Apache GraphAr C++ Library
The C++ Library for Apache GraphAr
vertices_builder.h
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #pragma once
21 
22 #include <any>
23 #include <cassert>
24 #include <cstddef>
25 #include <memory>
26 #include <string>
27 #include <unordered_map>
28 #include <unordered_set>
29 #include <utility>
30 #include <vector>
31 
32 #include "graphar/arrow/chunk_writer.h"
33 #include "graphar/fwd.h"
34 #include "graphar/graph_info.h"
35 #include "graphar/result.h"
36 #include "graphar/status.h"
37 #include "graphar/types.h"
38 #include "graphar/writer_util.h"
39 
40 // forward declaration
41 namespace arrow {
42 class Array;
43 class Table;
44 } // namespace arrow
45 
46 namespace graphar::builder {
47 
52 class Vertex {
53  public:
54  Vertex() : empty_(true) {}
55 
61  explicit Vertex(IdType id) : id_(id), empty_(false) {}
62 
68  inline IdType GetId() const noexcept { return id_; }
69 
75  inline void SetId(IdType id) { id_ = id; }
76 
82  inline bool Empty() const noexcept { return empty_; }
83 
90  // TODO(@acezen): Enable the property to be a vector(list).
91  inline void AddProperty(const std::string& name, const std::any& val) {
92  empty_ = false;
93  properties_[name] = val;
94  }
95 
96  inline void AddProperty(const Cardinality cardinality,
97  const std::string& name, const std::any& val) {
98  if (cardinality == Cardinality::SINGLE) {
99  cardinalities_[name] = Cardinality::SINGLE;
100  AddProperty(name, val);
101  return;
102  }
103  empty_ = false;
104  if (cardinalities_.find(name) != cardinalities_.end()) {
105  if (cardinalities_[name] != cardinality) {
106  throw std::runtime_error("Cardinality mismatch for property: " + name);
107  }
108  auto property_value_list =
109  std::any_cast<std::vector<std::any>>(properties_[name]);
110  property_value_list.push_back(val);
111  properties_[name] = property_value_list;
112  } else {
113  auto property_value_list = std::vector<std::any>();
114  property_value_list.push_back(val);
115  properties_[name] = property_value_list;
116  }
117  cardinalities_[name] = cardinality;
118  }
119 
126  inline const std::any& GetProperty(const std::string& property) const {
127  return properties_.at(property);
128  }
129 
135  inline const std::unordered_map<std::string, std::any>& GetProperties()
136  const {
137  return properties_;
138  }
139 
146  inline bool ContainProperty(const std::string& property) {
147  return (properties_.find(property) != properties_.end());
148  }
149 
150  inline bool IsMultiProperty(const std::string& property) const {
151  return (cardinalities_.find(property) != cardinalities_.end() &&
152  cardinalities_.at(property) != Cardinality::SINGLE);
153  }
154 
155  template <typename T>
156  Status ValidatePropertyType(const std::string& property,
157  const Cardinality cardinality) const {
158  if (cardinality == Cardinality::SINGLE && IsMultiProperty(property)) {
159  return Status::TypeError(
160  "Invalid data cardinality for property ", property,
161  ", defined as SINGLE but got ",
162  cardinalities_.at(property) == Cardinality::LIST ? "LIST" : "SET");
163  }
164  if (IsMultiProperty(property) &&
165  (cardinality == Cardinality::SET ||
166  cardinalities_.at(property) == Cardinality::SET)) {
167  GAR_RETURN_NOT_OK(ValidateMultiPropertySet<T>(property));
168  }
169  if (IsMultiProperty(property)) {
170  auto value_list =
171  std::any_cast<std::vector<std::any>>(properties_.at(property));
172  for (auto value : value_list) {
173  auto& value_type = value.type();
174  if (value_type != typeid(T)) {
175  return Status::TypeError("Invalid data type for property ", property,
176  ", defined as ", typeid(T).name(),
177  ", but got ", value_type.name());
178  }
179  }
180  } else {
181  auto& value_type = properties_.at(property).type();
182  if (value_type != typeid(T)) {
183  return Status::TypeError("Invalid data type for property ", property,
184  ", defined as ", typeid(T).name(),
185  ", but got ", value_type.name());
186  }
187  }
188  return Status::OK();
189  }
190 
191  template <typename T>
192  Status ValidateMultiProperty(const std::string& property) const {
193  if (IsMultiProperty(property) &&
194  cardinalities_.at(property) == Cardinality::SET) {
195  GAR_RETURN_NOT_OK(ValidateMultiPropertySet<T>(property));
196  }
197  return Status::OK();
198  }
199 
200  template <typename T>
201  Status ValidateMultiPropertySet(const std::string& property) const {
202  auto vec = std::any_cast<std::vector<std::any>>(properties_.at(property));
203  std::unordered_set<T> seen;
204  for (const auto& item : vec) {
205  if (!seen.insert(std::any_cast<T>(item)).second) {
206  return Status::KeyError(
207  "Duplicate values exist in set type multi-property key: ", property,
208  " value: ", std::any_cast<T>(item));
209  }
210  }
211  return Status::OK();
212  }
213 
214  private:
215  IdType id_;
216  bool empty_;
217  std::unordered_map<std::string, std::any> properties_;
218  std::unordered_map<std::string, Cardinality> cardinalities_;
219 };
220 
227  public:
241  explicit VerticesBuilder(
242  const std::shared_ptr<VertexInfo>& vertex_info, const std::string& prefix,
243  IdType start_vertex_index = 0,
244  std::shared_ptr<WriterOptions> writerOptions = nullptr,
245  const ValidateLevel& validate_level = ValidateLevel::no_validate)
246  : vertex_info_(std::move(vertex_info)),
247  prefix_(prefix),
248  start_vertex_index_(start_vertex_index),
249  writer_options_(writerOptions),
250  validate_level_(validate_level) {
251  if (validate_level_ == ValidateLevel::default_validate) {
252  throw std::runtime_error(
253  "default_validate is not allowed to be set as the global validate "
254  "level for VerticesBuilder");
255  }
256  vertices_.clear();
257  num_vertices_ = 0;
258  is_saved_ = false;
259  }
263  inline void Clear() {
264  vertices_.clear();
265  num_vertices_ = 0;
266  is_saved_ = false;
267  }
268 
275  inline void SetWriterOptions(std::shared_ptr<WriterOptions> writer_options) {
276  this->writer_options_ = writer_options;
277  }
278 
285  inline std::shared_ptr<WriterOptions> GetWriterOptions() {
286  return this->writer_options_;
287  }
288 
294  inline void SetValidateLevel(const ValidateLevel& validate_level) {
295  if (validate_level == ValidateLevel::default_validate) {
296  return;
297  }
298  validate_level_ = validate_level;
299  }
300 
306  inline ValidateLevel GetValidateLevel() const { return validate_level_; }
307 
331  Vertex& v, IdType index = -1, // NOLINT
332  ValidateLevel validate_level = ValidateLevel::default_validate) {
333  // validate
334  GAR_RETURN_NOT_OK(validate(v, index, validate_level));
335  // add a vertex
336  if (index == -1) {
337  v.SetId(vertices_.size());
338  vertices_.push_back(v);
339  } else {
340  v.SetId(index);
341  if (index >= static_cast<IdType>(vertices_.size()))
342  vertices_.resize(index + 1);
343  vertices_[index] = v;
344  }
345  num_vertices_++;
346  return Status::OK();
347  }
348 
354  IdType GetNum() const { return num_vertices_; }
355 
362  // construct the writer
363  VertexPropertyWriter writer(vertex_info_, prefix_, writer_options_,
364  validate_level_);
365  IdType start_chunk_index =
366  start_vertex_index_ / vertex_info_->GetChunkSize();
367  // convert to table
368  GAR_ASSIGN_OR_RAISE(auto input_table, convertToTable());
369  // write table
370  GAR_RETURN_NOT_OK(writer.WriteTable(input_table, start_chunk_index));
371  GAR_RETURN_NOT_OK(
372  writer.WriteVerticesNum(num_vertices_ + start_vertex_index_));
373  is_saved_ = true;
374  vertices_.clear();
375  return Status::OK();
376  }
377 
389  static Result<std::shared_ptr<VerticesBuilder>> Make(
390  const std::shared_ptr<VertexInfo>& vertex_info, const std::string& prefix,
391  std::shared_ptr<WriterOptions> writer_options,
392  IdType start_vertex_index = 0,
393  const ValidateLevel& validate_level = ValidateLevel::no_validate) {
394  return std::make_shared<VerticesBuilder>(vertex_info, prefix,
395  start_vertex_index, writer_options,
396  validate_level);
397  }
398 
399  static Result<std::shared_ptr<VerticesBuilder>> Make(
400  const std::shared_ptr<VertexInfo>& vertex_info, const std::string& prefix,
401  IdType start_vertex_index = 0,
402  const ValidateLevel& validate_level = ValidateLevel::no_validate) {
403  return std::make_shared<VerticesBuilder>(
404  vertex_info, prefix, start_vertex_index, nullptr, validate_level);
405  }
406 
418  static Result<std::shared_ptr<VerticesBuilder>> Make(
419  const std::shared_ptr<GraphInfo>& graph_info, const std::string& type,
420  std::shared_ptr<WriterOptions> writer_options,
421  IdType start_vertex_index = 0,
422  const ValidateLevel& validate_level = ValidateLevel::no_validate) {
423  const auto vertex_info = graph_info->GetVertexInfo(type);
424  if (!vertex_info) {
425  return Status::KeyError("The vertex type ", type,
426  " doesn't exist in graph ", graph_info->GetName(),
427  ".");
428  }
429  return Make(vertex_info, graph_info->GetPrefix(), writer_options,
430  start_vertex_index, validate_level);
431  }
432 
433  static Result<std::shared_ptr<VerticesBuilder>> Make(
434  const std::shared_ptr<GraphInfo>& graph_info, const std::string& type,
435  IdType start_vertex_index = 0,
436  const ValidateLevel& validate_level = ValidateLevel::no_validate) {
437  const auto vertex_info = graph_info->GetVertexInfo(type);
438  if (!vertex_info) {
439  return Status::KeyError("The vertex type ", type,
440  " doesn't exist in graph ", graph_info->GetName(),
441  ".");
442  }
443  return Make(vertex_info, graph_info->GetPrefix(), nullptr,
444  start_vertex_index, validate_level);
445  }
446 
447  private:
456  Status validate(const Vertex& v, IdType index,
457  ValidateLevel validate_level) const;
458 
467  Status appendToArray(const std::shared_ptr<DataType>& type,
468  const std::string& property_name,
469  std::shared_ptr<arrow::Array>& array); // NOLINT
470 
479  template <Type type>
480  Status tryToAppend(const std::string& property_name,
481  std::shared_ptr<arrow::Array>& array); // NOLINT
482 
486  Result<std::shared_ptr<arrow::Table>> convertToTable();
487 
488  private:
489  std::shared_ptr<VertexInfo> vertex_info_;
490  std::string prefix_;
491  std::vector<Vertex> vertices_;
492  IdType start_vertex_index_;
493  IdType num_vertices_;
494  bool is_saved_;
495  std::shared_ptr<WriterOptions> writer_options_;
496  ValidateLevel validate_level_;
497 };
498 
499 } // namespace graphar::builder
Status outcome object (success or error)
Definition: status.h:123
static Status TypeError(Args &&... args)
Definition: status.h:178
static Status KeyError(Args &&... args)
Definition: status.h:172
static Status OK()
Definition: status.h:157
The writer for vertex property group chunks.
Definition: chunk_writer.h:57
Status WriteVerticesNum(const IdType &count, ValidateLevel validate_level=ValidateLevel::default_validate) const
Write the number of vertices into the file.
Status WriteTable(const std::shared_ptr< arrow::Table > &input_table, const std::shared_ptr< PropertyGroup > &property_group, IdType start_chunk_index, ValidateLevel validate_level=ValidateLevel::default_validate) const
Write a single property group for multiple vertex chunks to corresponding files.
Vertex is designed for constructing vertices builder.
void SetId(IdType id)
Set id of the vertex.
const std::any & GetProperty(const std::string &property) const
Get a property of the vertex.
IdType GetId() const noexcept
Get id of the vertex.
void AddProperty(const std::string &name, const std::any &val)
Add a property to the vertex.
bool Empty() const noexcept
Check if the vertex is empty.
bool ContainProperty(const std::string &property)
Check if the vertex contains a property.
const std::unordered_map< std::string, std::any > & GetProperties() const
Get all properties of the vertex.
Vertex(IdType id)
Initialize the vertex with a given id.
VertexBuilder is designed for building and writing a collection of vertices.
void SetValidateLevel(const ValidateLevel &validate_level)
Set the validate level.
void Clear()
Clear the vertices in this VerciesBuilder.
Status AddVertex(Vertex &v, IdType index=-1, ValidateLevel validate_level=ValidateLevel::default_validate)
Add a vertex with the given index.
void SetWriterOptions(std::shared_ptr< WriterOptions > writer_options)
Set the writerOptions.
static Result< std::shared_ptr< VerticesBuilder > > Make(const std::shared_ptr< VertexInfo > &vertex_info, const std::string &prefix, std::shared_ptr< WriterOptions > writer_options, IdType start_vertex_index=0, const ValidateLevel &validate_level=ValidateLevel::no_validate)
Construct a VertexBuilder from vertex info.
IdType GetNum() const
Get the current number of vertices in the collection.
ValidateLevel GetValidateLevel() const
Get the validate level.
std::shared_ptr< WriterOptions > GetWriterOptions()
Set the writerOptions.
VerticesBuilder(const std::shared_ptr< VertexInfo > &vertex_info, const std::string &prefix, IdType start_vertex_index=0, std::shared_ptr< WriterOptions > writerOptions=nullptr, const ValidateLevel &validate_level=ValidateLevel::no_validate)
Initialize the VerticesBuilder.
Status Dump()
Dump the collection into files.
static Result< std::shared_ptr< VerticesBuilder > > Make(const std::shared_ptr< GraphInfo > &graph_info, const std::string &type, std::shared_ptr< WriterOptions > writer_options, IdType start_vertex_index=0, const ValidateLevel &validate_level=ValidateLevel::no_validate)
Construct a VertexBuilder from graph info and vertex type.