Apache GraphAr C++ Library
The C++ Library for Apache GraphAr
edges_builder.h
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #pragma once
21 
22 #include <algorithm>
23 #include <any>
24 #include <memory>
25 #include <string>
26 #include <unordered_map>
27 #include <utility>
28 #include <vector>
29 
30 #include "graphar/arrow/chunk_writer.h"
31 #include "graphar/fwd.h"
32 #include "graphar/graph_info.h"
33 #include "graphar/types.h"
34 
35 namespace arrow {
36 class Array;
37 }
38 
39 namespace graphar::builder {
40 
45 class Edge {
46  public:
53  explicit Edge(IdType src_id, IdType dst_id)
54  : src_id_(src_id), dst_id_(dst_id), empty_(true) {}
55 
61  inline bool Empty() const noexcept { return empty_; }
62 
68  inline IdType GetSource() const noexcept { return src_id_; }
69 
75  inline IdType GetDestination() const noexcept { return dst_id_; }
76 
83  // TODO(@acezen): Enable the property to be a vector(list).
84  inline void AddProperty(const std::string& name, const std::any& val) {
85  empty_ = false;
86  properties_[name] = val;
87  }
88 
95  inline const std::any& GetProperty(const std::string& property) const {
96  return properties_.at(property);
97  }
98 
104  inline const std::unordered_map<std::string, std::any>& GetProperties()
105  const {
106  return properties_;
107  }
108 
115  inline bool ContainProperty(const std::string& property) const {
116  return (properties_.find(property) != properties_.end());
117  }
118 
119  private:
120  IdType src_id_, dst_id_;
121  bool empty_;
122  std::unordered_map<std::string, std::any> properties_;
123 };
124 
132 inline bool cmp_src(const Edge& a, const Edge& b) {
133  return a.GetSource() < b.GetSource();
134 }
135 
143 inline bool cmp_dst(const Edge& a, const Edge& b) {
144  return a.GetDestination() < b.GetDestination();
145 }
146 
153  public:
168  explicit EdgesBuilder(
169  const std::shared_ptr<EdgeInfo>& edge_info, const std::string& prefix,
170  AdjListType adj_list_type, IdType num_vertices,
171  std::shared_ptr<WriterOptions> writerOptions = nullptr,
172  const ValidateLevel& validate_level = ValidateLevel::no_validate)
173  : edge_info_(std::move(edge_info)),
174  prefix_(prefix),
175  adj_list_type_(adj_list_type),
176  num_vertices_(num_vertices),
177  writer_options_(writerOptions),
178  validate_level_(validate_level) {
179  if (validate_level_ == ValidateLevel::default_validate) {
180  throw std::runtime_error(
181  "default_validate is not allowed to be set as the global validate "
182  "level for EdgesBuilder");
183  }
184  edges_.clear();
185  num_edges_ = 0;
186  is_saved_ = false;
187  switch (adj_list_type) {
188  case AdjListType::unordered_by_source:
189  vertex_chunk_size_ = edge_info_->GetSrcChunkSize();
190  break;
191  case AdjListType::ordered_by_source:
192  vertex_chunk_size_ = edge_info_->GetSrcChunkSize();
193  break;
194  case AdjListType::unordered_by_dest:
195  vertex_chunk_size_ = edge_info_->GetDstChunkSize();
196  break;
197  case AdjListType::ordered_by_dest:
198  vertex_chunk_size_ = edge_info_->GetDstChunkSize();
199  break;
200  default:
201  vertex_chunk_size_ = edge_info_->GetSrcChunkSize();
202  }
203  }
204 
210  inline void SetValidateLevel(const ValidateLevel& validate_level) {
211  if (validate_level == ValidateLevel::default_validate) {
212  return;
213  }
214  validate_level_ = validate_level;
215  }
216 
223  inline void SetWriterOptions(std::shared_ptr<WriterOptions> writer_options) {
224  this->writer_options_ = writer_options;
225  }
232  inline std::shared_ptr<WriterOptions> GetWriterOptions() {
233  return this->writer_options_;
234  }
235 
241  inline ValidateLevel GetValidateLevel() const { return validate_level_; }
242 
246  inline void Clear() {
247  edges_.clear();
248  num_edges_ = 0;
249  is_saved_ = false;
250  }
251 
273  Status AddEdge(const Edge& e, const ValidateLevel& validate_level =
274  ValidateLevel::default_validate) {
275  // validate
276  GAR_RETURN_NOT_OK(validate(e, validate_level));
277  // add an edge
278  IdType vertex_chunk_index = getVertexChunkIndex(e);
279  edges_[vertex_chunk_index].push_back(e);
280  num_edges_++;
281  return Status::OK();
282  }
283 
289  IdType GetNum() const { return num_edges_; }
290 
296  Status Dump();
297 
310  static Result<std::shared_ptr<EdgesBuilder>> Make(
311  const std::shared_ptr<EdgeInfo>& edge_info, const std::string& prefix,
312  AdjListType adj_list_type, IdType num_vertices,
313  std::shared_ptr<WriterOptions> writer_options,
314  const ValidateLevel& validate_level = ValidateLevel::no_validate) {
315  if (!edge_info->HasAdjacentListType(adj_list_type)) {
316  return Status::KeyError(
317  "The adjacent list type ", AdjListTypeToString(adj_list_type),
318  " doesn't exist in edge ", edge_info->GetEdgeType(), ".");
319  }
320  return std::make_shared<EdgesBuilder>(edge_info, prefix, adj_list_type,
321  num_vertices, writer_options,
322  validate_level);
323  }
324 
325  static Result<std::shared_ptr<EdgesBuilder>> Make(
326  const std::shared_ptr<EdgeInfo>& edge_info, const std::string& prefix,
327  AdjListType adj_list_type, IdType num_vertices,
328  const ValidateLevel& validate_level = ValidateLevel::no_validate) {
329  if (!edge_info->HasAdjacentListType(adj_list_type)) {
330  return Status::KeyError(
331  "The adjacent list type ", AdjListTypeToString(adj_list_type),
332  " doesn't exist in edge ", edge_info->GetEdgeType(), ".");
333  }
334  return std::make_shared<EdgesBuilder>(edge_info, prefix, adj_list_type,
335  num_vertices, nullptr,
336  validate_level);
337  }
338 
351  static Result<std::shared_ptr<EdgesBuilder>> Make(
352  const std::shared_ptr<GraphInfo>& graph_info, const std::string& src_type,
353  const std::string& edge_type, const std::string& dst_type,
354  const AdjListType& adj_list_type, IdType num_vertices,
355  std::shared_ptr<WriterOptions> writer_options,
356  const ValidateLevel& validate_level = ValidateLevel::no_validate) {
357  auto edge_info = graph_info->GetEdgeInfo(src_type, edge_type, dst_type);
358  if (!edge_info) {
359  return Status::KeyError("The edge ", src_type, " ", edge_type, " ",
360  dst_type, " doesn't exist.");
361  }
362  return Make(edge_info, graph_info->GetPrefix(), adj_list_type, num_vertices,
363  writer_options, validate_level);
364  }
365 
366  static Result<std::shared_ptr<EdgesBuilder>> Make(
367  const std::shared_ptr<GraphInfo>& graph_info, const std::string& src_type,
368  const std::string& edge_type, const std::string& dst_type,
369  const AdjListType& adj_list_type, IdType num_vertices,
370  const ValidateLevel& validate_level = ValidateLevel::no_validate) {
371  auto edge_info = graph_info->GetEdgeInfo(src_type, edge_type, dst_type);
372  if (!edge_info) {
373  return Status::KeyError("The edge ", src_type, " ", edge_type, " ",
374  dst_type, " doesn't exist.");
375  }
376  return Make(edge_info, graph_info->GetPrefix(), adj_list_type, num_vertices,
377  nullptr, validate_level);
378  }
379 
380  private:
387  IdType getVertexChunkIndex(const Edge& e) {
388  switch (adj_list_type_) {
389  case AdjListType::unordered_by_source:
390  return e.GetSource() / vertex_chunk_size_;
391  case AdjListType::ordered_by_source:
392  return e.GetSource() / vertex_chunk_size_;
393  case AdjListType::unordered_by_dest:
394  return e.GetDestination() / vertex_chunk_size_;
395  case AdjListType::ordered_by_dest:
396  return e.GetDestination() / vertex_chunk_size_;
397  default:
398  return e.GetSource() / vertex_chunk_size_;
399  }
400  }
401 
409  Status validate(const Edge& e, ValidateLevel validate_level) const;
410 
420  Status appendToArray(const std::shared_ptr<DataType>& type,
421  const std::string& property_name,
422  std::shared_ptr<arrow::Array>& array, // NOLINT
423  const std::vector<Edge>& edges);
424 
435  template <Type type>
436  Status tryToAppend(const std::string& property_name,
437  std::shared_ptr<arrow::Array>& array, // NOLINT
438  const std::vector<Edge>& edges);
439 
449  Status tryToAppend(int src_or_dest,
450  std::shared_ptr<arrow::Array>& array, // NOLINT
451  const std::vector<Edge>& edges);
452 
459  Result<std::shared_ptr<arrow::Table>> convertToTable(
460  const std::vector<Edge>& edges);
461 
468  Result<std::shared_ptr<arrow::Table>> getOffsetTable(
469  IdType vertex_chunk_index, const std::vector<Edge>& edges);
470 
471  private:
472  std::shared_ptr<EdgeInfo> edge_info_;
473  std::string prefix_;
474  AdjListType adj_list_type_;
475  std::unordered_map<IdType, std::vector<Edge>> edges_;
476  IdType vertex_chunk_size_;
477  IdType num_vertices_;
478  IdType num_edges_;
479  bool is_saved_;
480  std::shared_ptr<WriterOptions> writer_options_;
481  ValidateLevel validate_level_;
482 };
483 
484 } // namespace graphar::builder
Edge contains information of certain edge.
Definition: graph_reader.h:109
Status outcome object (success or error)
Definition: status.h:123
static Status KeyError(Args &&... args)
Definition: status.h:172
static Status OK()
Definition: status.h:157
Edge is designed for constructing edges builder.
Definition: edges_builder.h:45
const std::any & GetProperty(const std::string &property) const
Get a property of the edge.
Definition: edges_builder.h:95
const std::unordered_map< std::string, std::any > & GetProperties() const
Get all properties of the edge.
IdType GetSource() const noexcept
Get source id of the edge.
Definition: edges_builder.h:68
bool Empty() const noexcept
Check if the edge is empty.
Definition: edges_builder.h:61
void AddProperty(const std::string &name, const std::any &val)
Add a property to the edge.
Definition: edges_builder.h:84
IdType GetDestination() const noexcept
Get destination id of the edge.
Definition: edges_builder.h:75
Edge(IdType src_id, IdType dst_id)
Initialize the edge with its source and destination.
Definition: edges_builder.h:53
bool ContainProperty(const std::string &property) const
Check if the edge contains a property.
EdgeBuilder is designed for building and writing a collection of edges.
IdType GetNum() const
Get the current number of edges in the collection.
EdgesBuilder(const std::shared_ptr< EdgeInfo > &edge_info, const std::string &prefix, AdjListType adj_list_type, IdType num_vertices, std::shared_ptr< WriterOptions > writerOptions=nullptr, const ValidateLevel &validate_level=ValidateLevel::no_validate)
Initialize the EdgesBuilder.
static Result< std::shared_ptr< EdgesBuilder > > Make(const std::shared_ptr< EdgeInfo > &edge_info, const std::string &prefix, AdjListType adj_list_type, IdType num_vertices, std::shared_ptr< WriterOptions > writer_options, const ValidateLevel &validate_level=ValidateLevel::no_validate)
Construct an EdgesBuilder from edge info.
std::shared_ptr< WriterOptions > GetWriterOptions()
Set the writerOptions.
void SetValidateLevel(const ValidateLevel &validate_level)
Set the validate level.
static Result< std::shared_ptr< EdgesBuilder > > Make(const std::shared_ptr< GraphInfo > &graph_info, const std::string &src_type, const std::string &edge_type, const std::string &dst_type, const AdjListType &adj_list_type, IdType num_vertices, std::shared_ptr< WriterOptions > writer_options, const ValidateLevel &validate_level=ValidateLevel::no_validate)
Construct an EdgesBuilder from graph info.
void Clear()
Clear the edges in this EdgesBuilder.
ValidateLevel GetValidateLevel() const
Get the validate level.
Status Dump()
Dump the collection into files.
void SetWriterOptions(std::shared_ptr< WriterOptions > writer_options)
Set the writerOptions.
Status AddEdge(const Edge &e, const ValidateLevel &validate_level=ValidateLevel::default_validate)
Add an edge to the collection.