Apache GraphAr C++ Library
The C++ Library for Apache GraphAr
graph_info.cc
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #include <unordered_set>
21 #include <utility>
22 
23 #include "graphar/status.h"
24 #include "mini-yaml/yaml/Yaml.hpp"
25 
26 #include "graphar/filesystem.h"
27 #include "graphar/graph_info.h"
28 #include "graphar/result.h"
29 #include "graphar/types.h"
30 #include "graphar/version_parser.h"
31 #include "graphar/yaml.h"
32 
33 namespace graphar {
34 
35 #define CHECK_HAS_ADJ_LIST_TYPE(adj_list_type) \
36  do { \
37  if (!HasAdjacentListType(adj_list_type)) { \
38  return Status::KeyError( \
39  "Adjacency list type: ", AdjListTypeToString(adj_list_type), \
40  " is not found in edge info."); \
41  } \
42  } while (false)
43 
44 namespace {
45 
46 std::string ConcatEdgeTriple(const std::string& src_type,
47  const std::string& edge_type,
48  const std::string& dst_type) {
49  return src_type + REGULAR_SEPARATOR + edge_type + REGULAR_SEPARATOR +
50  dst_type;
51 }
52 
53 template <int NotFoundValue = -1>
54 int LookupKeyIndex(const std::unordered_map<std::string, int>& key_to_index,
55  const std::string& type) {
56  auto it = key_to_index.find(type);
57  if (it == key_to_index.end()) {
58  return NotFoundValue;
59  }
60  return it->second;
61 }
62 
63 template <typename T>
64 std::vector<T> AddVectorElement(const std::vector<T>& values, T new_element) {
65  std::vector<T> out;
66  out.reserve(values.size() + 1);
67  for (size_t i = 0; i < values.size(); ++i) {
68  out.push_back(values[i]);
69  }
70  out.emplace_back(std::move(new_element));
71  return out;
72 }
73 
74 std::string BuildPath(const std::vector<std::string>& paths) {
75  std::string path;
76  for (const auto& p : paths) {
77  if (p.back() == '/') {
78  path += p;
79  } else {
80  path += p + "/";
81  }
82  }
83  return path;
84 }
85 } // namespace
86 
87 bool operator==(const Property& lhs, const Property& rhs) {
88  return (lhs.name == rhs.name) && (lhs.type == rhs.type) &&
89  (lhs.is_primary == rhs.is_primary) &&
90  (lhs.is_nullable == rhs.is_nullable) &&
91  (lhs.cardinality == rhs.cardinality);
92 }
93 
94 PropertyGroup::PropertyGroup(const std::vector<Property>& properties,
95  FileType file_type, const std::string& prefix)
96  : properties_(properties), file_type_(file_type), prefix_(prefix) {
97  if (prefix_.empty() && !properties_.empty()) {
98  for (const auto& p : properties_) {
99  prefix_ += p.name + REGULAR_SEPARATOR;
100  }
101  prefix_.back() = '/';
102  }
103 }
104 
105 const std::vector<Property>& PropertyGroup::GetProperties() const {
106  return properties_;
107 }
108 
109 bool PropertyGroup::HasProperty(const std::string& property_name) const {
110  for (const auto& p : properties_) {
111  if (p.name == property_name) {
112  return true;
113  }
114  }
115  return false;
116 }
117 
119  if (prefix_.empty() ||
120  (file_type_ != FileType::CSV && file_type_ != FileType::PARQUET &&
121  file_type_ != FileType::ORC)) {
122  return false;
123  }
124  if (properties_.empty()) {
125  return false;
126  }
127  std::unordered_set<std::string> check_property_unique_set;
128  for (const auto& p : properties_) {
129  if (p.name.empty() || p.type == nullptr) {
130  return false;
131  }
132  if (check_property_unique_set.find(p.name) !=
133  check_property_unique_set.end()) {
134  return false;
135  } else {
136  check_property_unique_set.insert(p.name);
137  }
138  // TODO(@acezen): support list type in csv file
139  if (p.type->id() == Type::LIST && file_type_ == FileType::CSV) {
140  // list type is not supported in csv file
141  return false;
142  }
143  // TODO(@yangxk): support cardinality in csv file
144  if (p.cardinality != Cardinality::SINGLE && file_type_ == FileType::CSV) {
145  // list cardinality is not supported in csv file
146  return false;
147  }
148  }
149  return true;
150 }
151 
152 std::shared_ptr<PropertyGroup> CreatePropertyGroup(
153  const std::vector<Property>& properties, FileType file_type,
154  const std::string& prefix) {
155  if (properties.empty()) {
156  // empty property group is not allowed
157  return nullptr;
158  }
159  return std::make_shared<PropertyGroup>(properties, file_type, prefix);
160 }
161 
162 bool operator==(const PropertyGroup& lhs, const PropertyGroup& rhs) {
163  return (lhs.GetPrefix() == rhs.GetPrefix()) &&
164  (lhs.GetFileType() == rhs.GetFileType()) &&
165  (lhs.GetProperties() == rhs.GetProperties());
166 }
167 
168 AdjacentList::AdjacentList(AdjListType type, FileType file_type,
169  const std::string& prefix)
170  : type_(type), file_type_(file_type), prefix_(prefix) {
171  if (prefix_.empty()) {
172  prefix_ = std::string(AdjListTypeToString(type_)) + "/";
173  }
174 }
175 
177  if (type_ != AdjListType::unordered_by_source &&
178  type_ != AdjListType::ordered_by_source &&
179  type_ != AdjListType::unordered_by_dest &&
180  type_ != AdjListType::ordered_by_dest) {
181  return false;
182  }
183  if (prefix_.empty() ||
184  (file_type_ != FileType::CSV && file_type_ != FileType::PARQUET &&
185  file_type_ != FileType::ORC)) {
186  return false;
187  }
188  return true;
189 }
190 
191 std::shared_ptr<AdjacentList> CreateAdjacentList(AdjListType type,
192  FileType file_type,
193  const std::string& prefix) {
194  return std::make_shared<AdjacentList>(type, file_type, prefix);
195 }
196 
198  public:
199  Impl(const std::string& type, IdType chunk_size, const std::string& prefix,
200  const PropertyGroupVector& property_groups,
201  const std::vector<std::string>& labels,
202  std::shared_ptr<const InfoVersion> version)
203  : type_(type),
204  chunk_size_(chunk_size),
205  property_groups_(std::move(property_groups)),
206  labels_(labels),
207  prefix_(prefix),
208  version_(std::move(version)) {
209  if (prefix_.empty()) {
210  prefix_ = type_ + "/"; // default prefix
211  }
212  for (size_t i = 0; i < property_groups_.size(); i++) {
213  const auto& pg = property_groups_[i];
214  if (!pg) {
215  continue;
216  }
217  for (const auto& p : pg->GetProperties()) {
218  property_name_to_index_.emplace(p.name, i);
219  property_name_to_primary_.emplace(p.name, p.is_primary);
220  property_name_to_nullable_.emplace(p.name, p.is_nullable);
221  property_name_to_type_.emplace(p.name, p.type);
222  property_name_to_cardinality_.emplace(p.name, p.cardinality);
223  }
224  }
225  }
226 
227  bool is_validated() const noexcept {
228  if (type_.empty() || chunk_size_ <= 0 || prefix_.empty()) {
229  return false;
230  }
231  std::unordered_set<std::string> check_property_unique_set;
232  for (const auto& pg : property_groups_) {
233  // check if property group is validated
234  if (!pg || !pg->IsValidated()) {
235  return false;
236  }
237  // check if property name is unique in all property groups
238  for (const auto& p : pg->GetProperties()) {
239  if (check_property_unique_set.find(p.name) !=
240  check_property_unique_set.end()) {
241  return false;
242  } else {
243  check_property_unique_set.insert(p.name);
244  }
245  }
246  }
247 
248  return true;
249  }
250 
251  std::string type_;
252  IdType chunk_size_;
253  PropertyGroupVector property_groups_;
254  std::vector<std::string> labels_;
255  std::string prefix_;
256  std::shared_ptr<const InfoVersion> version_;
257  std::unordered_map<std::string, int> property_name_to_index_;
258  std::unordered_map<std::string, bool> property_name_to_primary_;
259  std::unordered_map<std::string, bool> property_name_to_nullable_;
260  std::unordered_map<std::string, std::shared_ptr<DataType>>
261  property_name_to_type_;
262  std::unordered_map<std::string, Cardinality> property_name_to_cardinality_;
263 };
264 
265 VertexInfo::VertexInfo(const std::string& type, IdType chunk_size,
266  const PropertyGroupVector& property_groups,
267  const std::vector<std::string>& labels,
268  const std::string& prefix,
269  std::shared_ptr<const InfoVersion> version)
270  : impl_(new Impl(type, chunk_size, prefix, property_groups, labels,
271  version)) {}
272 
273 VertexInfo::~VertexInfo() = default;
274 
275 const std::string& VertexInfo::GetType() const { return impl_->type_; }
276 
277 IdType VertexInfo::GetChunkSize() const { return impl_->chunk_size_; }
278 
279 const std::string& VertexInfo::GetPrefix() const { return impl_->prefix_; }
280 
281 const std::vector<std::string>& VertexInfo::GetLabels() const {
282  return impl_->labels_;
283 }
284 
285 const std::shared_ptr<const InfoVersion>& VertexInfo::version() const {
286  return impl_->version_;
287 }
288 
289 Result<std::string> VertexInfo::GetFilePath(
290  std::shared_ptr<PropertyGroup> property_group, IdType chunk_index) const {
291  if (property_group == nullptr) {
292  return Status::Invalid("property group is nullptr");
293  }
294  return BuildPath({impl_->prefix_, property_group->GetPrefix()}) + "chunk" +
295  std::to_string(chunk_index);
296 }
297 
298 Result<std::string> VertexInfo::GetPathPrefix(
299  std::shared_ptr<PropertyGroup> property_group) const {
300  if (property_group == nullptr) {
301  return Status::Invalid("property group is nullptr");
302  }
303  return BuildPath({impl_->prefix_, property_group->GetPrefix()});
304 }
305 
306 Result<std::string> VertexInfo::GetVerticesNumFilePath() const {
307  return BuildPath({impl_->prefix_}) + "vertex_count";
308 }
309 
311  return static_cast<int>(impl_->property_groups_.size());
312 }
313 
314 std::shared_ptr<PropertyGroup> VertexInfo::GetPropertyGroup(
315  const std::string& property_name) const {
316  int i = LookupKeyIndex(impl_->property_name_to_index_, property_name);
317  return i == -1 ? nullptr : impl_->property_groups_[i];
318 }
319 
320 std::shared_ptr<PropertyGroup> VertexInfo::GetPropertyGroupByIndex(
321  int index) const {
322  if (index < 0 || index >= static_cast<int>(impl_->property_groups_.size())) {
323  return nullptr;
324  }
325  return impl_->property_groups_[index];
326 }
327 
328 const PropertyGroupVector& VertexInfo::GetPropertyGroups() const {
329  return impl_->property_groups_;
330 }
331 
332 bool VertexInfo::IsPrimaryKey(const std::string& property_name) const {
333  auto it = impl_->property_name_to_primary_.find(property_name);
334  if (it == impl_->property_name_to_primary_.end()) {
335  return false;
336  }
337  return it->second;
338 }
339 
340 bool VertexInfo::IsNullableKey(const std::string& property_name) const {
341  auto it = impl_->property_name_to_nullable_.find(property_name);
342  if (it == impl_->property_name_to_nullable_.end()) {
343  return false;
344  }
345  return it->second;
346 }
347 
348 bool VertexInfo::HasProperty(const std::string& property_name) const {
349  return impl_->property_name_to_index_.find(property_name) !=
350  impl_->property_name_to_index_.end();
351 }
352 
354  const std::shared_ptr<PropertyGroup>& property_group) const {
355  if (property_group == nullptr) {
356  return false;
357  }
358  for (const auto& pg : impl_->property_groups_) {
359  if (*pg == *property_group) {
360  return true;
361  }
362  }
363  return false;
364 }
365 
366 Result<std::shared_ptr<DataType>> VertexInfo::GetPropertyType(
367  const std::string& property_name) const {
368  auto it = impl_->property_name_to_type_.find(property_name);
369  if (it == impl_->property_name_to_type_.end()) {
370  return Status::Invalid("property name not found: ", property_name);
371  }
372  return it->second;
373 }
374 
375 Result<Cardinality> VertexInfo::GetPropertyCardinality(
376  const std::string& property_name) const {
377  auto it = impl_->property_name_to_cardinality_.find(property_name);
378  if (it == impl_->property_name_to_cardinality_.end()) {
379  return Status::Invalid("property name not found: ", property_name);
380  }
381  return it->second;
382 }
383 
384 Result<std::shared_ptr<VertexInfo>> VertexInfo::AddPropertyGroup(
385  std::shared_ptr<PropertyGroup> property_group) const {
386  if (property_group == nullptr) {
387  return Status::Invalid("property group is nullptr");
388  }
389  for (const auto& property : property_group->GetProperties()) {
390  if (HasProperty(property.name)) {
391  return Status::Invalid("property in the property group already exists: ",
392  property.name);
393  }
394  }
395  return std::make_shared<VertexInfo>(
396  impl_->type_, impl_->chunk_size_,
397  AddVectorElement(impl_->property_groups_, property_group), impl_->labels_,
398  impl_->prefix_, impl_->version_);
399 }
400 
401 bool VertexInfo::IsValidated() const { return impl_->is_validated(); }
402 
403 std::shared_ptr<VertexInfo> CreateVertexInfo(
404  const std::string& type, IdType chunk_size,
405  const PropertyGroupVector& property_groups,
406  const std::vector<std::string>& labels, const std::string& prefix,
407  std::shared_ptr<const InfoVersion> version) {
408  if (type.empty() || chunk_size <= 0) {
409  return nullptr;
410  }
411  return std::make_shared<VertexInfo>(type, chunk_size, property_groups, labels,
412  prefix, version);
413 }
414 
415 Result<std::shared_ptr<VertexInfo>> VertexInfo::Load(
416  std::shared_ptr<Yaml> yaml) {
417  if (yaml == nullptr) {
418  return Status::Invalid("yaml shared pointer is nullptr");
419  }
420  std::string type = yaml->operator[]("type").As<std::string>();
421  IdType chunk_size =
422  static_cast<IdType>(yaml->operator[]("chunk_size").As<int64_t>());
423  std::string prefix;
424  if (!yaml->operator[]("prefix").IsNone()) {
425  prefix = yaml->operator[]("prefix").As<std::string>();
426  }
427  std::vector<std::string> labels;
428  const auto& labels_node = yaml->operator[]("labels");
429  if (labels_node.IsSequence()) {
430  for (auto it = labels_node.Begin(); it != labels_node.End(); it++) {
431  labels.push_back((*it).second.As<std::string>());
432  }
433  }
434  std::shared_ptr<const InfoVersion> version = nullptr;
435  if (!yaml->operator[]("version").IsNone()) {
436  GAR_ASSIGN_OR_RAISE(
437  version,
438  InfoVersion::Parse(yaml->operator[]("version").As<std::string>()));
439  }
440  PropertyGroupVector property_groups;
441  auto property_groups_node = yaml->operator[]("property_groups");
442  if (!property_groups_node.IsNone()) { // property_groups exist
443  for (auto it = property_groups_node.Begin();
444  it != property_groups_node.End(); it++) {
445  std::string pg_prefix;
446  auto& node = (*it).second;
447  if (!node["prefix"].IsNone()) {
448  pg_prefix = node["prefix"].As<std::string>();
449  }
450  auto file_type = StringToFileType(node["file_type"].As<std::string>());
451  std::vector<Property> property_vec;
452  auto& properties = node["properties"];
453  for (auto iit = properties.Begin(); iit != properties.End(); iit++) {
454  auto& p_node = (*iit).second;
455  auto property_name = p_node["name"].As<std::string>();
456  auto property_type =
457  DataType::TypeNameToDataType(p_node["data_type"].As<std::string>());
458  bool is_primary = p_node["is_primary"].As<bool>();
459  bool is_nullable =
460  p_node["is_nullable"].IsNone() || p_node["is_nullable"].As<bool>();
461  Cardinality cardinality = Cardinality::SINGLE;
462  if (!p_node["cardinality"].IsNone()) {
463  cardinality =
464  StringToCardinality(p_node["cardinality"].As<std::string>());
465  }
466  property_vec.emplace_back(property_name, property_type, is_primary,
467  is_nullable, cardinality);
468  }
469  property_groups.push_back(
470  std::make_shared<PropertyGroup>(property_vec, file_type, pg_prefix));
471  }
472  }
473  return std::make_shared<VertexInfo>(type, chunk_size, property_groups, labels,
474  prefix, version);
475 }
476 
477 Result<std::shared_ptr<VertexInfo>> VertexInfo::Load(const std::string& input) {
478  GAR_ASSIGN_OR_RAISE(auto yaml, Yaml::Load(input));
479  return VertexInfo::Load(yaml);
480 }
481 
482 Result<std::string> VertexInfo::Dump() const noexcept {
483  if (!IsValidated()) {
484  return Status::Invalid("The vertex info is not validated");
485  }
486  std::string dump_string;
487  ::Yaml::Node node;
488  try {
489  node["type"] = impl_->type_;
490  node["chunk_size"] = std::to_string(impl_->chunk_size_);
491  node["prefix"] = impl_->prefix_;
492  if (impl_->labels_.size() > 0) {
493  node["labels"];
494  for (const auto& label : impl_->labels_) {
495  node["labels"].PushBack();
496  node["labels"][node["labels"].Size() - 1] = label;
497  }
498  }
499  for (const auto& pg : impl_->property_groups_) {
500  ::Yaml::Node pg_node;
501  if (!pg->GetPrefix().empty()) {
502  pg_node["prefix"] = pg->GetPrefix();
503  }
504  pg_node["file_type"] = FileTypeToString(pg->GetFileType());
505  for (const auto& p : pg->GetProperties()) {
506  ::Yaml::Node p_node;
507  p_node["name"] = p.name;
508  p_node["data_type"] = p.type->ToTypeName();
509  p_node["is_primary"] = p.is_primary ? "true" : "false";
510  p_node["is_nullable"] = p.is_nullable ? "true" : "false";
511  if (p.cardinality != Cardinality::SINGLE) {
512  p_node["cardinality"] = CardinalityToString(p.cardinality);
513  }
514  pg_node["properties"].PushBack();
515  pg_node["properties"][pg_node["properties"].Size() - 1] = p_node;
516  }
517  node["property_groups"].PushBack();
518  node["property_groups"][node["property_groups"].Size() - 1] = pg_node;
519  }
520  if (impl_->version_ != nullptr) {
521  node["version"] = impl_->version_->ToString();
522  }
523  ::Yaml::Serialize(node, dump_string);
524  } catch (const std::exception& e) {
525  return Status::Invalid("Failed to dump vertex info: ", e.what());
526  }
527  return dump_string;
528 }
529 
530 Status VertexInfo::Save(const std::string& path) const {
531  std::string no_url_path;
532  GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(path, &no_url_path));
533  GAR_ASSIGN_OR_RAISE(auto yaml_content, this->Dump());
534  return fs->WriteValueToFile(yaml_content, no_url_path);
535 }
536 
538  public:
539  Impl(const std::string& src_type, const std::string& edge_type,
540  const std::string& dst_type, IdType chunk_size, IdType src_chunk_size,
541  IdType dst_chunk_size, bool directed, const std::string& prefix,
542  const AdjacentListVector& adjacent_lists,
543  const PropertyGroupVector& property_groups,
544  std::shared_ptr<const InfoVersion> version)
545  : src_type_(src_type),
546  edge_type_(edge_type),
547  dst_type_(dst_type),
548  chunk_size_(chunk_size),
549  src_chunk_size_(src_chunk_size),
550  dst_chunk_size_(dst_chunk_size),
551  directed_(directed),
552  prefix_(prefix),
553  adjacent_lists_(std::move(adjacent_lists)),
554  property_groups_(std::move(property_groups)),
555  version_(std::move(version)) {
556  if (prefix_.empty()) {
557  prefix_ = src_type_ + REGULAR_SEPARATOR + edge_type_ + REGULAR_SEPARATOR +
558  dst_type_ + "/"; // default prefix
559  }
560  for (size_t i = 0; i < adjacent_lists_.size(); i++) {
561  if (!adjacent_lists_[i]) {
562  continue;
563  }
564 
565  auto adj_list_type = adjacent_lists_[i]->GetType();
566  adjacent_list_type_to_index_[adj_list_type] = i;
567  }
568  for (size_t i = 0; i < property_groups_.size(); i++) {
569  const auto& pg = property_groups_[i];
570  if (!pg) {
571  continue;
572  }
573  for (const auto& p : pg->GetProperties()) {
574  property_name_to_index_.emplace(p.name, i);
575  property_name_to_primary_.emplace(p.name, p.is_primary);
576  property_name_to_nullable_.emplace(p.name, p.is_nullable);
577  property_name_to_type_.emplace(p.name, p.type);
578  }
579  }
580  }
581 
582  bool is_validated() const noexcept {
583  if (src_type_.empty() || edge_type_.empty() || dst_type_.empty() ||
584  chunk_size_ <= 0 || src_chunk_size_ <= 0 || dst_chunk_size_ <= 0 ||
585  prefix_.empty() || adjacent_lists_.empty()) {
586  return false;
587  }
588 
589  for (const auto& al : adjacent_lists_) {
590  if (!al || !al->IsValidated()) {
591  return false;
592  }
593  }
594 
595  std::unordered_set<std::string> check_property_unique_set;
596  for (const auto& pg : property_groups_) {
597  // check if property group is validated
598  if (!pg || !pg->IsValidated()) {
599  return false;
600  }
601  // check if property name is unique in all property groups
602  for (const auto& p : pg->GetProperties()) {
603  if (p.cardinality != Cardinality::SINGLE) {
604  // edge property only supports single cardinality
605  std::cout
606  << "Edge property only supports single cardinality, but got: "
607  << CardinalityToString(p.cardinality) << std::endl;
608  return false;
609  }
610  if (check_property_unique_set.find(p.name) !=
611  check_property_unique_set.end()) {
612  return false;
613  } else {
614  check_property_unique_set.insert(p.name);
615  }
616  }
617  }
618  if (adjacent_lists_.size() != adjacent_list_type_to_index_.size()) {
619  return false;
620  }
621  return true;
622  }
623 
624  std::string src_type_;
625  std::string edge_type_;
626  std::string dst_type_;
627  IdType chunk_size_;
628  IdType src_chunk_size_;
629  IdType dst_chunk_size_;
630  bool directed_;
631  std::string prefix_;
632  AdjacentListVector adjacent_lists_;
633  PropertyGroupVector property_groups_;
634  std::unordered_map<AdjListType, int> adjacent_list_type_to_index_;
635  std::unordered_map<std::string, int> property_name_to_index_;
636  std::unordered_map<std::string, bool> property_name_to_primary_;
637  std::unordered_map<std::string, bool> property_name_to_nullable_;
638  std::unordered_map<std::string, std::shared_ptr<DataType>>
639  property_name_to_type_;
640  std::shared_ptr<const InfoVersion> version_;
641 };
642 
643 EdgeInfo::EdgeInfo(const std::string& src_type, const std::string& edge_type,
644  const std::string& dst_type, IdType chunk_size,
645  IdType src_chunk_size, IdType dst_chunk_size, bool directed,
646  const AdjacentListVector& adjacent_lists,
647  const PropertyGroupVector& property_groups,
648  const std::string& prefix,
649  std::shared_ptr<const InfoVersion> version)
650  : impl_(new Impl(src_type, edge_type, dst_type, chunk_size, src_chunk_size,
651  dst_chunk_size, directed, prefix, adjacent_lists,
652  property_groups, version)) {}
653 
654 EdgeInfo::~EdgeInfo() = default;
655 
656 const std::string& EdgeInfo::GetSrcType() const { return impl_->src_type_; }
657 
658 const std::string& EdgeInfo::GetEdgeType() const { return impl_->edge_type_; }
659 
660 const std::string& EdgeInfo::GetDstType() const { return impl_->dst_type_; }
661 
662 IdType EdgeInfo::GetChunkSize() const { return impl_->chunk_size_; }
663 
664 IdType EdgeInfo::GetSrcChunkSize() const { return impl_->src_chunk_size_; }
665 
666 IdType EdgeInfo::GetDstChunkSize() const { return impl_->dst_chunk_size_; }
667 
668 const std::string& EdgeInfo::GetPrefix() const { return impl_->prefix_; }
669 
670 bool EdgeInfo::IsDirected() const { return impl_->directed_; }
671 
672 const std::shared_ptr<const InfoVersion>& EdgeInfo::version() const {
673  return impl_->version_;
674 }
675 
676 bool EdgeInfo::HasAdjacentListType(AdjListType adj_list_type) const {
677  return impl_->adjacent_list_type_to_index_.find(adj_list_type) !=
678  impl_->adjacent_list_type_to_index_.end();
679 }
680 
681 bool EdgeInfo::HasProperty(const std::string& property_name) const {
682  return impl_->property_name_to_index_.find(property_name) !=
683  impl_->property_name_to_index_.end();
684 }
685 
687  const std::shared_ptr<PropertyGroup>& property_group) const {
688  if (property_group == nullptr) {
689  return false;
690  }
691  for (const auto& pg : impl_->property_groups_) {
692  if (*pg == *property_group) {
693  return true;
694  }
695  }
696  return false;
697 }
698 
699 std::shared_ptr<AdjacentList> EdgeInfo::GetAdjacentList(
700  AdjListType adj_list_type) const {
701  auto it = impl_->adjacent_list_type_to_index_.find(adj_list_type);
702  if (it == impl_->adjacent_list_type_to_index_.end()) {
703  return nullptr;
704  }
705  return impl_->adjacent_lists_[it->second];
706 }
707 
709  return static_cast<int>(impl_->property_groups_.size());
710 }
711 
712 const PropertyGroupVector& EdgeInfo::GetPropertyGroups() const {
713  return impl_->property_groups_;
714 }
715 
716 std::shared_ptr<PropertyGroup> EdgeInfo::GetPropertyGroup(
717  const std::string& property_name) const {
718  int i = LookupKeyIndex(impl_->property_name_to_index_, property_name);
719  return i == -1 ? nullptr : impl_->property_groups_[i];
720 }
721 
722 std::shared_ptr<PropertyGroup> EdgeInfo::GetPropertyGroupByIndex(
723  int index) const {
724  if (index < 0 || index >= static_cast<int>(impl_->property_groups_.size())) {
725  return nullptr;
726  }
727  return impl_->property_groups_[index];
728 }
729 
731  AdjListType adj_list_type) const {
732  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
733  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
734  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
735  "vertex_count";
736 }
737 
738 Result<std::string> EdgeInfo::GetEdgesNumFilePath(
739  IdType vertex_chunk_index, AdjListType adj_list_type) const {
740  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
741  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
742  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
743  "edge_count" + std::to_string(vertex_chunk_index);
744 }
745 
746 Result<std::string> EdgeInfo::GetAdjListFilePath(
747  IdType vertex_chunk_index, IdType edge_chunk_index,
748  AdjListType adj_list_type) const {
749  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
750  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
751  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
752  "adj_list/part" + std::to_string(vertex_chunk_index) + "/chunk" +
753  std::to_string(edge_chunk_index);
754 }
755 
756 Result<std::string> EdgeInfo::GetAdjListPathPrefix(
757  AdjListType adj_list_type) const {
758  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
759  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
760  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
761  "adj_list/";
762 }
763 
765  IdType vertex_chunk_index, AdjListType adj_list_type) const {
766  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
767  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
768  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
769  "offset/chunk" + std::to_string(vertex_chunk_index);
770 }
771 
772 Result<std::string> EdgeInfo::GetOffsetPathPrefix(
773  AdjListType adj_list_type) const {
774  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
775  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
776  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
777  "offset/";
778 }
779 
780 Result<std::string> EdgeInfo::GetPropertyFilePath(
781  const std::shared_ptr<PropertyGroup>& property_group,
782  AdjListType adj_list_type, IdType vertex_chunk_index,
783  IdType edge_chunk_index) const {
784  if (property_group == nullptr) {
785  return Status::Invalid("property group is nullptr");
786  }
787  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
788  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
789  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix(),
790  property_group->GetPrefix()}) +
791  "part" + std::to_string(vertex_chunk_index) + "/chunk" +
792  std::to_string(edge_chunk_index);
793 }
794 
796  const std::shared_ptr<PropertyGroup>& property_group,
797  AdjListType adj_list_type) const {
798  if (property_group == nullptr) {
799  return Status::Invalid("property group is nullptr");
800  }
801  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
802  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
803  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix(),
804  property_group->GetPrefix()});
805 }
806 
807 Result<std::shared_ptr<DataType>> EdgeInfo::GetPropertyType(
808  const std::string& property_name) const {
809  auto it = impl_->property_name_to_type_.find(property_name);
810  if (it == impl_->property_name_to_type_.end()) {
811  return Status::Invalid("property name not found: ", property_name);
812  }
813  return it->second;
814 }
815 
816 bool EdgeInfo::IsPrimaryKey(const std::string& property_name) const {
817  auto it = impl_->property_name_to_primary_.find(property_name);
818  if (it == impl_->property_name_to_primary_.end()) {
819  return false;
820  }
821  return it->second;
822 }
823 
824 bool EdgeInfo::IsNullableKey(const std::string& property_name) const {
825  auto it = impl_->property_name_to_nullable_.find(property_name);
826  if (it == impl_->property_name_to_nullable_.end()) {
827  return false;
828  }
829  return it->second;
830 }
831 
832 Result<std::shared_ptr<EdgeInfo>> EdgeInfo::AddAdjacentList(
833  std::shared_ptr<AdjacentList> adj_list) const {
834  if (adj_list == nullptr) {
835  return Status::Invalid("adj list is nullptr");
836  }
837  if (HasAdjacentListType(adj_list->GetType())) {
838  return Status::Invalid("adj list type already exists: ",
839  AdjListTypeToString(adj_list->GetType()));
840  }
841  return std::make_shared<EdgeInfo>(
842  impl_->src_type_, impl_->edge_type_, impl_->dst_type_, impl_->chunk_size_,
843  impl_->src_chunk_size_, impl_->dst_chunk_size_, impl_->directed_,
844  AddVectorElement(impl_->adjacent_lists_, adj_list),
845  impl_->property_groups_, impl_->prefix_, impl_->version_);
846 }
847 
848 Result<std::shared_ptr<EdgeInfo>> EdgeInfo::AddPropertyGroup(
849  std::shared_ptr<PropertyGroup> property_group) const {
850  if (property_group == nullptr) {
851  return Status::Invalid("property group is nullptr");
852  }
853  for (const auto& property : property_group->GetProperties()) {
854  if (HasProperty(property.name)) {
855  return Status::Invalid("property in property group already exists: ",
856  property.name);
857  }
858  }
859  return std::make_shared<EdgeInfo>(
860  impl_->src_type_, impl_->edge_type_, impl_->dst_type_, impl_->chunk_size_,
861  impl_->src_chunk_size_, impl_->dst_chunk_size_, impl_->directed_,
862  impl_->adjacent_lists_,
863  AddVectorElement(impl_->property_groups_, property_group), impl_->prefix_,
864  impl_->version_);
865 }
866 
867 bool EdgeInfo::IsValidated() const { return impl_->is_validated(); }
868 
869 std::shared_ptr<EdgeInfo> CreateEdgeInfo(
870  const std::string& src_type, const std::string& edge_type,
871  const std::string& dst_type, IdType chunk_size, IdType src_chunk_size,
872  IdType dst_chunk_size, bool directed,
873  const AdjacentListVector& adjacent_lists,
874  const PropertyGroupVector& property_groups, const std::string& prefix,
875  std::shared_ptr<const InfoVersion> version) {
876  if (src_type.empty() || edge_type.empty() || dst_type.empty() ||
877  chunk_size <= 0 || src_chunk_size <= 0 || dst_chunk_size <= 0 ||
878  adjacent_lists.empty()) {
879  return nullptr;
880  }
881  return std::make_shared<EdgeInfo>(
882  src_type, edge_type, dst_type, chunk_size, src_chunk_size, dst_chunk_size,
883  directed, adjacent_lists, property_groups, prefix, version);
884 }
885 
886 Result<std::shared_ptr<EdgeInfo>> EdgeInfo::Load(std::shared_ptr<Yaml> yaml) {
887  if (yaml == nullptr) {
888  return Status::Invalid("yaml shared pointer is nullptr.");
889  }
890  std::string src_type = yaml->operator[]("src_type").As<std::string>();
891  std::string edge_type = yaml->operator[]("edge_type").As<std::string>();
892  std::string dst_type = yaml->operator[]("dst_type").As<std::string>();
893  IdType chunk_size =
894  static_cast<IdType>(yaml->operator[]("chunk_size").As<int64_t>());
895  IdType src_chunk_size =
896  static_cast<IdType>(yaml->operator[]("src_chunk_size").As<int64_t>());
897  IdType dst_chunk_size =
898  static_cast<IdType>(yaml->operator[]("dst_chunk_size").As<int64_t>());
899  bool directed = yaml->operator[]("directed").As<bool>();
900  std::string prefix;
901  if (!yaml->operator[]("prefix").IsNone()) {
902  prefix = yaml->operator[]("prefix").As<std::string>();
903  }
904  std::shared_ptr<const InfoVersion> version = nullptr;
905  if (!yaml->operator[]("version").IsNone()) {
906  GAR_ASSIGN_OR_RAISE(
907  version,
908  InfoVersion::Parse(yaml->operator[]("version").As<std::string>()));
909  }
910 
911  AdjacentListVector adjacent_lists;
912  PropertyGroupVector property_groups;
913  auto adj_lists_node = yaml->operator[]("adj_lists");
914  if (adj_lists_node.IsSequence()) {
915  for (auto it = adj_lists_node.Begin(); it != adj_lists_node.End(); it++) {
916  auto& node = (*it).second;
917  auto ordered = node["ordered"].As<bool>();
918  auto aligned = node["aligned_by"].As<std::string>();
919  auto adj_list_type = OrderedAlignedToAdjListType(ordered, aligned);
920  auto file_type = StringToFileType(node["file_type"].As<std::string>());
921  std::string adj_list_prefix;
922  if (!node["prefix"].IsNone()) {
923  adj_list_prefix = node["prefix"].As<std::string>();
924  }
925  adjacent_lists.push_back(std::make_shared<AdjacentList>(
926  adj_list_type, file_type, adj_list_prefix));
927  }
928  }
929  auto property_groups_node = yaml->operator[]("property_groups");
930  if (!property_groups_node.IsNone()) { // property_groups exist
931  for (auto pg_it = property_groups_node.Begin();
932  pg_it != property_groups_node.End(); pg_it++) {
933  auto& pg_node = (*pg_it).second;
934  std::string pg_prefix;
935  if (!pg_node["prefix"].IsNone()) {
936  pg_prefix = pg_node["prefix"].As<std::string>();
937  }
938  auto file_type = StringToFileType(pg_node["file_type"].As<std::string>());
939  auto properties = pg_node["properties"];
940  std::vector<Property> property_vec;
941  for (auto p_it = properties.Begin(); p_it != properties.End(); p_it++) {
942  auto& p_node = (*p_it).second;
943  auto property_name = p_node["name"].As<std::string>();
944  auto property_type =
945  DataType::TypeNameToDataType(p_node["data_type"].As<std::string>());
946  if (!p_node["cardinality"].IsNone() &&
947  StringToCardinality(p_node["cardinality"].As<std::string>()) !=
948  Cardinality::SINGLE) {
949  return Status::YamlError(
950  "Unsupported set cardinality for edge property.");
951  }
952  bool is_primary = p_node["is_primary"].As<bool>();
953  bool is_nullable =
954  p_node["is_nullable"].IsNone() || p_node["is_nullable"].As<bool>();
955  property_vec.emplace_back(property_name, property_type, is_primary,
956  is_nullable);
957  }
958  property_groups.push_back(
959  std::make_shared<PropertyGroup>(property_vec, file_type, pg_prefix));
960  }
961  }
962  return std::make_shared<EdgeInfo>(
963  src_type, edge_type, dst_type, chunk_size, src_chunk_size, dst_chunk_size,
964  directed, adjacent_lists, property_groups, prefix, version);
965 }
966 
967 Result<std::shared_ptr<EdgeInfo>> EdgeInfo::Load(const std::string& input) {
968  GAR_ASSIGN_OR_RAISE(auto yaml, Yaml::Load(input));
969  return EdgeInfo::Load(yaml);
970 }
971 
972 Result<std::string> EdgeInfo::Dump() const noexcept {
973  if (!IsValidated()) {
974  return Status::Invalid("The edge info is not validated.");
975  }
976  std::string dump_string;
977  ::Yaml::Node node;
978  try {
979  node["src_type"] = impl_->src_type_;
980  node["edge_type"] = impl_->edge_type_;
981  node["dst_type"] = impl_->dst_type_;
982  node["chunk_size"] = std::to_string(impl_->chunk_size_);
983  node["src_chunk_size"] = std::to_string(impl_->src_chunk_size_);
984  node["dst_chunk_size"] = std::to_string(impl_->dst_chunk_size_);
985  node["prefix"] = impl_->prefix_;
986  node["directed"] = impl_->directed_ ? "true" : "false";
987  for (const auto& adjacent_list : impl_->adjacent_lists_) {
988  ::Yaml::Node adj_list_node;
989  auto adj_list_type = adjacent_list->GetType();
990  auto pair = AdjListTypeToOrderedAligned(adj_list_type);
991  adj_list_node["ordered"] = pair.first ? "true" : "false";
992  adj_list_node["aligned_by"] = pair.second;
993  adj_list_node["prefix"] = adjacent_list->GetPrefix();
994  adj_list_node["file_type"] =
995  FileTypeToString(adjacent_list->GetFileType());
996  node["adj_lists"].PushBack();
997  node["adj_lists"][node["adj_lists"].Size() - 1] = adj_list_node;
998  }
999  for (const auto& pg : impl_->property_groups_) {
1000  ::Yaml::Node pg_node;
1001  if (!pg->GetPrefix().empty()) {
1002  pg_node["prefix"] = pg->GetPrefix();
1003  }
1004  pg_node["file_type"] = FileTypeToString(pg->GetFileType());
1005  for (const auto& p : pg->GetProperties()) {
1006  ::Yaml::Node p_node;
1007  p_node["name"] = p.name;
1008  p_node["data_type"] = p.type->ToTypeName();
1009  p_node["is_primary"] = p.is_primary ? "true" : "false";
1010  p_node["is_nullable"] = p.is_nullable ? "true" : "false";
1011  pg_node["properties"].PushBack();
1012  pg_node["properties"][pg_node["properties"].Size() - 1] = p_node;
1013  }
1014  node["property_groups"].PushBack();
1015  node["property_groups"][node["property_groups"].Size() - 1] = pg_node;
1016  }
1017  if (impl_->version_ != nullptr) {
1018  node["version"] = impl_->version_->ToString();
1019  }
1020  ::Yaml::Serialize(node, dump_string);
1021  } catch (const std::exception& e) {
1022  return Status::Invalid("Failed to dump edge info: ", e.what());
1023  }
1024  return dump_string;
1025 }
1026 
1027 Status EdgeInfo::Save(const std::string& path) const {
1028  std::string no_url_path;
1029  GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(path, &no_url_path));
1030  GAR_ASSIGN_OR_RAISE(auto yaml_content, this->Dump());
1031  return fs->WriteValueToFile(yaml_content, no_url_path);
1032 }
1033 
1034 namespace {
1035 
1036 static std::string PathToDirectory(const std::string& path) {
1037  if (path.rfind("s3://", 0) == 0) {
1038  int t = path.find_last_of('?');
1039  std::string prefix = path.substr(0, t);
1040  std::string suffix = path.substr(t);
1041  const size_t last_slash_idx = prefix.rfind('/');
1042  if (std::string::npos != last_slash_idx) {
1043  return prefix.substr(0, last_slash_idx + 1) + suffix;
1044  }
1045  } else {
1046  const size_t last_slash_idx = path.rfind('/');
1047  if (std::string::npos != last_slash_idx) {
1048  return path.substr(0, last_slash_idx + 1); // +1 to include the slash
1049  }
1050  }
1051  return path;
1052 }
1053 
1054 static Result<std::shared_ptr<GraphInfo>> ConstructGraphInfo(
1055  std::shared_ptr<Yaml> graph_meta, const std::string& default_name,
1056  const std::string& default_prefix, const std::shared_ptr<FileSystem> fs,
1057  const std::string& no_url_path) {
1058  std::string name = default_name;
1059  std::string prefix = default_prefix;
1060  if (!graph_meta->operator[]("name").IsNone()) {
1061  name = graph_meta->operator[]("name").As<std::string>();
1062  }
1063  if (!graph_meta->operator[]("prefix").IsNone()) {
1064  prefix = graph_meta->operator[]("prefix").As<std::string>();
1065  }
1066  std::shared_ptr<const InfoVersion> version = nullptr;
1067  if (!graph_meta->operator[]("version").IsNone()) {
1068  GAR_ASSIGN_OR_RAISE(
1069  version, InfoVersion::Parse(
1070  graph_meta->operator[]("version").As<std::string>()));
1071  }
1072  std::unordered_map<std::string, std::string> extra_info;
1073  if (!graph_meta->operator[]("extra_info").IsNone()) {
1074  auto& extra_info_node = graph_meta->operator[]("extra_info");
1075  for (auto it = extra_info_node.Begin(); it != extra_info_node.End(); it++) {
1076  auto node = (*it).second;
1077  auto key = node["key"].As<std::string>();
1078  auto value = node["value"].As<std::string>();
1079  extra_info.emplace(key, value);
1080  }
1081  }
1082 
1083  VertexInfoVector vertex_infos;
1084  EdgeInfoVector edge_infos;
1085  const auto& vertices = graph_meta->operator[]("vertices");
1086  if (vertices.IsSequence()) {
1087  for (auto it = vertices.Begin(); it != vertices.End(); it++) {
1088  std::string vertex_meta_file =
1089  no_url_path + (*it).second.As<std::string>();
1090  GAR_ASSIGN_OR_RAISE(auto input,
1091  fs->ReadFileToValue<std::string>(vertex_meta_file));
1092  GAR_ASSIGN_OR_RAISE(auto vertex_meta, Yaml::Load(input));
1093  GAR_ASSIGN_OR_RAISE(auto vertex_info, VertexInfo::Load(vertex_meta));
1094  vertex_infos.push_back(vertex_info);
1095  }
1096  }
1097  const auto& edges = graph_meta->operator[]("edges");
1098  if (edges.IsSequence()) {
1099  for (auto it = edges.Begin(); it != edges.End(); it++) {
1100  std::string edge_meta_file = no_url_path + (*it).second.As<std::string>();
1101  GAR_ASSIGN_OR_RAISE(auto input,
1102  fs->ReadFileToValue<std::string>(edge_meta_file));
1103  GAR_ASSIGN_OR_RAISE(auto edge_meta, Yaml::Load(input));
1104  GAR_ASSIGN_OR_RAISE(auto edge_info, EdgeInfo::Load(edge_meta));
1105  edge_infos.push_back(edge_info);
1106  }
1107  }
1108 
1109  std::vector<std::string> labels;
1110  if (!graph_meta->operator[]("labels").IsNone()) {
1111  const auto& labels_node = graph_meta->operator[]("labels");
1112  if (labels_node.IsSequence()) {
1113  for (auto it = labels_node.Begin(); it != labels_node.End(); it++) {
1114  labels.push_back((*it).second.As<std::string>());
1115  }
1116  }
1117  }
1118  return std::make_shared<GraphInfo>(name, vertex_infos, edge_infos, labels,
1119  prefix, version, extra_info);
1120 }
1121 
1122 } // namespace
1123 
1125  public:
1126  Impl(const std::string& graph_name, VertexInfoVector vertex_infos,
1127  EdgeInfoVector edge_infos, const std::vector<std::string>& labels,
1128  const std::string& prefix, std::shared_ptr<const InfoVersion> version,
1129  const std::unordered_map<std::string, std::string>& extra_info)
1130  : name_(graph_name),
1131  vertex_infos_(std::move(vertex_infos)),
1132  edge_infos_(std::move(edge_infos)),
1133  labels_(labels),
1134  prefix_(prefix),
1135  version_(std::move(version)),
1136  extra_info_(extra_info) {
1137  for (size_t i = 0; i < vertex_infos_.size(); i++) {
1138  if (vertex_infos_[i] != nullptr) {
1139  vtype_to_index_[vertex_infos_[i]->GetType()] = i;
1140  }
1141  }
1142  for (size_t i = 0; i < edge_infos_.size(); i++) {
1143  if (edge_infos_[i] != nullptr) {
1144  std::string edge_key = ConcatEdgeTriple(edge_infos_[i]->GetSrcType(),
1145  edge_infos_[i]->GetEdgeType(),
1146  edge_infos_[i]->GetDstType());
1147  etype_to_index_[edge_key] = i;
1148  }
1149  }
1150  }
1151 
1152  bool is_validated() const noexcept {
1153  if (name_.empty() || prefix_.empty()) {
1154  return false;
1155  }
1156  for (const auto& v : vertex_infos_) {
1157  if (!v || !v->IsValidated()) {
1158  return false;
1159  }
1160  }
1161  for (const auto& e : edge_infos_) {
1162  if (!e || !e->IsValidated()) {
1163  return false;
1164  }
1165  }
1166  if (vertex_infos_.size() != vtype_to_index_.size() ||
1167  edge_infos_.size() != etype_to_index_.size()) {
1168  return false;
1169  }
1170  return true;
1171  }
1172 
1173  std::string name_;
1174  VertexInfoVector vertex_infos_;
1175  EdgeInfoVector edge_infos_;
1176  std::vector<std::string> labels_;
1177  std::string prefix_;
1178  std::shared_ptr<const InfoVersion> version_;
1179  std::unordered_map<std::string, std::string> extra_info_;
1180  std::unordered_map<std::string, int> vtype_to_index_;
1181  std::unordered_map<std::string, int> etype_to_index_;
1182 };
1183 
1185  const std::string& graph_name, VertexInfoVector vertex_infos,
1186  EdgeInfoVector edge_infos, const std::vector<std::string>& labels,
1187  const std::string& prefix, std::shared_ptr<const InfoVersion> version,
1188  const std::unordered_map<std::string, std::string>& extra_info)
1189  : impl_(new Impl(graph_name, std::move(vertex_infos), std::move(edge_infos),
1190  labels, prefix, version, extra_info)) {}
1191 
1192 GraphInfo::~GraphInfo() = default;
1193 
1194 const std::string& GraphInfo::GetName() const { return impl_->name_; }
1195 
1196 const std::vector<std::string>& GraphInfo::GetLabels() const {
1197  return impl_->labels_;
1198 }
1199 
1200 const std::string& GraphInfo::GetPrefix() const { return impl_->prefix_; }
1201 
1202 const std::shared_ptr<const InfoVersion>& GraphInfo::version() const {
1203  return impl_->version_;
1204 }
1205 
1206 const std::unordered_map<std::string, std::string>& GraphInfo::GetExtraInfo()
1207  const {
1208  return impl_->extra_info_;
1209 }
1210 
1211 std::shared_ptr<VertexInfo> GraphInfo::GetVertexInfo(
1212  const std::string& type) const {
1213  int i = GetVertexInfoIndex(type);
1214  return i == -1 ? nullptr : impl_->vertex_infos_[i];
1215 }
1216 
1217 int GraphInfo::GetVertexInfoIndex(const std::string& type) const {
1218  return LookupKeyIndex(impl_->vtype_to_index_, type);
1219 }
1220 
1221 std::shared_ptr<EdgeInfo> GraphInfo::GetEdgeInfo(
1222  const std::string& src_type, const std::string& edge_type,
1223  const std::string& dst_type) const {
1224  int i = GetEdgeInfoIndex(src_type, edge_type, dst_type);
1225  return i == -1 ? nullptr : impl_->edge_infos_[i];
1226 }
1227 
1228 int GraphInfo::GetEdgeInfoIndex(const std::string& src_type,
1229  const std::string& edge_type,
1230  const std::string& dst_type) const {
1231  std::string edge_key = ConcatEdgeTriple(src_type, edge_type, dst_type);
1232  return LookupKeyIndex(impl_->etype_to_index_, edge_key);
1233 }
1234 
1236  return static_cast<int>(impl_->vertex_infos_.size());
1237 }
1238 
1240  return static_cast<int>(impl_->edge_infos_.size());
1241 }
1242 
1243 const std::shared_ptr<VertexInfo> GraphInfo::GetVertexInfoByIndex(
1244  int index) const {
1245  if (index < 0 || index >= static_cast<int>(impl_->vertex_infos_.size())) {
1246  return nullptr;
1247  }
1248  return impl_->vertex_infos_[index];
1249 }
1250 
1251 const std::shared_ptr<EdgeInfo> GraphInfo::GetEdgeInfoByIndex(int index) const {
1252  if (index < 0 || index >= static_cast<int>(impl_->edge_infos_.size())) {
1253  return nullptr;
1254  }
1255  return impl_->edge_infos_[index];
1256 }
1257 
1258 const VertexInfoVector& GraphInfo::GetVertexInfos() const {
1259  return impl_->vertex_infos_;
1260 }
1261 
1262 const EdgeInfoVector& GraphInfo::GetEdgeInfos() const {
1263  return impl_->edge_infos_;
1264 }
1265 
1266 bool GraphInfo::IsValidated() const { return impl_->is_validated(); }
1267 
1268 Result<std::shared_ptr<GraphInfo>> GraphInfo::AddVertex(
1269  std::shared_ptr<VertexInfo> vertex_info) const {
1270  if (vertex_info == nullptr) {
1271  return Status::Invalid("vertex info is nullptr");
1272  }
1273  if (GetVertexInfoIndex(vertex_info->GetType()) != -1) {
1274  return Status::Invalid("vertex info already exists");
1275  }
1276  return std::make_shared<GraphInfo>(
1277  impl_->name_, AddVectorElement(impl_->vertex_infos_, vertex_info),
1278  impl_->edge_infos_, impl_->labels_, impl_->prefix_, impl_->version_);
1279 }
1280 
1281 Result<std::shared_ptr<GraphInfo>> GraphInfo::AddEdge(
1282  std::shared_ptr<EdgeInfo> edge_info) const {
1283  if (edge_info == nullptr) {
1284  return Status::Invalid("edge info is nullptr");
1285  }
1286  if (GetEdgeInfoIndex(edge_info->GetSrcType(), edge_info->GetEdgeType(),
1287  edge_info->GetDstType()) != -1) {
1288  return Status::Invalid("edge info already exists");
1289  }
1290  return std::make_shared<GraphInfo>(
1291  impl_->name_, impl_->vertex_infos_,
1292  AddVectorElement(impl_->edge_infos_, edge_info), impl_->labels_,
1293  impl_->prefix_, impl_->version_);
1294 }
1295 
1296 std::shared_ptr<GraphInfo> CreateGraphInfo(
1297  const std::string& name, const VertexInfoVector& vertex_infos,
1298  const EdgeInfoVector& edge_infos, const std::vector<std::string>& labels,
1299  const std::string& prefix, std::shared_ptr<const InfoVersion> version,
1300  const std::unordered_map<std::string, std::string>& extra_info) {
1301  if (name.empty()) {
1302  return nullptr;
1303  }
1304  return std::make_shared<GraphInfo>(name, vertex_infos, edge_infos, labels,
1305  prefix, version, extra_info);
1306 }
1307 
1308 Result<std::shared_ptr<GraphInfo>> GraphInfo::Load(const std::string& path) {
1309  std::string no_url_path;
1310  GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(path, &no_url_path));
1311  GAR_ASSIGN_OR_RAISE(auto yaml_content,
1312  fs->ReadFileToValue<std::string>(no_url_path));
1313  GAR_ASSIGN_OR_RAISE(auto graph_meta, Yaml::Load(yaml_content));
1314  std::string default_name = "graph";
1315  std::string default_prefix = PathToDirectory(path);
1316  no_url_path = PathToDirectory(no_url_path);
1317  return ConstructGraphInfo(graph_meta, default_name, default_prefix, fs,
1318  no_url_path);
1319 }
1320 
1321 Result<std::shared_ptr<GraphInfo>> GraphInfo::Load(
1322  const std::string& input, const std::string& relative_location) {
1323  GAR_ASSIGN_OR_RAISE(auto graph_meta, Yaml::Load(input));
1324  std::string default_name = "graph";
1325  std::string default_prefix =
1326  relative_location; // default chunk file prefix is relative location
1327  std::string no_url_path;
1328  GAR_ASSIGN_OR_RAISE(auto fs,
1329  FileSystemFromUriOrPath(relative_location, &no_url_path));
1330  return ConstructGraphInfo(graph_meta, default_name, default_prefix, fs,
1331  no_url_path);
1332 }
1333 
1334 Result<std::string> GraphInfo::Dump() const {
1335  if (!IsValidated()) {
1336  return Status::Invalid("The graph info is not validated.");
1337  }
1338  ::Yaml::Node node;
1339  std::string dump_string;
1340  try {
1341  node["name"] = impl_->name_;
1342  node["prefix"] = impl_->prefix_;
1343  node["vertices"];
1344  node["edges"];
1345  for (const auto& vertex : GetVertexInfos()) {
1346  node["vertices"].PushBack();
1347  node["vertices"][node["vertices"].Size() - 1] =
1348  vertex->GetType() + ".vertex.yaml";
1349  }
1350  for (const auto& edge : GetEdgeInfos()) {
1351  node["edges"].PushBack();
1352  node["edges"][node["edges"].Size() - 1] =
1353  ConcatEdgeTriple(edge->GetSrcType(), edge->GetEdgeType(),
1354  edge->GetDstType()) +
1355  ".edge.yaml";
1356  }
1357  if (impl_->labels_.size() > 0) {
1358  node["labels"];
1359  for (const auto& label : impl_->labels_) {
1360  node["labels"].PushBack();
1361  node["labels"][node["labels"].Size() - 1] = label;
1362  }
1363  }
1364  if (impl_->version_ != nullptr) {
1365  node["version"] = impl_->version_->ToString();
1366  }
1367  if (impl_->extra_info_.size() > 0) {
1368  node["extra_info"];
1369  for (const auto& pair : impl_->extra_info_) {
1370  ::Yaml::Node extra_info_node;
1371  extra_info_node["key"] = pair.first;
1372  extra_info_node["value"] = pair.second;
1373  node["extra_info"].PushBack();
1374  node["extra_info"][node["extra_info"].Size() - 1] = extra_info_node;
1375  }
1376  }
1377  ::Yaml::Serialize(node, dump_string);
1378  } catch (const std::exception& e) {
1379  return Status::Invalid("Failed to dump graph info: ", e.what());
1380  }
1381  return dump_string;
1382 }
1383 
1384 Status GraphInfo::Save(const std::string& path) const {
1385  std::string no_url_path;
1386  GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(path, &no_url_path));
1387  GAR_ASSIGN_OR_RAISE(auto yaml_content, this->Dump());
1388  return fs->WriteValueToFile(yaml_content, no_url_path);
1389 }
1390 
1391 } // namespace graphar
bool IsValidated() const
Definition: graph_info.cc:176
AdjacentList(AdjListType type, FileType file_type, const std::string &prefix="")
Definition: graph_info.cc:168
const std::string & GetEdgeType() const
Definition: graph_info.cc:658
static Result< std::shared_ptr< EdgeInfo > > Load(std::shared_ptr< Yaml > yaml)
Definition: graph_info.cc:886
Result< std::string > GetAdjListFilePath(IdType vertex_chunk_index, IdType edge_chunk_index, AdjListType adj_list_type) const
Get the file path of adj list topology chunk.
Definition: graph_info.cc:746
Status Save(const std::string &file_name) const
Definition: graph_info.cc:1027
Result< std::shared_ptr< DataType > > GetPropertyType(const std::string &property_name) const
Definition: graph_info.cc:807
bool IsValidated() const
Definition: graph_info.cc:867
const std::string & GetPrefix() const
Definition: graph_info.cc:668
Result< std::string > GetEdgesNumFilePath(IdType vertex_chunk_index, AdjListType adj_list_type) const
Definition: graph_info.cc:738
Result< std::string > GetPropertyGroupPathPrefix(const std::shared_ptr< PropertyGroup > &property_group, AdjListType adj_list_type) const
Definition: graph_info.cc:795
bool IsPrimaryKey(const std::string &property_name) const
Definition: graph_info.cc:816
Result< std::string > GetAdjListOffsetFilePath(IdType vertex_chunk_index, AdjListType adj_list_type) const
Get the adjacency list offset chunk file path of vertex chunk the offset chunks is aligned with the v...
Definition: graph_info.cc:764
IdType GetChunkSize() const
Definition: graph_info.cc:662
bool HasProperty(const std::string &property_name) const
Returns whether the edge info contains the given property.
Definition: graph_info.cc:681
Result< std::shared_ptr< EdgeInfo > > AddAdjacentList(std::shared_ptr< AdjacentList > adj_list) const
Definition: graph_info.cc:832
Result< std::string > Dump() const noexcept
Definition: graph_info.cc:972
bool IsNullableKey(const std::string &property_name) const
Definition: graph_info.cc:824
std::shared_ptr< PropertyGroup > GetPropertyGroupByIndex(int index) const
Get the property group at the specified index.
Definition: graph_info.cc:722
bool HasPropertyGroup(const std::shared_ptr< PropertyGroup > &property_group) const
Returns whether the edge info contains the given property group.
Definition: graph_info.cc:686
const std::shared_ptr< const InfoVersion > & version() const
Definition: graph_info.cc:672
Result< std::shared_ptr< EdgeInfo > > AddPropertyGroup(std::shared_ptr< PropertyGroup > property_group) const
Definition: graph_info.cc:848
const std::string & GetSrcType() const
Definition: graph_info.cc:656
bool HasAdjacentListType(AdjListType adj_list_type) const
Definition: graph_info.cc:676
Result< std::string > GetOffsetPathPrefix(AdjListType adj_list_type) const
Definition: graph_info.cc:772
bool IsDirected() const
Definition: graph_info.cc:670
Result< std::string > GetAdjListPathPrefix(AdjListType adj_list_type) const
Get the path prefix of the adjacency list topology chunk for the given adjacency list type.
Definition: graph_info.cc:756
EdgeInfo(const std::string &src_type, const std::string &edge_type, const std::string &dst_type, IdType chunk_size, IdType src_chunk_size, IdType dst_chunk_size, bool directed, const AdjacentListVector &adjacent_lists, const PropertyGroupVector &property_groups, const std::string &prefix="", std::shared_ptr< const InfoVersion > version=nullptr)
Construct an EdgeInfo object with the given information and property groups.
Definition: graph_info.cc:643
IdType GetDstChunkSize() const
Definition: graph_info.cc:666
Result< std::string > GetPropertyFilePath(const std::shared_ptr< PropertyGroup > &property_group, AdjListType adj_list_type, IdType vertex_chunk_index, IdType edge_chunk_index) const
Get the chunk file path of adj list property group the property group chunks is aligned with the adj ...
Definition: graph_info.cc:780
const std::string & GetDstType() const
Definition: graph_info.cc:660
IdType GetSrcChunkSize() const
Definition: graph_info.cc:664
const PropertyGroupVector & GetPropertyGroups() const
Get the property groups.
Definition: graph_info.cc:712
Result< std::string > GetVerticesNumFilePath(AdjListType adj_list_type) const
Get the file path for the number of vertices.
Definition: graph_info.cc:730
std::shared_ptr< PropertyGroup > GetPropertyGroup(const std::string &property) const
Get the property group containing the given property.
Definition: graph_info.cc:716
int PropertyGroupNum() const
Get the number of property groups.
Definition: graph_info.cc:708
int GetVertexInfoIndex(const std::string &type) const
Get the vertex info index with the given type.
Definition: graph_info.cc:1217
const EdgeInfoVector & GetEdgeInfos() const
Get the edge infos of graph info.
Definition: graph_info.cc:1262
GraphInfo(const std::string &graph_name, VertexInfoVector vertex_infos, EdgeInfoVector edge_infos, const std::vector< std::string > &labels={}, const std::string &prefix="./", std::shared_ptr< const InfoVersion > version=nullptr, const std::unordered_map< std::string, std::string > &extra_info={})
Constructs a GraphInfo instance.
Definition: graph_info.cc:1184
Status Save(const std::string &path) const
Definition: graph_info.cc:1384
const std::shared_ptr< VertexInfo > GetVertexInfoByIndex(int index) const
Get the vertex info at the specified index.
Definition: graph_info.cc:1243
Result< std::shared_ptr< GraphInfo > > AddVertex(std::shared_ptr< VertexInfo > vertex_info) const
Adds a vertex info to the GraphInfo instance and returns a new GraphInfo.
Definition: graph_info.cc:1268
const std::string & GetPrefix() const
Get the absolute path prefix of the chunk files.
Definition: graph_info.cc:1200
static Result< std::shared_ptr< GraphInfo > > Load(const std::string &path)
Loads the input file as a GraphInfo instance.
Definition: graph_info.cc:1308
bool IsValidated() const
Definition: graph_info.cc:1266
Result< std::string > Dump() const
Definition: graph_info.cc:1334
std::shared_ptr< EdgeInfo > GetEdgeInfo(const std::string &src_type, const std::string &edge_type, const std::string &dst_type) const
Get the edge info with the given source vertex type, edge type, and destination vertex type.
Definition: graph_info.cc:1221
int EdgeInfoNum() const
Get the number of edge infos.
Definition: graph_info.cc:1239
std::shared_ptr< VertexInfo > GetVertexInfo(const std::string &type) const
Get the vertex info with the given type.
Definition: graph_info.cc:1211
const std::vector< std::string > & GetLabels() const
Get the vertex labels of the graph.
Definition: graph_info.cc:1196
Result< std::shared_ptr< GraphInfo > > AddEdge(std::shared_ptr< EdgeInfo > edge_info) const
Adds an edge info to the GraphInfo instance and returns a new GraphInfo.
Definition: graph_info.cc:1281
const VertexInfoVector & GetVertexInfos() const
Get the vertex infos of graph info.
Definition: graph_info.cc:1258
const std::unordered_map< std::string, std::string > & GetExtraInfo() const
Get the extra metadata of the graph info object.
Definition: graph_info.cc:1206
int GetEdgeInfoIndex(const std::string &src_type, const std::string &edge_type, const std::string &dst_type) const
Get the edge info index with the given source vertex type, edge type, and destination type.
Definition: graph_info.cc:1228
int VertexInfoNum() const
Get the number of vertex infos.
Definition: graph_info.cc:1235
const std::string & GetName() const
Get the name of the graph.
Definition: graph_info.cc:1194
const std::shared_ptr< const InfoVersion > & version() const
Get the version info of the graph info object.
Definition: graph_info.cc:1202
const std::shared_ptr< EdgeInfo > GetEdgeInfoByIndex(int index) const
Get the edge info at the specified index.
Definition: graph_info.cc:1251
static Result< std::shared_ptr< const InfoVersion > > Parse(const std::string &str) noexcept
bool IsValidated() const
Definition: graph_info.cc:118
PropertyGroup(const std::vector< Property > &properties, FileType file_type, const std::string &prefix="")
Definition: graph_info.cc:94
const std::vector< Property > & GetProperties() const
Definition: graph_info.cc:105
Status outcome object (success or error)
Definition: status.h:123
static Status YamlError(Args &&... args)
Definition: status.h:204
static Status Invalid(Args &&... args)
Definition: status.h:188
const std::string & GetType() const
Definition: graph_info.cc:275
std::shared_ptr< PropertyGroup > GetPropertyGroup(const std::string &property_name) const
Definition: graph_info.cc:314
Result< std::string > GetPathPrefix(std::shared_ptr< PropertyGroup > property_group) const
Definition: graph_info.cc:298
const std::vector< std::string > & GetLabels() const
Definition: graph_info.cc:281
IdType GetChunkSize() const
Definition: graph_info.cc:277
const std::shared_ptr< const InfoVersion > & version() const
Definition: graph_info.cc:285
Result< std::shared_ptr< VertexInfo > > AddPropertyGroup(std::shared_ptr< PropertyGroup > property_group) const
Definition: graph_info.cc:384
bool HasProperty(const std::string &property_name) const
Definition: graph_info.cc:348
bool HasPropertyGroup(const std::shared_ptr< PropertyGroup > &property_group) const
Definition: graph_info.cc:353
Result< std::string > GetFilePath(std::shared_ptr< PropertyGroup > property_group, IdType chunk_index) const
Definition: graph_info.cc:289
bool IsPrimaryKey(const std::string &property_name) const
Definition: graph_info.cc:332
bool IsValidated() const
Definition: graph_info.cc:401
int PropertyGroupNum() const
Definition: graph_info.cc:310
const std::string & GetPrefix() const
Definition: graph_info.cc:279
Result< std::string > GetVerticesNumFilePath() const
Definition: graph_info.cc:306
Result< std::shared_ptr< DataType > > GetPropertyType(const std::string &property_name) const
Definition: graph_info.cc:366
VertexInfo(const std::string &type, IdType chunk_size, const PropertyGroupVector &property_groups, const std::vector< std::string > &labels={}, const std::string &prefix="", std::shared_ptr< const InfoVersion > version=nullptr)
Definition: graph_info.cc:265
bool IsNullableKey(const std::string &property_name) const
Definition: graph_info.cc:340
Result< std::string > Dump() const noexcept
Definition: graph_info.cc:482
static Result< std::shared_ptr< VertexInfo > > Load(std::shared_ptr< Yaml > yaml)
Definition: graph_info.cc:415
std::shared_ptr< PropertyGroup > GetPropertyGroupByIndex(int index) const
Definition: graph_info.cc:320
Status Save(const std::string &file_name) const
Definition: graph_info.cc:530
const PropertyGroupVector & GetPropertyGroups() const
Definition: graph_info.cc:328
static Result< std::shared_ptr< Yaml > > Load(const std::string &input)
Definition: yaml.cc:34