Apache GraphAr C++ Library
The C++ Library for Apache GraphAr
graph_info.cc
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #include <unordered_set>
21 #include <utility>
22 
23 #include "mini-yaml/yaml/Yaml.hpp"
24 
25 #include "graphar/filesystem.h"
26 #include "graphar/graph_info.h"
27 #include "graphar/result.h"
28 #include "graphar/types.h"
29 #include "graphar/version_parser.h"
30 #include "graphar/yaml.h"
31 
32 namespace graphar {
33 
34 #define CHECK_HAS_ADJ_LIST_TYPE(adj_list_type) \
35  do { \
36  if (!HasAdjacentListType(adj_list_type)) { \
37  return Status::KeyError( \
38  "Adjacency list type: ", AdjListTypeToString(adj_list_type), \
39  " is not found in edge info."); \
40  } \
41  } while (false)
42 
43 namespace {
44 
45 std::string ConcatEdgeTriple(const std::string& src_label,
46  const std::string& edge_label,
47  const std::string& dst_label) {
48  return src_label + REGULAR_SEPARATOR + edge_label + REGULAR_SEPARATOR +
49  dst_label;
50 }
51 
52 template <int NotFoundValue = -1>
53 int LookupKeyIndex(const std::unordered_map<std::string, int>& key_to_index,
54  const std::string& type) {
55  auto it = key_to_index.find(type);
56  if (it == key_to_index.end()) {
57  return NotFoundValue;
58  }
59  return it->second;
60 }
61 
62 template <typename T>
63 std::vector<T> AddVectorElement(const std::vector<T>& values, T new_element) {
64  std::vector<T> out;
65  out.reserve(values.size() + 1);
66  for (size_t i = 0; i < values.size(); ++i) {
67  out.push_back(values[i]);
68  }
69  out.emplace_back(std::move(new_element));
70  return out;
71 }
72 
73 std::string BuildPath(const std::vector<std::string>& paths) {
74  std::string path;
75  for (const auto& p : paths) {
76  if (p.back() == '/') {
77  path += p;
78  } else {
79  path += p + "/";
80  }
81  }
82  return path;
83 }
84 } // namespace
85 
86 bool operator==(const Property& lhs, const Property& rhs) {
87  return (lhs.name == rhs.name) && (lhs.type == rhs.type) &&
88  (lhs.is_primary == rhs.is_primary) &&
89  (lhs.is_nullable == rhs.is_nullable);
90 }
91 
92 PropertyGroup::PropertyGroup(const std::vector<Property>& properties,
93  FileType file_type, const std::string& prefix)
94  : properties_(properties), file_type_(file_type), prefix_(prefix) {
95  if (prefix_.empty() && !properties_.empty()) {
96  for (const auto& p : properties_) {
97  prefix_ += p.name + REGULAR_SEPARATOR;
98  }
99  prefix_.back() = '/';
100  }
101 }
102 
103 const std::vector<Property>& PropertyGroup::GetProperties() const {
104  return properties_;
105 }
106 
107 bool PropertyGroup::HasProperty(const std::string& property_name) const {
108  for (const auto& p : properties_) {
109  if (p.name == property_name) {
110  return true;
111  }
112  }
113  return false;
114 }
115 
117  if (prefix_.empty() ||
118  (file_type_ != FileType::CSV && file_type_ != FileType::PARQUET &&
119  file_type_ != FileType::ORC)) {
120  return false;
121  }
122  if (properties_.empty()) {
123  return false;
124  }
125  std::unordered_set<std::string> check_property_unique_set;
126  for (const auto& p : properties_) {
127  if (p.name.empty() || p.type == nullptr) {
128  return false;
129  }
130  if (check_property_unique_set.find(p.name) !=
131  check_property_unique_set.end()) {
132  return false;
133  } else {
134  check_property_unique_set.insert(p.name);
135  }
136  // TODO(@acezen): support list type in csv file
137  if (p.type->id() == Type::LIST && file_type_ == FileType::CSV) {
138  // list type is not supported in csv file
139  return false;
140  }
141  }
142  return true;
143 }
144 
145 std::shared_ptr<PropertyGroup> CreatePropertyGroup(
146  const std::vector<Property>& properties, FileType file_type,
147  const std::string& prefix) {
148  if (properties.empty()) {
149  // empty property group is not allowed
150  return nullptr;
151  }
152  return std::make_shared<PropertyGroup>(properties, file_type, prefix);
153 }
154 
155 bool operator==(const PropertyGroup& lhs, const PropertyGroup& rhs) {
156  return (lhs.GetPrefix() == rhs.GetPrefix()) &&
157  (lhs.GetFileType() == rhs.GetFileType()) &&
158  (lhs.GetProperties() == rhs.GetProperties());
159 }
160 
161 AdjacentList::AdjacentList(AdjListType type, FileType file_type,
162  const std::string& prefix)
163  : type_(type), file_type_(file_type), prefix_(prefix) {
164  if (prefix_.empty()) {
165  prefix_ = std::string(AdjListTypeToString(type_)) + "/";
166  }
167 }
168 
170  if (type_ != AdjListType::unordered_by_source &&
171  type_ != AdjListType::ordered_by_source &&
172  type_ != AdjListType::unordered_by_dest &&
173  type_ != AdjListType::ordered_by_dest) {
174  return false;
175  }
176  if (prefix_.empty() ||
177  (file_type_ != FileType::CSV && file_type_ != FileType::PARQUET &&
178  file_type_ != FileType::ORC)) {
179  return false;
180  }
181  return true;
182 }
183 
184 std::shared_ptr<AdjacentList> CreateAdjacentList(AdjListType type,
185  FileType file_type,
186  const std::string& prefix) {
187  return std::make_shared<AdjacentList>(type, file_type, prefix);
188 }
189 
191  public:
192  Impl(const std::string& label, IdType chunk_size, const std::string& prefix,
193  const PropertyGroupVector& property_groups,
194  std::shared_ptr<const InfoVersion> version)
195  : label_(label),
196  chunk_size_(chunk_size),
197  property_groups_(std::move(property_groups)),
198  prefix_(prefix),
199  version_(std::move(version)) {
200  if (prefix_.empty()) {
201  prefix_ = label_ + "/"; // default prefix
202  }
203  for (size_t i = 0; i < property_groups_.size(); i++) {
204  const auto& pg = property_groups_[i];
205  if (!pg) {
206  continue;
207  }
208  for (const auto& p : pg->GetProperties()) {
209  property_name_to_index_.emplace(p.name, i);
210  property_name_to_primary_.emplace(p.name, p.is_primary);
211  property_name_to_nullable_.emplace(p.name, p.is_nullable);
212  property_name_to_type_.emplace(p.name, p.type);
213  }
214  }
215  }
216 
217  bool is_validated() const noexcept {
218  if (label_.empty() || chunk_size_ <= 0 || prefix_.empty()) {
219  return false;
220  }
221  std::unordered_set<std::string> check_property_unique_set;
222  for (const auto& pg : property_groups_) {
223  // check if property group is validated
224  if (!pg || !pg->IsValidated()) {
225  return false;
226  }
227  // check if property name is unique in all property groups
228  for (const auto& p : pg->GetProperties()) {
229  if (check_property_unique_set.find(p.name) !=
230  check_property_unique_set.end()) {
231  return false;
232  } else {
233  check_property_unique_set.insert(p.name);
234  }
235  }
236  }
237 
238  return true;
239  }
240 
241  std::string label_;
242  IdType chunk_size_;
243  PropertyGroupVector property_groups_;
244  std::string prefix_;
245  std::shared_ptr<const InfoVersion> version_;
246  std::unordered_map<std::string, int> property_name_to_index_;
247  std::unordered_map<std::string, bool> property_name_to_primary_;
248  std::unordered_map<std::string, bool> property_name_to_nullable_;
249  std::unordered_map<std::string, std::shared_ptr<DataType>>
250  property_name_to_type_;
251 };
252 
253 VertexInfo::VertexInfo(const std::string& label, IdType chunk_size,
254  const PropertyGroupVector& property_groups,
255  const std::string& prefix,
256  std::shared_ptr<const InfoVersion> version)
257  : impl_(new Impl(label, chunk_size, prefix, property_groups, version)) {}
258 
259 VertexInfo::~VertexInfo() = default;
260 
261 const std::string& VertexInfo::GetLabel() const { return impl_->label_; }
262 
263 IdType VertexInfo::GetChunkSize() const { return impl_->chunk_size_; }
264 
265 const std::string& VertexInfo::GetPrefix() const { return impl_->prefix_; }
266 
267 const std::shared_ptr<const InfoVersion>& VertexInfo::version() const {
268  return impl_->version_;
269 }
270 
271 Result<std::string> VertexInfo::GetFilePath(
272  std::shared_ptr<PropertyGroup> property_group, IdType chunk_index) const {
273  if (property_group == nullptr) {
274  return Status::Invalid("property group is nullptr");
275  }
276  return BuildPath({impl_->prefix_, property_group->GetPrefix()}) + "chunk" +
277  std::to_string(chunk_index);
278 }
279 
280 Result<std::string> VertexInfo::GetPathPrefix(
281  std::shared_ptr<PropertyGroup> property_group) const {
282  if (property_group == nullptr) {
283  return Status::Invalid("property group is nullptr");
284  }
285  return BuildPath({impl_->prefix_, property_group->GetPrefix()});
286 }
287 
288 Result<std::string> VertexInfo::GetVerticesNumFilePath() const {
289  return BuildPath({impl_->prefix_}) + "vertex_count";
290 }
291 
293  return static_cast<int>(impl_->property_groups_.size());
294 }
295 
296 std::shared_ptr<PropertyGroup> VertexInfo::GetPropertyGroup(
297  const std::string& property_name) const {
298  int i = LookupKeyIndex(impl_->property_name_to_index_, property_name);
299  return i == -1 ? nullptr : impl_->property_groups_[i];
300 }
301 
302 std::shared_ptr<PropertyGroup> VertexInfo::GetPropertyGroupByIndex(
303  int index) const {
304  if (index < 0 || index >= static_cast<int>(impl_->property_groups_.size())) {
305  return nullptr;
306  }
307  return impl_->property_groups_[index];
308 }
309 
310 const PropertyGroupVector& VertexInfo::GetPropertyGroups() const {
311  return impl_->property_groups_;
312 }
313 
314 bool VertexInfo::IsPrimaryKey(const std::string& property_name) const {
315  auto it = impl_->property_name_to_primary_.find(property_name);
316  if (it == impl_->property_name_to_primary_.end()) {
317  return false;
318  }
319  return it->second;
320 }
321 
322 bool VertexInfo::IsNullableKey(const std::string& property_name) const {
323  auto it = impl_->property_name_to_nullable_.find(property_name);
324  if (it == impl_->property_name_to_nullable_.end()) {
325  return false;
326  }
327  return it->second;
328 }
329 
330 bool VertexInfo::HasProperty(const std::string& property_name) const {
331  return impl_->property_name_to_index_.find(property_name) !=
332  impl_->property_name_to_index_.end();
333 }
334 
336  const std::shared_ptr<PropertyGroup>& property_group) const {
337  if (property_group == nullptr) {
338  return false;
339  }
340  for (const auto& pg : impl_->property_groups_) {
341  if (*pg == *property_group) {
342  return true;
343  }
344  }
345  return false;
346 }
347 
348 Result<std::shared_ptr<DataType>> VertexInfo::GetPropertyType(
349  const std::string& property_name) const {
350  auto it = impl_->property_name_to_type_.find(property_name);
351  if (it == impl_->property_name_to_type_.end()) {
352  return Status::Invalid("property name not found: ", property_name);
353  }
354  return it->second;
355 }
356 
357 Result<std::shared_ptr<VertexInfo>> VertexInfo::AddPropertyGroup(
358  std::shared_ptr<PropertyGroup> property_group) const {
359  if (property_group == nullptr) {
360  return Status::Invalid("property group is nullptr");
361  }
362  for (const auto& property : property_group->GetProperties()) {
363  if (HasProperty(property.name)) {
364  return Status::Invalid("property in the property group already exists: ",
365  property.name);
366  }
367  }
368  return std::make_shared<VertexInfo>(
369  impl_->label_, impl_->chunk_size_,
370  AddVectorElement(impl_->property_groups_, property_group), impl_->prefix_,
371  impl_->version_);
372 }
373 
374 bool VertexInfo::IsValidated() const { return impl_->is_validated(); }
375 
376 std::shared_ptr<VertexInfo> CreateVertexInfo(
377  const std::string& label, IdType chunk_size,
378  const PropertyGroupVector& property_groups, const std::string& prefix,
379  std::shared_ptr<const InfoVersion> version) {
380  if (label.empty() || chunk_size <= 0) {
381  return nullptr;
382  }
383  return std::make_shared<VertexInfo>(label, chunk_size, property_groups,
384  prefix, version);
385 }
386 
387 Result<std::shared_ptr<VertexInfo>> VertexInfo::Load(
388  std::shared_ptr<Yaml> yaml) {
389  if (yaml == nullptr) {
390  return Status::Invalid("yaml shared pointer is nullptr");
391  }
392  std::string label = yaml->operator[]("label").As<std::string>();
393  IdType chunk_size =
394  static_cast<IdType>(yaml->operator[]("chunk_size").As<int64_t>());
395  std::string prefix;
396  if (!yaml->operator[]("prefix").IsNone()) {
397  prefix = yaml->operator[]("prefix").As<std::string>();
398  }
399  std::shared_ptr<const InfoVersion> version = nullptr;
400  if (!yaml->operator[]("version").IsNone()) {
401  GAR_ASSIGN_OR_RAISE(
402  version,
403  InfoVersion::Parse(yaml->operator[]("version").As<std::string>()));
404  }
405  PropertyGroupVector property_groups;
406  auto property_groups_node = yaml->operator[]("property_groups");
407  if (!property_groups_node.IsNone()) { // property_groups exist
408  for (auto it = property_groups_node.Begin();
409  it != property_groups_node.End(); it++) {
410  std::string pg_prefix;
411  auto& node = (*it).second;
412  if (!node["prefix"].IsNone()) {
413  pg_prefix = node["prefix"].As<std::string>();
414  }
415  auto file_type = StringToFileType(node["file_type"].As<std::string>());
416  std::vector<Property> property_vec;
417  auto& properties = node["properties"];
418  for (auto iit = properties.Begin(); iit != properties.End(); iit++) {
419  auto& p_node = (*iit).second;
420  auto property_name = p_node["name"].As<std::string>();
421  auto property_type =
422  DataType::TypeNameToDataType(p_node["data_type"].As<std::string>());
423  bool is_primary = p_node["is_primary"].As<bool>();
424  bool is_nullable =
425  p_node["is_nullable"].IsNone() || p_node["is_nullable"].As<bool>();
426  property_vec.emplace_back(property_name, property_type, is_primary,
427  is_nullable);
428  }
429  property_groups.push_back(
430  std::make_shared<PropertyGroup>(property_vec, file_type, pg_prefix));
431  }
432  }
433  return std::make_shared<VertexInfo>(label, chunk_size, property_groups,
434  prefix, version);
435 }
436 
437 Result<std::shared_ptr<VertexInfo>> VertexInfo::Load(const std::string& input) {
438  GAR_ASSIGN_OR_RAISE(auto yaml, Yaml::Load(input));
439  return VertexInfo::Load(yaml);
440 }
441 
442 Result<std::string> VertexInfo::Dump() const noexcept {
443  if (!IsValidated()) {
444  return Status::Invalid("The vertex info is not validated");
445  }
446  std::string dump_string;
447  ::Yaml::Node node;
448  try {
449  node["label"] = impl_->label_;
450  node["chunk_size"] = std::to_string(impl_->chunk_size_);
451  node["prefix"] = impl_->prefix_;
452  for (const auto& pg : impl_->property_groups_) {
453  ::Yaml::Node pg_node;
454  if (!pg->GetPrefix().empty()) {
455  pg_node["prefix"] = pg->GetPrefix();
456  }
457  pg_node["file_type"] = FileTypeToString(pg->GetFileType());
458  for (const auto& p : pg->GetProperties()) {
459  ::Yaml::Node p_node;
460  p_node["name"] = p.name;
461  p_node["data_type"] = p.type->ToTypeName();
462  p_node["is_primary"] = p.is_primary ? "true" : "false";
463  p_node["is_nullable"] = p.is_nullable ? "true" : "false";
464  pg_node["properties"].PushBack();
465  pg_node["properties"][pg_node["properties"].Size() - 1] = p_node;
466  }
467  node["property_groups"].PushBack();
468  node["property_groups"][node["property_groups"].Size() - 1] = pg_node;
469  }
470  if (impl_->version_ != nullptr) {
471  node["version"] = impl_->version_->ToString();
472  }
473  ::Yaml::Serialize(node, dump_string);
474  } catch (const std::exception& e) {
475  return Status::Invalid("Failed to dump vertex info: ", e.what());
476  }
477  return dump_string;
478 }
479 
480 Status VertexInfo::Save(const std::string& path) const {
481  std::string no_url_path;
482  GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(path, &no_url_path));
483  GAR_ASSIGN_OR_RAISE(auto yaml_content, this->Dump());
484  return fs->WriteValueToFile(yaml_content, no_url_path);
485 }
486 
488  public:
489  Impl(const std::string& src_label, const std::string& edge_label,
490  const std::string& dst_label, IdType chunk_size, IdType src_chunk_size,
491  IdType dst_chunk_size, bool directed, const std::string& prefix,
492  const AdjacentListVector& adjacent_lists,
493  const PropertyGroupVector& property_groups,
494  std::shared_ptr<const InfoVersion> version)
495  : src_label_(src_label),
496  edge_label_(edge_label),
497  dst_label_(dst_label),
498  chunk_size_(chunk_size),
499  src_chunk_size_(src_chunk_size),
500  dst_chunk_size_(dst_chunk_size),
501  directed_(directed),
502  prefix_(prefix),
503  adjacent_lists_(std::move(adjacent_lists)),
504  property_groups_(std::move(property_groups)),
505  version_(std::move(version)) {
506  if (prefix_.empty()) {
507  prefix_ = src_label_ + REGULAR_SEPARATOR + edge_label_ +
508  REGULAR_SEPARATOR + dst_label_ + "/"; // default prefix
509  }
510  for (size_t i = 0; i < adjacent_lists_.size(); i++) {
511  if (!adjacent_lists_[i]) {
512  continue;
513  }
514 
515  auto adj_list_type = adjacent_lists_[i]->GetType();
516  adjacent_list_type_to_index_[adj_list_type] = i;
517  }
518  for (size_t i = 0; i < property_groups_.size(); i++) {
519  const auto& pg = property_groups_[i];
520  if (!pg) {
521  continue;
522  }
523  for (const auto& p : pg->GetProperties()) {
524  property_name_to_index_.emplace(p.name, i);
525  property_name_to_primary_.emplace(p.name, p.is_primary);
526  property_name_to_nullable_.emplace(p.name, p.is_nullable);
527  property_name_to_type_.emplace(p.name, p.type);
528  }
529  }
530  }
531 
532  bool is_validated() const noexcept {
533  if (src_label_.empty() || edge_label_.empty() || dst_label_.empty() ||
534  chunk_size_ <= 0 || src_chunk_size_ <= 0 || dst_chunk_size_ <= 0 ||
535  prefix_.empty() || adjacent_lists_.empty()) {
536  return false;
537  }
538 
539  for (const auto& al : adjacent_lists_) {
540  if (!al || !al->IsValidated()) {
541  return false;
542  }
543  }
544 
545  std::unordered_set<std::string> check_property_unique_set;
546  for (const auto& pg : property_groups_) {
547  // check if property group is validated
548  if (!pg || !pg->IsValidated()) {
549  return false;
550  }
551  // check if property name is unique in all property groups
552  for (const auto& p : pg->GetProperties()) {
553  if (check_property_unique_set.find(p.name) !=
554  check_property_unique_set.end()) {
555  return false;
556  } else {
557  check_property_unique_set.insert(p.name);
558  }
559  }
560  }
561  if (adjacent_lists_.size() != adjacent_list_type_to_index_.size()) {
562  return false;
563  }
564  return true;
565  }
566 
567  std::string src_label_;
568  std::string edge_label_;
569  std::string dst_label_;
570  IdType chunk_size_;
571  IdType src_chunk_size_;
572  IdType dst_chunk_size_;
573  bool directed_;
574  std::string prefix_;
575  AdjacentListVector adjacent_lists_;
576  PropertyGroupVector property_groups_;
577  std::unordered_map<AdjListType, int> adjacent_list_type_to_index_;
578  std::unordered_map<std::string, int> property_name_to_index_;
579  std::unordered_map<std::string, bool> property_name_to_primary_;
580  std::unordered_map<std::string, bool> property_name_to_nullable_;
581  std::unordered_map<std::string, std::shared_ptr<DataType>>
582  property_name_to_type_;
583  std::shared_ptr<const InfoVersion> version_;
584 };
585 
586 EdgeInfo::EdgeInfo(const std::string& src_label, const std::string& edge_label,
587  const std::string& dst_label, IdType chunk_size,
588  IdType src_chunk_size, IdType dst_chunk_size, bool directed,
589  const AdjacentListVector& adjacent_lists,
590  const PropertyGroupVector& property_groups,
591  const std::string& prefix,
592  std::shared_ptr<const InfoVersion> version)
593  : impl_(new Impl(src_label, edge_label, dst_label, chunk_size,
594  src_chunk_size, dst_chunk_size, directed, prefix,
595  adjacent_lists, property_groups, version)) {}
596 
597 EdgeInfo::~EdgeInfo() = default;
598 
599 const std::string& EdgeInfo::GetSrcLabel() const { return impl_->src_label_; }
600 
601 const std::string& EdgeInfo::GetEdgeLabel() const { return impl_->edge_label_; }
602 
603 const std::string& EdgeInfo::GetDstLabel() const { return impl_->dst_label_; }
604 
605 IdType EdgeInfo::GetChunkSize() const { return impl_->chunk_size_; }
606 
607 IdType EdgeInfo::GetSrcChunkSize() const { return impl_->src_chunk_size_; }
608 
609 IdType EdgeInfo::GetDstChunkSize() const { return impl_->dst_chunk_size_; }
610 
611 const std::string& EdgeInfo::GetPrefix() const { return impl_->prefix_; }
612 
613 bool EdgeInfo::IsDirected() const { return impl_->directed_; }
614 
615 const std::shared_ptr<const InfoVersion>& EdgeInfo::version() const {
616  return impl_->version_;
617 }
618 
619 bool EdgeInfo::HasAdjacentListType(AdjListType adj_list_type) const {
620  return impl_->adjacent_list_type_to_index_.find(adj_list_type) !=
621  impl_->adjacent_list_type_to_index_.end();
622 }
623 
624 bool EdgeInfo::HasProperty(const std::string& property_name) const {
625  return impl_->property_name_to_index_.find(property_name) !=
626  impl_->property_name_to_index_.end();
627 }
628 
630  const std::shared_ptr<PropertyGroup>& property_group) const {
631  if (property_group == nullptr) {
632  return false;
633  }
634  for (const auto& pg : impl_->property_groups_) {
635  if (*pg == *property_group) {
636  return true;
637  }
638  }
639  return false;
640 }
641 
642 std::shared_ptr<AdjacentList> EdgeInfo::GetAdjacentList(
643  AdjListType adj_list_type) const {
644  auto it = impl_->adjacent_list_type_to_index_.find(adj_list_type);
645  if (it == impl_->adjacent_list_type_to_index_.end()) {
646  return nullptr;
647  }
648  return impl_->adjacent_lists_[it->second];
649 }
650 
652  return static_cast<int>(impl_->property_groups_.size());
653 }
654 
655 const PropertyGroupVector& EdgeInfo::GetPropertyGroups() const {
656  return impl_->property_groups_;
657 }
658 
659 std::shared_ptr<PropertyGroup> EdgeInfo::GetPropertyGroup(
660  const std::string& property_name) const {
661  int i = LookupKeyIndex(impl_->property_name_to_index_, property_name);
662  return i == -1 ? nullptr : impl_->property_groups_[i];
663 }
664 
665 std::shared_ptr<PropertyGroup> EdgeInfo::GetPropertyGroupByIndex(
666  int index) const {
667  if (index < 0 || index >= static_cast<int>(impl_->property_groups_.size())) {
668  return nullptr;
669  }
670  return impl_->property_groups_[index];
671 }
672 
674  AdjListType adj_list_type) const {
675  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
676  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
677  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
678  "vertex_count";
679 }
680 
681 Result<std::string> EdgeInfo::GetEdgesNumFilePath(
682  IdType vertex_chunk_index, AdjListType adj_list_type) const {
683  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
684  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
685  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
686  "edge_count" + std::to_string(vertex_chunk_index);
687 }
688 
689 Result<std::string> EdgeInfo::GetAdjListFilePath(
690  IdType vertex_chunk_index, IdType edge_chunk_index,
691  AdjListType adj_list_type) const {
692  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
693  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
694  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
695  "adj_list/part" + std::to_string(vertex_chunk_index) + "/chunk" +
696  std::to_string(edge_chunk_index);
697 }
698 
699 Result<std::string> EdgeInfo::GetAdjListPathPrefix(
700  AdjListType adj_list_type) const {
701  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
702  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
703  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
704  "adj_list/";
705 }
706 
708  IdType vertex_chunk_index, AdjListType adj_list_type) const {
709  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
710  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
711  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
712  "offset/chunk" + std::to_string(vertex_chunk_index);
713 }
714 
715 Result<std::string> EdgeInfo::GetOffsetPathPrefix(
716  AdjListType adj_list_type) const {
717  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
718  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
719  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
720  "offset/";
721 }
722 
723 Result<std::string> EdgeInfo::GetPropertyFilePath(
724  const std::shared_ptr<PropertyGroup>& property_group,
725  AdjListType adj_list_type, IdType vertex_chunk_index,
726  IdType edge_chunk_index) const {
727  if (property_group == nullptr) {
728  return Status::Invalid("property group is nullptr");
729  }
730  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
731  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
732  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix(),
733  property_group->GetPrefix()}) +
734  "part" + std::to_string(vertex_chunk_index) + "/chunk" +
735  std::to_string(edge_chunk_index);
736 }
737 
739  const std::shared_ptr<PropertyGroup>& property_group,
740  AdjListType adj_list_type) const {
741  if (property_group == nullptr) {
742  return Status::Invalid("property group is nullptr");
743  }
744  CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
745  int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
746  return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix(),
747  property_group->GetPrefix()});
748 }
749 
750 Result<std::shared_ptr<DataType>> EdgeInfo::GetPropertyType(
751  const std::string& property_name) const {
752  auto it = impl_->property_name_to_type_.find(property_name);
753  if (it == impl_->property_name_to_type_.end()) {
754  return Status::Invalid("property name not found: ", property_name);
755  }
756  return it->second;
757 }
758 
759 bool EdgeInfo::IsPrimaryKey(const std::string& property_name) const {
760  auto it = impl_->property_name_to_primary_.find(property_name);
761  if (it == impl_->property_name_to_primary_.end()) {
762  return false;
763  }
764  return it->second;
765 }
766 
767 bool EdgeInfo::IsNullableKey(const std::string& property_name) const {
768  auto it = impl_->property_name_to_nullable_.find(property_name);
769  if (it == impl_->property_name_to_nullable_.end()) {
770  return false;
771  }
772  return it->second;
773 }
774 
775 Result<std::shared_ptr<EdgeInfo>> EdgeInfo::AddAdjacentList(
776  std::shared_ptr<AdjacentList> adj_list) const {
777  if (adj_list == nullptr) {
778  return Status::Invalid("adj list is nullptr");
779  }
780  if (HasAdjacentListType(adj_list->GetType())) {
781  return Status::Invalid("adj list type already exists: ",
782  AdjListTypeToString(adj_list->GetType()));
783  }
784  return std::make_shared<EdgeInfo>(
785  impl_->src_label_, impl_->edge_label_, impl_->dst_label_,
786  impl_->chunk_size_, impl_->src_chunk_size_, impl_->dst_chunk_size_,
787  impl_->directed_, AddVectorElement(impl_->adjacent_lists_, adj_list),
788  impl_->property_groups_, impl_->prefix_, impl_->version_);
789 }
790 
791 Result<std::shared_ptr<EdgeInfo>> EdgeInfo::AddPropertyGroup(
792  std::shared_ptr<PropertyGroup> property_group) const {
793  if (property_group == nullptr) {
794  return Status::Invalid("property group is nullptr");
795  }
796  for (const auto& property : property_group->GetProperties()) {
797  if (HasProperty(property.name)) {
798  return Status::Invalid("property in property group already exists: ",
799  property.name);
800  }
801  }
802  return std::make_shared<EdgeInfo>(
803  impl_->src_label_, impl_->edge_label_, impl_->dst_label_,
804  impl_->chunk_size_, impl_->src_chunk_size_, impl_->dst_chunk_size_,
805  impl_->directed_, impl_->adjacent_lists_,
806  AddVectorElement(impl_->property_groups_, property_group), impl_->prefix_,
807  impl_->version_);
808 }
809 
810 bool EdgeInfo::IsValidated() const { return impl_->is_validated(); }
811 
812 std::shared_ptr<EdgeInfo> CreateEdgeInfo(
813  const std::string& src_label, const std::string& edge_label,
814  const std::string& dst_label, IdType chunk_size, IdType src_chunk_size,
815  IdType dst_chunk_size, bool directed,
816  const AdjacentListVector& adjacent_lists,
817  const PropertyGroupVector& property_groups, const std::string& prefix,
818  std::shared_ptr<const InfoVersion> version) {
819  if (src_label.empty() || edge_label.empty() || dst_label.empty() ||
820  chunk_size <= 0 || src_chunk_size <= 0 || dst_chunk_size <= 0 ||
821  adjacent_lists.empty()) {
822  return nullptr;
823  }
824  return std::make_shared<EdgeInfo>(src_label, edge_label, dst_label,
825  chunk_size, src_chunk_size, dst_chunk_size,
826  directed, adjacent_lists, property_groups,
827  prefix, version);
828 }
829 
830 Result<std::shared_ptr<EdgeInfo>> EdgeInfo::Load(std::shared_ptr<Yaml> yaml) {
831  if (yaml == nullptr) {
832  return Status::Invalid("yaml shared pointer is nullptr.");
833  }
834  std::string src_label = yaml->operator[]("src_label").As<std::string>();
835  std::string edge_label = yaml->operator[]("edge_label").As<std::string>();
836  std::string dst_label = yaml->operator[]("dst_label").As<std::string>();
837  IdType chunk_size =
838  static_cast<IdType>(yaml->operator[]("chunk_size").As<int64_t>());
839  IdType src_chunk_size =
840  static_cast<IdType>(yaml->operator[]("src_chunk_size").As<int64_t>());
841  IdType dst_chunk_size =
842  static_cast<IdType>(yaml->operator[]("dst_chunk_size").As<int64_t>());
843  bool directed = yaml->operator[]("directed").As<bool>();
844  std::string prefix;
845  if (!yaml->operator[]("prefix").IsNone()) {
846  prefix = yaml->operator[]("prefix").As<std::string>();
847  }
848  std::shared_ptr<const InfoVersion> version = nullptr;
849  if (!yaml->operator[]("version").IsNone()) {
850  GAR_ASSIGN_OR_RAISE(
851  version,
852  InfoVersion::Parse(yaml->operator[]("version").As<std::string>()));
853  }
854 
855  AdjacentListVector adjacent_lists;
856  PropertyGroupVector property_groups;
857  auto adj_lists_node = yaml->operator[]("adj_lists");
858  if (adj_lists_node.IsSequence()) {
859  for (auto it = adj_lists_node.Begin(); it != adj_lists_node.End(); it++) {
860  auto& node = (*it).second;
861  auto ordered = node["ordered"].As<bool>();
862  auto aligned = node["aligned_by"].As<std::string>();
863  auto adj_list_type = OrderedAlignedToAdjListType(ordered, aligned);
864  auto file_type = StringToFileType(node["file_type"].As<std::string>());
865  std::string adj_list_prefix;
866  if (!node["prefix"].IsNone()) {
867  adj_list_prefix = node["prefix"].As<std::string>();
868  }
869  adjacent_lists.push_back(std::make_shared<AdjacentList>(
870  adj_list_type, file_type, adj_list_prefix));
871  }
872  }
873  auto property_groups_node = yaml->operator[]("property_groups");
874  if (!property_groups_node.IsNone()) { // property_groups exist
875  for (auto pg_it = property_groups_node.Begin();
876  pg_it != property_groups_node.End(); pg_it++) {
877  auto& pg_node = (*pg_it).second;
878  std::string pg_prefix;
879  if (!pg_node["prefix"].IsNone()) {
880  pg_prefix = pg_node["prefix"].As<std::string>();
881  }
882  auto file_type = StringToFileType(pg_node["file_type"].As<std::string>());
883  auto properties = pg_node["properties"];
884  std::vector<Property> property_vec;
885  for (auto p_it = properties.Begin(); p_it != properties.End(); p_it++) {
886  auto& p_node = (*p_it).second;
887  auto property_name = p_node["name"].As<std::string>();
888  auto property_type =
889  DataType::TypeNameToDataType(p_node["data_type"].As<std::string>());
890  bool is_primary = p_node["is_primary"].As<bool>();
891  bool is_nullable =
892  p_node["is_nullable"].IsNone() || p_node["is_nullable"].As<bool>();
893  property_vec.emplace_back(property_name, property_type, is_primary,
894  is_nullable);
895  }
896  property_groups.push_back(
897  std::make_shared<PropertyGroup>(property_vec, file_type, pg_prefix));
898  }
899  }
900  return std::make_shared<EdgeInfo>(src_label, edge_label, dst_label,
901  chunk_size, src_chunk_size, dst_chunk_size,
902  directed, adjacent_lists, property_groups,
903  prefix, version);
904 }
905 
906 Result<std::shared_ptr<EdgeInfo>> EdgeInfo::Load(const std::string& input) {
907  GAR_ASSIGN_OR_RAISE(auto yaml, Yaml::Load(input));
908  return EdgeInfo::Load(yaml);
909 }
910 
911 Result<std::string> EdgeInfo::Dump() const noexcept {
912  if (!IsValidated()) {
913  return Status::Invalid("The edge info is not validated.");
914  }
915  std::string dump_string;
916  ::Yaml::Node node;
917  try {
918  node["src_label"] = impl_->src_label_;
919  node["edge_label"] = impl_->edge_label_;
920  node["dst_label"] = impl_->dst_label_;
921  node["chunk_size"] = std::to_string(impl_->chunk_size_);
922  node["src_chunk_size"] = std::to_string(impl_->src_chunk_size_);
923  node["dst_chunk_size"] = std::to_string(impl_->dst_chunk_size_);
924  node["prefix"] = impl_->prefix_;
925  node["directed"] = impl_->directed_ ? "true" : "false";
926  for (const auto& adjacent_list : impl_->adjacent_lists_) {
927  ::Yaml::Node adj_list_node;
928  auto adj_list_type = adjacent_list->GetType();
929  auto pair = AdjListTypeToOrderedAligned(adj_list_type);
930  adj_list_node["ordered"] = pair.first ? "true" : "false";
931  adj_list_node["aligned_by"] = pair.second;
932  adj_list_node["prefix"] = adjacent_list->GetPrefix();
933  adj_list_node["file_type"] =
934  FileTypeToString(adjacent_list->GetFileType());
935  node["adj_lists"].PushBack();
936  node["adj_lists"][node["adj_lists"].Size() - 1] = adj_list_node;
937  }
938  for (const auto& pg : impl_->property_groups_) {
939  ::Yaml::Node pg_node;
940  if (!pg->GetPrefix().empty()) {
941  pg_node["prefix"] = pg->GetPrefix();
942  }
943  pg_node["file_type"] = FileTypeToString(pg->GetFileType());
944  for (const auto& p : pg->GetProperties()) {
945  ::Yaml::Node p_node;
946  p_node["name"] = p.name;
947  p_node["data_type"] = p.type->ToTypeName();
948  p_node["is_primary"] = p.is_primary ? "true" : "false";
949  p_node["is_nullable"] = p.is_nullable ? "true" : "false";
950  pg_node["properties"].PushBack();
951  pg_node["properties"][pg_node["properties"].Size() - 1] = p_node;
952  }
953  node["property_groups"].PushBack();
954  node["property_groups"][node["property_groups"].Size() - 1] = pg_node;
955  }
956  if (impl_->version_ != nullptr) {
957  node["version"] = impl_->version_->ToString();
958  }
959  ::Yaml::Serialize(node, dump_string);
960  } catch (const std::exception& e) {
961  return Status::Invalid("Failed to dump edge info: ", e.what());
962  }
963  return dump_string;
964 }
965 
966 Status EdgeInfo::Save(const std::string& path) const {
967  std::string no_url_path;
968  GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(path, &no_url_path));
969  GAR_ASSIGN_OR_RAISE(auto yaml_content, this->Dump());
970  return fs->WriteValueToFile(yaml_content, no_url_path);
971 }
972 
973 namespace {
974 
975 static std::string PathToDirectory(const std::string& path) {
976  if (path.rfind("s3://", 0) == 0) {
977  int t = path.find_last_of('?');
978  std::string prefix = path.substr(0, t);
979  std::string suffix = path.substr(t);
980  const size_t last_slash_idx = prefix.rfind('/');
981  if (std::string::npos != last_slash_idx) {
982  return prefix.substr(0, last_slash_idx + 1) + suffix;
983  }
984  } else {
985  const size_t last_slash_idx = path.rfind('/');
986  if (std::string::npos != last_slash_idx) {
987  return path.substr(0, last_slash_idx + 1); // +1 to include the slash
988  }
989  }
990  return path;
991 }
992 
993 static Result<std::shared_ptr<GraphInfo>> ConstructGraphInfo(
994  std::shared_ptr<Yaml> graph_meta, const std::string& default_name,
995  const std::string& default_prefix, const std::shared_ptr<FileSystem> fs,
996  const std::string& no_url_path) {
997  std::string name = default_name;
998  std::string prefix = default_prefix;
999  if (!graph_meta->operator[]("name").IsNone()) {
1000  name = graph_meta->operator[]("name").As<std::string>();
1001  }
1002  if (!graph_meta->operator[]("prefix").IsNone()) {
1003  prefix = graph_meta->operator[]("prefix").As<std::string>();
1004  }
1005  std::shared_ptr<const InfoVersion> version = nullptr;
1006  if (!graph_meta->operator[]("version").IsNone()) {
1007  GAR_ASSIGN_OR_RAISE(
1008  version, InfoVersion::Parse(
1009  graph_meta->operator[]("version").As<std::string>()));
1010  }
1011  std::unordered_map<std::string, std::string> extra_info;
1012  if (!graph_meta->operator[]("extra_info").IsNone()) {
1013  auto& extra_info_node = graph_meta->operator[]("extra_info");
1014  for (auto it = extra_info_node.Begin(); it != extra_info_node.End(); it++) {
1015  auto node = (*it).second;
1016  auto key = node["key"].As<std::string>();
1017  auto value = node["value"].As<std::string>();
1018  extra_info.emplace(key, value);
1019  }
1020  }
1021 
1022  VertexInfoVector vertex_infos;
1023  EdgeInfoVector edge_infos;
1024  const auto& vertices = graph_meta->operator[]("vertices");
1025  if (vertices.IsSequence()) {
1026  for (auto it = vertices.Begin(); it != vertices.End(); it++) {
1027  std::string vertex_meta_file =
1028  no_url_path + (*it).second.As<std::string>();
1029  GAR_ASSIGN_OR_RAISE(auto input,
1030  fs->ReadFileToValue<std::string>(vertex_meta_file));
1031  GAR_ASSIGN_OR_RAISE(auto vertex_meta, Yaml::Load(input));
1032  GAR_ASSIGN_OR_RAISE(auto vertex_info, VertexInfo::Load(vertex_meta));
1033  vertex_infos.push_back(vertex_info);
1034  }
1035  }
1036  const auto& edges = graph_meta->operator[]("edges");
1037  if (edges.IsSequence()) {
1038  for (auto it = edges.Begin(); it != edges.End(); it++) {
1039  std::string edge_meta_file = no_url_path + (*it).second.As<std::string>();
1040  GAR_ASSIGN_OR_RAISE(auto input,
1041  fs->ReadFileToValue<std::string>(edge_meta_file));
1042  GAR_ASSIGN_OR_RAISE(auto edge_meta, Yaml::Load(input));
1043  GAR_ASSIGN_OR_RAISE(auto edge_info, EdgeInfo::Load(edge_meta));
1044  edge_infos.push_back(edge_info);
1045  }
1046  }
1047  return std::make_shared<GraphInfo>(name, vertex_infos, edge_infos, prefix,
1048  version, extra_info);
1049 }
1050 
1051 } // namespace
1052 
1054  public:
1055  Impl(const std::string& graph_name, VertexInfoVector vertex_infos,
1056  EdgeInfoVector edge_infos, const std::string& prefix,
1057  std::shared_ptr<const InfoVersion> version,
1058  const std::unordered_map<std::string, std::string>& extra_info)
1059  : name_(graph_name),
1060  vertex_infos_(std::move(vertex_infos)),
1061  edge_infos_(std::move(edge_infos)),
1062  prefix_(prefix),
1063  version_(std::move(version)),
1064  extra_info_(extra_info) {
1065  for (size_t i = 0; i < vertex_infos_.size(); i++) {
1066  if (vertex_infos_[i] != nullptr) {
1067  vlabel_to_index_[vertex_infos_[i]->GetLabel()] = i;
1068  }
1069  }
1070  for (size_t i = 0; i < edge_infos_.size(); i++) {
1071  if (edge_infos_[i] != nullptr) {
1072  std::string edge_key = ConcatEdgeTriple(edge_infos_[i]->GetSrcLabel(),
1073  edge_infos_[i]->GetEdgeLabel(),
1074  edge_infos_[i]->GetDstLabel());
1075  elabel_to_index_[edge_key] = i;
1076  }
1077  }
1078  }
1079 
1080  bool is_validated() const noexcept {
1081  if (name_.empty() || prefix_.empty()) {
1082  return false;
1083  }
1084  for (const auto& v : vertex_infos_) {
1085  if (!v || !v->IsValidated()) {
1086  return false;
1087  }
1088  }
1089  for (const auto& e : edge_infos_) {
1090  if (!e || !e->IsValidated()) {
1091  return false;
1092  }
1093  }
1094  if (vertex_infos_.size() != vlabel_to_index_.size() ||
1095  edge_infos_.size() != elabel_to_index_.size()) {
1096  return false;
1097  }
1098  return true;
1099  }
1100 
1101  std::string name_;
1102  VertexInfoVector vertex_infos_;
1103  EdgeInfoVector edge_infos_;
1104  std::string prefix_;
1105  std::shared_ptr<const InfoVersion> version_;
1106  std::unordered_map<std::string, std::string> extra_info_;
1107  std::unordered_map<std::string, int> vlabel_to_index_;
1108  std::unordered_map<std::string, int> elabel_to_index_;
1109 };
1110 
1112  const std::string& graph_name, VertexInfoVector vertex_infos,
1113  EdgeInfoVector edge_infos, const std::string& prefix,
1114  std::shared_ptr<const InfoVersion> version,
1115  const std::unordered_map<std::string, std::string>& extra_info)
1116  : impl_(new Impl(graph_name, std::move(vertex_infos), std::move(edge_infos),
1117  prefix, version, extra_info)) {}
1118 
1119 GraphInfo::~GraphInfo() = default;
1120 
1121 const std::string& GraphInfo::GetName() const { return impl_->name_; }
1122 
1123 const std::string& GraphInfo::GetPrefix() const { return impl_->prefix_; }
1124 
1125 const std::shared_ptr<const InfoVersion>& GraphInfo::version() const {
1126  return impl_->version_;
1127 }
1128 
1129 const std::unordered_map<std::string, std::string>& GraphInfo::GetExtraInfo()
1130  const {
1131  return impl_->extra_info_;
1132 }
1133 
1134 std::shared_ptr<VertexInfo> GraphInfo::GetVertexInfo(
1135  const std::string& type) const {
1136  int i = GetVertexInfoIndex(type);
1137  return i == -1 ? nullptr : impl_->vertex_infos_[i];
1138 }
1139 
1140 int GraphInfo::GetVertexInfoIndex(const std::string& type) const {
1141  return LookupKeyIndex(impl_->vlabel_to_index_, type);
1142 }
1143 
1144 std::shared_ptr<EdgeInfo> GraphInfo::GetEdgeInfo(
1145  const std::string& src_label, const std::string& edge_label,
1146  const std::string& dst_label) const {
1147  int i = GetEdgeInfoIndex(src_label, edge_label, dst_label);
1148  return i == -1 ? nullptr : impl_->edge_infos_[i];
1149 }
1150 
1151 int GraphInfo::GetEdgeInfoIndex(const std::string& src_label,
1152  const std::string& edge_label,
1153  const std::string& dst_label) const {
1154  std::string edge_key = ConcatEdgeTriple(src_label, edge_label, dst_label);
1155  return LookupKeyIndex(impl_->elabel_to_index_, edge_key);
1156 }
1157 
1159  return static_cast<int>(impl_->vertex_infos_.size());
1160 }
1161 
1163  return static_cast<int>(impl_->edge_infos_.size());
1164 }
1165 
1166 const std::shared_ptr<VertexInfo> GraphInfo::GetVertexInfoByIndex(
1167  int index) const {
1168  if (index < 0 || index >= static_cast<int>(impl_->vertex_infos_.size())) {
1169  return nullptr;
1170  }
1171  return impl_->vertex_infos_[index];
1172 }
1173 
1174 const std::shared_ptr<EdgeInfo> GraphInfo::GetEdgeInfoByIndex(int index) const {
1175  if (index < 0 || index >= static_cast<int>(impl_->edge_infos_.size())) {
1176  return nullptr;
1177  }
1178  return impl_->edge_infos_[index];
1179 }
1180 
1181 const VertexInfoVector& GraphInfo::GetVertexInfos() const {
1182  return impl_->vertex_infos_;
1183 }
1184 
1185 const EdgeInfoVector& GraphInfo::GetEdgeInfos() const {
1186  return impl_->edge_infos_;
1187 }
1188 
1189 bool GraphInfo::IsValidated() const { return impl_->is_validated(); }
1190 
1191 Result<std::shared_ptr<GraphInfo>> GraphInfo::AddVertex(
1192  std::shared_ptr<VertexInfo> vertex_info) const {
1193  if (vertex_info == nullptr) {
1194  return Status::Invalid("vertex info is nullptr");
1195  }
1196  if (GetVertexInfoIndex(vertex_info->GetLabel()) != -1) {
1197  return Status::Invalid("vertex info already exists");
1198  }
1199  return std::make_shared<GraphInfo>(
1200  impl_->name_, AddVectorElement(impl_->vertex_infos_, vertex_info),
1201  impl_->edge_infos_, impl_->prefix_, impl_->version_);
1202 }
1203 
1204 Result<std::shared_ptr<GraphInfo>> GraphInfo::AddEdge(
1205  std::shared_ptr<EdgeInfo> edge_info) const {
1206  if (edge_info == nullptr) {
1207  return Status::Invalid("edge info is nullptr");
1208  }
1209  if (GetEdgeInfoIndex(edge_info->GetSrcLabel(), edge_info->GetEdgeLabel(),
1210  edge_info->GetDstLabel()) != -1) {
1211  return Status::Invalid("edge info already exists");
1212  }
1213  return std::make_shared<GraphInfo>(
1214  impl_->name_, impl_->vertex_infos_,
1215  AddVectorElement(impl_->edge_infos_, edge_info), impl_->prefix_,
1216  impl_->version_);
1217 }
1218 
1219 std::shared_ptr<GraphInfo> CreateGraphInfo(
1220  const std::string& name, const VertexInfoVector& vertex_infos,
1221  const EdgeInfoVector& edge_infos, const std::string& prefix,
1222  std::shared_ptr<const InfoVersion> version,
1223  const std::unordered_map<std::string, std::string>& extra_info) {
1224  if (name.empty()) {
1225  return nullptr;
1226  }
1227  return std::make_shared<GraphInfo>(name, vertex_infos, edge_infos, prefix,
1228  version, extra_info);
1229 }
1230 
1231 Result<std::shared_ptr<GraphInfo>> GraphInfo::Load(const std::string& path) {
1232  std::string no_url_path;
1233  GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(path, &no_url_path));
1234  GAR_ASSIGN_OR_RAISE(auto yaml_content,
1235  fs->ReadFileToValue<std::string>(no_url_path));
1236  GAR_ASSIGN_OR_RAISE(auto graph_meta, Yaml::Load(yaml_content));
1237  std::string default_name = "graph";
1238  std::string default_prefix = PathToDirectory(path);
1239  no_url_path = PathToDirectory(no_url_path);
1240  return ConstructGraphInfo(graph_meta, default_name, default_prefix, fs,
1241  no_url_path);
1242 }
1243 
1244 Result<std::shared_ptr<GraphInfo>> GraphInfo::Load(
1245  const std::string& input, const std::string& relative_location) {
1246  GAR_ASSIGN_OR_RAISE(auto graph_meta, Yaml::Load(input));
1247  std::string default_name = "graph";
1248  std::string default_prefix =
1249  relative_location; // default chunk file prefix is relative location
1250  std::string no_url_path;
1251  GAR_ASSIGN_OR_RAISE(auto fs,
1252  FileSystemFromUriOrPath(relative_location, &no_url_path));
1253  return ConstructGraphInfo(graph_meta, default_name, default_prefix, fs,
1254  no_url_path);
1255 }
1256 
1257 Result<std::string> GraphInfo::Dump() const {
1258  if (!IsValidated()) {
1259  return Status::Invalid("The graph info is not validated.");
1260  }
1261  ::Yaml::Node node;
1262  std::string dump_string;
1263  try {
1264  node["name"] = impl_->name_;
1265  node["prefix"] = impl_->prefix_;
1266  node["vertices"];
1267  node["edges"];
1268  for (const auto& vertex : GetVertexInfos()) {
1269  node["vertices"].PushBack();
1270  node["vertices"][node["vertices"].Size() - 1] =
1271  vertex->GetLabel() + ".vertex.yaml";
1272  }
1273  for (const auto& edge : GetEdgeInfos()) {
1274  node["edges"].PushBack();
1275  node["edges"][node["edges"].Size() - 1] =
1276  ConcatEdgeTriple(edge->GetSrcLabel(), edge->GetEdgeLabel(),
1277  edge->GetDstLabel()) +
1278  ".edge.yaml";
1279  }
1280  if (impl_->version_ != nullptr) {
1281  node["version"] = impl_->version_->ToString();
1282  }
1283  if (impl_->extra_info_.size() > 0) {
1284  node["extra_info"];
1285  for (const auto& pair : impl_->extra_info_) {
1286  ::Yaml::Node extra_info_node;
1287  extra_info_node["key"] = pair.first;
1288  extra_info_node["value"] = pair.second;
1289  node["extra_info"].PushBack();
1290  node["extra_info"][node["extra_info"].Size() - 1] = extra_info_node;
1291  }
1292  }
1293  ::Yaml::Serialize(node, dump_string);
1294  } catch (const std::exception& e) {
1295  return Status::Invalid("Failed to dump graph info: ", e.what());
1296  }
1297  return dump_string;
1298 }
1299 
1300 Status GraphInfo::Save(const std::string& path) const {
1301  std::string no_url_path;
1302  GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(path, &no_url_path));
1303  GAR_ASSIGN_OR_RAISE(auto yaml_content, this->Dump());
1304  return fs->WriteValueToFile(yaml_content, no_url_path);
1305 }
1306 
1307 } // namespace graphar
bool IsValidated() const
Definition: graph_info.cc:169
AdjacentList(AdjListType type, FileType file_type, const std::string &prefix="")
Definition: graph_info.cc:161
static Result< std::shared_ptr< EdgeInfo > > Load(std::shared_ptr< Yaml > yaml)
Definition: graph_info.cc:830
Result< std::string > GetAdjListFilePath(IdType vertex_chunk_index, IdType edge_chunk_index, AdjListType adj_list_type) const
Get the file path of adj list topology chunk.
Definition: graph_info.cc:689
Status Save(const std::string &file_name) const
Definition: graph_info.cc:966
Result< std::shared_ptr< DataType > > GetPropertyType(const std::string &property_name) const
Definition: graph_info.cc:750
bool IsValidated() const
Definition: graph_info.cc:810
const std::string & GetPrefix() const
Definition: graph_info.cc:611
Result< std::string > GetEdgesNumFilePath(IdType vertex_chunk_index, AdjListType adj_list_type) const
Definition: graph_info.cc:681
Result< std::string > GetPropertyGroupPathPrefix(const std::shared_ptr< PropertyGroup > &property_group, AdjListType adj_list_type) const
Definition: graph_info.cc:738
bool IsPrimaryKey(const std::string &property_name) const
Definition: graph_info.cc:759
Result< std::string > GetAdjListOffsetFilePath(IdType vertex_chunk_index, AdjListType adj_list_type) const
Get the adjacency list offset chunk file path of vertex chunk the offset chunks is aligned with the v...
Definition: graph_info.cc:707
IdType GetChunkSize() const
Definition: graph_info.cc:605
bool HasProperty(const std::string &property_name) const
Returns whether the edge info contains the given property.
Definition: graph_info.cc:624
Result< std::shared_ptr< EdgeInfo > > AddAdjacentList(std::shared_ptr< AdjacentList > adj_list) const
Definition: graph_info.cc:775
Result< std::string > Dump() const noexcept
Definition: graph_info.cc:911
bool IsNullableKey(const std::string &property_name) const
Definition: graph_info.cc:767
std::shared_ptr< PropertyGroup > GetPropertyGroupByIndex(int index) const
Get the property group at the specified index.
Definition: graph_info.cc:665
bool HasPropertyGroup(const std::shared_ptr< PropertyGroup > &property_group) const
Returns whether the edge info contains the given property group.
Definition: graph_info.cc:629
const std::shared_ptr< const InfoVersion > & version() const
Definition: graph_info.cc:615
Result< std::shared_ptr< EdgeInfo > > AddPropertyGroup(std::shared_ptr< PropertyGroup > property_group) const
Definition: graph_info.cc:791
bool HasAdjacentListType(AdjListType adj_list_type) const
Definition: graph_info.cc:619
Result< std::string > GetOffsetPathPrefix(AdjListType adj_list_type) const
Definition: graph_info.cc:715
bool IsDirected() const
Definition: graph_info.cc:613
Result< std::string > GetAdjListPathPrefix(AdjListType adj_list_type) const
Get the path prefix of the adjacency list topology chunk for the given adjacency list type.
Definition: graph_info.cc:699
IdType GetDstChunkSize() const
Definition: graph_info.cc:609
Result< std::string > GetPropertyFilePath(const std::shared_ptr< PropertyGroup > &property_group, AdjListType adj_list_type, IdType vertex_chunk_index, IdType edge_chunk_index) const
Get the chunk file path of adj list property group the property group chunks is aligned with the adj ...
Definition: graph_info.cc:723
IdType GetSrcChunkSize() const
Definition: graph_info.cc:607
const std::string & GetSrcLabel() const
Definition: graph_info.cc:599
const PropertyGroupVector & GetPropertyGroups() const
Get the property groups.
Definition: graph_info.cc:655
const std::string & GetDstLabel() const
Definition: graph_info.cc:603
Result< std::string > GetVerticesNumFilePath(AdjListType adj_list_type) const
Get the file path for the number of vertices.
Definition: graph_info.cc:673
std::shared_ptr< PropertyGroup > GetPropertyGroup(const std::string &property) const
Get the property group containing the given property.
Definition: graph_info.cc:659
int PropertyGroupNum() const
Get the number of property groups.
Definition: graph_info.cc:651
const std::string & GetEdgeLabel() const
Definition: graph_info.cc:601
EdgeInfo(const std::string &src_label, const std::string &edge_label, const std::string &dst_label, IdType chunk_size, IdType src_chunk_size, IdType dst_chunk_size, bool directed, const AdjacentListVector &adjacent_lists, const PropertyGroupVector &property_groups, const std::string &prefix="", std::shared_ptr< const InfoVersion > version=nullptr)
Construct an EdgeInfo object with the given information and property groups.
Definition: graph_info.cc:586
int GetVertexInfoIndex(const std::string &label) const
Get the vertex info index with the given label.
Definition: graph_info.cc:1140
const EdgeInfoVector & GetEdgeInfos() const
Get the edge infos of graph info.
Definition: graph_info.cc:1185
std::shared_ptr< EdgeInfo > GetEdgeInfo(const std::string &src_label, const std::string &edge_label, const std::string &dst_label) const
Get the edge info with the given source vertex label, edge label, and destination vertex label.
Definition: graph_info.cc:1144
Status Save(const std::string &path) const
Definition: graph_info.cc:1300
const std::shared_ptr< VertexInfo > GetVertexInfoByIndex(int index) const
Get the vertex info at the specified index.
Definition: graph_info.cc:1166
Result< std::shared_ptr< GraphInfo > > AddVertex(std::shared_ptr< VertexInfo > vertex_info) const
Adds a vertex info to the GraphInfo instance and returns a new GraphInfo.
Definition: graph_info.cc:1191
const std::string & GetPrefix() const
Get the absolute path prefix of the chunk files.
Definition: graph_info.cc:1123
static Result< std::shared_ptr< GraphInfo > > Load(const std::string &path)
Loads the input file as a GraphInfo instance.
Definition: graph_info.cc:1231
int GetEdgeInfoIndex(const std::string &src_label, const std::string &edge_label, const std::string &dst_label) const
Get the edge info index with the given source vertex label, edge label, and destination label.
Definition: graph_info.cc:1151
bool IsValidated() const
Definition: graph_info.cc:1189
Result< std::string > Dump() const
Definition: graph_info.cc:1257
int EdgeInfoNum() const
Get the number of edge infos.
Definition: graph_info.cc:1162
Result< std::shared_ptr< GraphInfo > > AddEdge(std::shared_ptr< EdgeInfo > edge_info) const
Adds an edge info to the GraphInfo instance and returns a new GraphInfo.
Definition: graph_info.cc:1204
const VertexInfoVector & GetVertexInfos() const
Get the vertex infos of graph info.
Definition: graph_info.cc:1181
const std::unordered_map< std::string, std::string > & GetExtraInfo() const
Get the extra metadata of the graph info object.
Definition: graph_info.cc:1129
int VertexInfoNum() const
Get the number of vertex infos.
Definition: graph_info.cc:1158
const std::string & GetName() const
Get the name of the graph.
Definition: graph_info.cc:1121
GraphInfo(const std::string &graph_name, VertexInfoVector vertex_infos, EdgeInfoVector edge_infos, const std::string &prefix="./", std::shared_ptr< const InfoVersion > version=nullptr, const std::unordered_map< std::string, std::string > &extra_info={})
Constructs a GraphInfo instance.
Definition: graph_info.cc:1111
const std::shared_ptr< const InfoVersion > & version() const
Get the version info of the graph info object.
Definition: graph_info.cc:1125
std::shared_ptr< VertexInfo > GetVertexInfo(const std::string &label) const
Get the vertex info with the given label.
Definition: graph_info.cc:1134
const std::shared_ptr< EdgeInfo > GetEdgeInfoByIndex(int index) const
Get the edge info at the specified index.
Definition: graph_info.cc:1174
static Result< std::shared_ptr< const InfoVersion > > Parse(const std::string &str) noexcept
bool IsValidated() const
Definition: graph_info.cc:116
PropertyGroup(const std::vector< Property > &properties, FileType file_type, const std::string &prefix="")
Definition: graph_info.cc:92
const std::vector< Property > & GetProperties() const
Definition: graph_info.cc:103
Status outcome object (success or error)
Definition: status.h:123
static Status Invalid(Args &&... args)
Definition: status.h:188
std::shared_ptr< PropertyGroup > GetPropertyGroup(const std::string &property_name) const
Definition: graph_info.cc:296
Result< std::string > GetPathPrefix(std::shared_ptr< PropertyGroup > property_group) const
Definition: graph_info.cc:280
IdType GetChunkSize() const
Definition: graph_info.cc:263
const std::shared_ptr< const InfoVersion > & version() const
Definition: graph_info.cc:267
Result< std::shared_ptr< VertexInfo > > AddPropertyGroup(std::shared_ptr< PropertyGroup > property_group) const
Definition: graph_info.cc:357
bool HasProperty(const std::string &property_name) const
Definition: graph_info.cc:330
bool HasPropertyGroup(const std::shared_ptr< PropertyGroup > &property_group) const
Definition: graph_info.cc:335
Result< std::string > GetFilePath(std::shared_ptr< PropertyGroup > property_group, IdType chunk_index) const
Definition: graph_info.cc:271
bool IsPrimaryKey(const std::string &property_name) const
Definition: graph_info.cc:314
bool IsValidated() const
Definition: graph_info.cc:374
int PropertyGroupNum() const
Definition: graph_info.cc:292
const std::string & GetPrefix() const
Definition: graph_info.cc:265
const std::string & GetLabel() const
Definition: graph_info.cc:261
Result< std::string > GetVerticesNumFilePath() const
Definition: graph_info.cc:288
VertexInfo(const std::string &label, IdType chunk_size, const PropertyGroupVector &property_groups, const std::string &prefix="", std::shared_ptr< const InfoVersion > version=nullptr)
Definition: graph_info.cc:253
Result< std::shared_ptr< DataType > > GetPropertyType(const std::string &property_name) const
Definition: graph_info.cc:348
bool IsNullableKey(const std::string &property_name) const
Definition: graph_info.cc:322
Result< std::string > Dump() const noexcept
Definition: graph_info.cc:442
static Result< std::shared_ptr< VertexInfo > > Load(std::shared_ptr< Yaml > yaml)
Definition: graph_info.cc:387
std::shared_ptr< PropertyGroup > GetPropertyGroupByIndex(int index) const
Definition: graph_info.cc:302
Status Save(const std::string &file_name) const
Definition: graph_info.cc:480
const PropertyGroupVector & GetPropertyGroups() const
Definition: graph_info.cc:310
static Result< std::shared_ptr< Yaml > > Load(const std::string &input)
Definition: yaml.cc:34