20 #include <unordered_set>
23 #include "mini-yaml/yaml/Yaml.hpp"
25 #include "graphar/filesystem.h"
26 #include "graphar/graph_info.h"
27 #include "graphar/result.h"
28 #include "graphar/types.h"
29 #include "graphar/version_parser.h"
30 #include "graphar/yaml.h"
34 #define CHECK_HAS_ADJ_LIST_TYPE(adj_list_type) \
36 if (!HasAdjacentListType(adj_list_type)) { \
37 return Status::KeyError( \
38 "Adjacency list type: ", AdjListTypeToString(adj_list_type), \
39 " is not found in edge info."); \
45 std::string ConcatEdgeTriple(
const std::string& src_label,
46 const std::string& edge_label,
47 const std::string& dst_label) {
48 return src_label + REGULAR_SEPARATOR + edge_label + REGULAR_SEPARATOR +
52 template <
int NotFoundValue = -1>
53 int LookupKeyIndex(
const std::unordered_map<std::string, int>& key_to_index,
54 const std::string& type) {
55 auto it = key_to_index.find(type);
56 if (it == key_to_index.end()) {
63 std::vector<T> AddVectorElement(
const std::vector<T>& values, T new_element) {
65 out.reserve(values.size() + 1);
66 for (
size_t i = 0; i < values.size(); ++i) {
67 out.push_back(values[i]);
69 out.emplace_back(std::move(new_element));
73 std::string BuildPath(
const std::vector<std::string>& paths) {
75 for (
const auto& p : paths) {
76 if (p.back() ==
'/') {
86 bool operator==(
const Property& lhs,
const Property& rhs) {
87 return (lhs.name == rhs.name) && (lhs.type == rhs.type) &&
88 (lhs.is_primary == rhs.is_primary) &&
89 (lhs.is_nullable == rhs.is_nullable);
93 FileType file_type,
const std::string& prefix)
94 : properties_(properties), file_type_(file_type), prefix_(prefix) {
95 if (prefix_.empty() && !properties_.empty()) {
96 for (
const auto& p : properties_) {
97 prefix_ += p.name + REGULAR_SEPARATOR;
107 bool PropertyGroup::HasProperty(
const std::string& property_name)
const {
108 for (
const auto& p : properties_) {
109 if (p.name == property_name) {
117 if (prefix_.empty() ||
118 (file_type_ != FileType::CSV && file_type_ != FileType::PARQUET &&
119 file_type_ != FileType::ORC)) {
122 if (properties_.empty()) {
125 std::unordered_set<std::string> check_property_unique_set;
126 for (
const auto& p : properties_) {
127 if (p.name.empty() || p.type ==
nullptr) {
130 if (check_property_unique_set.find(p.name) !=
131 check_property_unique_set.end()) {
134 check_property_unique_set.insert(p.name);
137 if (p.type->id() == Type::LIST && file_type_ == FileType::CSV) {
145 std::shared_ptr<PropertyGroup> CreatePropertyGroup(
146 const std::vector<Property>& properties, FileType file_type,
147 const std::string& prefix) {
148 if (properties.empty()) {
152 return std::make_shared<PropertyGroup>(properties, file_type, prefix);
155 bool operator==(
const PropertyGroup& lhs,
const PropertyGroup& rhs) {
156 return (lhs.GetPrefix() == rhs.GetPrefix()) &&
157 (lhs.GetFileType() == rhs.GetFileType()) &&
158 (lhs.GetProperties() == rhs.GetProperties());
162 const std::string& prefix)
163 : type_(type), file_type_(file_type), prefix_(prefix) {
164 if (prefix_.empty()) {
165 prefix_ = std::string(AdjListTypeToString(type_)) +
"/";
170 if (type_ != AdjListType::unordered_by_source &&
171 type_ != AdjListType::ordered_by_source &&
172 type_ != AdjListType::unordered_by_dest &&
173 type_ != AdjListType::ordered_by_dest) {
176 if (prefix_.empty() ||
177 (file_type_ != FileType::CSV && file_type_ != FileType::PARQUET &&
178 file_type_ != FileType::ORC)) {
184 std::shared_ptr<AdjacentList> CreateAdjacentList(AdjListType type,
186 const std::string& prefix) {
187 return std::make_shared<AdjacentList>(type, file_type, prefix);
192 Impl(
const std::string& label, IdType chunk_size,
const std::string& prefix,
193 const PropertyGroupVector& property_groups,
194 std::shared_ptr<const InfoVersion>
version)
196 chunk_size_(chunk_size),
197 property_groups_(std::move(property_groups)),
200 if (prefix_.empty()) {
201 prefix_ = label_ +
"/";
203 for (
size_t i = 0; i < property_groups_.size(); i++) {
204 const auto& pg = property_groups_[i];
208 for (
const auto& p : pg->GetProperties()) {
209 property_name_to_index_.emplace(p.name, i);
210 property_name_to_primary_.emplace(p.name, p.is_primary);
211 property_name_to_nullable_.emplace(p.name, p.is_nullable);
212 property_name_to_type_.emplace(p.name, p.type);
217 bool is_validated()
const noexcept {
218 if (label_.empty() || chunk_size_ <= 0 || prefix_.empty()) {
221 std::unordered_set<std::string> check_property_unique_set;
222 for (
const auto& pg : property_groups_) {
224 if (!pg || !pg->IsValidated()) {
228 for (
const auto& p : pg->GetProperties()) {
229 if (check_property_unique_set.find(p.name) !=
230 check_property_unique_set.end()) {
233 check_property_unique_set.insert(p.name);
243 PropertyGroupVector property_groups_;
245 std::shared_ptr<const InfoVersion> version_;
246 std::unordered_map<std::string, int> property_name_to_index_;
247 std::unordered_map<std::string, bool> property_name_to_primary_;
248 std::unordered_map<std::string, bool> property_name_to_nullable_;
249 std::unordered_map<std::string, std::shared_ptr<DataType>>
250 property_name_to_type_;
254 const PropertyGroupVector& property_groups,
255 const std::string& prefix,
256 std::shared_ptr<const InfoVersion> version)
257 : impl_(new
Impl(label, chunk_size, prefix, property_groups, version)) {}
259 VertexInfo::~VertexInfo() =
default;
268 return impl_->version_;
272 std::shared_ptr<PropertyGroup> property_group, IdType chunk_index)
const {
273 if (property_group ==
nullptr) {
276 return BuildPath({impl_->prefix_, property_group->GetPrefix()}) +
"chunk" +
277 std::to_string(chunk_index);
281 std::shared_ptr<PropertyGroup> property_group)
const {
282 if (property_group ==
nullptr) {
285 return BuildPath({impl_->prefix_, property_group->GetPrefix()});
289 return BuildPath({impl_->prefix_}) +
"vertex_count";
293 return static_cast<int>(impl_->property_groups_.size());
297 const std::string& property_name)
const {
298 int i = LookupKeyIndex(impl_->property_name_to_index_, property_name);
299 return i == -1 ? nullptr : impl_->property_groups_[i];
304 if (index < 0 || index >=
static_cast<int>(impl_->property_groups_.size())) {
307 return impl_->property_groups_[index];
311 return impl_->property_groups_;
315 auto it = impl_->property_name_to_primary_.find(property_name);
316 if (it == impl_->property_name_to_primary_.end()) {
323 auto it = impl_->property_name_to_nullable_.find(property_name);
324 if (it == impl_->property_name_to_nullable_.end()) {
331 return impl_->property_name_to_index_.find(property_name) !=
332 impl_->property_name_to_index_.end();
336 const std::shared_ptr<PropertyGroup>& property_group)
const {
337 if (property_group ==
nullptr) {
340 for (
const auto& pg : impl_->property_groups_) {
341 if (*pg == *property_group) {
349 const std::string& property_name)
const {
350 auto it = impl_->property_name_to_type_.find(property_name);
351 if (it == impl_->property_name_to_type_.end()) {
358 std::shared_ptr<PropertyGroup> property_group)
const {
359 if (property_group ==
nullptr) {
362 for (
const auto& property : property_group->GetProperties()) {
364 return Status::Invalid(
"property in the property group already exists: ",
368 return std::make_shared<VertexInfo>(
369 impl_->label_, impl_->chunk_size_,
370 AddVectorElement(impl_->property_groups_, property_group), impl_->prefix_,
376 std::shared_ptr<VertexInfo> CreateVertexInfo(
377 const std::string& label, IdType chunk_size,
378 const PropertyGroupVector& property_groups,
const std::string& prefix,
379 std::shared_ptr<const InfoVersion> version) {
380 if (label.empty() || chunk_size <= 0) {
383 return std::make_shared<VertexInfo>(label, chunk_size, property_groups,
388 std::shared_ptr<Yaml> yaml) {
389 if (yaml ==
nullptr) {
392 std::string label = yaml->operator[](
"label").As<std::string>();
394 static_cast<IdType
>(yaml->operator[](
"chunk_size").As<int64_t>());
396 if (!yaml->operator[](
"prefix").IsNone()) {
397 prefix = yaml->operator[](
"prefix").As<std::string>();
399 std::shared_ptr<const InfoVersion>
version =
nullptr;
400 if (!yaml->operator[](
"version").IsNone()) {
405 PropertyGroupVector property_groups;
406 auto property_groups_node = yaml->operator[](
"property_groups");
407 if (!property_groups_node.IsNone()) {
408 for (
auto it = property_groups_node.Begin();
409 it != property_groups_node.End(); it++) {
410 std::string pg_prefix;
411 auto& node = (*it).second;
412 if (!node[
"prefix"].IsNone()) {
413 pg_prefix = node[
"prefix"].As<std::string>();
415 auto file_type = StringToFileType(node[
"file_type"].As<std::string>());
416 std::vector<Property> property_vec;
417 auto& properties = node[
"properties"];
418 for (
auto iit = properties.Begin(); iit != properties.End(); iit++) {
419 auto& p_node = (*iit).second;
420 auto property_name = p_node[
"name"].As<std::string>();
422 DataType::TypeNameToDataType(p_node[
"data_type"].As<std::string>());
423 bool is_primary = p_node[
"is_primary"].As<
bool>();
425 p_node[
"is_nullable"].IsNone() || p_node[
"is_nullable"].As<
bool>();
426 property_vec.emplace_back(property_name, property_type, is_primary,
429 property_groups.push_back(
430 std::make_shared<PropertyGroup>(property_vec, file_type, pg_prefix));
433 return std::make_shared<VertexInfo>(label, chunk_size, property_groups,
438 GAR_ASSIGN_OR_RAISE(
auto yaml,
Yaml::Load(input));
446 std::string dump_string;
449 node[
"label"] = impl_->label_;
450 node[
"chunk_size"] = std::to_string(impl_->chunk_size_);
451 node[
"prefix"] = impl_->prefix_;
452 for (
const auto& pg : impl_->property_groups_) {
453 ::Yaml::Node pg_node;
454 if (!pg->GetPrefix().empty()) {
455 pg_node[
"prefix"] = pg->GetPrefix();
457 pg_node[
"file_type"] = FileTypeToString(pg->GetFileType());
458 for (
const auto& p : pg->GetProperties()) {
460 p_node[
"name"] = p.name;
461 p_node[
"data_type"] = p.type->ToTypeName();
462 p_node[
"is_primary"] = p.is_primary ?
"true" :
"false";
463 p_node[
"is_nullable"] = p.is_nullable ?
"true" :
"false";
464 pg_node[
"properties"].PushBack();
465 pg_node[
"properties"][pg_node[
"properties"].Size() - 1] = p_node;
467 node[
"property_groups"].PushBack();
468 node[
"property_groups"][node[
"property_groups"].Size() - 1] = pg_node;
470 if (impl_->version_ !=
nullptr) {
471 node[
"version"] = impl_->version_->ToString();
473 ::Yaml::Serialize(node, dump_string);
474 }
catch (
const std::exception& e) {
481 std::string no_url_path;
482 GAR_ASSIGN_OR_RAISE(
auto fs, FileSystemFromUriOrPath(path, &no_url_path));
483 GAR_ASSIGN_OR_RAISE(
auto yaml_content, this->
Dump());
484 return fs->WriteValueToFile(yaml_content, no_url_path);
489 Impl(
const std::string& src_label,
const std::string& edge_label,
490 const std::string& dst_label, IdType chunk_size, IdType src_chunk_size,
491 IdType dst_chunk_size,
bool directed,
const std::string& prefix,
492 const AdjacentListVector& adjacent_lists,
493 const PropertyGroupVector& property_groups,
494 std::shared_ptr<const InfoVersion>
version)
495 : src_label_(src_label),
496 edge_label_(edge_label),
497 dst_label_(dst_label),
498 chunk_size_(chunk_size),
499 src_chunk_size_(src_chunk_size),
500 dst_chunk_size_(dst_chunk_size),
503 adjacent_lists_(std::move(adjacent_lists)),
504 property_groups_(std::move(property_groups)),
506 if (prefix_.empty()) {
507 prefix_ = src_label_ + REGULAR_SEPARATOR + edge_label_ +
508 REGULAR_SEPARATOR + dst_label_ +
"/";
510 for (
size_t i = 0; i < adjacent_lists_.size(); i++) {
511 if (!adjacent_lists_[i]) {
515 auto adj_list_type = adjacent_lists_[i]->GetType();
516 adjacent_list_type_to_index_[adj_list_type] = i;
518 for (
size_t i = 0; i < property_groups_.size(); i++) {
519 const auto& pg = property_groups_[i];
523 for (
const auto& p : pg->GetProperties()) {
524 property_name_to_index_.emplace(p.name, i);
525 property_name_to_primary_.emplace(p.name, p.is_primary);
526 property_name_to_nullable_.emplace(p.name, p.is_nullable);
527 property_name_to_type_.emplace(p.name, p.type);
532 bool is_validated()
const noexcept {
533 if (src_label_.empty() || edge_label_.empty() || dst_label_.empty() ||
534 chunk_size_ <= 0 || src_chunk_size_ <= 0 || dst_chunk_size_ <= 0 ||
535 prefix_.empty() || adjacent_lists_.empty()) {
539 for (
const auto& al : adjacent_lists_) {
540 if (!al || !al->IsValidated()) {
545 std::unordered_set<std::string> check_property_unique_set;
546 for (
const auto& pg : property_groups_) {
548 if (!pg || !pg->IsValidated()) {
552 for (
const auto& p : pg->GetProperties()) {
553 if (check_property_unique_set.find(p.name) !=
554 check_property_unique_set.end()) {
557 check_property_unique_set.insert(p.name);
561 if (adjacent_lists_.size() != adjacent_list_type_to_index_.size()) {
567 std::string src_label_;
568 std::string edge_label_;
569 std::string dst_label_;
571 IdType src_chunk_size_;
572 IdType dst_chunk_size_;
575 AdjacentListVector adjacent_lists_;
576 PropertyGroupVector property_groups_;
577 std::unordered_map<AdjListType, int> adjacent_list_type_to_index_;
578 std::unordered_map<std::string, int> property_name_to_index_;
579 std::unordered_map<std::string, bool> property_name_to_primary_;
580 std::unordered_map<std::string, bool> property_name_to_nullable_;
581 std::unordered_map<std::string, std::shared_ptr<DataType>>
582 property_name_to_type_;
583 std::shared_ptr<const InfoVersion> version_;
587 const std::string& dst_label, IdType chunk_size,
588 IdType src_chunk_size, IdType dst_chunk_size,
bool directed,
589 const AdjacentListVector& adjacent_lists,
590 const PropertyGroupVector& property_groups,
591 const std::string& prefix,
592 std::shared_ptr<const InfoVersion> version)
593 : impl_(new
Impl(src_label, edge_label, dst_label, chunk_size,
594 src_chunk_size, dst_chunk_size, directed, prefix,
595 adjacent_lists, property_groups, version)) {}
597 EdgeInfo::~EdgeInfo() =
default;
616 return impl_->version_;
620 return impl_->adjacent_list_type_to_index_.find(adj_list_type) !=
621 impl_->adjacent_list_type_to_index_.end();
625 return impl_->property_name_to_index_.find(property_name) !=
626 impl_->property_name_to_index_.end();
630 const std::shared_ptr<PropertyGroup>& property_group)
const {
631 if (property_group ==
nullptr) {
634 for (
const auto& pg : impl_->property_groups_) {
635 if (*pg == *property_group) {
642 std::shared_ptr<AdjacentList> EdgeInfo::GetAdjacentList(
643 AdjListType adj_list_type)
const {
644 auto it = impl_->adjacent_list_type_to_index_.find(adj_list_type);
645 if (it == impl_->adjacent_list_type_to_index_.end()) {
648 return impl_->adjacent_lists_[it->second];
652 return static_cast<int>(impl_->property_groups_.size());
656 return impl_->property_groups_;
660 const std::string& property_name)
const {
661 int i = LookupKeyIndex(impl_->property_name_to_index_, property_name);
662 return i == -1 ? nullptr : impl_->property_groups_[i];
667 if (index < 0 || index >=
static_cast<int>(impl_->property_groups_.size())) {
670 return impl_->property_groups_[index];
674 AdjListType adj_list_type)
const {
675 CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
676 int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
677 return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
682 IdType vertex_chunk_index, AdjListType adj_list_type)
const {
683 CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
684 int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
685 return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
686 "edge_count" + std::to_string(vertex_chunk_index);
690 IdType vertex_chunk_index, IdType edge_chunk_index,
691 AdjListType adj_list_type)
const {
692 CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
693 int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
694 return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
695 "adj_list/part" + std::to_string(vertex_chunk_index) +
"/chunk" +
696 std::to_string(edge_chunk_index);
700 AdjListType adj_list_type)
const {
701 CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
702 int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
703 return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
708 IdType vertex_chunk_index, AdjListType adj_list_type)
const {
709 CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
710 int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
711 return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
712 "offset/chunk" + std::to_string(vertex_chunk_index);
716 AdjListType adj_list_type)
const {
717 CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
718 int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
719 return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix()}) +
724 const std::shared_ptr<PropertyGroup>& property_group,
725 AdjListType adj_list_type, IdType vertex_chunk_index,
726 IdType edge_chunk_index)
const {
727 if (property_group ==
nullptr) {
730 CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
731 int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
732 return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix(),
733 property_group->GetPrefix()}) +
734 "part" + std::to_string(vertex_chunk_index) +
"/chunk" +
735 std::to_string(edge_chunk_index);
739 const std::shared_ptr<PropertyGroup>& property_group,
740 AdjListType adj_list_type)
const {
741 if (property_group ==
nullptr) {
744 CHECK_HAS_ADJ_LIST_TYPE(adj_list_type);
745 int i = impl_->adjacent_list_type_to_index_.at(adj_list_type);
746 return BuildPath({impl_->prefix_, impl_->adjacent_lists_[i]->GetPrefix(),
747 property_group->GetPrefix()});
751 const std::string& property_name)
const {
752 auto it = impl_->property_name_to_type_.find(property_name);
753 if (it == impl_->property_name_to_type_.end()) {
760 auto it = impl_->property_name_to_primary_.find(property_name);
761 if (it == impl_->property_name_to_primary_.end()) {
768 auto it = impl_->property_name_to_nullable_.find(property_name);
769 if (it == impl_->property_name_to_nullable_.end()) {
776 std::shared_ptr<AdjacentList> adj_list)
const {
777 if (adj_list ==
nullptr) {
782 AdjListTypeToString(adj_list->GetType()));
784 return std::make_shared<EdgeInfo>(
785 impl_->src_label_, impl_->edge_label_, impl_->dst_label_,
786 impl_->chunk_size_, impl_->src_chunk_size_, impl_->dst_chunk_size_,
787 impl_->directed_, AddVectorElement(impl_->adjacent_lists_, adj_list),
788 impl_->property_groups_, impl_->prefix_, impl_->version_);
792 std::shared_ptr<PropertyGroup> property_group)
const {
793 if (property_group ==
nullptr) {
796 for (
const auto& property : property_group->GetProperties()) {
802 return std::make_shared<EdgeInfo>(
803 impl_->src_label_, impl_->edge_label_, impl_->dst_label_,
804 impl_->chunk_size_, impl_->src_chunk_size_, impl_->dst_chunk_size_,
805 impl_->directed_, impl_->adjacent_lists_,
806 AddVectorElement(impl_->property_groups_, property_group), impl_->prefix_,
812 std::shared_ptr<EdgeInfo> CreateEdgeInfo(
813 const std::string& src_label,
const std::string& edge_label,
814 const std::string& dst_label, IdType chunk_size, IdType src_chunk_size,
815 IdType dst_chunk_size,
bool directed,
816 const AdjacentListVector& adjacent_lists,
817 const PropertyGroupVector& property_groups,
const std::string& prefix,
818 std::shared_ptr<const InfoVersion> version) {
819 if (src_label.empty() || edge_label.empty() || dst_label.empty() ||
820 chunk_size <= 0 || src_chunk_size <= 0 || dst_chunk_size <= 0 ||
821 adjacent_lists.empty()) {
824 return std::make_shared<EdgeInfo>(src_label, edge_label, dst_label,
825 chunk_size, src_chunk_size, dst_chunk_size,
826 directed, adjacent_lists, property_groups,
831 if (yaml ==
nullptr) {
834 std::string src_label = yaml->operator[](
"src_label").As<std::string>();
835 std::string edge_label = yaml->operator[](
"edge_label").As<std::string>();
836 std::string dst_label = yaml->operator[](
"dst_label").As<std::string>();
838 static_cast<IdType
>(yaml->operator[](
"chunk_size").As<int64_t>());
839 IdType src_chunk_size =
840 static_cast<IdType
>(yaml->operator[](
"src_chunk_size").As<int64_t>());
841 IdType dst_chunk_size =
842 static_cast<IdType
>(yaml->operator[](
"dst_chunk_size").As<int64_t>());
843 bool directed = yaml->operator[](
"directed").As<bool>();
845 if (!yaml->operator[](
"prefix").IsNone()) {
846 prefix = yaml->operator[](
"prefix").As<std::string>();
848 std::shared_ptr<const InfoVersion>
version =
nullptr;
849 if (!yaml->operator[](
"version").IsNone()) {
855 AdjacentListVector adjacent_lists;
856 PropertyGroupVector property_groups;
857 auto adj_lists_node = yaml->operator[](
"adj_lists");
858 if (adj_lists_node.IsSequence()) {
859 for (
auto it = adj_lists_node.Begin(); it != adj_lists_node.End(); it++) {
860 auto& node = (*it).second;
861 auto ordered = node[
"ordered"].As<
bool>();
862 auto aligned = node[
"aligned_by"].As<std::string>();
863 auto adj_list_type = OrderedAlignedToAdjListType(ordered, aligned);
864 auto file_type = StringToFileType(node[
"file_type"].As<std::string>());
865 std::string adj_list_prefix;
866 if (!node[
"prefix"].IsNone()) {
867 adj_list_prefix = node[
"prefix"].As<std::string>();
869 adjacent_lists.push_back(std::make_shared<AdjacentList>(
870 adj_list_type, file_type, adj_list_prefix));
873 auto property_groups_node = yaml->operator[](
"property_groups");
874 if (!property_groups_node.IsNone()) {
875 for (
auto pg_it = property_groups_node.Begin();
876 pg_it != property_groups_node.End(); pg_it++) {
877 auto& pg_node = (*pg_it).second;
878 std::string pg_prefix;
879 if (!pg_node[
"prefix"].IsNone()) {
880 pg_prefix = pg_node[
"prefix"].As<std::string>();
882 auto file_type = StringToFileType(pg_node[
"file_type"].As<std::string>());
883 auto properties = pg_node[
"properties"];
884 std::vector<Property> property_vec;
885 for (
auto p_it = properties.Begin(); p_it != properties.End(); p_it++) {
886 auto& p_node = (*p_it).second;
887 auto property_name = p_node[
"name"].As<std::string>();
889 DataType::TypeNameToDataType(p_node[
"data_type"].As<std::string>());
890 bool is_primary = p_node[
"is_primary"].As<
bool>();
892 p_node[
"is_nullable"].IsNone() || p_node[
"is_nullable"].As<
bool>();
893 property_vec.emplace_back(property_name, property_type, is_primary,
896 property_groups.push_back(
897 std::make_shared<PropertyGroup>(property_vec, file_type, pg_prefix));
900 return std::make_shared<EdgeInfo>(src_label, edge_label, dst_label,
901 chunk_size, src_chunk_size, dst_chunk_size,
902 directed, adjacent_lists, property_groups,
907 GAR_ASSIGN_OR_RAISE(
auto yaml,
Yaml::Load(input));
915 std::string dump_string;
918 node[
"src_label"] = impl_->src_label_;
919 node[
"edge_label"] = impl_->edge_label_;
920 node[
"dst_label"] = impl_->dst_label_;
921 node[
"chunk_size"] = std::to_string(impl_->chunk_size_);
922 node[
"src_chunk_size"] = std::to_string(impl_->src_chunk_size_);
923 node[
"dst_chunk_size"] = std::to_string(impl_->dst_chunk_size_);
924 node[
"prefix"] = impl_->prefix_;
925 node[
"directed"] = impl_->directed_ ?
"true" :
"false";
926 for (
const auto& adjacent_list : impl_->adjacent_lists_) {
927 ::Yaml::Node adj_list_node;
928 auto adj_list_type = adjacent_list->GetType();
929 auto pair = AdjListTypeToOrderedAligned(adj_list_type);
930 adj_list_node[
"ordered"] = pair.first ?
"true" :
"false";
931 adj_list_node[
"aligned_by"] = pair.second;
932 adj_list_node[
"prefix"] = adjacent_list->GetPrefix();
933 adj_list_node[
"file_type"] =
934 FileTypeToString(adjacent_list->GetFileType());
935 node[
"adj_lists"].PushBack();
936 node[
"adj_lists"][node[
"adj_lists"].Size() - 1] = adj_list_node;
938 for (
const auto& pg : impl_->property_groups_) {
939 ::Yaml::Node pg_node;
940 if (!pg->GetPrefix().empty()) {
941 pg_node[
"prefix"] = pg->GetPrefix();
943 pg_node[
"file_type"] = FileTypeToString(pg->GetFileType());
944 for (
const auto& p : pg->GetProperties()) {
946 p_node[
"name"] = p.name;
947 p_node[
"data_type"] = p.type->ToTypeName();
948 p_node[
"is_primary"] = p.is_primary ?
"true" :
"false";
949 p_node[
"is_nullable"] = p.is_nullable ?
"true" :
"false";
950 pg_node[
"properties"].PushBack();
951 pg_node[
"properties"][pg_node[
"properties"].Size() - 1] = p_node;
953 node[
"property_groups"].PushBack();
954 node[
"property_groups"][node[
"property_groups"].Size() - 1] = pg_node;
956 if (impl_->version_ !=
nullptr) {
957 node[
"version"] = impl_->version_->ToString();
959 ::Yaml::Serialize(node, dump_string);
960 }
catch (
const std::exception& e) {
967 std::string no_url_path;
968 GAR_ASSIGN_OR_RAISE(
auto fs, FileSystemFromUriOrPath(path, &no_url_path));
969 GAR_ASSIGN_OR_RAISE(
auto yaml_content, this->
Dump());
970 return fs->WriteValueToFile(yaml_content, no_url_path);
975 static std::string PathToDirectory(
const std::string& path) {
976 if (path.rfind(
"s3://", 0) == 0) {
977 int t = path.find_last_of(
'?');
978 std::string prefix = path.substr(0, t);
979 std::string suffix = path.substr(t);
980 const size_t last_slash_idx = prefix.rfind(
'/');
981 if (std::string::npos != last_slash_idx) {
982 return prefix.substr(0, last_slash_idx + 1) + suffix;
985 const size_t last_slash_idx = path.rfind(
'/');
986 if (std::string::npos != last_slash_idx) {
987 return path.substr(0, last_slash_idx + 1);
993 static Result<std::shared_ptr<GraphInfo>> ConstructGraphInfo(
994 std::shared_ptr<Yaml> graph_meta,
const std::string& default_name,
995 const std::string& default_prefix,
const std::shared_ptr<FileSystem> fs,
996 const std::string& no_url_path) {
997 std::string name = default_name;
998 std::string prefix = default_prefix;
999 if (!graph_meta->operator[](
"name").IsNone()) {
1000 name = graph_meta->operator[](
"name").As<std::string>();
1002 if (!graph_meta->operator[](
"prefix").IsNone()) {
1003 prefix = graph_meta->operator[](
"prefix").As<std::string>();
1005 std::shared_ptr<const InfoVersion> version =
nullptr;
1006 if (!graph_meta->operator[](
"version").IsNone()) {
1007 GAR_ASSIGN_OR_RAISE(
1009 graph_meta->operator[](
"version").As<std::string>()));
1011 std::unordered_map<std::string, std::string> extra_info;
1012 if (!graph_meta->operator[](
"extra_info").IsNone()) {
1013 auto& extra_info_node = graph_meta->operator[](
"extra_info");
1014 for (
auto it = extra_info_node.Begin(); it != extra_info_node.End(); it++) {
1015 auto node = (*it).second;
1016 auto key = node[
"key"].As<std::string>();
1017 auto value = node[
"value"].As<std::string>();
1018 extra_info.emplace(key, value);
1022 VertexInfoVector vertex_infos;
1023 EdgeInfoVector edge_infos;
1024 const auto& vertices = graph_meta->operator[](
"vertices");
1025 if (vertices.IsSequence()) {
1026 for (
auto it = vertices.Begin(); it != vertices.End(); it++) {
1027 std::string vertex_meta_file =
1028 no_url_path + (*it).second.As<std::string>();
1029 GAR_ASSIGN_OR_RAISE(
auto input,
1030 fs->ReadFileToValue<std::string>(vertex_meta_file));
1031 GAR_ASSIGN_OR_RAISE(
auto vertex_meta,
Yaml::Load(input));
1033 vertex_infos.push_back(vertex_info);
1036 const auto& edges = graph_meta->operator[](
"edges");
1037 if (edges.IsSequence()) {
1038 for (
auto it = edges.Begin(); it != edges.End(); it++) {
1039 std::string edge_meta_file = no_url_path + (*it).second.As<std::string>();
1040 GAR_ASSIGN_OR_RAISE(
auto input,
1041 fs->ReadFileToValue<std::string>(edge_meta_file));
1042 GAR_ASSIGN_OR_RAISE(
auto edge_meta,
Yaml::Load(input));
1044 edge_infos.push_back(edge_info);
1047 return std::make_shared<GraphInfo>(name, vertex_infos, edge_infos, prefix,
1048 version, extra_info);
1055 Impl(
const std::string& graph_name, VertexInfoVector vertex_infos,
1056 EdgeInfoVector edge_infos,
const std::string& prefix,
1057 std::shared_ptr<const InfoVersion>
version,
1058 const std::unordered_map<std::string, std::string>& extra_info)
1059 : name_(graph_name),
1060 vertex_infos_(std::move(vertex_infos)),
1061 edge_infos_(std::move(edge_infos)),
1064 extra_info_(extra_info) {
1065 for (
size_t i = 0; i < vertex_infos_.size(); i++) {
1066 if (vertex_infos_[i] !=
nullptr) {
1067 vlabel_to_index_[vertex_infos_[i]->GetLabel()] = i;
1070 for (
size_t i = 0; i < edge_infos_.size(); i++) {
1071 if (edge_infos_[i] !=
nullptr) {
1072 std::string edge_key = ConcatEdgeTriple(edge_infos_[i]->GetSrcLabel(),
1073 edge_infos_[i]->GetEdgeLabel(),
1074 edge_infos_[i]->GetDstLabel());
1075 elabel_to_index_[edge_key] = i;
1080 bool is_validated()
const noexcept {
1081 if (name_.empty() || prefix_.empty()) {
1084 for (
const auto& v : vertex_infos_) {
1085 if (!v || !v->IsValidated()) {
1089 for (
const auto& e : edge_infos_) {
1090 if (!e || !e->IsValidated()) {
1094 if (vertex_infos_.size() != vlabel_to_index_.size() ||
1095 edge_infos_.size() != elabel_to_index_.size()) {
1102 VertexInfoVector vertex_infos_;
1103 EdgeInfoVector edge_infos_;
1104 std::string prefix_;
1105 std::shared_ptr<const InfoVersion> version_;
1106 std::unordered_map<std::string, std::string> extra_info_;
1107 std::unordered_map<std::string, int> vlabel_to_index_;
1108 std::unordered_map<std::string, int> elabel_to_index_;
1112 const std::string& graph_name, VertexInfoVector vertex_infos,
1113 EdgeInfoVector edge_infos,
const std::string& prefix,
1114 std::shared_ptr<const InfoVersion> version,
1115 const std::unordered_map<std::string, std::string>& extra_info)
1116 : impl_(new
Impl(graph_name, std::move(vertex_infos), std::move(edge_infos),
1117 prefix, version, extra_info)) {}
1119 GraphInfo::~GraphInfo() =
default;
1126 return impl_->version_;
1131 return impl_->extra_info_;
1135 const std::string& type)
const {
1137 return i == -1 ? nullptr : impl_->vertex_infos_[i];
1141 return LookupKeyIndex(impl_->vlabel_to_index_, type);
1145 const std::string& src_label,
const std::string& edge_label,
1146 const std::string& dst_label)
const {
1148 return i == -1 ? nullptr : impl_->edge_infos_[i];
1152 const std::string& edge_label,
1153 const std::string& dst_label)
const {
1154 std::string edge_key = ConcatEdgeTriple(src_label, edge_label, dst_label);
1155 return LookupKeyIndex(impl_->elabel_to_index_, edge_key);
1159 return static_cast<int>(impl_->vertex_infos_.size());
1163 return static_cast<int>(impl_->edge_infos_.size());
1168 if (index < 0 || index >=
static_cast<int>(impl_->vertex_infos_.size())) {
1171 return impl_->vertex_infos_[index];
1175 if (index < 0 || index >=
static_cast<int>(impl_->edge_infos_.size())) {
1178 return impl_->edge_infos_[index];
1182 return impl_->vertex_infos_;
1186 return impl_->edge_infos_;
1192 std::shared_ptr<VertexInfo> vertex_info)
const {
1193 if (vertex_info ==
nullptr) {
1199 return std::make_shared<GraphInfo>(
1200 impl_->name_, AddVectorElement(impl_->vertex_infos_, vertex_info),
1201 impl_->edge_infos_, impl_->prefix_, impl_->version_);
1205 std::shared_ptr<EdgeInfo> edge_info)
const {
1206 if (edge_info ==
nullptr) {
1210 edge_info->GetDstLabel()) != -1) {
1213 return std::make_shared<GraphInfo>(
1214 impl_->name_, impl_->vertex_infos_,
1215 AddVectorElement(impl_->edge_infos_, edge_info), impl_->prefix_,
1219 std::shared_ptr<GraphInfo> CreateGraphInfo(
1220 const std::string& name,
const VertexInfoVector& vertex_infos,
1221 const EdgeInfoVector& edge_infos,
const std::string& prefix,
1222 std::shared_ptr<const InfoVersion> version,
1223 const std::unordered_map<std::string, std::string>& extra_info) {
1227 return std::make_shared<GraphInfo>(name, vertex_infos, edge_infos, prefix,
1228 version, extra_info);
1232 std::string no_url_path;
1233 GAR_ASSIGN_OR_RAISE(
auto fs, FileSystemFromUriOrPath(path, &no_url_path));
1234 GAR_ASSIGN_OR_RAISE(
auto yaml_content,
1235 fs->ReadFileToValue<std::string>(no_url_path));
1236 GAR_ASSIGN_OR_RAISE(
auto graph_meta,
Yaml::Load(yaml_content));
1237 std::string default_name =
"graph";
1238 std::string default_prefix = PathToDirectory(path);
1239 no_url_path = PathToDirectory(no_url_path);
1240 return ConstructGraphInfo(graph_meta, default_name, default_prefix, fs,
1245 const std::string& input,
const std::string& relative_location) {
1246 GAR_ASSIGN_OR_RAISE(
auto graph_meta,
Yaml::Load(input));
1247 std::string default_name =
"graph";
1248 std::string default_prefix =
1250 std::string no_url_path;
1251 GAR_ASSIGN_OR_RAISE(
auto fs,
1252 FileSystemFromUriOrPath(relative_location, &no_url_path));
1253 return ConstructGraphInfo(graph_meta, default_name, default_prefix, fs,
1262 std::string dump_string;
1264 node[
"name"] = impl_->name_;
1265 node[
"prefix"] = impl_->prefix_;
1269 node[
"vertices"].PushBack();
1270 node[
"vertices"][node[
"vertices"].Size() - 1] =
1271 vertex->GetLabel() +
".vertex.yaml";
1274 node[
"edges"].PushBack();
1275 node[
"edges"][node[
"edges"].Size() - 1] =
1276 ConcatEdgeTriple(edge->GetSrcLabel(), edge->GetEdgeLabel(),
1277 edge->GetDstLabel()) +
1280 if (impl_->version_ !=
nullptr) {
1281 node[
"version"] = impl_->version_->ToString();
1283 if (impl_->extra_info_.size() > 0) {
1285 for (
const auto& pair : impl_->extra_info_) {
1286 ::Yaml::Node extra_info_node;
1287 extra_info_node[
"key"] = pair.first;
1288 extra_info_node[
"value"] = pair.second;
1289 node[
"extra_info"].PushBack();
1290 node[
"extra_info"][node[
"extra_info"].Size() - 1] = extra_info_node;
1293 ::Yaml::Serialize(node, dump_string);
1294 }
catch (
const std::exception& e) {
1301 std::string no_url_path;
1302 GAR_ASSIGN_OR_RAISE(
auto fs, FileSystemFromUriOrPath(path, &no_url_path));
1303 GAR_ASSIGN_OR_RAISE(
auto yaml_content, this->
Dump());
1304 return fs->WriteValueToFile(yaml_content, no_url_path);
AdjacentList(AdjListType type, FileType file_type, const std::string &prefix="")
static Result< std::shared_ptr< EdgeInfo > > Load(std::shared_ptr< Yaml > yaml)
Result< std::string > GetAdjListFilePath(IdType vertex_chunk_index, IdType edge_chunk_index, AdjListType adj_list_type) const
Get the file path of adj list topology chunk.
Status Save(const std::string &file_name) const
Result< std::shared_ptr< DataType > > GetPropertyType(const std::string &property_name) const
const std::string & GetPrefix() const
Result< std::string > GetEdgesNumFilePath(IdType vertex_chunk_index, AdjListType adj_list_type) const
Result< std::string > GetPropertyGroupPathPrefix(const std::shared_ptr< PropertyGroup > &property_group, AdjListType adj_list_type) const
bool IsPrimaryKey(const std::string &property_name) const
Result< std::string > GetAdjListOffsetFilePath(IdType vertex_chunk_index, AdjListType adj_list_type) const
Get the adjacency list offset chunk file path of vertex chunk the offset chunks is aligned with the v...
IdType GetChunkSize() const
bool HasProperty(const std::string &property_name) const
Returns whether the edge info contains the given property.
Result< std::shared_ptr< EdgeInfo > > AddAdjacentList(std::shared_ptr< AdjacentList > adj_list) const
Result< std::string > Dump() const noexcept
bool IsNullableKey(const std::string &property_name) const
std::shared_ptr< PropertyGroup > GetPropertyGroupByIndex(int index) const
Get the property group at the specified index.
bool HasPropertyGroup(const std::shared_ptr< PropertyGroup > &property_group) const
Returns whether the edge info contains the given property group.
const std::shared_ptr< const InfoVersion > & version() const
Result< std::shared_ptr< EdgeInfo > > AddPropertyGroup(std::shared_ptr< PropertyGroup > property_group) const
bool HasAdjacentListType(AdjListType adj_list_type) const
Result< std::string > GetOffsetPathPrefix(AdjListType adj_list_type) const
Result< std::string > GetAdjListPathPrefix(AdjListType adj_list_type) const
Get the path prefix of the adjacency list topology chunk for the given adjacency list type.
IdType GetDstChunkSize() const
Result< std::string > GetPropertyFilePath(const std::shared_ptr< PropertyGroup > &property_group, AdjListType adj_list_type, IdType vertex_chunk_index, IdType edge_chunk_index) const
Get the chunk file path of adj list property group the property group chunks is aligned with the adj ...
IdType GetSrcChunkSize() const
const std::string & GetSrcLabel() const
const PropertyGroupVector & GetPropertyGroups() const
Get the property groups.
const std::string & GetDstLabel() const
Result< std::string > GetVerticesNumFilePath(AdjListType adj_list_type) const
Get the file path for the number of vertices.
std::shared_ptr< PropertyGroup > GetPropertyGroup(const std::string &property) const
Get the property group containing the given property.
int PropertyGroupNum() const
Get the number of property groups.
const std::string & GetEdgeLabel() const
EdgeInfo(const std::string &src_label, const std::string &edge_label, const std::string &dst_label, IdType chunk_size, IdType src_chunk_size, IdType dst_chunk_size, bool directed, const AdjacentListVector &adjacent_lists, const PropertyGroupVector &property_groups, const std::string &prefix="", std::shared_ptr< const InfoVersion > version=nullptr)
Construct an EdgeInfo object with the given information and property groups.
int GetVertexInfoIndex(const std::string &label) const
Get the vertex info index with the given label.
const EdgeInfoVector & GetEdgeInfos() const
Get the edge infos of graph info.
std::shared_ptr< EdgeInfo > GetEdgeInfo(const std::string &src_label, const std::string &edge_label, const std::string &dst_label) const
Get the edge info with the given source vertex label, edge label, and destination vertex label.
Status Save(const std::string &path) const
const std::shared_ptr< VertexInfo > GetVertexInfoByIndex(int index) const
Get the vertex info at the specified index.
Result< std::shared_ptr< GraphInfo > > AddVertex(std::shared_ptr< VertexInfo > vertex_info) const
Adds a vertex info to the GraphInfo instance and returns a new GraphInfo.
const std::string & GetPrefix() const
Get the absolute path prefix of the chunk files.
static Result< std::shared_ptr< GraphInfo > > Load(const std::string &path)
Loads the input file as a GraphInfo instance.
int GetEdgeInfoIndex(const std::string &src_label, const std::string &edge_label, const std::string &dst_label) const
Get the edge info index with the given source vertex label, edge label, and destination label.
Result< std::string > Dump() const
int EdgeInfoNum() const
Get the number of edge infos.
Result< std::shared_ptr< GraphInfo > > AddEdge(std::shared_ptr< EdgeInfo > edge_info) const
Adds an edge info to the GraphInfo instance and returns a new GraphInfo.
const VertexInfoVector & GetVertexInfos() const
Get the vertex infos of graph info.
const std::unordered_map< std::string, std::string > & GetExtraInfo() const
Get the extra metadata of the graph info object.
int VertexInfoNum() const
Get the number of vertex infos.
const std::string & GetName() const
Get the name of the graph.
GraphInfo(const std::string &graph_name, VertexInfoVector vertex_infos, EdgeInfoVector edge_infos, const std::string &prefix="./", std::shared_ptr< const InfoVersion > version=nullptr, const std::unordered_map< std::string, std::string > &extra_info={})
Constructs a GraphInfo instance.
const std::shared_ptr< const InfoVersion > & version() const
Get the version info of the graph info object.
std::shared_ptr< VertexInfo > GetVertexInfo(const std::string &label) const
Get the vertex info with the given label.
const std::shared_ptr< EdgeInfo > GetEdgeInfoByIndex(int index) const
Get the edge info at the specified index.
static Result< std::shared_ptr< const InfoVersion > > Parse(const std::string &str) noexcept
PropertyGroup(const std::vector< Property > &properties, FileType file_type, const std::string &prefix="")
const std::vector< Property > & GetProperties() const
Status outcome object (success or error)
static Status Invalid(Args &&... args)
std::shared_ptr< PropertyGroup > GetPropertyGroup(const std::string &property_name) const
Result< std::string > GetPathPrefix(std::shared_ptr< PropertyGroup > property_group) const
IdType GetChunkSize() const
const std::shared_ptr< const InfoVersion > & version() const
Result< std::shared_ptr< VertexInfo > > AddPropertyGroup(std::shared_ptr< PropertyGroup > property_group) const
bool HasProperty(const std::string &property_name) const
bool HasPropertyGroup(const std::shared_ptr< PropertyGroup > &property_group) const
Result< std::string > GetFilePath(std::shared_ptr< PropertyGroup > property_group, IdType chunk_index) const
bool IsPrimaryKey(const std::string &property_name) const
int PropertyGroupNum() const
const std::string & GetPrefix() const
const std::string & GetLabel() const
Result< std::string > GetVerticesNumFilePath() const
VertexInfo(const std::string &label, IdType chunk_size, const PropertyGroupVector &property_groups, const std::string &prefix="", std::shared_ptr< const InfoVersion > version=nullptr)
Result< std::shared_ptr< DataType > > GetPropertyType(const std::string &property_name) const
bool IsNullableKey(const std::string &property_name) const
Result< std::string > Dump() const noexcept
static Result< std::shared_ptr< VertexInfo > > Load(std::shared_ptr< Yaml > yaml)
std::shared_ptr< PropertyGroup > GetPropertyGroupByIndex(int index) const
Status Save(const std::string &file_name) const
const PropertyGroupVector & GetPropertyGroups() const
static Result< std::shared_ptr< Yaml > > Load(const std::string &input)