21 #include "arrow/adapters/orc/adapter.h"
23 #include "arrow/api.h"
24 #include "arrow/csv/api.h"
25 #include "arrow/filesystem/api.h"
26 #include "arrow/io/api.h"
27 #include "parquet/arrow/reader.h"
29 #include "graphar/expression.h"
30 #include "graphar/filesystem.h"
31 #include "graphar/graph_info.h"
32 #include "graphar/reader_util.h"
33 #include "graphar/types.h"
35 namespace graphar::util {
45 Status CheckFilterOptions(
46 const FilterOptions& filter_options,
47 const std::shared_ptr<PropertyGroup>& property_group) noexcept {
48 if (filter_options.filter) {
49 GAR_ASSIGN_OR_RAISE(
auto filter, filter_options.filter->Evaluate());
50 for (
const auto& field : arrow::compute::FieldsInExpression(filter)) {
51 auto property_name = *field.name();
52 if (!property_group->HasProperty(property_name)) {
54 property_name,
" in the filter does not match the property group: ",
59 if (filter_options.columns.has_value()) {
60 for (
const auto& col : filter_options.columns.value().get()) {
61 if (!property_group->HasProperty(col)) {
63 col,
" in the columns does not match the property group: ",
82 Result<std::pair<IdType, IdType>> GetAdjListOffsetOfVertex(
83 const std::shared_ptr<EdgeInfo>& edge_info,
const std::string& prefix,
84 AdjListType adj_list_type, IdType vid) noexcept {
86 IdType vertex_chunk_size;
87 if (adj_list_type == AdjListType::ordered_by_source) {
88 vertex_chunk_size = edge_info->GetSrcChunkSize();
89 }
else if (adj_list_type == AdjListType::ordered_by_dest) {
90 vertex_chunk_size = edge_info->GetDstChunkSize();
93 "The adj list type has to be ordered_by_source or ordered_by_dest, but "
95 std::string(AdjListTypeToString(adj_list_type)));
98 IdType offset_chunk_index = vid / vertex_chunk_size;
99 IdType offset_in_file = vid % vertex_chunk_size;
101 auto offset_file_path,
102 edge_info->GetAdjListOffsetFilePath(offset_chunk_index, adj_list_type));
103 std::string out_prefix;
104 GAR_ASSIGN_OR_RAISE(
auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
105 auto adjacent_list = edge_info->GetAdjacentList(adj_list_type);
106 if (adjacent_list ==
nullptr) {
107 return Status::Invalid(
"The adjacent list is not set for adj list type ",
108 std::string(AdjListTypeToString(adj_list_type)));
110 auto file_type = adjacent_list->GetFileType();
111 std::string path = out_prefix + offset_file_path;
112 GAR_ASSIGN_OR_RAISE(
auto table, fs->ReadFileToTable(path, file_type));
113 auto array = std::static_pointer_cast<arrow::Int64Array>(
114 table->column(0)->Slice(offset_in_file, 2)->chunk(0));
115 return std::make_pair(
static_cast<IdType
>(array->Value(0)),
116 static_cast<IdType
>(array->Value(1)));
119 Result<IdType> GetVertexChunkNum(
120 const std::string& prefix,
121 const std::shared_ptr<VertexInfo>& vertex_info) noexcept {
122 std::string out_prefix;
123 GAR_ASSIGN_OR_RAISE(
auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
124 GAR_ASSIGN_OR_RAISE(
auto vertex_num_file_suffix,
125 vertex_info->GetVerticesNumFilePath());
126 std::string vertex_num_file_path = out_prefix + vertex_num_file_suffix;
127 GAR_ASSIGN_OR_RAISE(
auto vertex_num,
128 fs->ReadFileToValue<IdType>(vertex_num_file_path));
129 return (vertex_num + vertex_info->GetChunkSize() - 1) /
130 vertex_info->GetChunkSize();
133 Result<IdType> GetVertexNum(
134 const std::string& prefix,
135 const std::shared_ptr<VertexInfo>& vertex_info) noexcept {
136 std::string out_prefix;
137 GAR_ASSIGN_OR_RAISE(
auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
138 GAR_ASSIGN_OR_RAISE(
auto vertex_num_file_suffix,
139 vertex_info->GetVerticesNumFilePath());
140 std::string vertex_num_file_path = out_prefix + vertex_num_file_suffix;
141 GAR_ASSIGN_OR_RAISE(
auto vertex_num,
142 fs->ReadFileToValue<IdType>(vertex_num_file_path));
146 Result<IdType> GetVertexChunkNum(
const std::string& prefix,
147 const std::shared_ptr<EdgeInfo>& edge_info,
148 AdjListType adj_list_type) noexcept {
149 std::string out_prefix;
150 GAR_ASSIGN_OR_RAISE(
auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
151 GAR_ASSIGN_OR_RAISE(
auto vertex_num_file_suffix,
152 edge_info->GetVerticesNumFilePath(adj_list_type));
153 std::string vertex_num_file_path = out_prefix + vertex_num_file_suffix;
154 GAR_ASSIGN_OR_RAISE(
auto vertex_num,
155 fs->ReadFileToValue<IdType>(vertex_num_file_path));
157 if (adj_list_type == AdjListType::ordered_by_source ||
158 adj_list_type == AdjListType::unordered_by_source) {
159 chunk_size = edge_info->GetSrcChunkSize();
161 chunk_size = edge_info->GetDstChunkSize();
163 return (vertex_num + chunk_size - 1) / chunk_size;
166 Result<IdType> GetVertexNum(
const std::string& prefix,
167 const std::shared_ptr<EdgeInfo>& edge_info,
168 AdjListType adj_list_type) noexcept {
169 std::string out_prefix;
170 GAR_ASSIGN_OR_RAISE(
auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
171 GAR_ASSIGN_OR_RAISE(
auto vertex_num_file_suffix,
172 edge_info->GetVerticesNumFilePath(adj_list_type));
173 std::string vertex_num_file_path = out_prefix + vertex_num_file_suffix;
174 GAR_ASSIGN_OR_RAISE(
auto vertex_num,
175 fs->ReadFileToValue<IdType>(vertex_num_file_path));
179 Result<IdType> GetEdgeChunkNum(
const std::string& prefix,
180 const std::shared_ptr<EdgeInfo>& edge_info,
181 AdjListType adj_list_type,
182 IdType vertex_chunk_index) noexcept {
183 std::string out_prefix;
184 GAR_ASSIGN_OR_RAISE(
auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
186 auto edge_num_file_suffix,
187 edge_info->GetEdgesNumFilePath(vertex_chunk_index, adj_list_type));
188 std::string edge_num_file_path = out_prefix + edge_num_file_suffix;
189 GAR_ASSIGN_OR_RAISE(
auto edge_num,
190 fs->ReadFileToValue<IdType>(edge_num_file_path));
191 return (edge_num + edge_info->GetChunkSize() - 1) / edge_info->GetChunkSize();
194 Result<IdType> GetEdgeNum(
const std::string& prefix,
195 const std::shared_ptr<EdgeInfo>& edge_info,
196 AdjListType adj_list_type,
197 IdType vertex_chunk_index) noexcept {
198 std::string out_prefix;
199 GAR_ASSIGN_OR_RAISE(
auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
201 auto edge_num_file_suffix,
202 edge_info->GetEdgesNumFilePath(vertex_chunk_index, adj_list_type));
203 std::string edge_num_file_path = out_prefix + edge_num_file_suffix;
204 GAR_ASSIGN_OR_RAISE(
auto edge_num,
205 fs->ReadFileToValue<IdType>(edge_num_file_path));
static Status Invalid(Args &&... args)