Apache GraphAr C++ Library
The C++ Library for Apache GraphAr
reader_util.cc
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifdef ARROW_ORC
21 #include "arrow/adapters/orc/adapter.h"
22 #endif
23 #include "arrow/api.h"
24 #include "arrow/csv/api.h"
25 #include "arrow/filesystem/api.h"
26 #include "arrow/io/api.h"
27 #include "parquet/arrow/reader.h"
28 
29 #include "graphar/expression.h"
30 #include "graphar/filesystem.h"
31 #include "graphar/graph_info.h"
32 #include "graphar/reader_util.h"
33 #include "graphar/types.h"
34 
35 namespace graphar::util {
36 
45 Status CheckFilterOptions(
46  const FilterOptions& filter_options,
47  const std::shared_ptr<PropertyGroup>& property_group) noexcept {
48  if (filter_options.filter) {
49  GAR_ASSIGN_OR_RAISE(auto filter, filter_options.filter->Evaluate());
50  for (const auto& field : arrow::compute::FieldsInExpression(filter)) {
51  auto property_name = *field.name();
52  if (!property_group->HasProperty(property_name)) {
53  return Status::Invalid(
54  property_name, " in the filter does not match the property group: ",
55  property_group);
56  }
57  }
58  }
59  if (filter_options.columns.has_value()) {
60  for (const auto& col : filter_options.columns.value().get()) {
61  if (!property_group->HasProperty(col)) {
62  return Status::Invalid(
63  col, " in the columns does not match the property group: ",
64  property_group);
65  }
66  }
67  }
68  return Status::OK();
69 }
70 
82 Result<std::pair<IdType, IdType>> GetAdjListOffsetOfVertex(
83  const std::shared_ptr<EdgeInfo>& edge_info, const std::string& prefix,
84  AdjListType adj_list_type, IdType vid) noexcept {
85  // get the adj list offset of id
86  IdType vertex_chunk_size;
87  if (adj_list_type == AdjListType::ordered_by_source) {
88  vertex_chunk_size = edge_info->GetSrcChunkSize();
89  } else if (adj_list_type == AdjListType::ordered_by_dest) {
90  vertex_chunk_size = edge_info->GetDstChunkSize();
91  } else {
92  return Status::Invalid(
93  "The adj list type has to be ordered_by_source or ordered_by_dest, but "
94  "got ",
95  std::string(AdjListTypeToString(adj_list_type)));
96  }
97 
98  IdType offset_chunk_index = vid / vertex_chunk_size;
99  IdType offset_in_file = vid % vertex_chunk_size;
100  GAR_ASSIGN_OR_RAISE(
101  auto offset_file_path,
102  edge_info->GetAdjListOffsetFilePath(offset_chunk_index, adj_list_type));
103  std::string out_prefix;
104  GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
105  auto adjacent_list = edge_info->GetAdjacentList(adj_list_type);
106  if (adjacent_list == nullptr) {
107  return Status::Invalid("The adjacent list is not set for adj list type ",
108  std::string(AdjListTypeToString(adj_list_type)));
109  }
110  auto file_type = adjacent_list->GetFileType();
111  std::string path = out_prefix + offset_file_path;
112  GAR_ASSIGN_OR_RAISE(auto table, fs->ReadFileToTable(path, file_type));
113  auto array = std::static_pointer_cast<arrow::Int64Array>(
114  table->column(0)->Slice(offset_in_file, 2)->chunk(0));
115  return std::make_pair(static_cast<IdType>(array->Value(0)),
116  static_cast<IdType>(array->Value(1)));
117 }
118 
119 Result<IdType> GetVertexChunkNum(
120  const std::string& prefix,
121  const std::shared_ptr<VertexInfo>& vertex_info) noexcept {
122  std::string out_prefix;
123  GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
124  GAR_ASSIGN_OR_RAISE(auto vertex_num_file_suffix,
125  vertex_info->GetVerticesNumFilePath());
126  std::string vertex_num_file_path = out_prefix + vertex_num_file_suffix;
127  GAR_ASSIGN_OR_RAISE(auto vertex_num,
128  fs->ReadFileToValue<IdType>(vertex_num_file_path));
129  return (vertex_num + vertex_info->GetChunkSize() - 1) /
130  vertex_info->GetChunkSize();
131 }
132 
133 Result<IdType> GetVertexNum(
134  const std::string& prefix,
135  const std::shared_ptr<VertexInfo>& vertex_info) noexcept {
136  std::string out_prefix;
137  GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
138  GAR_ASSIGN_OR_RAISE(auto vertex_num_file_suffix,
139  vertex_info->GetVerticesNumFilePath());
140  std::string vertex_num_file_path = out_prefix + vertex_num_file_suffix;
141  GAR_ASSIGN_OR_RAISE(auto vertex_num,
142  fs->ReadFileToValue<IdType>(vertex_num_file_path));
143  return vertex_num;
144 }
145 
146 Result<IdType> GetVertexChunkNum(const std::string& prefix,
147  const std::shared_ptr<EdgeInfo>& edge_info,
148  AdjListType adj_list_type) noexcept {
149  std::string out_prefix;
150  GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
151  GAR_ASSIGN_OR_RAISE(auto vertex_num_file_suffix,
152  edge_info->GetVerticesNumFilePath(adj_list_type));
153  std::string vertex_num_file_path = out_prefix + vertex_num_file_suffix;
154  GAR_ASSIGN_OR_RAISE(auto vertex_num,
155  fs->ReadFileToValue<IdType>(vertex_num_file_path));
156  IdType chunk_size;
157  if (adj_list_type == AdjListType::ordered_by_source ||
158  adj_list_type == AdjListType::unordered_by_source) {
159  chunk_size = edge_info->GetSrcChunkSize();
160  } else {
161  chunk_size = edge_info->GetDstChunkSize();
162  }
163  return (vertex_num + chunk_size - 1) / chunk_size;
164 }
165 
166 Result<IdType> GetVertexNum(const std::string& prefix,
167  const std::shared_ptr<EdgeInfo>& edge_info,
168  AdjListType adj_list_type) noexcept {
169  std::string out_prefix;
170  GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
171  GAR_ASSIGN_OR_RAISE(auto vertex_num_file_suffix,
172  edge_info->GetVerticesNumFilePath(adj_list_type));
173  std::string vertex_num_file_path = out_prefix + vertex_num_file_suffix;
174  GAR_ASSIGN_OR_RAISE(auto vertex_num,
175  fs->ReadFileToValue<IdType>(vertex_num_file_path));
176  return vertex_num;
177 }
178 
179 Result<IdType> GetEdgeChunkNum(const std::string& prefix,
180  const std::shared_ptr<EdgeInfo>& edge_info,
181  AdjListType adj_list_type,
182  IdType vertex_chunk_index) noexcept {
183  std::string out_prefix;
184  GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
185  GAR_ASSIGN_OR_RAISE(
186  auto edge_num_file_suffix,
187  edge_info->GetEdgesNumFilePath(vertex_chunk_index, adj_list_type));
188  std::string edge_num_file_path = out_prefix + edge_num_file_suffix;
189  GAR_ASSIGN_OR_RAISE(auto edge_num,
190  fs->ReadFileToValue<IdType>(edge_num_file_path));
191  return (edge_num + edge_info->GetChunkSize() - 1) / edge_info->GetChunkSize();
192 }
193 
194 Result<IdType> GetEdgeNum(const std::string& prefix,
195  const std::shared_ptr<EdgeInfo>& edge_info,
196  AdjListType adj_list_type,
197  IdType vertex_chunk_index) noexcept {
198  std::string out_prefix;
199  GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
200  GAR_ASSIGN_OR_RAISE(
201  auto edge_num_file_suffix,
202  edge_info->GetEdgesNumFilePath(vertex_chunk_index, adj_list_type));
203  std::string edge_num_file_path = out_prefix + edge_num_file_suffix;
204  GAR_ASSIGN_OR_RAISE(auto edge_num,
205  fs->ReadFileToValue<IdType>(edge_num_file_path));
206  return edge_num;
207 }
208 
209 } // namespace graphar::util
static Status Invalid(Args &&... args)
Definition: status.h:188
static Status OK()
Definition: status.h:157