Apache GraphAr C++ Library
The C++ Library for Apache GraphAr
chunk_reader.h
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #pragma once
21 
22 #include <memory>
23 #include <string>
24 #include <utility>
25 #include <vector>
26 
27 #include "graphar/fwd.h"
28 #include "graphar/reader_util.h"
29 
30 // forward declaration
31 namespace arrow {
32 class Array;
33 class Schema;
34 class Table;
35 } // namespace arrow
36 
37 namespace graphar {
38 
43  public:
52  const std::shared_ptr<VertexInfo>& vertex_info,
53  const std::shared_ptr<PropertyGroup>& property_group,
54  const std::string& prefix, const util::FilterOptions& options = {});
55 
64  Status seek(IdType id);
65 
69  Result<std::shared_ptr<arrow::Table>> GetChunk();
70 
77 
81  IdType GetChunkNum() const noexcept { return chunk_num_; }
82 
89  void Filter(util::Filter filter = nullptr);
90 
97  void Select(util::ColumnNames column_names = std::nullopt);
98 
107  static Result<std::shared_ptr<VertexPropertyArrowChunkReader>> Make(
108  const std::shared_ptr<VertexInfo>& vertex_info,
109  const std::shared_ptr<PropertyGroup>& property_group,
110  const std::string& prefix, const util::FilterOptions& options = {});
111 
121  static Result<std::shared_ptr<VertexPropertyArrowChunkReader>> Make(
122  const std::shared_ptr<GraphInfo>& graph_info, const std::string& label,
123  const std::shared_ptr<PropertyGroup>& property_group,
124  const util::FilterOptions& options = {});
125 
136  static Result<std::shared_ptr<VertexPropertyArrowChunkReader>> Make(
137  const std::shared_ptr<GraphInfo>& graph_info, const std::string& label,
138  const std::string& property_name,
139  const util::FilterOptions& options = {});
140 
141  private:
142  std::shared_ptr<VertexInfo> vertex_info_;
143  std::shared_ptr<PropertyGroup> property_group_;
144  std::string prefix_;
145  IdType chunk_index_;
146  IdType seek_id_;
147  IdType chunk_num_;
148  IdType vertex_num_;
149  std::shared_ptr<arrow::Schema> schema_;
150  std::shared_ptr<arrow::Table> chunk_table_;
151  util::FilterOptions filter_options_;
152  std::shared_ptr<FileSystem> fs_;
153 };
154 
159  public:
160  using range_t = std::pair<IdType, IdType>;
168  AdjListArrowChunkReader(const std::shared_ptr<EdgeInfo>& edge_info,
169  AdjListType adj_list_type, const std::string& prefix);
170 
175 
181  Status seek_src(IdType id);
182 
188  Status seek_dst(IdType offset);
189 
197  Status seek(IdType offset);
198 
203  Result<std::shared_ptr<arrow::Table>> GetChunk();
204 
208  Result<IdType> GetRowNumOfChunk();
209 
217  Status next_chunk();
218 
226  Status seek_chunk_index(IdType vertex_chunk_index, IdType chunk_index = 0);
227 
235  static Result<std::shared_ptr<AdjListArrowChunkReader>> Make(
236  const std::shared_ptr<EdgeInfo>& edge_info, AdjListType adj_list_type,
237  const std::string& prefix);
238 
248  static Result<std::shared_ptr<AdjListArrowChunkReader>> Make(
249  const std::shared_ptr<GraphInfo>& graph_info,
250  const std::string& src_label, const std::string& edge_label,
251  const std::string& dst_label, AdjListType adj_list_type);
252 
253  private:
254  Status initOrUpdateEdgeChunkNum();
255 
256  private:
257  std::shared_ptr<EdgeInfo> edge_info_;
258  AdjListType adj_list_type_;
259  std::string prefix_;
260  IdType vertex_chunk_index_, chunk_index_;
261  IdType seek_offset_;
262  std::shared_ptr<arrow::Table> chunk_table_;
263  IdType vertex_chunk_num_, chunk_num_;
264  std::string base_dir_;
265  std::shared_ptr<FileSystem> fs_;
266 };
267 
272  public:
273  using range_t = std::pair<IdType, IdType>;
283  AdjListOffsetArrowChunkReader(const std::shared_ptr<EdgeInfo>& edge_info,
284  AdjListType adj_list_type,
285  const std::string& prefix);
286 
295  Status seek(IdType id);
296 
300  Result<std::shared_ptr<arrow::Array>> GetChunk();
301 
307  Status next_chunk();
308 
312  IdType GetChunkIndex() const noexcept { return chunk_index_; }
313 
321  static Result<std::shared_ptr<AdjListOffsetArrowChunkReader>> Make(
322  const std::shared_ptr<EdgeInfo>& edge_info, AdjListType adj_list_type,
323  const std::string& prefix);
324 
334  static Result<std::shared_ptr<AdjListOffsetArrowChunkReader>> Make(
335  const std::shared_ptr<GraphInfo>& graph_info,
336  const std::string& src_label, const std::string& edge_label,
337  const std::string& dst_label, AdjListType adj_list_type);
338 
339  private:
340  std::shared_ptr<EdgeInfo> edge_info_;
341  AdjListType adj_list_type_;
342  std::string prefix_;
343  IdType chunk_index_;
344  IdType seek_id_;
345  std::shared_ptr<arrow::Table> chunk_table_;
346  IdType vertex_chunk_num_;
347  IdType vertex_chunk_size_;
348  std::string base_dir_;
349  std::shared_ptr<FileSystem> fs_;
350 };
351 
356  public:
357  using range_t = std::pair<IdType, IdType>;
368  const std::shared_ptr<EdgeInfo>& edge_info,
369  const std::shared_ptr<PropertyGroup>& property_group,
370  AdjListType adj_list_type, const std::string prefix,
371  const util::FilterOptions& options = {});
372 
377 
383  Status seek_src(IdType id);
384 
390  Status seek_dst(IdType id);
391 
399  Status seek(IdType offset);
400 
405  Result<std::shared_ptr<arrow::Table>> GetChunk();
406 
414  Status next_chunk();
415 
423  Status seek_chunk_index(IdType vertex_chunk_index, IdType chunk_index = 0);
424 
431  void Filter(util::Filter filter = nullptr);
432 
439  void Select(util::ColumnNames column_names = std::nullopt);
440 
450  static Result<std::shared_ptr<AdjListPropertyArrowChunkReader>> Make(
451  const std::shared_ptr<EdgeInfo>& edge_info,
452  const std::shared_ptr<PropertyGroup>& property_group,
453  AdjListType adj_list_type, const std::string& prefix,
454  const util::FilterOptions& options = {});
455 
469  static Result<std::shared_ptr<AdjListPropertyArrowChunkReader>> Make(
470  const std::shared_ptr<GraphInfo>& graph_info,
471  const std::string& src_label, const std::string& edge_label,
472  const std::string& dst_label,
473  const std::shared_ptr<PropertyGroup>& property_group,
474  AdjListType adj_list_type, const util::FilterOptions& options = {});
475 
489  static Result<std::shared_ptr<AdjListPropertyArrowChunkReader>> Make(
490  const std::shared_ptr<GraphInfo>& graph_info,
491  const std::string& src_label, const std::string& edge_label,
492  const std::string& dst_label, const std::string& property_name,
493  AdjListType adj_list_type, const util::FilterOptions& options = {});
494 
495  private:
496  Status initOrUpdateEdgeChunkNum();
497 
498  private:
499  std::shared_ptr<EdgeInfo> edge_info_;
500  std::shared_ptr<PropertyGroup> property_group_;
501  AdjListType adj_list_type_;
502  std::string prefix_;
503  IdType vertex_chunk_index_, chunk_index_;
504  IdType seek_offset_;
505  std::shared_ptr<arrow::Schema> schema_;
506  std::shared_ptr<arrow::Table> chunk_table_;
507  util::FilterOptions filter_options_;
508  IdType vertex_chunk_num_, chunk_num_;
509  std::string base_dir_;
510  std::shared_ptr<FileSystem> fs_;
511 };
512 } // namespace graphar
The arrow chunk reader for adj list topology chunk.
Definition: chunk_reader.h:158
Status seek_src(IdType id)
Sets chunk position indicator for reader by source vertex id.
Status seek(IdType offset)
Sets chunk position indicator for reader by edge index.
AdjListArrowChunkReader(const std::shared_ptr< EdgeInfo > &edge_info, AdjListType adj_list_type, const std::string &prefix)
Initialize the AdjListArrowChunkReader.
Result< IdType > GetRowNumOfChunk()
Get the number of rows of the current chunk table.
Status seek_chunk_index(IdType vertex_chunk_index, IdType chunk_index=0)
Sets chunk position to the specific vertex chunk and edge chunk.
Result< std::shared_ptr< arrow::Table > > GetChunk()
Return the current chunk of chunk position indicator as arrow::Table, if the chunk is empty,...
static Result< std::shared_ptr< AdjListArrowChunkReader > > Make(const std::shared_ptr< EdgeInfo > &edge_info, AdjListType adj_list_type, const std::string &prefix)
Create an AdjListArrowChunkReader instance from edge info.
Status next_chunk()
Sets chunk position indicator to next chunk.
Status seek_dst(IdType offset)
Sets chunk position indicator for reader by destination vertex id.
The arrow chunk reader for edge offset.
Definition: chunk_reader.h:271
Status seek(IdType id)
Sets chunk position indicator for reader by internal vertex id. If internal vertex id is not found,...
Result< std::shared_ptr< arrow::Array > > GetChunk()
Get the current offset chunk as arrow::Array.
AdjListOffsetArrowChunkReader(const std::shared_ptr< EdgeInfo > &edge_info, AdjListType adj_list_type, const std::string &prefix)
Initialize the AdjListOffsetArrowChunkReader.
static Result< std::shared_ptr< AdjListOffsetArrowChunkReader > > Make(const std::shared_ptr< EdgeInfo > &edge_info, AdjListType adj_list_type, const std::string &prefix)
Create an AdjListOffsetArrowChunkReader instance from edge info.
Status next_chunk()
Sets chunk position indicator to next chunk. if current chunk is the last chunk, will return Status::...
IdType GetChunkIndex() const noexcept
Get current vertex chunk index.
Definition: chunk_reader.h:312
The arrow chunk reader for edge property group chunks.
Definition: chunk_reader.h:355
Status seek_src(IdType id)
Sets chunk position indicator for reader by source vertex id.
Status seek_dst(IdType id)
Sets chunk position indicator for reader by destination vertex id.
Status seek_chunk_index(IdType vertex_chunk_index, IdType chunk_index=0)
Sets chunk position to the specific vertex chunk and edge chunk.
static Result< std::shared_ptr< AdjListPropertyArrowChunkReader > > Make(const std::shared_ptr< EdgeInfo > &edge_info, const std::shared_ptr< PropertyGroup > &property_group, AdjListType adj_list_type, const std::string &prefix, const util::FilterOptions &options={})
Create an AdjListPropertyArrowChunkReader instance from edge info.
AdjListPropertyArrowChunkReader(const std::shared_ptr< EdgeInfo > &edge_info, const std::shared_ptr< PropertyGroup > &property_group, AdjListType adj_list_type, const std::string prefix, const util::FilterOptions &options={})
Initialize the AdjListPropertyArrowChunkReader.
void Filter(util::Filter filter=nullptr)
Apply the row filter to the table. No parameter call Filter() will clear the filter.
Result< std::shared_ptr< arrow::Table > > GetChunk()
Return the current chunk of chunk position indicator as arrow::Table, if the chunk is empty,...
Status next_chunk()
Sets chunk position indicator to next chunk.
Status seek(IdType offset)
Sets chunk position indicator for reader by edge index.
void Select(util::ColumnNames column_names=std::nullopt)
Apply the projection to the table to be read. No parameter call Select() will clear the projection.
Status outcome object (success or error)
Definition: status.h:123
The arrow chunk reader for vertex property group.
Definition: chunk_reader.h:42
IdType GetChunkNum() const noexcept
Get the chunk number of current vertex property group.
Definition: chunk_reader.h:81
Result< std::shared_ptr< arrow::Table > > GetChunk()
Return the current arrow chunk table of chunk position indicator.
VertexPropertyArrowChunkReader(const std::shared_ptr< VertexInfo > &vertex_info, const std::shared_ptr< PropertyGroup > &property_group, const std::string &prefix, const util::FilterOptions &options={})
Initialize the VertexPropertyArrowChunkReader.
void Filter(util::Filter filter=nullptr)
Apply the row filter to the table. No parameter call Filter() will clear the filter.
Status next_chunk()
Sets chunk position indicator to next chunk.
Status seek(IdType id)
Sets chunk position indicator for reader by internal vertex id. If internal vertex id is not found,...
void Select(util::ColumnNames column_names=std::nullopt)
Apply the projection to the table to be read. No parameter call Select() will clear the projection.
static Result< std::shared_ptr< VertexPropertyArrowChunkReader > > Make(const std::shared_ptr< VertexInfo > &vertex_info, const std::shared_ptr< PropertyGroup > &property_group, const std::string &prefix, const util::FilterOptions &options={})
Create a VertexPropertyArrowChunkReader instance from vertex info.