Apache GraphAr C++ Library
The C++ Library for Apache GraphAr
chunk_reader.h
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #pragma once
21 
22 #include <memory>
23 #include <string>
24 #include <utility>
25 #include <vector>
26 
27 #include "graphar/fwd.h"
28 #include "graphar/reader_util.h"
29 #include "graphar/status.h"
30 
31 // forward declaration
32 namespace arrow {
33 class Array;
34 class Schema;
35 class Table;
36 } // namespace arrow
37 
38 namespace graphar {
39 
44  public:
53  const std::shared_ptr<VertexInfo>& vertex_info,
54  const std::shared_ptr<PropertyGroup>& property_group,
55  const std::string& prefix, const util::FilterOptions& options = {});
65  const std::shared_ptr<VertexInfo>& vertex_info,
66  const std::shared_ptr<PropertyGroup>& property_group,
67  const std::vector<std::string>& property_names, const std::string& prefix,
68  const util::FilterOptions& options = {});
69 
70  VertexPropertyArrowChunkReader() : vertex_info_(nullptr), prefix_("") {}
71 
79  VertexPropertyArrowChunkReader(const std::shared_ptr<VertexInfo>& vertex_info,
80  const std::vector<std::string>& labels,
81  const std::string& prefix,
82  const util::FilterOptions& options = {});
91  Status seek(IdType id);
92 
96  Result<std::shared_ptr<arrow::Table>> GetChunk(
97  GetChunkVersion version = GetChunkVersion::AUTO);
102  Result<std::shared_ptr<arrow::Table>> GetLabelChunk();
108  Status next_chunk();
109 
113  IdType GetChunkNum() const noexcept { return chunk_num_; }
114 
121  void Filter(util::Filter filter = nullptr);
122 
129  void Select(util::ColumnNames column_names = std::nullopt);
130 
139  static Result<std::shared_ptr<VertexPropertyArrowChunkReader>> Make(
140  const std::shared_ptr<VertexInfo>& vertex_info,
141  const std::shared_ptr<PropertyGroup>& property_group,
142  const std::string& prefix, const util::FilterOptions& options = {});
143 
153  static Result<std::shared_ptr<VertexPropertyArrowChunkReader>> Make(
154  const std::shared_ptr<VertexInfo>& vertex_info,
155  const std::shared_ptr<PropertyGroup>& property_group,
156  const std::vector<std::string>& property_names, const std::string& prefix,
157  const util::FilterOptions& options = {});
158 
168  static Result<std::shared_ptr<VertexPropertyArrowChunkReader>> Make(
169  const std::shared_ptr<GraphInfo>& graph_info, const std::string& type,
170  const std::shared_ptr<PropertyGroup>& property_group,
171  const util::FilterOptions& options = {});
172 
183  static Result<std::shared_ptr<VertexPropertyArrowChunkReader>> Make(
184  const std::shared_ptr<GraphInfo>& graph_info, const std::string& type,
185  const std::string& property_name,
186  const util::FilterOptions& options = {});
198  static Result<std::shared_ptr<VertexPropertyArrowChunkReader>> Make(
199  const std::shared_ptr<GraphInfo>& graph_info, const std::string& type,
200  const std::vector<std::string>& property_names_or_labels,
201  const SelectType select_type, const util::FilterOptions& options = {});
202 
212  static Result<std::shared_ptr<VertexPropertyArrowChunkReader>> Make(
213  const std::shared_ptr<VertexInfo>& vertex_info,
214  const std::vector<std::string>& labels, const std::string& prefix,
215  const util::FilterOptions& options = {});
216 
227  static Result<std::shared_ptr<VertexPropertyArrowChunkReader>>
228  MakeForProperties(const std::shared_ptr<GraphInfo>& graph_info,
229  const std::string& type,
230  const std::vector<std::string>& property_names,
231  const util::FilterOptions& options = {});
232 
243  static Result<std::shared_ptr<VertexPropertyArrowChunkReader>> MakeForLabels(
244  const std::shared_ptr<GraphInfo>& graph_info, const std::string& type,
245  const std::vector<std::string>& labels,
246  const util::FilterOptions& options = {});
247 
248  private:
252  Result<std::shared_ptr<arrow::Table>> GetChunkV1();
256  Result<std::shared_ptr<arrow::Table>> GetChunkV2();
257 
258  private:
259  std::shared_ptr<VertexInfo> vertex_info_;
260  std::shared_ptr<PropertyGroup> property_group_;
261  std::vector<std::string> property_names_;
262  std::string prefix_;
263  std::vector<std::string> labels_;
264  IdType chunk_index_;
265  IdType seek_id_;
266  IdType chunk_num_;
267  IdType vertex_num_;
268  std::shared_ptr<arrow::Schema> schema_;
269  std::shared_ptr<arrow::Table> chunk_table_;
270  util::FilterOptions filter_options_;
271  std::shared_ptr<FileSystem> fs_;
272 };
273 
278  public:
279  using range_t = std::pair<IdType, IdType>;
287  AdjListArrowChunkReader(const std::shared_ptr<EdgeInfo>& edge_info,
288  AdjListType adj_list_type, const std::string& prefix);
289 
294 
300  Status seek_src(IdType id);
301 
307  Status seek_dst(IdType offset);
308 
316  Status seek(IdType offset);
317 
322  Result<std::shared_ptr<arrow::Table>> GetChunk();
323 
327  Result<IdType> GetRowNumOfChunk();
328 
336  Status next_chunk();
337 
345  Status seek_chunk_index(IdType vertex_chunk_index, IdType chunk_index = 0);
346 
354  static Result<std::shared_ptr<AdjListArrowChunkReader>> Make(
355  const std::shared_ptr<EdgeInfo>& edge_info, AdjListType adj_list_type,
356  const std::string& prefix);
357 
367  static Result<std::shared_ptr<AdjListArrowChunkReader>> Make(
368  const std::shared_ptr<GraphInfo>& graph_info, const std::string& src_type,
369  const std::string& edge_type, const std::string& dst_type,
370  AdjListType adj_list_type);
371 
372  private:
373  Status initOrUpdateEdgeChunkNum();
374 
375  private:
376  std::shared_ptr<EdgeInfo> edge_info_;
377  AdjListType adj_list_type_;
378  std::string prefix_;
379  IdType vertex_chunk_index_, chunk_index_;
380  IdType seek_offset_;
381  std::shared_ptr<arrow::Table> chunk_table_;
382  IdType vertex_chunk_num_, chunk_num_;
383  std::string base_dir_;
384  std::shared_ptr<FileSystem> fs_;
385 };
386 
391  public:
392  using range_t = std::pair<IdType, IdType>;
402  AdjListOffsetArrowChunkReader(const std::shared_ptr<EdgeInfo>& edge_info,
403  AdjListType adj_list_type,
404  const std::string& prefix);
405 
414  Status seek(IdType id);
415 
419  Result<std::shared_ptr<arrow::Array>> GetChunk();
420 
426  Status next_chunk();
427 
431  IdType GetChunkIndex() const noexcept { return chunk_index_; }
432 
440  static Result<std::shared_ptr<AdjListOffsetArrowChunkReader>> Make(
441  const std::shared_ptr<EdgeInfo>& edge_info, AdjListType adj_list_type,
442  const std::string& prefix);
443 
453  static Result<std::shared_ptr<AdjListOffsetArrowChunkReader>> Make(
454  const std::shared_ptr<GraphInfo>& graph_info, const std::string& src_type,
455  const std::string& edge_type, const std::string& dst_type,
456  AdjListType adj_list_type);
457 
458  private:
459  std::shared_ptr<EdgeInfo> edge_info_;
460  AdjListType adj_list_type_;
461  std::string prefix_;
462  IdType chunk_index_;
463  IdType seek_id_;
464  std::shared_ptr<arrow::Table> chunk_table_;
465  IdType vertex_chunk_num_;
466  IdType vertex_chunk_size_;
467  std::string base_dir_;
468  std::shared_ptr<FileSystem> fs_;
469 };
470 
475  public:
476  using range_t = std::pair<IdType, IdType>;
487  const std::shared_ptr<EdgeInfo>& edge_info,
488  const std::shared_ptr<PropertyGroup>& property_group,
489  AdjListType adj_list_type, const std::string prefix,
490  const util::FilterOptions& options = {});
491 
496 
502  Status seek_src(IdType id);
503 
509  Status seek_dst(IdType id);
510 
518  Status seek(IdType offset);
519 
524  Result<std::shared_ptr<arrow::Table>> GetChunk();
525 
533  Status next_chunk();
534 
542  Status seek_chunk_index(IdType vertex_chunk_index, IdType chunk_index = 0);
543 
550  void Filter(util::Filter filter = nullptr);
551 
558  void Select(util::ColumnNames column_names = std::nullopt);
559 
569  static Result<std::shared_ptr<AdjListPropertyArrowChunkReader>> Make(
570  const std::shared_ptr<EdgeInfo>& edge_info,
571  const std::shared_ptr<PropertyGroup>& property_group,
572  AdjListType adj_list_type, const std::string& prefix,
573  const util::FilterOptions& options = {});
574 
588  static Result<std::shared_ptr<AdjListPropertyArrowChunkReader>> Make(
589  const std::shared_ptr<GraphInfo>& graph_info, const std::string& src_type,
590  const std::string& edge_type, const std::string& dst_type,
591  const std::shared_ptr<PropertyGroup>& property_group,
592  AdjListType adj_list_type, const util::FilterOptions& options = {});
593 
607  static Result<std::shared_ptr<AdjListPropertyArrowChunkReader>> Make(
608  const std::shared_ptr<GraphInfo>& graph_info, const std::string& src_type,
609  const std::string& edge_type, const std::string& dst_type,
610  const std::string& property_name, AdjListType adj_list_type,
611  const util::FilterOptions& options = {});
612 
613  private:
614  Status initOrUpdateEdgeChunkNum();
615 
616  private:
617  std::shared_ptr<EdgeInfo> edge_info_;
618  std::shared_ptr<PropertyGroup> property_group_;
619  AdjListType adj_list_type_;
620  std::string prefix_;
621  IdType vertex_chunk_index_, chunk_index_;
622  IdType seek_offset_;
623  std::shared_ptr<arrow::Schema> schema_;
624  std::shared_ptr<arrow::Table> chunk_table_;
625  util::FilterOptions filter_options_;
626  IdType vertex_chunk_num_, chunk_num_;
627  std::string base_dir_;
628  std::shared_ptr<FileSystem> fs_;
629 };
630 } // namespace graphar
The arrow chunk reader for adj list topology chunk.
Definition: chunk_reader.h:277
Status seek_src(IdType id)
Sets chunk position indicator for reader by source vertex id.
Status seek(IdType offset)
Sets chunk position indicator for reader by edge index.
AdjListArrowChunkReader(const std::shared_ptr< EdgeInfo > &edge_info, AdjListType adj_list_type, const std::string &prefix)
Initialize the AdjListArrowChunkReader.
Result< IdType > GetRowNumOfChunk()
Get the number of rows of the current chunk table.
Status seek_chunk_index(IdType vertex_chunk_index, IdType chunk_index=0)
Sets chunk position to the specific vertex chunk and edge chunk.
Result< std::shared_ptr< arrow::Table > > GetChunk()
Return the current chunk of chunk position indicator as arrow::Table, if the chunk is empty,...
static Result< std::shared_ptr< AdjListArrowChunkReader > > Make(const std::shared_ptr< EdgeInfo > &edge_info, AdjListType adj_list_type, const std::string &prefix)
Create an AdjListArrowChunkReader instance from edge info.
Status next_chunk()
Sets chunk position indicator to next chunk.
Status seek_dst(IdType offset)
Sets chunk position indicator for reader by destination vertex id.
The arrow chunk reader for edge offset.
Definition: chunk_reader.h:390
Status seek(IdType id)
Sets chunk position indicator for reader by internal vertex id. If internal vertex id is not found,...
Result< std::shared_ptr< arrow::Array > > GetChunk()
Get the current offset chunk as arrow::Array.
AdjListOffsetArrowChunkReader(const std::shared_ptr< EdgeInfo > &edge_info, AdjListType adj_list_type, const std::string &prefix)
Initialize the AdjListOffsetArrowChunkReader.
static Result< std::shared_ptr< AdjListOffsetArrowChunkReader > > Make(const std::shared_ptr< EdgeInfo > &edge_info, AdjListType adj_list_type, const std::string &prefix)
Create an AdjListOffsetArrowChunkReader instance from edge info.
Status next_chunk()
Sets chunk position indicator to next chunk. if current chunk is the last chunk, will return Status::...
IdType GetChunkIndex() const noexcept
Get current vertex chunk index.
Definition: chunk_reader.h:431
The arrow chunk reader for edge property group chunks.
Definition: chunk_reader.h:474
Status seek_src(IdType id)
Sets chunk position indicator for reader by source vertex id.
Status seek_dst(IdType id)
Sets chunk position indicator for reader by destination vertex id.
Status seek_chunk_index(IdType vertex_chunk_index, IdType chunk_index=0)
Sets chunk position to the specific vertex chunk and edge chunk.
static Result< std::shared_ptr< AdjListPropertyArrowChunkReader > > Make(const std::shared_ptr< EdgeInfo > &edge_info, const std::shared_ptr< PropertyGroup > &property_group, AdjListType adj_list_type, const std::string &prefix, const util::FilterOptions &options={})
Create an AdjListPropertyArrowChunkReader instance from edge info.
AdjListPropertyArrowChunkReader(const std::shared_ptr< EdgeInfo > &edge_info, const std::shared_ptr< PropertyGroup > &property_group, AdjListType adj_list_type, const std::string prefix, const util::FilterOptions &options={})
Initialize the AdjListPropertyArrowChunkReader.
void Filter(util::Filter filter=nullptr)
Apply the row filter to the table. No parameter call Filter() will clear the filter.
Result< std::shared_ptr< arrow::Table > > GetChunk()
Return the current chunk of chunk position indicator as arrow::Table, if the chunk is empty,...
Status next_chunk()
Sets chunk position indicator to next chunk.
Status seek(IdType offset)
Sets chunk position indicator for reader by edge index.
void Select(util::ColumnNames column_names=std::nullopt)
Apply the projection to the table to be read. No parameter call Select() will clear the projection.
Status outcome object (success or error)
Definition: status.h:123
The arrow chunk reader for vertex property group.
Definition: chunk_reader.h:43
IdType GetChunkNum() const noexcept
Get the chunk number of current vertex property group.
Definition: chunk_reader.h:113
Result< std::shared_ptr< arrow::Table > > GetLabelChunk()
Return the current arrow label chunk table of chunk position indicator.
void Filter(util::Filter filter=nullptr)
Apply the row filter to the table. No parameter call Filter() will clear the filter.
static Result< std::shared_ptr< VertexPropertyArrowChunkReader > > MakeForLabels(const std::shared_ptr< GraphInfo > &graph_info, const std::string &type, const std::vector< std::string > &labels, const util::FilterOptions &options={})
Create a VertexPropertyArrowChunkReader instance from graph info for labels.
Status next_chunk()
Sets chunk position indicator to next chunk.
Status seek(IdType id)
Sets chunk position indicator for reader by internal vertex id. If internal vertex id is not found,...
Result< std::shared_ptr< arrow::Table > > GetChunk(GetChunkVersion version=GetChunkVersion::AUTO)
Return the current arrow chunk table of chunk position indicator.
static Result< std::shared_ptr< VertexPropertyArrowChunkReader > > MakeForProperties(const std::shared_ptr< GraphInfo > &graph_info, const std::string &type, const std::vector< std::string > &property_names, const util::FilterOptions &options={})
Create a VertexPropertyArrowChunkReader instance from graph info for properties.
void Select(util::ColumnNames column_names=std::nullopt)
Apply the projection to the table to be read. No parameter call Select() will clear the projection.
static Result< std::shared_ptr< VertexPropertyArrowChunkReader > > Make(const std::shared_ptr< VertexInfo > &vertex_info, const std::shared_ptr< PropertyGroup > &property_group, const std::string &prefix, const util::FilterOptions &options={})
Create a VertexPropertyArrowChunkReader instance from vertex info.