Apache GraphAr C++ Library
The C++ Library for Apache GraphAr
chunk_writer.h
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #pragma once
21 
22 #include <memory>
23 #include <string>
24 #include <vector>
25 
26 #include "graphar/fwd.h"
27 #include "graphar/writer_util.h"
28 
29 // forward declaration
30 namespace arrow {
31 class Table;
32 }
33 
34 namespace graphar {
35 
58  public:
69  explicit VertexPropertyWriter(
70  const std::shared_ptr<VertexInfo>& vertex_info, const std::string& prefix,
71  const std::shared_ptr<WriterOptions>& options =
72  WriterOptions::DefaultWriterOption(),
73  const ValidateLevel& validate_level = ValidateLevel::no_validate);
74 
80  inline void SetValidateLevel(const ValidateLevel& validate_level) {
81  if (validate_level == ValidateLevel::default_validate) {
82  return;
83  }
84  validate_level_ = validate_level;
85  }
86 
92  inline ValidateLevel GetValidateLevel() const { return validate_level_; }
93 
103  const IdType& count,
104  ValidateLevel validate_level = ValidateLevel::default_validate) const;
105 
118  const std::shared_ptr<arrow::Table>& input_table,
119  const std::shared_ptr<PropertyGroup>& property_group, IdType chunk_index,
120  ValidateLevel validate_level = ValidateLevel::default_validate) const;
121 
133  const std::shared_ptr<arrow::Table>& input_table, IdType chunk_index,
134  FileType file_type,
135  ValidateLevel validate_level = ValidateLevel::default_validate) const;
136 
148  const std::shared_ptr<arrow::Table>& input_table, IdType chunk_index,
149  ValidateLevel validate_level = ValidateLevel::default_validate) const;
150 
163  const std::shared_ptr<arrow::Table>& input_table,
164  const std::shared_ptr<PropertyGroup>& property_group,
165  IdType start_chunk_index,
166  ValidateLevel validate_level = ValidateLevel::default_validate) const;
167 
179  const std::shared_ptr<arrow::Table>& input_table,
180  IdType start_chunk_index,
181  ValidateLevel validate_level = ValidateLevel::default_validate) const;
182 
194  const std::shared_ptr<arrow::Table>& input_table,
195  IdType start_chunk_index, FileType file_type,
196  ValidateLevel validate_level = ValidateLevel::default_validate) const;
197 
204  Result<std::shared_ptr<arrow::Table>> GetLabelTable(
205  const std::shared_ptr<arrow::Table>& input_table,
206  const std::vector<std::string>& labels) const;
207 
208  Result<std::shared_ptr<arrow::Table>> GetLabelTableAndRandomlyAddLabels(
209  const std::shared_ptr<arrow::Table>& input_table,
210  const std::vector<std::string>& labels) const;
211 
221  static Result<std::shared_ptr<VertexPropertyWriter>> Make(
222  const std::shared_ptr<VertexInfo>& vertex_info, const std::string& prefix,
223  const std::shared_ptr<WriterOptions>& options,
224  const ValidateLevel& validate_level = ValidateLevel::no_validate);
225 
226  static Result<std::shared_ptr<VertexPropertyWriter>> Make(
227  const std::shared_ptr<VertexInfo>& vertex_info, const std::string& prefix,
228  const ValidateLevel& validate_level = ValidateLevel::no_validate);
229 
239  static Result<std::shared_ptr<VertexPropertyWriter>> Make(
240  const std::shared_ptr<GraphInfo>& graph_info, const std::string& type,
241  const std::shared_ptr<WriterOptions>& options,
242  const ValidateLevel& validate_level = ValidateLevel::no_validate);
243 
244  static Result<std::shared_ptr<VertexPropertyWriter>> Make(
245  const std::shared_ptr<GraphInfo>& graph_info, const std::string& type,
246  const ValidateLevel& validate_level = ValidateLevel::no_validate);
247 
248  void setWriterOptions(const std::shared_ptr<WriterOptions>& options) {
249  options_ = options;
250  }
251 
252  Result<std::shared_ptr<arrow::Table>> AddIndexColumn(
253  const std::shared_ptr<arrow::Table>& table, IdType chunk_index,
254  IdType chunk_size) const;
255 
256  private:
264  Status validate(const IdType& count, ValidateLevel validate_level) const;
265 
274  Status validate(const std::shared_ptr<PropertyGroup>& property_group,
275  IdType chunk_index, ValidateLevel validate_level) const;
276 
286  Status validate(const std::shared_ptr<arrow::Table>& input_table,
287  const std::shared_ptr<PropertyGroup>& property_group,
288  IdType chunk_index, ValidateLevel validate_level) const;
289 
290  private:
291  std::shared_ptr<VertexInfo> vertex_info_;
292  std::string prefix_;
293  std::shared_ptr<FileSystem> fs_;
294  ValidateLevel validate_level_;
295  std::shared_ptr<WriterOptions> options_;
296 };
297 
321  public:
333  explicit EdgeChunkWriter(
334  const std::shared_ptr<EdgeInfo>& edge_info, const std::string& prefix,
335  AdjListType adj_list_type,
336  const std::shared_ptr<WriterOptions>& options =
337  WriterOptions::DefaultWriterOption(),
338  const ValidateLevel& validate_level = ValidateLevel::no_validate);
339 
345  void SetValidateLevel(const ValidateLevel& validate_level) {
346  if (validate_level == ValidateLevel::default_validate) {
347  return;
348  }
349  validate_level_ = validate_level;
350  }
351 
357  inline ValidateLevel GetValidateLevel() const { return validate_level_; }
358 
369  IdType vertex_chunk_index, const IdType& count,
370  ValidateLevel validate_level = ValidateLevel::default_validate) const;
371 
381  const IdType& count,
382  ValidateLevel validate_level = ValidateLevel::default_validate) const;
383 
394  const std::shared_ptr<arrow::Table>& input_table,
395  IdType vertex_chunk_index,
396  ValidateLevel validate_level = ValidateLevel::default_validate) const;
397 
409  const std::shared_ptr<arrow::Table>& input_table,
410  IdType vertex_chunk_index, IdType chunk_index,
411  ValidateLevel validate_level = ValidateLevel::default_validate) const;
412 
425  const std::shared_ptr<arrow::Table>& input_table,
426  const std::shared_ptr<PropertyGroup>& property_group,
427  IdType vertex_chunk_index, IdType chunk_index,
428  ValidateLevel validate_level = ValidateLevel::default_validate) const;
429 
441  const std::shared_ptr<arrow::Table>& input_table,
442  IdType vertex_chunk_index, IdType chunk_index,
443  ValidateLevel validate_level = ValidateLevel::default_validate) const;
444 
456  const std::shared_ptr<arrow::Table>& input_table,
457  IdType vertex_chunk_index, IdType chunk_index,
458  ValidateLevel validate_level = ValidateLevel::default_validate) const;
459 
472  const std::shared_ptr<arrow::Table>& input_table,
473  IdType vertex_chunk_index, IdType start_chunk_index = 0,
474  ValidateLevel validate_level = ValidateLevel::default_validate) const;
475 
490  const std::shared_ptr<arrow::Table>& input_table,
491  const std::shared_ptr<PropertyGroup>& property_group,
492  IdType vertex_chunk_index, IdType start_chunk_index = 0,
493  ValidateLevel validate_level = ValidateLevel::default_validate) const;
494 
508  const std::shared_ptr<arrow::Table>& input_table,
509  IdType vertex_chunk_index, IdType start_chunk_index = 0,
510  ValidateLevel validate_level = ValidateLevel::default_validate) const;
511 
525  const std::shared_ptr<arrow::Table>& input_table,
526  IdType vertex_chunk_index, IdType start_chunk_index = 0,
527  ValidateLevel validate_level = ValidateLevel::default_validate) const;
528 
542  const std::shared_ptr<arrow::Table>& input_table,
543  IdType vertex_chunk_index, IdType start_chunk_index = 0,
544  ValidateLevel validate_level = ValidateLevel::default_validate) const;
545 
560  const std::shared_ptr<arrow::Table>& input_table,
561  const std::shared_ptr<PropertyGroup>& property_group,
562  IdType vertex_chunk_index, IdType start_chunk_index = 0,
563  ValidateLevel validate_level = ValidateLevel::default_validate) const;
564 
578  const std::shared_ptr<arrow::Table>& input_table,
579  IdType vertex_chunk_index, IdType start_chunk_index = 0,
580  ValidateLevel validate_level = ValidateLevel::default_validate) const;
581 
595  const std::shared_ptr<arrow::Table>& input_table,
596  IdType vertex_chunk_index, IdType start_chunk_index = 0,
597  ValidateLevel validate_level = ValidateLevel::default_validate) const;
598 
608  static Result<std::shared_ptr<EdgeChunkWriter>> Make(
609  const std::shared_ptr<EdgeInfo>& edge_info, const std::string& prefix,
610  AdjListType adj_list_type, const std::shared_ptr<WriterOptions>& options,
611  const ValidateLevel& validate_level = ValidateLevel::no_validate);
612 
613  static Result<std::shared_ptr<EdgeChunkWriter>> Make(
614  const std::shared_ptr<EdgeInfo>& edge_info, const std::string& prefix,
615  AdjListType adj_list_type,
616  const ValidateLevel& validate_level = ValidateLevel::no_validate);
617 
629  static Result<std::shared_ptr<EdgeChunkWriter>> Make(
630  const std::shared_ptr<GraphInfo>& graph_info, const std::string& src_type,
631  const std::string& edge_type, const std::string& dst_type,
632  AdjListType adj_list_type, const std::shared_ptr<WriterOptions>& options,
633  const ValidateLevel& validate_level = ValidateLevel::no_validate);
634 
635  static Result<std::shared_ptr<EdgeChunkWriter>> Make(
636  const std::shared_ptr<GraphInfo>& graph_info, const std::string& src_type,
637  const std::string& edge_type, const std::string& dst_type,
638  AdjListType adj_list_type,
639  const ValidateLevel& validate_level = ValidateLevel::no_validate);
640 
641  private:
651  Status validate(IdType count_or_index1, IdType count_or_index2,
652  ValidateLevel validate_level) const;
653 
664  Status validate(const std::shared_ptr<PropertyGroup>& property_group,
665  IdType vertex_chunk_index, IdType chunk_index,
666  ValidateLevel validate_level) const;
667 
677  Status validate(const std::shared_ptr<arrow::Table>& input_table,
678  IdType vertex_chunk_index,
679  ValidateLevel validate_level) const;
680 
691  Status validate(const std::shared_ptr<arrow::Table>& input_table,
692  IdType vertex_chunk_index, IdType chunk_index,
693  ValidateLevel validate_level) const;
694 
706  Status validate(const std::shared_ptr<arrow::Table>& input_table,
707  const std::shared_ptr<PropertyGroup>& property_group,
708  IdType vertex_chunk_index, IdType chunk_index,
709  ValidateLevel validate_level) const;
710 
718  Result<std::shared_ptr<arrow::Table>> getOffsetTable(
719  const std::shared_ptr<arrow::Table>& input_table,
720  const std::string& column_name, IdType vertex_chunk_index) const;
721 
728  static std::string getSortColumnName(AdjListType adj_list_type);
729 
737  static Result<std::shared_ptr<arrow::Table>> sortTable(
738  const std::shared_ptr<arrow::Table>& input_table,
739  const std::string& column_name);
740 
741  private:
742  std::shared_ptr<EdgeInfo> edge_info_;
743  IdType vertex_chunk_size_;
744  IdType chunk_size_;
745  AdjListType adj_list_type_;
746  std::string prefix_;
747  std::shared_ptr<FileSystem> fs_;
748  ValidateLevel validate_level_;
749  std::shared_ptr<WriterOptions> options_;
750 };
751 
752 } // namespace graphar
The writer for edge (adj list, offset and property group) chunks.
Definition: chunk_writer.h:320
void SetValidateLevel(const ValidateLevel &validate_level)
Set the validate level.
Definition: chunk_writer.h:345
Status WritePropertyChunk(const std::shared_ptr< arrow::Table > &input_table, const std::shared_ptr< PropertyGroup > &property_group, IdType vertex_chunk_index, IdType chunk_index, ValidateLevel validate_level=ValidateLevel::default_validate) const
Validate and write a single edge property group for an edge chunk.
Status SortAndWriteAdjListTable(const std::shared_ptr< arrow::Table > &input_table, IdType vertex_chunk_index, IdType start_chunk_index=0, ValidateLevel validate_level=ValidateLevel::default_validate) const
Sort the edges, and write the adj list chunks for the edges of a vertex chunk.
Status WriteAdjListTable(const std::shared_ptr< arrow::Table > &input_table, IdType vertex_chunk_index, IdType start_chunk_index=0, ValidateLevel validate_level=ValidateLevel::default_validate) const
Write the adj list chunks for the edges of a vertex chunk.
Status WritePropertyTable(const std::shared_ptr< arrow::Table > &input_table, const std::shared_ptr< PropertyGroup > &property_group, IdType vertex_chunk_index, IdType start_chunk_index=0, ValidateLevel validate_level=ValidateLevel::default_validate) const
Write chunks of a single property group for the edges of a vertex chunk.
static Result< std::shared_ptr< EdgeChunkWriter > > Make(const std::shared_ptr< EdgeInfo > &edge_info, const std::string &prefix, AdjListType adj_list_type, const std::shared_ptr< WriterOptions > &options, const ValidateLevel &validate_level=ValidateLevel::no_validate)
Construct an EdgeChunkWriter from edge info.
Status WriteOffsetChunk(const std::shared_ptr< arrow::Table > &input_table, IdType vertex_chunk_index, ValidateLevel validate_level=ValidateLevel::default_validate) const
Validate and write the offset chunk for a vertex chunk.
Status WriteChunk(const std::shared_ptr< arrow::Table > &input_table, IdType vertex_chunk_index, IdType chunk_index, ValidateLevel validate_level=ValidateLevel::default_validate) const
Write the adj list and all property groups for an edge chunk.
Status SortAndWriteTable(const std::shared_ptr< arrow::Table > &input_table, IdType vertex_chunk_index, IdType start_chunk_index=0, ValidateLevel validate_level=ValidateLevel::default_validate) const
Sort the edges, and write chunks of the adj list and all property groups for the edges of a vertex ch...
Status SortAndWritePropertyTable(const std::shared_ptr< arrow::Table > &input_table, const std::shared_ptr< PropertyGroup > &property_group, IdType vertex_chunk_index, IdType start_chunk_index=0, ValidateLevel validate_level=ValidateLevel::default_validate) const
Sort the edges, and write chunks of a single property group for the edges of a vertex chunk.
Status WriteAdjListChunk(const std::shared_ptr< arrow::Table > &input_table, IdType vertex_chunk_index, IdType chunk_index, ValidateLevel validate_level=ValidateLevel::default_validate) const
Validate and write the adj list chunk for an edge chunk.
Status WriteEdgesNum(IdType vertex_chunk_index, const IdType &count, ValidateLevel validate_level=ValidateLevel::default_validate) const
Write the number of edges into the file.
EdgeChunkWriter(const std::shared_ptr< EdgeInfo > &edge_info, const std::string &prefix, AdjListType adj_list_type, const std::shared_ptr< WriterOptions > &options=WriterOptions::DefaultWriterOption(), const ValidateLevel &validate_level=ValidateLevel::no_validate)
Initialize the EdgeChunkWriter.
Status WriteTable(const std::shared_ptr< arrow::Table > &input_table, IdType vertex_chunk_index, IdType start_chunk_index=0, ValidateLevel validate_level=ValidateLevel::default_validate) const
Write chunks of the adj list and all property groups for the edges of a vertex chunk.
ValidateLevel GetValidateLevel() const
Get the validate level.
Definition: chunk_writer.h:357
Status WriteVerticesNum(const IdType &count, ValidateLevel validate_level=ValidateLevel::default_validate) const
Write the number of vertices into the file.
Status outcome object (success or error)
Definition: status.h:123
The writer for vertex property group chunks.
Definition: chunk_writer.h:57
Status WriteLabelTable(const std::shared_ptr< arrow::Table > &input_table, IdType start_chunk_index, FileType file_type, ValidateLevel validate_level=ValidateLevel::default_validate) const
Write all labels for multiple vertex chunks to corresponding files.
ValidateLevel GetValidateLevel() const
Get the validate level.
Definition: chunk_writer.h:92
VertexPropertyWriter(const std::shared_ptr< VertexInfo > &vertex_info, const std::string &prefix, const std::shared_ptr< WriterOptions > &options=WriterOptions::DefaultWriterOption(), const ValidateLevel &validate_level=ValidateLevel::no_validate)
Initialize the VertexPropertyWriter.
Status WriteChunk(const std::shared_ptr< arrow::Table > &input_table, const std::shared_ptr< PropertyGroup > &property_group, IdType chunk_index, ValidateLevel validate_level=ValidateLevel::default_validate) const
Validate and write a single property group for a single vertex chunk.
Result< std::shared_ptr< arrow::Table > > GetLabelTable(const std::shared_ptr< arrow::Table > &input_table, const std::vector< std::string > &labels) const
Get label column from table to formulate label table.
void SetValidateLevel(const ValidateLevel &validate_level)
Set the validate level.
Definition: chunk_writer.h:80
static Result< std::shared_ptr< VertexPropertyWriter > > Make(const std::shared_ptr< VertexInfo > &vertex_info, const std::string &prefix, const std::shared_ptr< WriterOptions > &options, const ValidateLevel &validate_level=ValidateLevel::no_validate)
Construct a VertexPropertyWriter from vertex info.
Status WriteLabelChunk(const std::shared_ptr< arrow::Table > &input_table, IdType chunk_index, FileType file_type, ValidateLevel validate_level=ValidateLevel::default_validate) const
Write all labels of a single vertex chunk to corresponding files.
Status WriteVerticesNum(const IdType &count, ValidateLevel validate_level=ValidateLevel::default_validate) const
Write the number of vertices into the file.
Status WriteTable(const std::shared_ptr< arrow::Table > &input_table, const std::shared_ptr< PropertyGroup > &property_group, IdType start_chunk_index, ValidateLevel validate_level=ValidateLevel::default_validate) const
Write a single property group for multiple vertex chunks to corresponding files.