Apache GraphAr C++ Library
The C++ Library for Apache GraphAr
writer_util.cc
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #include "graphar/writer_util.h"
21 namespace graphar {
22 arrow::csv::WriteOptions WriterOptions::getCsvOption() const {
23  if (csvOption_) {
24  arrow::csv::WriteOptions csvWriteOptions;
25  csvWriteOptions.include_header = csvOption_->include_header;
26  csvWriteOptions.batch_size = csvOption_->batch_size;
27  csvWriteOptions.delimiter = csvOption_->delimiter;
28  csvWriteOptions.null_string = csvOption_->null_string;
29  csvWriteOptions.io_context = csvOption_->io_context;
30  csvWriteOptions.eol = csvOption_->eol;
31  csvWriteOptions.quoting_style = csvOption_->quoting_style;
32  return csvWriteOptions;
33  } else {
34  return arrow::csv::WriteOptions::Defaults();
35  }
36 }
37 
38 std::shared_ptr<parquet::WriterProperties>
39 WriterOptions::getParquetWriterProperties() const {
40  parquet::WriterProperties::Builder builder;
41  if (parquetOption_) {
42  builder
43  .dictionary_pagesize_limit(parquetOption_->dictionary_pagesize_limit)
44  ->write_batch_size(parquetOption_->write_batch_size)
45  ->max_row_group_length(parquetOption_->max_row_group_length)
46  ->data_pagesize(parquetOption_->data_pagesize)
47  ->data_page_version(parquetOption_->data_page_version)
48  ->version(parquetOption_->version)
49  ->encoding(parquetOption_->encoding)
50  ->max_statistics_size(parquetOption_->max_statistics_size)
51  ->compression(parquetOption_->compression)
52  ->compression_level(parquetOption_->compression_level);
53  for (const auto& kv : parquetOption_->column_encoding) {
54  builder.encoding(kv.first, kv.second);
55  }
56  for (const auto& kv : parquetOption_->column_compression) {
57  builder.compression(kv.first, kv.second);
58  }
59  for (const auto& kv : parquetOption_->column_compression_level) {
60  builder.compression_level(kv.first, kv.second);
61  }
62  if (!parquetOption_->enable_dictionary) {
63  builder.disable_dictionary();
64  }
65  if (parquetOption_->encryption_properties) {
66  builder.encryption(parquetOption_->encryption_properties);
67  }
68  if (!parquetOption_->enable_statistics) {
69  builder.disable_statistics();
70  }
71  for (const auto& path_st : parquetOption_->column_statistics) {
72  if (!path_st.second) {
73  builder.disable_statistics(path_st.first);
74  }
75  }
76  if (!parquetOption_->sorting_columns.empty()) {
77  builder.set_sorting_columns(parquetOption_->sorting_columns);
78  }
79  if (parquetOption_->enable_store_decimal_as_integer) {
80  builder.enable_store_decimal_as_integer();
81  }
82  if (parquetOption_->enable_write_page_index) {
83  builder.enable_write_page_index();
84  }
85  }
86  return builder.build();
87 }
88 
89 std::shared_ptr<parquet::ArrowWriterProperties>
90 WriterOptions::getArrowWriterProperties() const {
91  parquet::ArrowWriterProperties::Builder builder;
92  if (parquetOption_) {
93  if (!parquetOption_->compliant_nested_types) {
94  builder.disable_compliant_nested_types();
95  }
96  builder.set_use_threads(parquetOption_->use_threads);
97  if (parquetOption_->enable_deprecated_int96_timestamps) {
98  builder.enable_deprecated_int96_timestamps();
99  }
100  builder.coerce_timestamps(parquetOption_->coerce_timestamps);
101  if (parquetOption_->allow_truncated_timestamps) {
102  builder.allow_truncated_timestamps();
103  }
104  if (parquetOption_->store_schema) {
105  builder.store_schema();
106  }
107  if (parquetOption_->executor) {
108  builder.set_executor(parquetOption_->executor);
109  }
110  }
111  return builder.build();
112 }
113 
114 #ifdef ARROW_ORC
115 arrow::adapters::orc::WriteOptions WriterOptions::getOrcOption() const {
116  auto writer_options = arrow::adapters::orc::WriteOptions();
117  writer_options.compression = arrow::Compression::ZSTD;
118  if (orcOption_) {
119  writer_options.batch_size = orcOption_->batch_size;
120  writer_options.compression = orcOption_->compression;
121  writer_options.stripe_size = orcOption_->stripe_size;
122  writer_options.file_version = orcOption_->file_version;
123  writer_options.bloom_filter_columns = orcOption_->bloom_filter_columns;
124  writer_options.bloom_filter_fpp = orcOption_->bloom_filter_fpp;
125  }
126  return writer_options;
127 }
128 #endif
129 } // namespace graphar