20 #include "graphar/writer_util.h"
22 arrow::csv::WriteOptions WriterOptions::getCsvOption()
const {
24 arrow::csv::WriteOptions csvWriteOptions;
25 csvWriteOptions.include_header = csvOption_->include_header;
26 csvWriteOptions.batch_size = csvOption_->batch_size;
27 csvWriteOptions.delimiter = csvOption_->delimiter;
28 csvWriteOptions.null_string = csvOption_->null_string;
29 csvWriteOptions.io_context = csvOption_->io_context;
30 csvWriteOptions.eol = csvOption_->eol;
31 csvWriteOptions.quoting_style = csvOption_->quoting_style;
32 return csvWriteOptions;
34 return arrow::csv::WriteOptions::Defaults();
38 std::shared_ptr<parquet::WriterProperties>
39 WriterOptions::getParquetWriterProperties()
const {
40 parquet::WriterProperties::Builder builder;
43 .dictionary_pagesize_limit(parquetOption_->dictionary_pagesize_limit)
44 ->write_batch_size(parquetOption_->write_batch_size)
45 ->max_row_group_length(parquetOption_->max_row_group_length)
46 ->data_pagesize(parquetOption_->data_pagesize)
47 ->data_page_version(parquetOption_->data_page_version)
48 ->version(parquetOption_->version)
49 ->encoding(parquetOption_->encoding)
50 ->max_statistics_size(parquetOption_->max_statistics_size)
51 ->compression(parquetOption_->compression)
52 ->compression_level(parquetOption_->compression_level);
53 for (
const auto& kv : parquetOption_->column_encoding) {
54 builder.encoding(kv.first, kv.second);
56 for (
const auto& kv : parquetOption_->column_compression) {
57 builder.compression(kv.first, kv.second);
59 for (
const auto& kv : parquetOption_->column_compression_level) {
60 builder.compression_level(kv.first, kv.second);
62 if (!parquetOption_->enable_dictionary) {
63 builder.disable_dictionary();
65 if (parquetOption_->encryption_properties) {
66 builder.encryption(parquetOption_->encryption_properties);
68 if (!parquetOption_->enable_statistics) {
69 builder.disable_statistics();
71 for (
const auto& path_st : parquetOption_->column_statistics) {
72 if (!path_st.second) {
73 builder.disable_statistics(path_st.first);
76 if (!parquetOption_->sorting_columns.empty()) {
77 builder.set_sorting_columns(parquetOption_->sorting_columns);
79 if (parquetOption_->enable_store_decimal_as_integer) {
80 builder.enable_store_decimal_as_integer();
82 if (parquetOption_->enable_write_page_index) {
83 builder.enable_write_page_index();
86 return builder.build();
89 std::shared_ptr<parquet::ArrowWriterProperties>
90 WriterOptions::getArrowWriterProperties()
const {
91 parquet::ArrowWriterProperties::Builder builder;
93 if (!parquetOption_->compliant_nested_types) {
94 builder.disable_compliant_nested_types();
96 builder.set_use_threads(parquetOption_->use_threads);
97 if (parquetOption_->enable_deprecated_int96_timestamps) {
98 builder.enable_deprecated_int96_timestamps();
100 builder.coerce_timestamps(parquetOption_->coerce_timestamps);
101 if (parquetOption_->allow_truncated_timestamps) {
102 builder.allow_truncated_timestamps();
104 if (parquetOption_->store_schema) {
105 builder.store_schema();
107 if (parquetOption_->executor) {
108 builder.set_executor(parquetOption_->executor);
111 return builder.build();
115 arrow::adapters::orc::WriteOptions WriterOptions::getOrcOption()
const {
116 auto writer_options = arrow::adapters::orc::WriteOptions();
117 writer_options.compression = arrow::Compression::ZSTD;
119 writer_options.batch_size = orcOption_->batch_size;
120 writer_options.compression = orcOption_->compression;
121 writer_options.stripe_size = orcOption_->stripe_size;
122 writer_options.file_version = orcOption_->file_version;
123 writer_options.bloom_filter_columns = orcOption_->bloom_filter_columns;
124 writer_options.bloom_filter_fpp = orcOption_->bloom_filter_fpp;
126 return writer_options;