Apache GraphAr C++ Library
The C++ Library for Apache GraphAr
util.h
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #pragma once
21 
22 #include <memory>
23 #include <numeric>
24 #include <string>
25 #include <utility>
26 #include <vector>
27 
28 #include "graphar/result.h"
29 
30 #define REGULAR_SEPARATOR "_"
31 
32 // forward declarations
33 namespace arrow {
34 class Table;
35 class ChunkedArray;
36 class Array;
37 } // namespace arrow
38 
39 namespace graphar {
40 
41 template <typename T>
42 class Array final {
43  public:
44  using ValueType = T;
45  Array() : data_(nullptr), size_(0) {}
46  Array(const T* data, size_t size) : data_(data), size_(size) {}
47  Array(const Array& other) = default;
48  Array(Array&& other) = default;
49  Array& operator=(const Array& other) = default;
50  Array& operator=(Array&& other) = default;
51  ~Array() = default;
52 
53  const T& operator[](size_t index) const { return data_[index]; }
54 
55  const T* data() const { return data_; }
56 
57  size_t size() const { return size_; }
58 
59  void clear() {
60  data_ = nullptr;
61  size_ = 0;
62  }
63 
64  bool empty() const { return size_ == 0; }
65 
66  void swap(Array& other) {
67  std::swap(data_, other.data_);
68  std::swap(size_, other.size_);
69  }
70 
71  const T* begin() const { return data_; }
72 
73  const T* end() const { return data_ + size_; }
74 
75  private:
76  const T* data_;
77  size_t size_;
78 };
79 
80 template <>
81 class Array<std::string_view> final {
82  public:
83  using ValueType = std::string_view;
84 
85  class iterator {
86  private:
87  const int32_t* offsets_;
88  const uint8_t* data_;
89  size_t index_;
90 
91  public:
92  explicit iterator(const int32_t* offsets, const uint8_t* data, size_t index)
93  : offsets_(offsets), data_(data), index_(index) {}
94 
95  const std::string_view operator*() const {
96  return std::string_view(
97  reinterpret_cast<const char*>(data_ + offsets_[index_]),
98  offsets_[index_ + 1] - offsets_[index_]);
99  }
100 
101  iterator& operator++() {
102  ++index_;
103  return *this;
104  }
105 
106  iterator operator++(int) { return iterator(offsets_, data_, index_++); }
107 
108  iterator operator+(size_t n) {
109  return iterator(offsets_, data_, index_ + n);
110  }
111 
112  bool operator==(const iterator& other) const {
113  return index_ == other.index_;
114  }
115  bool operator!=(const iterator& other) const {
116  return index_ != other.index_;
117  }
118  };
119  Array() : offsets_(nullptr), data_(nullptr), size_(0) {}
120  explicit Array(const int32_t* offsets, const uint8_t* data, size_t size)
121  : offsets_(offsets), data_(data), size_(size) {}
122 
123  const std::string_view operator[](size_t index) const {
124  return std::string_view(
125  reinterpret_cast<const char*>(data_ + offsets_[index]),
126  offsets_[index + 1] - offsets_[index]);
127  }
128 
129  const int32_t* offsets() const { return offsets_; }
130  const uint8_t* data() const { return data_; }
131 
132  size_t size() const { return size_; }
133 
134  void clear() {
135  offsets_ = nullptr;
136  data_ = nullptr;
137  size_ = 0;
138  }
139 
140  bool empty() const { return size_ == 0; }
141 
142  void swap(Array& other) {
143  std::swap(offsets_, other.offsets_);
144  std::swap(data_, other.data_);
145  std::swap(size_, other.size_);
146  }
147 
148  const iterator begin() const { return iterator(offsets_, data_, 0); }
149  const iterator end() const { return iterator(offsets_, data_, size_); }
150 
151  private:
152  const int32_t* offsets_;
153  const uint8_t* data_;
154  size_t size_;
155 };
156 
157 using Int32Array = Array<int32_t>;
158 using Int64Array = Array<int64_t>;
159 using FloatArray = Array<float>;
160 using DoubleArray = Array<double>;
161 using StringArray = Array<std::string_view>;
162 
163 } // namespace graphar
164 
165 namespace graphar::util {
166 
168  explicit IndexConverter(std::vector<IdType>&& edge_chunk_nums)
169  : edge_chunk_nums_(std::move(edge_chunk_nums)) {}
170  IdType IndexPairToGlobalChunkIndex(IdType vertex_chunk_index,
171  IdType edge_chunk_index) {
172  IdType global_edge_chunk_index = 0;
173  for (IdType i = 0; i < vertex_chunk_index; ++i) {
174  global_edge_chunk_index += edge_chunk_nums_[i];
175  }
176  return global_edge_chunk_index + edge_chunk_index;
177  }
178 
179  // covert edge global chunk index to <vertex_chunk_index, edge_chunk_index>
180  std::pair<IdType, IdType> GlobalChunkIndexToIndexPair(IdType global_index) {
181  std::pair<IdType, IdType> index_pair(0, 0);
182  for (size_t i = 0; i < edge_chunk_nums_.size(); ++i) {
183  if (global_index < edge_chunk_nums_[i]) {
184  index_pair.first = static_cast<IdType>(i);
185  index_pair.second = global_index;
186  break;
187  }
188  global_index -= edge_chunk_nums_[i];
189  }
190  return index_pair;
191  }
192 
193  private:
194  std::vector<IdType> edge_chunk_nums_;
195 };
196 
197 static inline IdType IndexPairToGlobalChunkIndex(
198  const std::vector<IdType>& edge_chunk_nums, IdType vertex_chunk_index,
199  IdType edge_chunk_index) {
200  IdType global_edge_chunk_index = 0;
201  for (IdType i = 0; i < vertex_chunk_index; ++i) {
202  global_edge_chunk_index += edge_chunk_nums[i];
203  }
204  return global_edge_chunk_index + edge_chunk_index;
205 }
206 
207 // covert edge global chunk index to <vertex_chunk_index, edge_chunk_index>
208 static inline std::pair<IdType, IdType> GlobalChunkIndexToIndexPair(
209  const std::vector<IdType>& edge_chunk_nums, IdType global_index) {
210  std::pair<IdType, IdType> index_pair(0, 0);
211  for (size_t i = 0; i < edge_chunk_nums.size(); ++i) {
212  if (global_index < edge_chunk_nums[i]) {
213  index_pair.first = static_cast<IdType>(i);
214  index_pair.second = global_index;
215  break;
216  }
217  global_index -= edge_chunk_nums[i];
218  }
219  return index_pair;
220 }
221 
222 std::shared_ptr<arrow::ChunkedArray> GetArrowColumnByName(
223  std::shared_ptr<arrow::Table> const& table, const std::string& name);
224 
225 std::shared_ptr<arrow::Array> GetArrowArrayByChunkIndex(
226  std::shared_ptr<arrow::ChunkedArray> const& chunk_array,
227  int64_t chunk_index);
228 
229 Result<const void*> GetArrowArrayData(
230  std::shared_ptr<arrow::Array> const& array);
231 
232 static inline std::string ConcatStringWithDelimiter(
233  const std::vector<std::string>& str_vec, const std::string& delimiter) {
234  return std::accumulate(
235  std::begin(str_vec), std::end(str_vec), std::string(),
236  [&delimiter](const std::string& ss, const std::string& s) {
237  return ss.empty() ? s : ss + delimiter + s;
238  });
239 }
240 
241 template <typename T>
242 struct ValueGetter {
243  inline static T Value(const void* data, int64_t offset) {
244  return reinterpret_cast<const T*>(data)[offset];
245  }
246 };
247 
248 template <>
249 struct ValueGetter<std::string> {
250  static std::string Value(const void* data, int64_t offset);
251 };
252 
253 } // namespace graphar::util