Apache GraphAr C++ Library
The C++ Library for Apache GraphAr
vertices_builder.cc
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #include "graphar/high-level/vertices_builder.h"
21 #include <any>
22 #include <iterator>
23 #include <vector>
24 #include "graphar/convert_to_arrow_type.h"
25 #include "graphar/fwd.h"
26 #include "graphar/graph_info.h"
27 #include "graphar/label.h"
28 #include "graphar/status.h"
29 
30 namespace graphar::builder {
31 
32 Status VerticesBuilder::validate(const Vertex& v, IdType index,
33  ValidateLevel validate_level) const {
34  // use the builder's validate level
35  if (validate_level == ValidateLevel::default_validate)
36  validate_level = validate_level_;
37  // no validate
38  if (validate_level == ValidateLevel::no_validate)
39  return Status::OK();
40 
41  // weak validate
42  // can not add new vertices after dumping
43  if (is_saved_) {
44  return Status::Invalid(
45  "The vertices builder has been saved, can not add "
46  "new vertices any more");
47  }
48  // the start vertex index must be aligned with the chunk size
49  if (start_vertex_index_ % vertex_info_->GetChunkSize() != 0) {
50  return Status::IndexError("The start vertex index ", start_vertex_index_,
51  " is not aligned with the chunk size ",
52  vertex_info_->GetChunkSize());
53  }
54  // the vertex index must larger than start index
55  if (index != -1 && index < start_vertex_index_) {
56  return Status::IndexError("The vertex index ", index,
57  " is smaller than the start index ",
58  start_vertex_index_);
59  }
60 
61  // strong validate
62  if (validate_level == ValidateLevel::strong_validate) {
63  for (auto& property : v.GetProperties()) {
64  // check if the property is contained
65  if (!vertex_info_->HasProperty(property.first)) {
66  return Status::KeyError("Property with name ", property.first,
67  " is not contained in the ",
68  vertex_info_->GetType(), " vertex info.");
69  }
70  // check if the property type is correct
71  auto type = vertex_info_->GetPropertyType(property.first).value();
72  bool invalid_type = false;
73  switch (type->id()) {
74  case Type::BOOL:
75  GAR_RETURN_NOT_OK(
76  v.ValidatePropertyType<typename TypeToArrowType<Type::BOOL>::CType>(
77  property.first,
78  vertex_info_->GetPropertyCardinality(property.first).value()));
79  break;
80  case Type::INT32:
81  GAR_RETURN_NOT_OK(v.ValidatePropertyType<
82  typename TypeToArrowType<Type::INT32>::CType>(
83  property.first,
84  vertex_info_->GetPropertyCardinality(property.first).value()));
85  break;
86  case Type::INT64:
87  GAR_RETURN_NOT_OK(v.ValidatePropertyType<
88  typename TypeToArrowType<Type::INT64>::CType>(
89  property.first,
90  vertex_info_->GetPropertyCardinality(property.first).value()));
91  break;
92  case Type::FLOAT:
93  GAR_RETURN_NOT_OK(v.ValidatePropertyType<
94  typename TypeToArrowType<Type::FLOAT>::CType>(
95  property.first,
96  vertex_info_->GetPropertyCardinality(property.first).value()));
97  break;
98  case Type::DOUBLE:
99  GAR_RETURN_NOT_OK(v.ValidatePropertyType<
100  typename TypeToArrowType<Type::DOUBLE>::CType>(
101  property.first,
102  vertex_info_->GetPropertyCardinality(property.first).value()));
103  break;
104  case Type::STRING:
105  GAR_RETURN_NOT_OK(v.ValidatePropertyType<
106  typename TypeToArrowType<Type::STRING>::CType>(
107  property.first,
108  vertex_info_->GetPropertyCardinality(property.first).value()));
109  break;
110  case Type::DATE:
111  // date is stored as int32_t
112  GAR_RETURN_NOT_OK(v.ValidatePropertyType<
113  typename TypeToArrowType<Type::DATE>::CType::c_type>(
114  property.first,
115  vertex_info_->GetPropertyCardinality(property.first).value()));
116  break;
117  case Type::TIMESTAMP:
118  // timestamp is stored as int64_t
119  GAR_RETURN_NOT_OK(
120  v.ValidatePropertyType<
121  typename TypeToArrowType<Type::TIMESTAMP>::CType::c_type>(
122  property.first,
123  vertex_info_->GetPropertyCardinality(property.first).value()));
124  break;
125  default:
126  return Status::TypeError("Unsupported property type.");
127  }
128  if (invalid_type &&
129  Cardinality::SINGLE ==
130  vertex_info_->GetPropertyCardinality(property.first).value()) {
131  return Status::TypeError(
132  "Invalid data type for property ", property.first + ", defined as ",
133  type->ToTypeName(), ", but got ", property.second.type().name());
134  }
135  }
136  }
137  return Status::OK();
138 }
139 
140 template <Type type>
141 Status VerticesBuilder::tryToAppend(
142  const std::string& property_name,
143  std::shared_ptr<arrow::Array>& array) { // NOLINT
144  using CType = typename TypeToArrowType<type>::CType;
145  arrow::MemoryPool* pool = arrow::default_memory_pool();
146  auto builder =
147  std::make_shared<typename TypeToArrowType<type>::BuilderType>(pool);
148  auto cardinality =
149  vertex_info_->GetPropertyCardinality(property_name).value();
150  if (cardinality != Cardinality::SINGLE) {
151  arrow::ListBuilder list_builder(pool, builder);
152  for (auto& v : vertices_) {
153  RETURN_NOT_ARROW_OK(list_builder.Append());
154  if (v.Empty() || !v.ContainProperty(property_name)) {
155  RETURN_NOT_ARROW_OK(builder->AppendNull());
156  } else {
157  if (!v.IsMultiProperty(property_name)) {
158  RETURN_NOT_ARROW_OK(builder->Append(
159  std::any_cast<CType>(v.GetProperty(property_name))));
160  } else {
161  auto property_value_list = std::any_cast<std::vector<std::any>>(
162  v.GetProperty(property_name));
163  for (auto& value : property_value_list) {
164  RETURN_NOT_ARROW_OK(builder->Append(std::any_cast<CType>(value)));
165  }
166  }
167  }
168  }
169  array = list_builder.Finish().ValueOrDie();
170  } else {
171  for (auto& v : vertices_) {
172  if (v.Empty() || !v.ContainProperty(property_name)) {
173  RETURN_NOT_ARROW_OK(builder->AppendNull());
174  } else {
175  RETURN_NOT_ARROW_OK(builder->Append(
176  std::any_cast<CType>(v.GetProperty(property_name))));
177  }
178  }
179  array = builder->Finish().ValueOrDie();
180  }
181  return Status::OK();
182 }
183 
184 template <>
185 Status VerticesBuilder::tryToAppend<Type::TIMESTAMP>(
186  const std::string& property_name,
187  std::shared_ptr<arrow::Array>& array) { // NOLINT
188  using CType = typename TypeToArrowType<Type::TIMESTAMP>::CType::c_type;
189  arrow::MemoryPool* pool = arrow::default_memory_pool();
190  typename TypeToArrowType<Type::TIMESTAMP>::BuilderType builder(
191  arrow::timestamp(arrow::TimeUnit::MILLI), pool);
192  for (auto& v : vertices_) {
193  if (v.Empty() || !v.ContainProperty(property_name)) {
194  RETURN_NOT_ARROW_OK(builder.AppendNull());
195  } else {
196  RETURN_NOT_ARROW_OK(
197  builder.Append(std::any_cast<CType>(v.GetProperty(property_name))));
198  }
199  }
200  array = builder.Finish().ValueOrDie();
201  return Status::OK();
202 }
203 
204 template <>
205 Status VerticesBuilder::tryToAppend<Type::DATE>(
206  const std::string& property_name,
207  std::shared_ptr<arrow::Array>& array) { // NOLINT
208  using CType = typename TypeToArrowType<Type::DATE>::CType::c_type;
209  arrow::MemoryPool* pool = arrow::default_memory_pool();
210  typename TypeToArrowType<Type::DATE>::BuilderType builder(pool);
211  for (auto& v : vertices_) {
212  if (v.Empty() || !v.ContainProperty(property_name)) {
213  RETURN_NOT_ARROW_OK(builder.AppendNull());
214  } else {
215  RETURN_NOT_ARROW_OK(
216  builder.Append(std::any_cast<CType>(v.GetProperty(property_name))));
217  }
218  }
219  array = builder.Finish().ValueOrDie();
220  return Status::OK();
221 }
222 
223 Status VerticesBuilder::appendToArray(
224  const std::shared_ptr<DataType>& type, const std::string& property_name,
225  std::shared_ptr<arrow::Array>& array) { // NOLINT
226  switch (type->id()) {
227  case Type::BOOL:
228  return tryToAppend<Type::BOOL>(property_name, array);
229  case Type::INT32:
230  return tryToAppend<Type::INT32>(property_name, array);
231  case Type::INT64:
232  return tryToAppend<Type::INT64>(property_name, array);
233  case Type::FLOAT:
234  return tryToAppend<Type::FLOAT>(property_name, array);
235  case Type::DOUBLE:
236  return tryToAppend<Type::DOUBLE>(property_name, array);
237  case Type::STRING:
238  return tryToAppend<Type::STRING>(property_name, array);
239  case Type::DATE:
240  return tryToAppend<Type::DATE>(property_name, array);
241  case Type::TIMESTAMP:
242  return tryToAppend<Type::TIMESTAMP>(property_name, array);
243  default:
244  return Status::TypeError("Unsupported property type.");
245  }
246  return Status::OK();
247 }
248 
249 Result<std::shared_ptr<arrow::Table>> VerticesBuilder::convertToTable() {
250  const auto& property_groups = vertex_info_->GetPropertyGroups();
251  std::vector<std::shared_ptr<arrow::Array>> arrays;
252  std::vector<std::shared_ptr<arrow::Field>> schema_vector;
253  for (auto& property_group : property_groups) {
254  for (auto& property : property_group->GetProperties()) {
255  // add a column to schema
256  if (vertex_info_->GetPropertyCardinality(property.name).value() !=
257  Cardinality::SINGLE) {
258  schema_vector.push_back(arrow::field(
259  property.name,
260  arrow::list(DataType::DataTypeToArrowDataType(property.type))));
261  } else {
262  schema_vector.push_back(arrow::field(
263  property.name, DataType::DataTypeToArrowDataType(property.type)));
264  }
265  // add a column to data
266  std::shared_ptr<arrow::Array> array;
267  GAR_RETURN_NOT_OK(appendToArray(property.type, property.name, array));
268  arrays.push_back(array);
269  }
270  }
271  auto schema = std::make_shared<arrow::Schema>(schema_vector);
272  return arrow::Table::Make(schema, arrays);
273 }
274 
275 } // namespace graphar::builder
static Status IndexError(Args &&... args)
Definition: status.h:197
static Status TypeError(Args &&... args)
Definition: status.h:178
static Status KeyError(Args &&... args)
Definition: status.h:172
static Status Invalid(Args &&... args)
Definition: status.h:188
static Status OK()
Definition: status.h:157