Apache GraphAr C++ Library
The C++ Library for Apache GraphAr
vertices_builder.cc
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #include "graphar/high-level/vertices_builder.h"
21 #include "graphar/convert_to_arrow_type.h"
22 #include "graphar/graph_info.h"
23 
24 namespace graphar::builder {
25 
26 Status VerticesBuilder::validate(const Vertex& v, IdType index,
27  ValidateLevel validate_level) const {
28  // use the builder's validate level
29  if (validate_level == ValidateLevel::default_validate)
30  validate_level = validate_level_;
31  // no validate
32  if (validate_level == ValidateLevel::no_validate)
33  return Status::OK();
34 
35  // weak validate
36  // can not add new vertices after dumping
37  if (is_saved_) {
38  return Status::Invalid(
39  "The vertices builder has been saved, can not add "
40  "new vertices any more");
41  }
42  // the start vertex index must be aligned with the chunk size
43  if (start_vertex_index_ % vertex_info_->GetChunkSize() != 0) {
44  return Status::IndexError("The start vertex index ", start_vertex_index_,
45  " is not aligned with the chunk size ",
46  vertex_info_->GetChunkSize());
47  }
48  // the vertex index must larger than start index
49  if (index != -1 && index < start_vertex_index_) {
50  return Status::IndexError("The vertex index ", index,
51  " is smaller than the start index ",
52  start_vertex_index_);
53  }
54 
55  // strong validate
56  if (validate_level == ValidateLevel::strong_validate) {
57  for (auto& property : v.GetProperties()) {
58  // check if the property is contained
59  if (!vertex_info_->HasProperty(property.first)) {
60  return Status::KeyError("Property with name ", property.first,
61  " is not contained in the ",
62  vertex_info_->GetLabel(), " vertex info.");
63  }
64  // check if the property type is correct
65  auto type = vertex_info_->GetPropertyType(property.first).value();
66  bool invalid_type = false;
67  switch (type->id()) {
68  case Type::BOOL:
69  if (property.second.type() !=
70  typeid(typename TypeToArrowType<Type::BOOL>::CType)) {
71  invalid_type = true;
72  }
73  break;
74  case Type::INT32:
75  if (property.second.type() !=
76  typeid(typename TypeToArrowType<Type::INT32>::CType)) {
77  invalid_type = true;
78  }
79  break;
80  case Type::INT64:
81  if (property.second.type() !=
82  typeid(typename TypeToArrowType<Type::INT64>::CType)) {
83  invalid_type = true;
84  }
85  break;
86  case Type::FLOAT:
87  if (property.second.type() !=
88  typeid(typename TypeToArrowType<Type::FLOAT>::CType)) {
89  invalid_type = true;
90  }
91  break;
92  case Type::DOUBLE:
93  if (property.second.type() !=
94  typeid(typename TypeToArrowType<Type::DOUBLE>::CType)) {
95  invalid_type = true;
96  }
97  break;
98  case Type::STRING:
99  if (property.second.type() !=
100  typeid(typename TypeToArrowType<Type::STRING>::CType)) {
101  invalid_type = true;
102  }
103  break;
104  case Type::DATE:
105  // date is stored as int32_t
106  if (property.second.type() !=
107  typeid(typename TypeToArrowType<Type::DATE>::CType::c_type)) {
108  invalid_type = true;
109  }
110  break;
111  case Type::TIMESTAMP:
112  // timestamp is stored as int64_t
113  if (property.second.type() !=
114  typeid(typename TypeToArrowType<Type::TIMESTAMP>::CType::c_type)) {
115  invalid_type = true;
116  }
117  break;
118  default:
119  return Status::TypeError("Unsupported property type.");
120  }
121  if (invalid_type) {
122  return Status::TypeError(
123  "Invalid data type for property ", property.first + ", defined as ",
124  type->ToTypeName(), ", but got ", property.second.type().name());
125  }
126  }
127  }
128  return Status::OK();
129 }
130 
131 template <Type type>
132 Status VerticesBuilder::tryToAppend(
133  const std::string& property_name,
134  std::shared_ptr<arrow::Array>& array) { // NOLINT
135  using CType = typename TypeToArrowType<type>::CType;
136  arrow::MemoryPool* pool = arrow::default_memory_pool();
137  typename TypeToArrowType<type>::BuilderType builder(pool);
138  for (auto& v : vertices_) {
139  if (v.Empty() || !v.ContainProperty(property_name)) {
140  RETURN_NOT_ARROW_OK(builder.AppendNull());
141  } else {
142  RETURN_NOT_ARROW_OK(
143  builder.Append(std::any_cast<CType>(v.GetProperty(property_name))));
144  }
145  }
146  array = builder.Finish().ValueOrDie();
147  return Status::OK();
148 }
149 
150 template <>
151 Status VerticesBuilder::tryToAppend<Type::TIMESTAMP>(
152  const std::string& property_name,
153  std::shared_ptr<arrow::Array>& array) { // NOLINT
154  using CType = typename TypeToArrowType<Type::TIMESTAMP>::CType::c_type;
155  arrow::MemoryPool* pool = arrow::default_memory_pool();
156  typename TypeToArrowType<Type::TIMESTAMP>::BuilderType builder(
157  arrow::timestamp(arrow::TimeUnit::MILLI), pool);
158  for (auto& v : vertices_) {
159  if (v.Empty() || !v.ContainProperty(property_name)) {
160  RETURN_NOT_ARROW_OK(builder.AppendNull());
161  } else {
162  RETURN_NOT_ARROW_OK(
163  builder.Append(std::any_cast<CType>(v.GetProperty(property_name))));
164  }
165  }
166  array = builder.Finish().ValueOrDie();
167  return Status::OK();
168 }
169 
170 template <>
171 Status VerticesBuilder::tryToAppend<Type::DATE>(
172  const std::string& property_name,
173  std::shared_ptr<arrow::Array>& array) { // NOLINT
174  using CType = typename TypeToArrowType<Type::DATE>::CType::c_type;
175  arrow::MemoryPool* pool = arrow::default_memory_pool();
176  typename TypeToArrowType<Type::DATE>::BuilderType builder(pool);
177  for (auto& v : vertices_) {
178  if (v.Empty() || !v.ContainProperty(property_name)) {
179  RETURN_NOT_ARROW_OK(builder.AppendNull());
180  } else {
181  RETURN_NOT_ARROW_OK(
182  builder.Append(std::any_cast<CType>(v.GetProperty(property_name))));
183  }
184  }
185  array = builder.Finish().ValueOrDie();
186  return Status::OK();
187 }
188 
189 Status VerticesBuilder::appendToArray(
190  const std::shared_ptr<DataType>& type, const std::string& property_name,
191  std::shared_ptr<arrow::Array>& array) { // NOLINT
192  switch (type->id()) {
193  case Type::BOOL:
194  return tryToAppend<Type::BOOL>(property_name, array);
195  case Type::INT32:
196  return tryToAppend<Type::INT32>(property_name, array);
197  case Type::INT64:
198  return tryToAppend<Type::INT64>(property_name, array);
199  case Type::FLOAT:
200  return tryToAppend<Type::FLOAT>(property_name, array);
201  case Type::DOUBLE:
202  return tryToAppend<Type::DOUBLE>(property_name, array);
203  case Type::STRING:
204  return tryToAppend<Type::STRING>(property_name, array);
205  case Type::DATE:
206  return tryToAppend<Type::DATE>(property_name, array);
207  case Type::TIMESTAMP:
208  return tryToAppend<Type::TIMESTAMP>(property_name, array);
209  default:
210  return Status::TypeError("Unsupported property type.");
211  }
212  return Status::OK();
213 }
214 
215 Result<std::shared_ptr<arrow::Table>> VerticesBuilder::convertToTable() {
216  const auto& property_groups = vertex_info_->GetPropertyGroups();
217  std::vector<std::shared_ptr<arrow::Array>> arrays;
218  std::vector<std::shared_ptr<arrow::Field>> schema_vector;
219  for (auto& property_group : property_groups) {
220  for (auto& property : property_group->GetProperties()) {
221  // add a column to schema
222  schema_vector.push_back(arrow::field(
223  property.name, DataType::DataTypeToArrowDataType(property.type)));
224  // add a column to data
225  std::shared_ptr<arrow::Array> array;
226  appendToArray(property.type, property.name, array);
227  arrays.push_back(array);
228  }
229  }
230  auto schema = std::make_shared<arrow::Schema>(schema_vector);
231  return arrow::Table::Make(schema, arrays);
232 }
233 
234 } // namespace graphar::builder
static Status IndexError(Args &&... args)
Definition: status.h:197
static Status TypeError(Args &&... args)
Definition: status.h:178
static Status KeyError(Args &&... args)
Definition: status.h:172
static Status Invalid(Args &&... args)
Definition: status.h:188
static Status OK()
Definition: status.h:157