20 #include "graphar/high-level/vertices_builder.h"
24 #include "graphar/convert_to_arrow_type.h"
25 #include "graphar/fwd.h"
26 #include "graphar/graph_info.h"
27 #include "graphar/label.h"
28 #include "graphar/status.h"
30 namespace graphar::builder {
32 Status VerticesBuilder::validate(
const Vertex& v, IdType index,
33 ValidateLevel validate_level)
const {
35 if (validate_level == ValidateLevel::default_validate)
36 validate_level = validate_level_;
38 if (validate_level == ValidateLevel::no_validate)
45 "The vertices builder has been saved, can not add "
46 "new vertices any more");
49 if (start_vertex_index_ % vertex_info_->GetChunkSize() != 0) {
51 " is not aligned with the chunk size ",
52 vertex_info_->GetChunkSize());
55 if (index != -1 && index < start_vertex_index_) {
57 " is smaller than the start index ",
62 if (validate_level == ValidateLevel::strong_validate) {
63 for (
auto& property : v.GetProperties()) {
65 if (!vertex_info_->HasProperty(property.first)) {
67 " is not contained in the ",
68 vertex_info_->GetType(),
" vertex info.");
71 auto type = vertex_info_->GetPropertyType(property.first).value();
72 bool invalid_type =
false;
76 v.ValidatePropertyType<
typename TypeToArrowType<Type::BOOL>::CType>(
78 vertex_info_->GetPropertyCardinality(property.first).value()));
81 GAR_RETURN_NOT_OK(v.ValidatePropertyType<
82 typename TypeToArrowType<Type::INT32>::CType>(
84 vertex_info_->GetPropertyCardinality(property.first).value()));
87 GAR_RETURN_NOT_OK(v.ValidatePropertyType<
88 typename TypeToArrowType<Type::INT64>::CType>(
90 vertex_info_->GetPropertyCardinality(property.first).value()));
93 GAR_RETURN_NOT_OK(v.ValidatePropertyType<
94 typename TypeToArrowType<Type::FLOAT>::CType>(
96 vertex_info_->GetPropertyCardinality(property.first).value()));
99 GAR_RETURN_NOT_OK(v.ValidatePropertyType<
100 typename TypeToArrowType<Type::DOUBLE>::CType>(
102 vertex_info_->GetPropertyCardinality(property.first).value()));
105 GAR_RETURN_NOT_OK(v.ValidatePropertyType<
106 typename TypeToArrowType<Type::STRING>::CType>(
108 vertex_info_->GetPropertyCardinality(property.first).value()));
112 GAR_RETURN_NOT_OK(v.ValidatePropertyType<
113 typename TypeToArrowType<Type::DATE>::CType::c_type>(
115 vertex_info_->GetPropertyCardinality(property.first).value()));
117 case Type::TIMESTAMP:
120 v.ValidatePropertyType<
121 typename TypeToArrowType<Type::TIMESTAMP>::CType::c_type>(
123 vertex_info_->GetPropertyCardinality(property.first).value()));
129 Cardinality::SINGLE ==
130 vertex_info_->GetPropertyCardinality(property.first).value()) {
132 "Invalid data type for property ", property.first +
", defined as ",
133 type->ToTypeName(),
", but got ", property.second.type().name());
141 Status VerticesBuilder::tryToAppend(
142 const std::string& property_name,
143 std::shared_ptr<arrow::Array>& array) {
144 using CType =
typename TypeToArrowType<type>::CType;
145 arrow::MemoryPool* pool = arrow::default_memory_pool();
147 std::make_shared<typename TypeToArrowType<type>::BuilderType>(pool);
149 vertex_info_->GetPropertyCardinality(property_name).value();
150 if (cardinality != Cardinality::SINGLE) {
151 arrow::ListBuilder list_builder(pool, builder);
152 for (
auto& v : vertices_) {
153 RETURN_NOT_ARROW_OK(list_builder.Append());
154 if (v.Empty() || !v.ContainProperty(property_name)) {
155 RETURN_NOT_ARROW_OK(builder->AppendNull());
157 if (!v.IsMultiProperty(property_name)) {
158 RETURN_NOT_ARROW_OK(builder->Append(
159 std::any_cast<CType>(v.GetProperty(property_name))));
161 auto property_value_list = std::any_cast<std::vector<std::any>>(
162 v.GetProperty(property_name));
163 for (
auto& value : property_value_list) {
164 RETURN_NOT_ARROW_OK(builder->Append(std::any_cast<CType>(value)));
169 array = list_builder.Finish().ValueOrDie();
171 for (
auto& v : vertices_) {
172 if (v.Empty() || !v.ContainProperty(property_name)) {
173 RETURN_NOT_ARROW_OK(builder->AppendNull());
175 RETURN_NOT_ARROW_OK(builder->Append(
176 std::any_cast<CType>(v.GetProperty(property_name))));
179 array = builder->Finish().ValueOrDie();
185 Status VerticesBuilder::tryToAppend<Type::TIMESTAMP>(
186 const std::string& property_name,
187 std::shared_ptr<arrow::Array>& array) {
188 using CType =
typename TypeToArrowType<Type::TIMESTAMP>::CType::c_type;
189 arrow::MemoryPool* pool = arrow::default_memory_pool();
190 typename TypeToArrowType<Type::TIMESTAMP>::BuilderType builder(
191 arrow::timestamp(arrow::TimeUnit::MILLI), pool);
192 for (
auto& v : vertices_) {
193 if (v.Empty() || !v.ContainProperty(property_name)) {
194 RETURN_NOT_ARROW_OK(builder.AppendNull());
197 builder.Append(std::any_cast<CType>(v.GetProperty(property_name))));
200 array = builder.Finish().ValueOrDie();
205 Status VerticesBuilder::tryToAppend<Type::DATE>(
206 const std::string& property_name,
207 std::shared_ptr<arrow::Array>& array) {
208 using CType =
typename TypeToArrowType<Type::DATE>::CType::c_type;
209 arrow::MemoryPool* pool = arrow::default_memory_pool();
210 typename TypeToArrowType<Type::DATE>::BuilderType builder(pool);
211 for (
auto& v : vertices_) {
212 if (v.Empty() || !v.ContainProperty(property_name)) {
213 RETURN_NOT_ARROW_OK(builder.AppendNull());
216 builder.Append(std::any_cast<CType>(v.GetProperty(property_name))));
219 array = builder.Finish().ValueOrDie();
223 Status VerticesBuilder::appendToArray(
224 const std::shared_ptr<DataType>& type,
const std::string& property_name,
225 std::shared_ptr<arrow::Array>& array) {
226 switch (type->id()) {
228 return tryToAppend<Type::BOOL>(property_name, array);
230 return tryToAppend<Type::INT32>(property_name, array);
232 return tryToAppend<Type::INT64>(property_name, array);
234 return tryToAppend<Type::FLOAT>(property_name, array);
236 return tryToAppend<Type::DOUBLE>(property_name, array);
238 return tryToAppend<Type::STRING>(property_name, array);
240 return tryToAppend<Type::DATE>(property_name, array);
241 case Type::TIMESTAMP:
242 return tryToAppend<Type::TIMESTAMP>(property_name, array);
249 Result<std::shared_ptr<arrow::Table>> VerticesBuilder::convertToTable() {
250 const auto& property_groups = vertex_info_->GetPropertyGroups();
251 std::vector<std::shared_ptr<arrow::Array>> arrays;
252 std::vector<std::shared_ptr<arrow::Field>> schema_vector;
253 for (
auto& property_group : property_groups) {
254 for (
auto& property : property_group->GetProperties()) {
256 if (vertex_info_->GetPropertyCardinality(property.name).value() !=
257 Cardinality::SINGLE) {
258 schema_vector.push_back(arrow::field(
260 arrow::list(DataType::DataTypeToArrowDataType(property.type))));
262 schema_vector.push_back(arrow::field(
263 property.name, DataType::DataTypeToArrowDataType(property.type)));
266 std::shared_ptr<arrow::Array> array;
267 GAR_RETURN_NOT_OK(appendToArray(property.type, property.name, array));
268 arrays.push_back(array);
271 auto schema = std::make_shared<arrow::Schema>(schema_vector);
272 return arrow::Table::Make(schema, arrays);
static Status IndexError(Args &&... args)
static Status TypeError(Args &&... args)
static Status KeyError(Args &&... args)
static Status Invalid(Args &&... args)