Apache GraphAr C++ Library
The C++ Library for Apache GraphAr
label.h
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef CPP_SRC_GRAPHAR_LABEL_H_
21 #define CPP_SRC_GRAPHAR_LABEL_H_
22 
23 #include <arrow/io/file.h>
24 #include <arrow/util/logging.h>
25 #include <parquet/api/reader.h>
26 #include <parquet/api/writer.h>
27 #include <parquet/properties.h>
28 
29 #include <iostream>
30 #include <set>
31 #include <vector>
32 
33 using parquet::ConvertedType;
34 using parquet::Encoding;
35 using parquet::Repetition;
36 using parquet::Type;
37 using parquet::schema::GroupNode;
38 using parquet::schema::PrimitiveNode;
39 
40 constexpr int BATCH_SIZE = 1024; // the batch size
41 
43 enum QUERY_TYPE {
44  COUNT, // return the number of valid vertices
45  INDEX, // return the indices of valid vertices
46  BITMAP, // return the bitmap of valid vertices
47  ADAPTIVE // adaptively return indices or bitmap
48 };
49 
51 static inline void SetBitmap(uint64_t* bitmap, const int index) {
52  bitmap[index >> 6] |= (1ULL << (index & 63));
53 }
54 
55 int read_parquet_file_and_get_valid_indices(
56  const char* parquet_filename, const int row_num, const int tot_label_num,
57  const int tested_label_num, std::vector<int> tested_label_ids,
58  const std::function<bool(bool*, int)>& IsValid, int chunk_idx,
59  int chunk_size, std::vector<int>* indices = nullptr,
60  uint64_t* bitmap = nullptr, const QUERY_TYPE query_type = COUNT);
61 
62 #endif // CPP_SRC_GRAPHAR_LABEL_H_