graphar_pyspark.util
Bindings to org.apache.graphar.util.
1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18"""Bindings to org.apache.graphar.util.""" 19 20from __future__ import annotations 21 22from typing import Optional 23 24from pyspark.sql import DataFrame 25 26from graphar_pyspark import GraphArSession, _check_session 27 28 29class IndexGenerator: 30 """IndexGenerator is an object to help generating the indices for vertex/edge DataFrames.""" 31 32 @staticmethod 33 def construct_vertex_index_mapping( 34 vertex_df: DataFrame, 35 primary_key: str, 36 ) -> DataFrame: 37 """Generate a vertex index mapping from the primary key. 38 39 The resulting DataFrame contains two columns: vertex index & primary key. 40 41 :param vertex_df: input vertex DataFrame. 42 :param primary_key: the primary key of vertex 43 :returns: a DataFrame contains two columns: vertex index & primary key. 44 """ 45 _check_session() 46 return DataFrame( 47 GraphArSession.graphar.util.IndexGenerator.constructVertexIndexMapping( 48 vertex_df._jdf, 49 primary_key, 50 ), 51 GraphArSession.ss, 52 ) 53 54 @staticmethod 55 def generate_vertex_index_column(vertex_df: DataFrame) -> DataFrame: 56 """Add a column contains vertex index to DataFrame. 57 58 :param vertex_df: the input vertex DataFrame. 59 :returns: DataFrame that contains a new vertex index column. 60 """ 61 _check_session() 62 return DataFrame( 63 GraphArSession.graphar.util.IndexGenerator.generateVertexIndexColumn( 64 vertex_df._jdf, 65 ), 66 GraphArSession.ss, 67 ) 68 69 @staticmethod 70 def generate_vertex_index_column_and_index_mapping( 71 vertex_df: DataFrame, 72 primary_key: str = "", 73 ) -> (DataFrame, DataFrame): 74 """Add an index column and generate a new index mapping. 75 76 :param vertex_df: the input vertex DataFrame. 77 :param primary_key: the primary key of vertex. 78 :returns: the new vertex DataFrame and mapping DataFrame. 79 """ 80 _check_session() 81 jvm_res = GraphArSession.graphar.util.IndexGenerator.generateVertexIndexColumnAndIndexMapping( 82 vertex_df._jdf, 83 primary_key, 84 ) 85 86 return ( 87 DataFrame(jvm_res._1(), GraphArSession.ss), 88 DataFrame(jvm_res._2(), GraphArSession.ss), 89 ) 90 91 @staticmethod 92 def generate_edge_index_column(edge_df: DataFrame) -> DataFrame: 93 """Add a column contains edge index to input edge DataFrame. 94 95 :param edge_df: DataFrame with edges. 96 :returns: DataFrame with edges and index. 97 """ 98 _check_session() 99 return DataFrame( 100 GraphArSession.graphar.util.IndexGenerator.generateEdgeIndexColumn( 101 edge_df._jdf, 102 ), 103 GraphArSession.ss, 104 ) 105 106 @staticmethod 107 def generate_src_index_for_edges_from_mapping( 108 edge_df: DataFrame, 109 src_column_name: str, 110 src_index_mapping: DataFrame, 111 ) -> DataFrame: 112 """Join the edge table with the vertex index mapping for source column. 113 114 :param edge_df: edges DataFrame 115 :param src_column_name: join-column 116 :param src_index_mapping: mapping DataFrame 117 :returns: DataFrame with index 118 """ 119 _check_session() 120 return DataFrame( 121 GraphArSession.graphar.util.IndexGenerator.generateSrcIndexForEdgesFromMapping( 122 edge_df._jdf, 123 src_column_name, 124 src_index_mapping._jdf, 125 ), 126 GraphArSession.ss, 127 ) 128 129 @staticmethod 130 def generate_dst_index_for_edges_from_mapping( 131 edge_df: DataFrame, 132 dst_column_name: str, 133 dst_index_mapping: DataFrame, 134 ) -> DataFrame: 135 """Join the edge table with the vertex index mapping for destination column. 136 137 :param edge_df: edges DataFrame 138 :param dst_column_name: join-column 139 :param dst_index_mapping: mapping DataFrame 140 :returns: DataFrame with index 141 """ 142 _check_session() 143 return DataFrame( 144 GraphArSession.graphar.util.IndexGenerator.generateDstIndexForEdgesFromMapping( 145 edge_df._jdf, 146 dst_column_name, 147 dst_index_mapping._jdf, 148 ), 149 GraphArSession.ss, 150 ) 151 152 @staticmethod 153 def generate_src_and_dst_index_for_edges_from_mapping( 154 edge_df: DataFrame, 155 src_column_name: Optional[str], 156 dst_column_name: Optional[str], 157 src_index_mapping: DataFrame, 158 dst_index_mapping: DataFrame, 159 ) -> DataFrame: 160 """Join the edge table with the vertex index mapping for source & destination columns. 161 162 Assumes that the first and second columns are the src and dst columns if they are None. 163 164 165 :param edge_df: edge DataFrame 166 :param src_column_name: src column, optional (the first col from edge_df will be used if None) 167 :param dst_column_name: dst column, optional (the second col from edge_df will be used if None) 168 :param src_index_mapping: source mapping DataFrame 169 :param dst_index_mapping: dest mapping DataFrame 170 :returns: DataFrame with indices 171 """ 172 _check_session() 173 if src_column_name is None: 174 src_column_name = edge_df.columns[0] 175 176 if dst_column_name is None: 177 dst_column_name = edge_df.columns[1] 178 179 return DataFrame( 180 GraphArSession.graphar.util.IndexGenerator.generateSrcAndDstIndexForEdgesFromMapping( 181 edge_df._jdf, 182 src_column_name, 183 dst_column_name, 184 src_index_mapping._jdf, 185 dst_index_mapping._jdf, 186 ), 187 GraphArSession.ss, 188 ) 189 190 @staticmethod 191 def generate_scr_index_for_edges( 192 edge_df: DataFrame, 193 src_column_name: str, 194 ) -> DataFrame: 195 """Construct vertex index for source column. 196 197 :param edge_df: edge DataFrame 198 :param src_column_name: source column 199 :returns: DataFrame with index 200 """ 201 _check_session() 202 return DataFrame( 203 GraphArSession.graphar.util.IndexGenerator.generateSrcIndexForEdges( 204 edge_df._jdf, 205 src_column_name, 206 ), 207 GraphArSession.ss, 208 ) 209 210 @staticmethod 211 def generate_dst_index_for_edges( 212 edge_df: DataFrame, 213 dst_column_name: str, 214 ) -> DataFrame: 215 """Construct vertex index for destination column. 216 217 :param edge_df: edge DataFrame 218 :param src_column_name: destination column 219 :returns: DataFrame with index 220 """ 221 _check_session() 222 return DataFrame( 223 GraphArSession.graphar.util.IndexGenerator.generateDstIndexForEdges( 224 edge_df._jdf, 225 dst_column_name, 226 ), 227 GraphArSession.ss, 228 ) 229 230 @staticmethod 231 def generate_src_and_dst_index_unitedly_for_edges( 232 edge_df: DataFrame, 233 src_column_name: str, 234 dst_column_name: str, 235 ) -> DataFrame: 236 """Union and construct vertex index for source & destination columns. 237 238 :param edge_df: edge DataFrame 239 :param src_column_name: source column name 240 :param dst_column_name: destination column name 241 :returns: DataFrame with index 242 """ 243 _check_session() 244 return DataFrame( 245 GraphArSession.graphar.util.IndexGenerator.generateSrcAndDstIndexUnitedlyForEdges( 246 edge_df._jdf, 247 src_column_name, 248 dst_column_name, 249 ), 250 GraphArSession.ss, 251 )
30class IndexGenerator: 31 """IndexGenerator is an object to help generating the indices for vertex/edge DataFrames.""" 32 33 @staticmethod 34 def construct_vertex_index_mapping( 35 vertex_df: DataFrame, 36 primary_key: str, 37 ) -> DataFrame: 38 """Generate a vertex index mapping from the primary key. 39 40 The resulting DataFrame contains two columns: vertex index & primary key. 41 42 :param vertex_df: input vertex DataFrame. 43 :param primary_key: the primary key of vertex 44 :returns: a DataFrame contains two columns: vertex index & primary key. 45 """ 46 _check_session() 47 return DataFrame( 48 GraphArSession.graphar.util.IndexGenerator.constructVertexIndexMapping( 49 vertex_df._jdf, 50 primary_key, 51 ), 52 GraphArSession.ss, 53 ) 54 55 @staticmethod 56 def generate_vertex_index_column(vertex_df: DataFrame) -> DataFrame: 57 """Add a column contains vertex index to DataFrame. 58 59 :param vertex_df: the input vertex DataFrame. 60 :returns: DataFrame that contains a new vertex index column. 61 """ 62 _check_session() 63 return DataFrame( 64 GraphArSession.graphar.util.IndexGenerator.generateVertexIndexColumn( 65 vertex_df._jdf, 66 ), 67 GraphArSession.ss, 68 ) 69 70 @staticmethod 71 def generate_vertex_index_column_and_index_mapping( 72 vertex_df: DataFrame, 73 primary_key: str = "", 74 ) -> (DataFrame, DataFrame): 75 """Add an index column and generate a new index mapping. 76 77 :param vertex_df: the input vertex DataFrame. 78 :param primary_key: the primary key of vertex. 79 :returns: the new vertex DataFrame and mapping DataFrame. 80 """ 81 _check_session() 82 jvm_res = GraphArSession.graphar.util.IndexGenerator.generateVertexIndexColumnAndIndexMapping( 83 vertex_df._jdf, 84 primary_key, 85 ) 86 87 return ( 88 DataFrame(jvm_res._1(), GraphArSession.ss), 89 DataFrame(jvm_res._2(), GraphArSession.ss), 90 ) 91 92 @staticmethod 93 def generate_edge_index_column(edge_df: DataFrame) -> DataFrame: 94 """Add a column contains edge index to input edge DataFrame. 95 96 :param edge_df: DataFrame with edges. 97 :returns: DataFrame with edges and index. 98 """ 99 _check_session() 100 return DataFrame( 101 GraphArSession.graphar.util.IndexGenerator.generateEdgeIndexColumn( 102 edge_df._jdf, 103 ), 104 GraphArSession.ss, 105 ) 106 107 @staticmethod 108 def generate_src_index_for_edges_from_mapping( 109 edge_df: DataFrame, 110 src_column_name: str, 111 src_index_mapping: DataFrame, 112 ) -> DataFrame: 113 """Join the edge table with the vertex index mapping for source column. 114 115 :param edge_df: edges DataFrame 116 :param src_column_name: join-column 117 :param src_index_mapping: mapping DataFrame 118 :returns: DataFrame with index 119 """ 120 _check_session() 121 return DataFrame( 122 GraphArSession.graphar.util.IndexGenerator.generateSrcIndexForEdgesFromMapping( 123 edge_df._jdf, 124 src_column_name, 125 src_index_mapping._jdf, 126 ), 127 GraphArSession.ss, 128 ) 129 130 @staticmethod 131 def generate_dst_index_for_edges_from_mapping( 132 edge_df: DataFrame, 133 dst_column_name: str, 134 dst_index_mapping: DataFrame, 135 ) -> DataFrame: 136 """Join the edge table with the vertex index mapping for destination column. 137 138 :param edge_df: edges DataFrame 139 :param dst_column_name: join-column 140 :param dst_index_mapping: mapping DataFrame 141 :returns: DataFrame with index 142 """ 143 _check_session() 144 return DataFrame( 145 GraphArSession.graphar.util.IndexGenerator.generateDstIndexForEdgesFromMapping( 146 edge_df._jdf, 147 dst_column_name, 148 dst_index_mapping._jdf, 149 ), 150 GraphArSession.ss, 151 ) 152 153 @staticmethod 154 def generate_src_and_dst_index_for_edges_from_mapping( 155 edge_df: DataFrame, 156 src_column_name: Optional[str], 157 dst_column_name: Optional[str], 158 src_index_mapping: DataFrame, 159 dst_index_mapping: DataFrame, 160 ) -> DataFrame: 161 """Join the edge table with the vertex index mapping for source & destination columns. 162 163 Assumes that the first and second columns are the src and dst columns if they are None. 164 165 166 :param edge_df: edge DataFrame 167 :param src_column_name: src column, optional (the first col from edge_df will be used if None) 168 :param dst_column_name: dst column, optional (the second col from edge_df will be used if None) 169 :param src_index_mapping: source mapping DataFrame 170 :param dst_index_mapping: dest mapping DataFrame 171 :returns: DataFrame with indices 172 """ 173 _check_session() 174 if src_column_name is None: 175 src_column_name = edge_df.columns[0] 176 177 if dst_column_name is None: 178 dst_column_name = edge_df.columns[1] 179 180 return DataFrame( 181 GraphArSession.graphar.util.IndexGenerator.generateSrcAndDstIndexForEdgesFromMapping( 182 edge_df._jdf, 183 src_column_name, 184 dst_column_name, 185 src_index_mapping._jdf, 186 dst_index_mapping._jdf, 187 ), 188 GraphArSession.ss, 189 ) 190 191 @staticmethod 192 def generate_scr_index_for_edges( 193 edge_df: DataFrame, 194 src_column_name: str, 195 ) -> DataFrame: 196 """Construct vertex index for source column. 197 198 :param edge_df: edge DataFrame 199 :param src_column_name: source column 200 :returns: DataFrame with index 201 """ 202 _check_session() 203 return DataFrame( 204 GraphArSession.graphar.util.IndexGenerator.generateSrcIndexForEdges( 205 edge_df._jdf, 206 src_column_name, 207 ), 208 GraphArSession.ss, 209 ) 210 211 @staticmethod 212 def generate_dst_index_for_edges( 213 edge_df: DataFrame, 214 dst_column_name: str, 215 ) -> DataFrame: 216 """Construct vertex index for destination column. 217 218 :param edge_df: edge DataFrame 219 :param src_column_name: destination column 220 :returns: DataFrame with index 221 """ 222 _check_session() 223 return DataFrame( 224 GraphArSession.graphar.util.IndexGenerator.generateDstIndexForEdges( 225 edge_df._jdf, 226 dst_column_name, 227 ), 228 GraphArSession.ss, 229 ) 230 231 @staticmethod 232 def generate_src_and_dst_index_unitedly_for_edges( 233 edge_df: DataFrame, 234 src_column_name: str, 235 dst_column_name: str, 236 ) -> DataFrame: 237 """Union and construct vertex index for source & destination columns. 238 239 :param edge_df: edge DataFrame 240 :param src_column_name: source column name 241 :param dst_column_name: destination column name 242 :returns: DataFrame with index 243 """ 244 _check_session() 245 return DataFrame( 246 GraphArSession.graphar.util.IndexGenerator.generateSrcAndDstIndexUnitedlyForEdges( 247 edge_df._jdf, 248 src_column_name, 249 dst_column_name, 250 ), 251 GraphArSession.ss, 252 )
IndexGenerator is an object to help generating the indices for vertex/edge DataFrames.
33 @staticmethod 34 def construct_vertex_index_mapping( 35 vertex_df: DataFrame, 36 primary_key: str, 37 ) -> DataFrame: 38 """Generate a vertex index mapping from the primary key. 39 40 The resulting DataFrame contains two columns: vertex index & primary key. 41 42 :param vertex_df: input vertex DataFrame. 43 :param primary_key: the primary key of vertex 44 :returns: a DataFrame contains two columns: vertex index & primary key. 45 """ 46 _check_session() 47 return DataFrame( 48 GraphArSession.graphar.util.IndexGenerator.constructVertexIndexMapping( 49 vertex_df._jdf, 50 primary_key, 51 ), 52 GraphArSession.ss, 53 )
Generate a vertex index mapping from the primary key.
The resulting DataFrame contains two columns: vertex index & primary key.
Parameters
- vertex_df: input vertex DataFrame.
- primary_key: the primary key of vertex :returns: a DataFrame contains two columns: vertex index & primary key.
55 @staticmethod 56 def generate_vertex_index_column(vertex_df: DataFrame) -> DataFrame: 57 """Add a column contains vertex index to DataFrame. 58 59 :param vertex_df: the input vertex DataFrame. 60 :returns: DataFrame that contains a new vertex index column. 61 """ 62 _check_session() 63 return DataFrame( 64 GraphArSession.graphar.util.IndexGenerator.generateVertexIndexColumn( 65 vertex_df._jdf, 66 ), 67 GraphArSession.ss, 68 )
Add a column contains vertex index to DataFrame.
Parameters
- vertex_df: the input vertex DataFrame. :returns: DataFrame that contains a new vertex index column.
70 @staticmethod 71 def generate_vertex_index_column_and_index_mapping( 72 vertex_df: DataFrame, 73 primary_key: str = "", 74 ) -> (DataFrame, DataFrame): 75 """Add an index column and generate a new index mapping. 76 77 :param vertex_df: the input vertex DataFrame. 78 :param primary_key: the primary key of vertex. 79 :returns: the new vertex DataFrame and mapping DataFrame. 80 """ 81 _check_session() 82 jvm_res = GraphArSession.graphar.util.IndexGenerator.generateVertexIndexColumnAndIndexMapping( 83 vertex_df._jdf, 84 primary_key, 85 ) 86 87 return ( 88 DataFrame(jvm_res._1(), GraphArSession.ss), 89 DataFrame(jvm_res._2(), GraphArSession.ss), 90 )
Add an index column and generate a new index mapping.
Parameters
- vertex_df: the input vertex DataFrame.
- primary_key: the primary key of vertex. :returns: the new vertex DataFrame and mapping DataFrame.
92 @staticmethod 93 def generate_edge_index_column(edge_df: DataFrame) -> DataFrame: 94 """Add a column contains edge index to input edge DataFrame. 95 96 :param edge_df: DataFrame with edges. 97 :returns: DataFrame with edges and index. 98 """ 99 _check_session() 100 return DataFrame( 101 GraphArSession.graphar.util.IndexGenerator.generateEdgeIndexColumn( 102 edge_df._jdf, 103 ), 104 GraphArSession.ss, 105 )
Add a column contains edge index to input edge DataFrame.
Parameters
- edge_df: DataFrame with edges. :returns: DataFrame with edges and index.
107 @staticmethod 108 def generate_src_index_for_edges_from_mapping( 109 edge_df: DataFrame, 110 src_column_name: str, 111 src_index_mapping: DataFrame, 112 ) -> DataFrame: 113 """Join the edge table with the vertex index mapping for source column. 114 115 :param edge_df: edges DataFrame 116 :param src_column_name: join-column 117 :param src_index_mapping: mapping DataFrame 118 :returns: DataFrame with index 119 """ 120 _check_session() 121 return DataFrame( 122 GraphArSession.graphar.util.IndexGenerator.generateSrcIndexForEdgesFromMapping( 123 edge_df._jdf, 124 src_column_name, 125 src_index_mapping._jdf, 126 ), 127 GraphArSession.ss, 128 )
Join the edge table with the vertex index mapping for source column.
Parameters
- edge_df: edges DataFrame
- src_column_name: join-column
- src_index_mapping: mapping DataFrame :returns: DataFrame with index
130 @staticmethod 131 def generate_dst_index_for_edges_from_mapping( 132 edge_df: DataFrame, 133 dst_column_name: str, 134 dst_index_mapping: DataFrame, 135 ) -> DataFrame: 136 """Join the edge table with the vertex index mapping for destination column. 137 138 :param edge_df: edges DataFrame 139 :param dst_column_name: join-column 140 :param dst_index_mapping: mapping DataFrame 141 :returns: DataFrame with index 142 """ 143 _check_session() 144 return DataFrame( 145 GraphArSession.graphar.util.IndexGenerator.generateDstIndexForEdgesFromMapping( 146 edge_df._jdf, 147 dst_column_name, 148 dst_index_mapping._jdf, 149 ), 150 GraphArSession.ss, 151 )
Join the edge table with the vertex index mapping for destination column.
Parameters
- edge_df: edges DataFrame
- dst_column_name: join-column
- dst_index_mapping: mapping DataFrame :returns: DataFrame with index
153 @staticmethod 154 def generate_src_and_dst_index_for_edges_from_mapping( 155 edge_df: DataFrame, 156 src_column_name: Optional[str], 157 dst_column_name: Optional[str], 158 src_index_mapping: DataFrame, 159 dst_index_mapping: DataFrame, 160 ) -> DataFrame: 161 """Join the edge table with the vertex index mapping for source & destination columns. 162 163 Assumes that the first and second columns are the src and dst columns if they are None. 164 165 166 :param edge_df: edge DataFrame 167 :param src_column_name: src column, optional (the first col from edge_df will be used if None) 168 :param dst_column_name: dst column, optional (the second col from edge_df will be used if None) 169 :param src_index_mapping: source mapping DataFrame 170 :param dst_index_mapping: dest mapping DataFrame 171 :returns: DataFrame with indices 172 """ 173 _check_session() 174 if src_column_name is None: 175 src_column_name = edge_df.columns[0] 176 177 if dst_column_name is None: 178 dst_column_name = edge_df.columns[1] 179 180 return DataFrame( 181 GraphArSession.graphar.util.IndexGenerator.generateSrcAndDstIndexForEdgesFromMapping( 182 edge_df._jdf, 183 src_column_name, 184 dst_column_name, 185 src_index_mapping._jdf, 186 dst_index_mapping._jdf, 187 ), 188 GraphArSession.ss, 189 )
Join the edge table with the vertex index mapping for source & destination columns.
Assumes that the first and second columns are the src and dst columns if they are None.
Parameters
- edge_df: edge DataFrame
- src_column_name: src column, optional (the first col from edge_df will be used if None)
- dst_column_name: dst column, optional (the second col from edge_df will be used if None)
- src_index_mapping: source mapping DataFrame
- dst_index_mapping: dest mapping DataFrame :returns: DataFrame with indices
191 @staticmethod 192 def generate_scr_index_for_edges( 193 edge_df: DataFrame, 194 src_column_name: str, 195 ) -> DataFrame: 196 """Construct vertex index for source column. 197 198 :param edge_df: edge DataFrame 199 :param src_column_name: source column 200 :returns: DataFrame with index 201 """ 202 _check_session() 203 return DataFrame( 204 GraphArSession.graphar.util.IndexGenerator.generateSrcIndexForEdges( 205 edge_df._jdf, 206 src_column_name, 207 ), 208 GraphArSession.ss, 209 )
Construct vertex index for source column.
Parameters
- edge_df: edge DataFrame
- src_column_name: source column :returns: DataFrame with index
211 @staticmethod 212 def generate_dst_index_for_edges( 213 edge_df: DataFrame, 214 dst_column_name: str, 215 ) -> DataFrame: 216 """Construct vertex index for destination column. 217 218 :param edge_df: edge DataFrame 219 :param src_column_name: destination column 220 :returns: DataFrame with index 221 """ 222 _check_session() 223 return DataFrame( 224 GraphArSession.graphar.util.IndexGenerator.generateDstIndexForEdges( 225 edge_df._jdf, 226 dst_column_name, 227 ), 228 GraphArSession.ss, 229 )
Construct vertex index for destination column.
Parameters
- edge_df: edge DataFrame
- src_column_name: destination column :returns: DataFrame with index
231 @staticmethod 232 def generate_src_and_dst_index_unitedly_for_edges( 233 edge_df: DataFrame, 234 src_column_name: str, 235 dst_column_name: str, 236 ) -> DataFrame: 237 """Union and construct vertex index for source & destination columns. 238 239 :param edge_df: edge DataFrame 240 :param src_column_name: source column name 241 :param dst_column_name: destination column name 242 :returns: DataFrame with index 243 """ 244 _check_session() 245 return DataFrame( 246 GraphArSession.graphar.util.IndexGenerator.generateSrcAndDstIndexUnitedlyForEdges( 247 edge_df._jdf, 248 src_column_name, 249 dst_column_name, 250 ), 251 GraphArSession.ss, 252 )
Union and construct vertex index for source & destination columns.
Parameters
- edge_df: edge DataFrame
- src_column_name: source column name
- dst_column_name: destination column name :returns: DataFrame with index