graphar_pyspark.reader

Bidnings to org.apache.graphar.graph.

  1# Licensed to the Apache Software Foundation (ASF) under one
  2# or more contributor license agreements.  See the NOTICE file
  3# distributed with this work for additional information
  4# regarding copyright ownership.  The ASF licenses this file
  5# to you under the Apache License, Version 2.0 (the
  6# "License"); you may not use this file except in compliance
  7# with the License.  You may obtain a copy of the License at
  8#
  9#   http://www.apache.org/licenses/LICENSE-2.0
 10#
 11# Unless required by applicable law or agreed to in writing,
 12# software distributed under the License is distributed on an
 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 14# KIND, either express or implied.  See the License for the
 15# specific language governing permissions and limitations
 16# under the License.
 17
 18"""Bidnings to org.apache.graphar.graph."""
 19
 20from __future__ import annotations
 21
 22import os
 23from typing import Optional
 24
 25from py4j.java_gateway import JavaObject
 26from pyspark.sql import DataFrame
 27
 28from graphar_pyspark import GraphArSession, _check_session
 29from graphar_pyspark.enums import AdjListType
 30from graphar_pyspark.info import EdgeInfo, PropertyGroup, VertexInfo
 31
 32
 33class VertexReader:
 34    """Reader for vertex chunks."""
 35
 36    def __init__(
 37        self,
 38        prefix: Optional[str],
 39        vertex_info: Optional[VertexInfo],
 40        jvm_obj: Optional[JavaObject],
 41    ) -> None:
 42        """One should not use this constructor directly, please use `from_scala` or `from_python`."""
 43        _check_session()
 44        if jvm_obj is not None:
 45            self._jvm_vertex_reader_obj = jvm_obj
 46        else:
 47            self._jvm_vertex_reader_obj = GraphArSession.graphar.reader.VertexReader(
 48                prefix,
 49                vertex_info.to_scala(),
 50                GraphArSession.jss,
 51            )
 52
 53    def to_scala(self) -> JavaObject:
 54        """Transform object to JVM representation.
 55
 56        :returns: JavaObject
 57        """
 58        return self._jvm_vertex_reader_obj
 59
 60    @staticmethod
 61    def from_scala(jvm_obj: JavaObject) -> "VertexReader":
 62        """Create an instance of the Class from the corresponding JVM object.
 63
 64        :param jvm_obj: scala object in JVM.
 65        :returns: instance of Python Class.
 66        """
 67        return VertexReader(None, None, jvm_obj)
 68
 69    @staticmethod
 70    def from_python(prefix: str, vertex_info: VertexInfo) -> "VertexReader":
 71        """Create an instance of the Class from Python arguments.
 72
 73        :param prefix: the absolute prefix.
 74        :param vertex_info: the vertex info that describes the vertex type.
 75        """
 76        if not prefix.endswith(os.sep):
 77            prefix += os.sep
 78        return VertexReader(prefix, vertex_info, None)
 79
 80    def read_vertices_number(self) -> int:
 81        """Load the total number of vertices for this vertex type.
 82
 83        :returns: total number of vertices.
 84        """
 85        return self._jvm_vertex_reader_obj.readVerticesNumber()
 86
 87    def read_vertex_property_chunk(
 88        self,
 89        property_group: PropertyGroup,
 90        chunk_index: int,
 91    ) -> DataFrame:
 92        """Load a single vertex property chunk as a DataFrame.
 93
 94        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
 95
 96        :param property_group: property group.
 97        :param chunk_index: index of vertex chunk.
 98        :returns: vertex property chunk DataFrame. Raise IllegalArgumentException if the property group not contained.
 99        """
100        return DataFrame(
101            self._jvm_vertex_reader_obj.readVertexPropertyChunk(
102                property_group.to_scala(),
103                chunk_index,
104            ),
105            GraphArSession.ss,
106        )
107
108    def read_vertex_property_group(self, property_group: PropertyGroup) -> DataFrame:
109        """Load all chunks for a property group as a DataFrame.
110
111        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
112
113        :param property_group: property group.
114        :returns: DataFrame that contains all chunks of property group. Raise IllegalArgumentException if the property group not contained.
115        """
116        return DataFrame(
117            self._jvm_vertex_reader_obj.readVertexPropertyGroup(
118                property_group.to_scala(),
119            ),
120            GraphArSession.ss,
121        )
122
123    def read_multiple_vertex_property_groups(
124        self,
125        property_groups: list[PropertyGroup],
126    ) -> DataFrame:
127        """Load the chunks for multiple property groups as a DataFrame.
128
129        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
130
131        :param property_groups: list of property groups.
132        :returns: DataFrame that contains all chunks of property group. Raise IllegalArgumentException if the property group not contained.
133        """
134        return DataFrame(
135            self._jvm_vertex_reader_obj.readMultipleVertexPropertyGroups(
136                [py_property_group.to_scala() for py_property_group in property_groups],
137            ),
138            GraphArSession.ss,
139        )
140
141    def read_all_vertex_property_groups(self) -> DataFrame:
142        """Load the chunks for all property groups as a DataFrame.
143
144        :returns: DataFrame that contains all property group chunks of vertex.
145        """
146        return DataFrame(
147            self._jvm_vertex_reader_obj.readAllVertexPropertyGroups(),
148            GraphArSession.ss,
149        )
150
151
152class EdgeReader:
153    """Reader for edge chunks."""
154
155    def __init__(
156        self,
157        prefix: Optional[str],
158        edge_info: Optional[EdgeInfo],
159        adj_list_type: Optional[AdjListType],
160        jvm_obj: Optional[JavaObject],
161    ) -> None:
162        """One should not use this constructor directly, please use `from_scala` or `from_python`."""
163        _check_session()
164        if jvm_obj is not None:
165            self._jvm_edge_reader_obj = jvm_obj
166        else:
167            self._jvm_edge_reader_obj = GraphArSession.graphar.reader.EdgeReader(
168                prefix,
169                edge_info.to_scala(),
170                adj_list_type.to_scala(),
171                GraphArSession.jss,
172            )
173
174    def to_scala(self) -> JavaObject:
175        """Transform object to JVM representation.
176
177        :returns: JavaObject
178        """
179        return self._jvm_edge_reader_obj
180
181    @staticmethod
182    def from_scala(jvm_obj: JavaObject) -> "EdgeReader":
183        """Create an instance of the Class from the corresponding JVM object.
184
185        :param jvm_obj: scala object in JVM.
186        :returns: instance of Python Class.
187        """
188        return EdgeReader(None, None, None, jvm_obj)
189
190    @staticmethod
191    def from_python(
192        prefix: str,
193        edge_info: EdgeInfo,
194        adj_list_type: AdjListType,
195    ) -> "EdgeReader":
196        """Create an instance of the Class from Python arguments.
197
198        Note that constructor would raise IllegalArgumentException if edge info does not support given adjListType.
199
200        :param prefix: the absolute prefix.
201        :param edge_info: the edge info that describes the edge type.
202        :param adj_list_type: the adj list type for the edge.
203        """
204        if not prefix.endswith(os.sep):
205            prefix += os.sep
206        return EdgeReader(prefix, edge_info, adj_list_type, None)
207
208    def read_vertices_number(self) -> int:
209        """Load the total number of src/dst vertices for this edge type.
210
211        :returns: total number of vertices.
212        """
213        return self._jvm_edge_reader_obj.readVerticesNumber()
214
215    def read_vertex_chunk_number(self) -> int:
216        """Load the chunk number of src/dst vertices.
217
218        :returns: chunk number of vertices.
219        """
220        return self._jvm_edge_reader_obj.readVertexChunkNumber()
221
222    def read_edges_number(self, chunk_index: Optional[int] = None) -> int:
223        """Load the number of edges for the vertex chunk or for this edge type.
224
225        :param chunk_index: index of vertex chunk (optional, default is None)
226        if not provided, returns the number of edges for this edge type
227        if provided, returns the number of edges for the vertex chunk
228        :returns: the number of edges
229        """
230        if chunk_index is None:
231            return self._jvm_edge_reader_obj.readEdgesNumber()
232        return self._jvm_edge_reader_obj.readEdgesNumber(chunk_index)
233
234    def read_offset(self, chunk_index: int) -> DataFrame:
235        """Load a single offset chunk as a DataFrame.
236
237        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
238
239        :param chunk_index: index of offset chunk
240        :returns: offset chunk DataFrame. Raise IllegalArgumentException if adjListType is
241        not AdjListType.ordered_by_source or AdjListType.ordered_by_dest.
242        """
243        return DataFrame(
244            self._jvm_edge_reader_obj.readOffset(chunk_index),
245            GraphArSession.ss,
246        )
247
248    def read_adj_list_chunk(
249        self,
250        vertex_chunk_index: int,
251        chunk_index: int,
252    ) -> DataFrame:
253        """Load a single AdjList chunk as a DataFrame.
254
255        :param vertex_chunk_index: index of vertex chunk
256        :param chunk_index: index of AdjList chunk.
257        :returns: AdjList chunk DataFrame
258        """
259        return DataFrame(
260            self._jvm_edge_reader_obj.readAdjListChunk(vertex_chunk_index, chunk_index),
261            GraphArSession.ss,
262        )
263
264    def read_adj_list_for_vertex_chunk(
265        self,
266        vertex_chunk_index: int,
267        add_index: bool = True,
268    ) -> DataFrame:
269        """Load all AdjList chunks for a vertex chunk as a DataFrame.
270
271        :param vertex_chunk_index: index of vertex chunk.
272        :param add_index: flag that add edge index column or not in the final DataFrame.
273        :returns: DataFrame of all AdjList chunks of vertices in given vertex chunk.
274        """
275        return DataFrame(
276            self._jvm_edge_reader_obj.readAdjListForVertexChunk(
277                vertex_chunk_index,
278                add_index,
279            ),
280            GraphArSession.ss,
281        )
282
283    def read_all_adj_list(self, add_index: bool = True) -> DataFrame:
284        """Load all AdjList chunks for this edge type as a DataFrame.
285
286        :param add_index: flag that add index column or not in the final DataFrame.
287        :returns: DataFrame of all AdjList chunks.
288        """
289        return DataFrame(
290            self._jvm_edge_reader_obj.readAllAdjList(add_index),
291            GraphArSession.ss,
292        )
293
294    def read_edge_property_chunk(
295        self,
296        property_group: PropertyGroup,
297        vertex_chunk_index: int,
298        chunk_index: int,
299    ) -> DataFrame:
300        """Load a single edge property chunk as a DataFrame.
301
302        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
303
304        :param property_group: property group.
305        :param vertex_chunk_index: index of vertex chunk.
306        :param chunk_index: index of property group chunk.
307        :returns: property group chunk DataFrame. If edge info does not contain the
308        property group, raise an IllegalArgumentException error.
309        """
310        return DataFrame(
311            self._jvm_edge_reader_obj.readEdgePropertyChunk(
312                property_group.to_scala(),
313                vertex_chunk_index,
314                chunk_index,
315            ),
316        )
317
318    def read_edge_property_group_for_vertex_chunk(
319        self,
320        property_group: PropertyGroup,
321        vertex_chunk_index: int,
322        add_index: bool = True,
323    ) -> DataFrame:
324        """Load the chunks for a property group of a vertex chunk as a DataFrame.
325
326        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
327
328        :param property_group: property group.
329        :param vertex_chunk_index: index of vertex chunk.
330        :param add_index: flag that add edge index column or not in the final DataFrame.
331        :returns: DataFrame that contains all property group chunks of vertices in given
332        vertex chunk. If edge info does not contain the property group, raise an IllegalArgumentException error.
333        """
334        return DataFrame(
335            self._jvm_edge_reader_obj.readEdgePropertyGroupForVertexChunk(
336                property_group.to_scala(),
337                vertex_chunk_index,
338                add_index,
339            ),
340            GraphArSession.ss,
341        )
342
343    def read_edge_property_group(
344        self,
345        property_group: PropertyGroup,
346        add_index: bool = True,
347    ) -> DataFrame:
348        """Load all chunks for a property group as a DataFrame.
349
350        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
351
352        :param property_group: property group.
353        :param add_index: flag that add edge index column or not in the final DataFrame.
354        :returns: DataFrame that contains all chunks of property group. If edge info does
355        not contain the property group, raise an IllegalArgumentException error.
356        """
357        return DataFrame(
358            self._jvm_edge_reader_obj.readEdgePropertyGroup(
359                property_group.to_scala(),
360                add_index,
361            ),
362            GraphArSession.ss,
363        )
364
365    def read_multiple_edge_property_groups_for_vertex_chunk(
366        self,
367        property_groups: list[PropertyGroup],
368        vertex_chunk_index: int,
369        add_index: bool = True,
370    ) -> DataFrame:
371        """Load the chunks for multiple property groups of a vertex chunk as a DataFrame.
372
373        :param property_groups: list of property groups.
374        :param vertex_chunk_index: index of vertex chunk.
375        :param add_index: flag that add edge index column or not in the final DataFrame.
376        :returns: DataFrame that contains all property groups chunks of a vertex chunk.
377        """
378        return DataFrame(
379            self._jvm_edge_reader_obj.readMultipleEdgePropertyGroupsForVertexChunk(
380                [py_property_group.to_scala() for py_property_group in property_groups],
381                vertex_chunk_index,
382                add_index,
383            ),
384            GraphArSession.ss,
385        )
386
387    def read_multiple_edge_property_groups(
388        self,
389        property_groups: list[PropertyGroup],
390        add_index: bool = True,
391    ) -> DataFrame:
392        """Load the chunks for multiple property groups as a DataFrame.
393
394        :param property_groups: list of property groups.
395        :param add_index: flag that add edge index column or not in the final DataFrame.
396        :returns: DataFrame tha contains all property groups chunks of edge.
397        """
398        return DataFrame(
399            self._jvm_edge_reader_obj.readMultipleEdgePropertyGroups(
400                [py_property_group.to_scala() for py_property_group in property_groups],
401                add_index,
402            ),
403            GraphArSession.ss,
404        )
405
406    def read_all_edge_property_groups_for_vertex_chunk(
407        self,
408        vertex_chunk_index: int,
409        add_index: bool = True,
410    ) -> DataFrame:
411        """Load the chunks for all property groups of a vertex chunk as a DataFrame.
412
413        :param vertex_chunk_index: index of vertex chunk.
414        :param add_index: flag that add edge index column or not in the final DataFrame.
415        :returns: DataFrame that contains all property groups chunks of a vertex chunk.
416        """
417        return DataFrame(
418            self._jvm_edge_reader_obj.readAllEdgePropertyGroupsForVertexChunk(
419                vertex_chunk_index,
420                add_index,
421            ),
422            GraphArSession.ss,
423        )
424
425    def read_all_edge_property_groups(self, add_index: bool = True) -> DataFrame:
426        """Load the chunks for all property groups as a DataFrame.
427
428        :param add_index: flag that add edge index column or not in the final DataFrame.
429        :returns: DataFrame tha contains all property groups chunks of edge.
430        """
431        return DataFrame(
432            self._jvm_edge_reader_obj.readAllEdgePropertyGroups(add_index),
433            GraphArSession.ss,
434        )
435
436    def read_edges_for_vertex_chunk(
437        self,
438        vertex_chunk_index: int,
439        add_index: bool = True,
440    ) -> DataFrame:
441        """Load the chunks for the AdjList and all property groups for a vertex chunk as a DataFrame.
442
443        :param vertex_chunk_index: index of vertex chunk
444        :param add_index: flag that add edge index column or not in the final DataFrame.
445        :returns: DataFrame that contains all chunks of AdjList and property groups of vertices in given vertex chunk.
446        """
447        return DataFrame(
448            self._jvm_edge_reader_obj.readEdgesForVertexChunk(
449                vertex_chunk_index,
450                add_index,
451            ),
452            GraphArSession.ss,
453        )
454
455    def read_edges(self, add_index: bool = True) -> DataFrame:
456        """Load the chunks for the AdjList and all property groups as a DataFrame.
457
458        :param add_index: flag that add edge index column or not in the final DataFrame.
459        :returns: DataFrame that contains all chunks of AdjList and property groups of edges.
460        """
461        return DataFrame(
462            self._jvm_edge_reader_obj.readEdges(add_index),
463            GraphArSession.ss,
464        )
class VertexReader:
 34class VertexReader:
 35    """Reader for vertex chunks."""
 36
 37    def __init__(
 38        self,
 39        prefix: Optional[str],
 40        vertex_info: Optional[VertexInfo],
 41        jvm_obj: Optional[JavaObject],
 42    ) -> None:
 43        """One should not use this constructor directly, please use `from_scala` or `from_python`."""
 44        _check_session()
 45        if jvm_obj is not None:
 46            self._jvm_vertex_reader_obj = jvm_obj
 47        else:
 48            self._jvm_vertex_reader_obj = GraphArSession.graphar.reader.VertexReader(
 49                prefix,
 50                vertex_info.to_scala(),
 51                GraphArSession.jss,
 52            )
 53
 54    def to_scala(self) -> JavaObject:
 55        """Transform object to JVM representation.
 56
 57        :returns: JavaObject
 58        """
 59        return self._jvm_vertex_reader_obj
 60
 61    @staticmethod
 62    def from_scala(jvm_obj: JavaObject) -> "VertexReader":
 63        """Create an instance of the Class from the corresponding JVM object.
 64
 65        :param jvm_obj: scala object in JVM.
 66        :returns: instance of Python Class.
 67        """
 68        return VertexReader(None, None, jvm_obj)
 69
 70    @staticmethod
 71    def from_python(prefix: str, vertex_info: VertexInfo) -> "VertexReader":
 72        """Create an instance of the Class from Python arguments.
 73
 74        :param prefix: the absolute prefix.
 75        :param vertex_info: the vertex info that describes the vertex type.
 76        """
 77        if not prefix.endswith(os.sep):
 78            prefix += os.sep
 79        return VertexReader(prefix, vertex_info, None)
 80
 81    def read_vertices_number(self) -> int:
 82        """Load the total number of vertices for this vertex type.
 83
 84        :returns: total number of vertices.
 85        """
 86        return self._jvm_vertex_reader_obj.readVerticesNumber()
 87
 88    def read_vertex_property_chunk(
 89        self,
 90        property_group: PropertyGroup,
 91        chunk_index: int,
 92    ) -> DataFrame:
 93        """Load a single vertex property chunk as a DataFrame.
 94
 95        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
 96
 97        :param property_group: property group.
 98        :param chunk_index: index of vertex chunk.
 99        :returns: vertex property chunk DataFrame. Raise IllegalArgumentException if the property group not contained.
100        """
101        return DataFrame(
102            self._jvm_vertex_reader_obj.readVertexPropertyChunk(
103                property_group.to_scala(),
104                chunk_index,
105            ),
106            GraphArSession.ss,
107        )
108
109    def read_vertex_property_group(self, property_group: PropertyGroup) -> DataFrame:
110        """Load all chunks for a property group as a DataFrame.
111
112        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
113
114        :param property_group: property group.
115        :returns: DataFrame that contains all chunks of property group. Raise IllegalArgumentException if the property group not contained.
116        """
117        return DataFrame(
118            self._jvm_vertex_reader_obj.readVertexPropertyGroup(
119                property_group.to_scala(),
120            ),
121            GraphArSession.ss,
122        )
123
124    def read_multiple_vertex_property_groups(
125        self,
126        property_groups: list[PropertyGroup],
127    ) -> DataFrame:
128        """Load the chunks for multiple property groups as a DataFrame.
129
130        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
131
132        :param property_groups: list of property groups.
133        :returns: DataFrame that contains all chunks of property group. Raise IllegalArgumentException if the property group not contained.
134        """
135        return DataFrame(
136            self._jvm_vertex_reader_obj.readMultipleVertexPropertyGroups(
137                [py_property_group.to_scala() for py_property_group in property_groups],
138            ),
139            GraphArSession.ss,
140        )
141
142    def read_all_vertex_property_groups(self) -> DataFrame:
143        """Load the chunks for all property groups as a DataFrame.
144
145        :returns: DataFrame that contains all property group chunks of vertex.
146        """
147        return DataFrame(
148            self._jvm_vertex_reader_obj.readAllVertexPropertyGroups(),
149            GraphArSession.ss,
150        )

Reader for vertex chunks.

VertexReader( prefix: Optional[str], vertex_info: Optional[graphar_pyspark.info.VertexInfo], jvm_obj: Optional[py4j.java_gateway.JavaObject])
37    def __init__(
38        self,
39        prefix: Optional[str],
40        vertex_info: Optional[VertexInfo],
41        jvm_obj: Optional[JavaObject],
42    ) -> None:
43        """One should not use this constructor directly, please use `from_scala` or `from_python`."""
44        _check_session()
45        if jvm_obj is not None:
46            self._jvm_vertex_reader_obj = jvm_obj
47        else:
48            self._jvm_vertex_reader_obj = GraphArSession.graphar.reader.VertexReader(
49                prefix,
50                vertex_info.to_scala(),
51                GraphArSession.jss,
52            )

One should not use this constructor directly, please use from_scala or from_python.

def to_scala(self) -> py4j.java_gateway.JavaObject:
54    def to_scala(self) -> JavaObject:
55        """Transform object to JVM representation.
56
57        :returns: JavaObject
58        """
59        return self._jvm_vertex_reader_obj

Transform object to JVM representation.

:returns: JavaObject

@staticmethod
def from_scala( jvm_obj: py4j.java_gateway.JavaObject) -> VertexReader:
61    @staticmethod
62    def from_scala(jvm_obj: JavaObject) -> "VertexReader":
63        """Create an instance of the Class from the corresponding JVM object.
64
65        :param jvm_obj: scala object in JVM.
66        :returns: instance of Python Class.
67        """
68        return VertexReader(None, None, jvm_obj)

Create an instance of the Class from the corresponding JVM object.

Parameters
  • jvm_obj: scala object in JVM. :returns: instance of Python Class.
@staticmethod
def from_python( prefix: str, vertex_info: graphar_pyspark.info.VertexInfo) -> VertexReader:
70    @staticmethod
71    def from_python(prefix: str, vertex_info: VertexInfo) -> "VertexReader":
72        """Create an instance of the Class from Python arguments.
73
74        :param prefix: the absolute prefix.
75        :param vertex_info: the vertex info that describes the vertex type.
76        """
77        if not prefix.endswith(os.sep):
78            prefix += os.sep
79        return VertexReader(prefix, vertex_info, None)

Create an instance of the Class from Python arguments.

Parameters
  • prefix: the absolute prefix.
  • vertex_info: the vertex info that describes the vertex type.
def read_vertices_number(self) -> int:
81    def read_vertices_number(self) -> int:
82        """Load the total number of vertices for this vertex type.
83
84        :returns: total number of vertices.
85        """
86        return self._jvm_vertex_reader_obj.readVerticesNumber()

Load the total number of vertices for this vertex type.

:returns: total number of vertices.

def read_vertex_property_chunk( self, property_group: graphar_pyspark.info.PropertyGroup, chunk_index: int) -> pyspark.sql.dataframe.DataFrame:
 88    def read_vertex_property_chunk(
 89        self,
 90        property_group: PropertyGroup,
 91        chunk_index: int,
 92    ) -> DataFrame:
 93        """Load a single vertex property chunk as a DataFrame.
 94
 95        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
 96
 97        :param property_group: property group.
 98        :param chunk_index: index of vertex chunk.
 99        :returns: vertex property chunk DataFrame. Raise IllegalArgumentException if the property group not contained.
100        """
101        return DataFrame(
102            self._jvm_vertex_reader_obj.readVertexPropertyChunk(
103                property_group.to_scala(),
104                chunk_index,
105            ),
106            GraphArSession.ss,
107        )

Load a single vertex property chunk as a DataFrame.

WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!

Parameters
  • property_group: property group.
  • chunk_index: index of vertex chunk. :returns: vertex property chunk DataFrame. Raise IllegalArgumentException if the property group not contained.
def read_vertex_property_group( self, property_group: graphar_pyspark.info.PropertyGroup) -> pyspark.sql.dataframe.DataFrame:
109    def read_vertex_property_group(self, property_group: PropertyGroup) -> DataFrame:
110        """Load all chunks for a property group as a DataFrame.
111
112        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
113
114        :param property_group: property group.
115        :returns: DataFrame that contains all chunks of property group. Raise IllegalArgumentException if the property group not contained.
116        """
117        return DataFrame(
118            self._jvm_vertex_reader_obj.readVertexPropertyGroup(
119                property_group.to_scala(),
120            ),
121            GraphArSession.ss,
122        )

Load all chunks for a property group as a DataFrame.

WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!

Parameters
  • property_group: property group. :returns: DataFrame that contains all chunks of property group. Raise IllegalArgumentException if the property group not contained.
def read_multiple_vertex_property_groups( self, property_groups: list[graphar_pyspark.info.PropertyGroup]) -> pyspark.sql.dataframe.DataFrame:
124    def read_multiple_vertex_property_groups(
125        self,
126        property_groups: list[PropertyGroup],
127    ) -> DataFrame:
128        """Load the chunks for multiple property groups as a DataFrame.
129
130        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
131
132        :param property_groups: list of property groups.
133        :returns: DataFrame that contains all chunks of property group. Raise IllegalArgumentException if the property group not contained.
134        """
135        return DataFrame(
136            self._jvm_vertex_reader_obj.readMultipleVertexPropertyGroups(
137                [py_property_group.to_scala() for py_property_group in property_groups],
138            ),
139            GraphArSession.ss,
140        )

Load the chunks for multiple property groups as a DataFrame.

WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!

Parameters
  • property_groups: list of property groups. :returns: DataFrame that contains all chunks of property group. Raise IllegalArgumentException if the property group not contained.
def read_all_vertex_property_groups(self) -> pyspark.sql.dataframe.DataFrame:
142    def read_all_vertex_property_groups(self) -> DataFrame:
143        """Load the chunks for all property groups as a DataFrame.
144
145        :returns: DataFrame that contains all property group chunks of vertex.
146        """
147        return DataFrame(
148            self._jvm_vertex_reader_obj.readAllVertexPropertyGroups(),
149            GraphArSession.ss,
150        )

Load the chunks for all property groups as a DataFrame.

:returns: DataFrame that contains all property group chunks of vertex.

class EdgeReader:
153class EdgeReader:
154    """Reader for edge chunks."""
155
156    def __init__(
157        self,
158        prefix: Optional[str],
159        edge_info: Optional[EdgeInfo],
160        adj_list_type: Optional[AdjListType],
161        jvm_obj: Optional[JavaObject],
162    ) -> None:
163        """One should not use this constructor directly, please use `from_scala` or `from_python`."""
164        _check_session()
165        if jvm_obj is not None:
166            self._jvm_edge_reader_obj = jvm_obj
167        else:
168            self._jvm_edge_reader_obj = GraphArSession.graphar.reader.EdgeReader(
169                prefix,
170                edge_info.to_scala(),
171                adj_list_type.to_scala(),
172                GraphArSession.jss,
173            )
174
175    def to_scala(self) -> JavaObject:
176        """Transform object to JVM representation.
177
178        :returns: JavaObject
179        """
180        return self._jvm_edge_reader_obj
181
182    @staticmethod
183    def from_scala(jvm_obj: JavaObject) -> "EdgeReader":
184        """Create an instance of the Class from the corresponding JVM object.
185
186        :param jvm_obj: scala object in JVM.
187        :returns: instance of Python Class.
188        """
189        return EdgeReader(None, None, None, jvm_obj)
190
191    @staticmethod
192    def from_python(
193        prefix: str,
194        edge_info: EdgeInfo,
195        adj_list_type: AdjListType,
196    ) -> "EdgeReader":
197        """Create an instance of the Class from Python arguments.
198
199        Note that constructor would raise IllegalArgumentException if edge info does not support given adjListType.
200
201        :param prefix: the absolute prefix.
202        :param edge_info: the edge info that describes the edge type.
203        :param adj_list_type: the adj list type for the edge.
204        """
205        if not prefix.endswith(os.sep):
206            prefix += os.sep
207        return EdgeReader(prefix, edge_info, adj_list_type, None)
208
209    def read_vertices_number(self) -> int:
210        """Load the total number of src/dst vertices for this edge type.
211
212        :returns: total number of vertices.
213        """
214        return self._jvm_edge_reader_obj.readVerticesNumber()
215
216    def read_vertex_chunk_number(self) -> int:
217        """Load the chunk number of src/dst vertices.
218
219        :returns: chunk number of vertices.
220        """
221        return self._jvm_edge_reader_obj.readVertexChunkNumber()
222
223    def read_edges_number(self, chunk_index: Optional[int] = None) -> int:
224        """Load the number of edges for the vertex chunk or for this edge type.
225
226        :param chunk_index: index of vertex chunk (optional, default is None)
227        if not provided, returns the number of edges for this edge type
228        if provided, returns the number of edges for the vertex chunk
229        :returns: the number of edges
230        """
231        if chunk_index is None:
232            return self._jvm_edge_reader_obj.readEdgesNumber()
233        return self._jvm_edge_reader_obj.readEdgesNumber(chunk_index)
234
235    def read_offset(self, chunk_index: int) -> DataFrame:
236        """Load a single offset chunk as a DataFrame.
237
238        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
239
240        :param chunk_index: index of offset chunk
241        :returns: offset chunk DataFrame. Raise IllegalArgumentException if adjListType is
242        not AdjListType.ordered_by_source or AdjListType.ordered_by_dest.
243        """
244        return DataFrame(
245            self._jvm_edge_reader_obj.readOffset(chunk_index),
246            GraphArSession.ss,
247        )
248
249    def read_adj_list_chunk(
250        self,
251        vertex_chunk_index: int,
252        chunk_index: int,
253    ) -> DataFrame:
254        """Load a single AdjList chunk as a DataFrame.
255
256        :param vertex_chunk_index: index of vertex chunk
257        :param chunk_index: index of AdjList chunk.
258        :returns: AdjList chunk DataFrame
259        """
260        return DataFrame(
261            self._jvm_edge_reader_obj.readAdjListChunk(vertex_chunk_index, chunk_index),
262            GraphArSession.ss,
263        )
264
265    def read_adj_list_for_vertex_chunk(
266        self,
267        vertex_chunk_index: int,
268        add_index: bool = True,
269    ) -> DataFrame:
270        """Load all AdjList chunks for a vertex chunk as a DataFrame.
271
272        :param vertex_chunk_index: index of vertex chunk.
273        :param add_index: flag that add edge index column or not in the final DataFrame.
274        :returns: DataFrame of all AdjList chunks of vertices in given vertex chunk.
275        """
276        return DataFrame(
277            self._jvm_edge_reader_obj.readAdjListForVertexChunk(
278                vertex_chunk_index,
279                add_index,
280            ),
281            GraphArSession.ss,
282        )
283
284    def read_all_adj_list(self, add_index: bool = True) -> DataFrame:
285        """Load all AdjList chunks for this edge type as a DataFrame.
286
287        :param add_index: flag that add index column or not in the final DataFrame.
288        :returns: DataFrame of all AdjList chunks.
289        """
290        return DataFrame(
291            self._jvm_edge_reader_obj.readAllAdjList(add_index),
292            GraphArSession.ss,
293        )
294
295    def read_edge_property_chunk(
296        self,
297        property_group: PropertyGroup,
298        vertex_chunk_index: int,
299        chunk_index: int,
300    ) -> DataFrame:
301        """Load a single edge property chunk as a DataFrame.
302
303        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
304
305        :param property_group: property group.
306        :param vertex_chunk_index: index of vertex chunk.
307        :param chunk_index: index of property group chunk.
308        :returns: property group chunk DataFrame. If edge info does not contain the
309        property group, raise an IllegalArgumentException error.
310        """
311        return DataFrame(
312            self._jvm_edge_reader_obj.readEdgePropertyChunk(
313                property_group.to_scala(),
314                vertex_chunk_index,
315                chunk_index,
316            ),
317        )
318
319    def read_edge_property_group_for_vertex_chunk(
320        self,
321        property_group: PropertyGroup,
322        vertex_chunk_index: int,
323        add_index: bool = True,
324    ) -> DataFrame:
325        """Load the chunks for a property group of a vertex chunk as a DataFrame.
326
327        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
328
329        :param property_group: property group.
330        :param vertex_chunk_index: index of vertex chunk.
331        :param add_index: flag that add edge index column or not in the final DataFrame.
332        :returns: DataFrame that contains all property group chunks of vertices in given
333        vertex chunk. If edge info does not contain the property group, raise an IllegalArgumentException error.
334        """
335        return DataFrame(
336            self._jvm_edge_reader_obj.readEdgePropertyGroupForVertexChunk(
337                property_group.to_scala(),
338                vertex_chunk_index,
339                add_index,
340            ),
341            GraphArSession.ss,
342        )
343
344    def read_edge_property_group(
345        self,
346        property_group: PropertyGroup,
347        add_index: bool = True,
348    ) -> DataFrame:
349        """Load all chunks for a property group as a DataFrame.
350
351        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
352
353        :param property_group: property group.
354        :param add_index: flag that add edge index column or not in the final DataFrame.
355        :returns: DataFrame that contains all chunks of property group. If edge info does
356        not contain the property group, raise an IllegalArgumentException error.
357        """
358        return DataFrame(
359            self._jvm_edge_reader_obj.readEdgePropertyGroup(
360                property_group.to_scala(),
361                add_index,
362            ),
363            GraphArSession.ss,
364        )
365
366    def read_multiple_edge_property_groups_for_vertex_chunk(
367        self,
368        property_groups: list[PropertyGroup],
369        vertex_chunk_index: int,
370        add_index: bool = True,
371    ) -> DataFrame:
372        """Load the chunks for multiple property groups of a vertex chunk as a DataFrame.
373
374        :param property_groups: list of property groups.
375        :param vertex_chunk_index: index of vertex chunk.
376        :param add_index: flag that add edge index column or not in the final DataFrame.
377        :returns: DataFrame that contains all property groups chunks of a vertex chunk.
378        """
379        return DataFrame(
380            self._jvm_edge_reader_obj.readMultipleEdgePropertyGroupsForVertexChunk(
381                [py_property_group.to_scala() for py_property_group in property_groups],
382                vertex_chunk_index,
383                add_index,
384            ),
385            GraphArSession.ss,
386        )
387
388    def read_multiple_edge_property_groups(
389        self,
390        property_groups: list[PropertyGroup],
391        add_index: bool = True,
392    ) -> DataFrame:
393        """Load the chunks for multiple property groups as a DataFrame.
394
395        :param property_groups: list of property groups.
396        :param add_index: flag that add edge index column or not in the final DataFrame.
397        :returns: DataFrame tha contains all property groups chunks of edge.
398        """
399        return DataFrame(
400            self._jvm_edge_reader_obj.readMultipleEdgePropertyGroups(
401                [py_property_group.to_scala() for py_property_group in property_groups],
402                add_index,
403            ),
404            GraphArSession.ss,
405        )
406
407    def read_all_edge_property_groups_for_vertex_chunk(
408        self,
409        vertex_chunk_index: int,
410        add_index: bool = True,
411    ) -> DataFrame:
412        """Load the chunks for all property groups of a vertex chunk as a DataFrame.
413
414        :param vertex_chunk_index: index of vertex chunk.
415        :param add_index: flag that add edge index column or not in the final DataFrame.
416        :returns: DataFrame that contains all property groups chunks of a vertex chunk.
417        """
418        return DataFrame(
419            self._jvm_edge_reader_obj.readAllEdgePropertyGroupsForVertexChunk(
420                vertex_chunk_index,
421                add_index,
422            ),
423            GraphArSession.ss,
424        )
425
426    def read_all_edge_property_groups(self, add_index: bool = True) -> DataFrame:
427        """Load the chunks for all property groups as a DataFrame.
428
429        :param add_index: flag that add edge index column or not in the final DataFrame.
430        :returns: DataFrame tha contains all property groups chunks of edge.
431        """
432        return DataFrame(
433            self._jvm_edge_reader_obj.readAllEdgePropertyGroups(add_index),
434            GraphArSession.ss,
435        )
436
437    def read_edges_for_vertex_chunk(
438        self,
439        vertex_chunk_index: int,
440        add_index: bool = True,
441    ) -> DataFrame:
442        """Load the chunks for the AdjList and all property groups for a vertex chunk as a DataFrame.
443
444        :param vertex_chunk_index: index of vertex chunk
445        :param add_index: flag that add edge index column or not in the final DataFrame.
446        :returns: DataFrame that contains all chunks of AdjList and property groups of vertices in given vertex chunk.
447        """
448        return DataFrame(
449            self._jvm_edge_reader_obj.readEdgesForVertexChunk(
450                vertex_chunk_index,
451                add_index,
452            ),
453            GraphArSession.ss,
454        )
455
456    def read_edges(self, add_index: bool = True) -> DataFrame:
457        """Load the chunks for the AdjList and all property groups as a DataFrame.
458
459        :param add_index: flag that add edge index column or not in the final DataFrame.
460        :returns: DataFrame that contains all chunks of AdjList and property groups of edges.
461        """
462        return DataFrame(
463            self._jvm_edge_reader_obj.readEdges(add_index),
464            GraphArSession.ss,
465        )

Reader for edge chunks.

EdgeReader( prefix: Optional[str], edge_info: Optional[graphar_pyspark.info.EdgeInfo], adj_list_type: Optional[graphar_pyspark.enums.AdjListType], jvm_obj: Optional[py4j.java_gateway.JavaObject])
156    def __init__(
157        self,
158        prefix: Optional[str],
159        edge_info: Optional[EdgeInfo],
160        adj_list_type: Optional[AdjListType],
161        jvm_obj: Optional[JavaObject],
162    ) -> None:
163        """One should not use this constructor directly, please use `from_scala` or `from_python`."""
164        _check_session()
165        if jvm_obj is not None:
166            self._jvm_edge_reader_obj = jvm_obj
167        else:
168            self._jvm_edge_reader_obj = GraphArSession.graphar.reader.EdgeReader(
169                prefix,
170                edge_info.to_scala(),
171                adj_list_type.to_scala(),
172                GraphArSession.jss,
173            )

One should not use this constructor directly, please use from_scala or from_python.

def to_scala(self) -> py4j.java_gateway.JavaObject:
175    def to_scala(self) -> JavaObject:
176        """Transform object to JVM representation.
177
178        :returns: JavaObject
179        """
180        return self._jvm_edge_reader_obj

Transform object to JVM representation.

:returns: JavaObject

@staticmethod
def from_scala( jvm_obj: py4j.java_gateway.JavaObject) -> EdgeReader:
182    @staticmethod
183    def from_scala(jvm_obj: JavaObject) -> "EdgeReader":
184        """Create an instance of the Class from the corresponding JVM object.
185
186        :param jvm_obj: scala object in JVM.
187        :returns: instance of Python Class.
188        """
189        return EdgeReader(None, None, None, jvm_obj)

Create an instance of the Class from the corresponding JVM object.

Parameters
  • jvm_obj: scala object in JVM. :returns: instance of Python Class.
@staticmethod
def from_python( prefix: str, edge_info: graphar_pyspark.info.EdgeInfo, adj_list_type: graphar_pyspark.enums.AdjListType) -> EdgeReader:
191    @staticmethod
192    def from_python(
193        prefix: str,
194        edge_info: EdgeInfo,
195        adj_list_type: AdjListType,
196    ) -> "EdgeReader":
197        """Create an instance of the Class from Python arguments.
198
199        Note that constructor would raise IllegalArgumentException if edge info does not support given adjListType.
200
201        :param prefix: the absolute prefix.
202        :param edge_info: the edge info that describes the edge type.
203        :param adj_list_type: the adj list type for the edge.
204        """
205        if not prefix.endswith(os.sep):
206            prefix += os.sep
207        return EdgeReader(prefix, edge_info, adj_list_type, None)

Create an instance of the Class from Python arguments.

Note that constructor would raise IllegalArgumentException if edge info does not support given adjListType.

Parameters
  • prefix: the absolute prefix.
  • edge_info: the edge info that describes the edge type.
  • adj_list_type: the adj list type for the edge.
def read_vertices_number(self) -> int:
209    def read_vertices_number(self) -> int:
210        """Load the total number of src/dst vertices for this edge type.
211
212        :returns: total number of vertices.
213        """
214        return self._jvm_edge_reader_obj.readVerticesNumber()

Load the total number of src/dst vertices for this edge type.

:returns: total number of vertices.

def read_vertex_chunk_number(self) -> int:
216    def read_vertex_chunk_number(self) -> int:
217        """Load the chunk number of src/dst vertices.
218
219        :returns: chunk number of vertices.
220        """
221        return self._jvm_edge_reader_obj.readVertexChunkNumber()

Load the chunk number of src/dst vertices.

:returns: chunk number of vertices.

def read_edges_number(self, chunk_index: Optional[int] = None) -> int:
223    def read_edges_number(self, chunk_index: Optional[int] = None) -> int:
224        """Load the number of edges for the vertex chunk or for this edge type.
225
226        :param chunk_index: index of vertex chunk (optional, default is None)
227        if not provided, returns the number of edges for this edge type
228        if provided, returns the number of edges for the vertex chunk
229        :returns: the number of edges
230        """
231        if chunk_index is None:
232            return self._jvm_edge_reader_obj.readEdgesNumber()
233        return self._jvm_edge_reader_obj.readEdgesNumber(chunk_index)

Load the number of edges for the vertex chunk or for this edge type.

Parameters
  • chunk_index: index of vertex chunk (optional, default is None) if not provided, returns the number of edges for this edge type if provided, returns the number of edges for the vertex chunk :returns: the number of edges
def read_offset(self, chunk_index: int) -> pyspark.sql.dataframe.DataFrame:
235    def read_offset(self, chunk_index: int) -> DataFrame:
236        """Load a single offset chunk as a DataFrame.
237
238        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
239
240        :param chunk_index: index of offset chunk
241        :returns: offset chunk DataFrame. Raise IllegalArgumentException if adjListType is
242        not AdjListType.ordered_by_source or AdjListType.ordered_by_dest.
243        """
244        return DataFrame(
245            self._jvm_edge_reader_obj.readOffset(chunk_index),
246            GraphArSession.ss,
247        )

Load a single offset chunk as a DataFrame.

WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!

Parameters
  • chunk_index: index of offset chunk :returns: offset chunk DataFrame. Raise IllegalArgumentException if adjListType is not AdjListType.ordered_by_source or AdjListType.ordered_by_dest.
def read_adj_list_chunk( self, vertex_chunk_index: int, chunk_index: int) -> pyspark.sql.dataframe.DataFrame:
249    def read_adj_list_chunk(
250        self,
251        vertex_chunk_index: int,
252        chunk_index: int,
253    ) -> DataFrame:
254        """Load a single AdjList chunk as a DataFrame.
255
256        :param vertex_chunk_index: index of vertex chunk
257        :param chunk_index: index of AdjList chunk.
258        :returns: AdjList chunk DataFrame
259        """
260        return DataFrame(
261            self._jvm_edge_reader_obj.readAdjListChunk(vertex_chunk_index, chunk_index),
262            GraphArSession.ss,
263        )

Load a single AdjList chunk as a DataFrame.

Parameters
  • vertex_chunk_index: index of vertex chunk
  • chunk_index: index of AdjList chunk. :returns: AdjList chunk DataFrame
def read_adj_list_for_vertex_chunk( self, vertex_chunk_index: int, add_index: bool = True) -> pyspark.sql.dataframe.DataFrame:
265    def read_adj_list_for_vertex_chunk(
266        self,
267        vertex_chunk_index: int,
268        add_index: bool = True,
269    ) -> DataFrame:
270        """Load all AdjList chunks for a vertex chunk as a DataFrame.
271
272        :param vertex_chunk_index: index of vertex chunk.
273        :param add_index: flag that add edge index column or not in the final DataFrame.
274        :returns: DataFrame of all AdjList chunks of vertices in given vertex chunk.
275        """
276        return DataFrame(
277            self._jvm_edge_reader_obj.readAdjListForVertexChunk(
278                vertex_chunk_index,
279                add_index,
280            ),
281            GraphArSession.ss,
282        )

Load all AdjList chunks for a vertex chunk as a DataFrame.

Parameters
  • vertex_chunk_index: index of vertex chunk.
  • add_index: flag that add edge index column or not in the final DataFrame. :returns: DataFrame of all AdjList chunks of vertices in given vertex chunk.
def read_all_adj_list(self, add_index: bool = True) -> pyspark.sql.dataframe.DataFrame:
284    def read_all_adj_list(self, add_index: bool = True) -> DataFrame:
285        """Load all AdjList chunks for this edge type as a DataFrame.
286
287        :param add_index: flag that add index column or not in the final DataFrame.
288        :returns: DataFrame of all AdjList chunks.
289        """
290        return DataFrame(
291            self._jvm_edge_reader_obj.readAllAdjList(add_index),
292            GraphArSession.ss,
293        )

Load all AdjList chunks for this edge type as a DataFrame.

Parameters
  • add_index: flag that add index column or not in the final DataFrame. :returns: DataFrame of all AdjList chunks.
def read_edge_property_chunk( self, property_group: graphar_pyspark.info.PropertyGroup, vertex_chunk_index: int, chunk_index: int) -> pyspark.sql.dataframe.DataFrame:
295    def read_edge_property_chunk(
296        self,
297        property_group: PropertyGroup,
298        vertex_chunk_index: int,
299        chunk_index: int,
300    ) -> DataFrame:
301        """Load a single edge property chunk as a DataFrame.
302
303        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
304
305        :param property_group: property group.
306        :param vertex_chunk_index: index of vertex chunk.
307        :param chunk_index: index of property group chunk.
308        :returns: property group chunk DataFrame. If edge info does not contain the
309        property group, raise an IllegalArgumentException error.
310        """
311        return DataFrame(
312            self._jvm_edge_reader_obj.readEdgePropertyChunk(
313                property_group.to_scala(),
314                vertex_chunk_index,
315                chunk_index,
316            ),
317        )

Load a single edge property chunk as a DataFrame.

WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!

Parameters
  • property_group: property group.
  • vertex_chunk_index: index of vertex chunk.
  • chunk_index: index of property group chunk. :returns: property group chunk DataFrame. If edge info does not contain the property group, raise an IllegalArgumentException error.
def read_edge_property_group_for_vertex_chunk( self, property_group: graphar_pyspark.info.PropertyGroup, vertex_chunk_index: int, add_index: bool = True) -> pyspark.sql.dataframe.DataFrame:
319    def read_edge_property_group_for_vertex_chunk(
320        self,
321        property_group: PropertyGroup,
322        vertex_chunk_index: int,
323        add_index: bool = True,
324    ) -> DataFrame:
325        """Load the chunks for a property group of a vertex chunk as a DataFrame.
326
327        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
328
329        :param property_group: property group.
330        :param vertex_chunk_index: index of vertex chunk.
331        :param add_index: flag that add edge index column or not in the final DataFrame.
332        :returns: DataFrame that contains all property group chunks of vertices in given
333        vertex chunk. If edge info does not contain the property group, raise an IllegalArgumentException error.
334        """
335        return DataFrame(
336            self._jvm_edge_reader_obj.readEdgePropertyGroupForVertexChunk(
337                property_group.to_scala(),
338                vertex_chunk_index,
339                add_index,
340            ),
341            GraphArSession.ss,
342        )

Load the chunks for a property group of a vertex chunk as a DataFrame.

WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!

Parameters
  • property_group: property group.
  • vertex_chunk_index: index of vertex chunk.
  • add_index: flag that add edge index column or not in the final DataFrame. :returns: DataFrame that contains all property group chunks of vertices in given vertex chunk. If edge info does not contain the property group, raise an IllegalArgumentException error.
def read_edge_property_group( self, property_group: graphar_pyspark.info.PropertyGroup, add_index: bool = True) -> pyspark.sql.dataframe.DataFrame:
344    def read_edge_property_group(
345        self,
346        property_group: PropertyGroup,
347        add_index: bool = True,
348    ) -> DataFrame:
349        """Load all chunks for a property group as a DataFrame.
350
351        WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!
352
353        :param property_group: property group.
354        :param add_index: flag that add edge index column or not in the final DataFrame.
355        :returns: DataFrame that contains all chunks of property group. If edge info does
356        not contain the property group, raise an IllegalArgumentException error.
357        """
358        return DataFrame(
359            self._jvm_edge_reader_obj.readEdgePropertyGroup(
360                property_group.to_scala(),
361                add_index,
362            ),
363            GraphArSession.ss,
364        )

Load all chunks for a property group as a DataFrame.

WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method!

Parameters
  • property_group: property group.
  • add_index: flag that add edge index column or not in the final DataFrame. :returns: DataFrame that contains all chunks of property group. If edge info does not contain the property group, raise an IllegalArgumentException error.
def read_multiple_edge_property_groups_for_vertex_chunk( self, property_groups: list[graphar_pyspark.info.PropertyGroup], vertex_chunk_index: int, add_index: bool = True) -> pyspark.sql.dataframe.DataFrame:
366    def read_multiple_edge_property_groups_for_vertex_chunk(
367        self,
368        property_groups: list[PropertyGroup],
369        vertex_chunk_index: int,
370        add_index: bool = True,
371    ) -> DataFrame:
372        """Load the chunks for multiple property groups of a vertex chunk as a DataFrame.
373
374        :param property_groups: list of property groups.
375        :param vertex_chunk_index: index of vertex chunk.
376        :param add_index: flag that add edge index column or not in the final DataFrame.
377        :returns: DataFrame that contains all property groups chunks of a vertex chunk.
378        """
379        return DataFrame(
380            self._jvm_edge_reader_obj.readMultipleEdgePropertyGroupsForVertexChunk(
381                [py_property_group.to_scala() for py_property_group in property_groups],
382                vertex_chunk_index,
383                add_index,
384            ),
385            GraphArSession.ss,
386        )

Load the chunks for multiple property groups of a vertex chunk as a DataFrame.

Parameters
  • property_groups: list of property groups.
  • vertex_chunk_index: index of vertex chunk.
  • add_index: flag that add edge index column or not in the final DataFrame. :returns: DataFrame that contains all property groups chunks of a vertex chunk.
def read_multiple_edge_property_groups( self, property_groups: list[graphar_pyspark.info.PropertyGroup], add_index: bool = True) -> pyspark.sql.dataframe.DataFrame:
388    def read_multiple_edge_property_groups(
389        self,
390        property_groups: list[PropertyGroup],
391        add_index: bool = True,
392    ) -> DataFrame:
393        """Load the chunks for multiple property groups as a DataFrame.
394
395        :param property_groups: list of property groups.
396        :param add_index: flag that add edge index column or not in the final DataFrame.
397        :returns: DataFrame tha contains all property groups chunks of edge.
398        """
399        return DataFrame(
400            self._jvm_edge_reader_obj.readMultipleEdgePropertyGroups(
401                [py_property_group.to_scala() for py_property_group in property_groups],
402                add_index,
403            ),
404            GraphArSession.ss,
405        )

Load the chunks for multiple property groups as a DataFrame.

Parameters
  • property_groups: list of property groups.
  • add_index: flag that add edge index column or not in the final DataFrame. :returns: DataFrame tha contains all property groups chunks of edge.
def read_all_edge_property_groups_for_vertex_chunk( self, vertex_chunk_index: int, add_index: bool = True) -> pyspark.sql.dataframe.DataFrame:
407    def read_all_edge_property_groups_for_vertex_chunk(
408        self,
409        vertex_chunk_index: int,
410        add_index: bool = True,
411    ) -> DataFrame:
412        """Load the chunks for all property groups of a vertex chunk as a DataFrame.
413
414        :param vertex_chunk_index: index of vertex chunk.
415        :param add_index: flag that add edge index column or not in the final DataFrame.
416        :returns: DataFrame that contains all property groups chunks of a vertex chunk.
417        """
418        return DataFrame(
419            self._jvm_edge_reader_obj.readAllEdgePropertyGroupsForVertexChunk(
420                vertex_chunk_index,
421                add_index,
422            ),
423            GraphArSession.ss,
424        )

Load the chunks for all property groups of a vertex chunk as a DataFrame.

Parameters
  • vertex_chunk_index: index of vertex chunk.
  • add_index: flag that add edge index column or not in the final DataFrame. :returns: DataFrame that contains all property groups chunks of a vertex chunk.
def read_all_edge_property_groups(self, add_index: bool = True) -> pyspark.sql.dataframe.DataFrame:
426    def read_all_edge_property_groups(self, add_index: bool = True) -> DataFrame:
427        """Load the chunks for all property groups as a DataFrame.
428
429        :param add_index: flag that add edge index column or not in the final DataFrame.
430        :returns: DataFrame tha contains all property groups chunks of edge.
431        """
432        return DataFrame(
433            self._jvm_edge_reader_obj.readAllEdgePropertyGroups(add_index),
434            GraphArSession.ss,
435        )

Load the chunks for all property groups as a DataFrame.

Parameters
  • add_index: flag that add edge index column or not in the final DataFrame. :returns: DataFrame tha contains all property groups chunks of edge.
def read_edges_for_vertex_chunk( self, vertex_chunk_index: int, add_index: bool = True) -> pyspark.sql.dataframe.DataFrame:
437    def read_edges_for_vertex_chunk(
438        self,
439        vertex_chunk_index: int,
440        add_index: bool = True,
441    ) -> DataFrame:
442        """Load the chunks for the AdjList and all property groups for a vertex chunk as a DataFrame.
443
444        :param vertex_chunk_index: index of vertex chunk
445        :param add_index: flag that add edge index column or not in the final DataFrame.
446        :returns: DataFrame that contains all chunks of AdjList and property groups of vertices in given vertex chunk.
447        """
448        return DataFrame(
449            self._jvm_edge_reader_obj.readEdgesForVertexChunk(
450                vertex_chunk_index,
451                add_index,
452            ),
453            GraphArSession.ss,
454        )

Load the chunks for the AdjList and all property groups for a vertex chunk as a DataFrame.

Parameters
  • vertex_chunk_index: index of vertex chunk
  • add_index: flag that add edge index column or not in the final DataFrame. :returns: DataFrame that contains all chunks of AdjList and property groups of vertices in given vertex chunk.
def read_edges(self, add_index: bool = True) -> pyspark.sql.dataframe.DataFrame:
456    def read_edges(self, add_index: bool = True) -> DataFrame:
457        """Load the chunks for the AdjList and all property groups as a DataFrame.
458
459        :param add_index: flag that add edge index column or not in the final DataFrame.
460        :returns: DataFrame that contains all chunks of AdjList and property groups of edges.
461        """
462        return DataFrame(
463            self._jvm_edge_reader_obj.readEdges(add_index),
464            GraphArSession.ss,
465        )

Load the chunks for the AdjList and all property groups as a DataFrame.

Parameters
  • add_index: flag that add edge index column or not in the final DataFrame. :returns: DataFrame that contains all chunks of AdjList and property groups of edges.