graphar_pyspark

GraphSession and initialization.

 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8#
 9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18"""GraphSession and initialization."""
19
20from pyspark.sql import SparkSession
21
22from graphar_pyspark.errors import GraphArIsNotInitializedError
23
24__version__ = "0.13.0.dev"
25
26class _GraphArSession:
27    """Singleton GraphAr helper object, that contains SparkSession and JVM.
28
29    It is implemented as a module-level instance of the class.
30    """
31
32    def __init__(self) -> None:
33        self.ss = None
34        self.sc = None
35        self.jvm = None
36        self.graphar = None
37        self.jsc = None
38        self.jss = None
39
40    def set_spark_session(self, spark_session: SparkSession) -> None:
41        self.ss = spark_session  # Python SparkSession
42        self.sc = spark_session.sparkContext  # Python SparkContext
43        self.jvm = spark_session._jvm  # JVM
44        self.graphar = spark_session._jvm.org.apache.graphar  # Alias to scala graphar
45        self.jsc = spark_session._jsc  # Java SparkContext
46        self.jss = spark_session._jsparkSession  # Java SparkSession
47
48    def is_initialized(self) -> bool:
49        return self.ss is not None
50
51
52GraphArSession = _GraphArSession()
53
54
55def initialize(spark: SparkSession) -> None:
56    """Initialize GraphAr session.
57
58    :param spark: pyspark SparkSession object.
59    """
60    GraphArSession.set_spark_session(
61        spark,
62    )  # modify the global GraphArSession singleton.
63
64
65def _check_session() -> None:
66    if not GraphArSession.is_initialized():
67        msg = "GraphArSession is not initialized. Call `pyspark_graphar.initialize` first!"
68        raise GraphArIsNotInitializedError(msg)
GraphArSession = <graphar_pyspark._GraphArSession object>
def initialize(spark: pyspark.sql.session.SparkSession) -> None:
56def initialize(spark: SparkSession) -> None:
57    """Initialize GraphAr session.
58
59    :param spark: pyspark SparkSession object.
60    """
61    GraphArSession.set_spark_session(
62        spark,
63    )  # modify the global GraphArSession singleton.

Initialize GraphAr session.

Parameters
  • spark: pyspark SparkSession object.