From 70366dd7627a4eb98dbd24e9dabacec03c45b8e6 Mon Sep 17 00:00:00 2001 From: Vatsal Mevada Date: Wed, 4 Dec 2019 13:11:31 +0530 Subject: [PATCH] [SNAP-3165] Instantiating snappy session only when catalogImplementation is in-memory which running pyspark shell. --- python/pyspark/shell.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py index bb6929749f6b0..9d82f400c3a2c 100644 --- a/python/pyspark/shell.py +++ b/python/pyspark/shell.py @@ -47,6 +47,8 @@ import py4j import pyspark + +from pyspark import SparkConf from pyspark.context import SparkContext from pyspark.sql import SparkSession, SQLContext from pyspark.sql.snappy import SnappySession @@ -57,12 +59,17 @@ SparkContext._ensure_initialized() +conf = SparkConf() +catalogImplementation = conf.get('spark.sql.catalogImplementation', 'hive').lower() try: - # Try to access HiveConf, it will raise exception if Hive is not added - SparkContext._jvm.org.apache.hadoop.hive.conf.HiveConf() - spark = SparkSession.builder\ - .enableHiveSupport()\ - .getOrCreate() + if catalogImplementation == 'hive': + # Try to access HiveConf, it will raise exception if Hive is not added + SparkContext._jvm.org.apache.hadoop.hive.conf.HiveConf() + spark = SparkSession.builder\ + .enableHiveSupport()\ + .getOrCreate() + else: + spark = SparkSession.builder.getOrCreate() except py4j.protocol.Py4JError: spark = SparkSession.builder.getOrCreate() except TypeError: @@ -70,12 +77,18 @@ sc = spark.sparkContext -snappy = SnappySession(sc) -sql = snappy.sql +if catalogImplementation == 'in-memory': + snappy = SnappySession(sc) + sql = snappy.sql +else: + sql = spark.sql atexit.register(lambda: sc.stop()) # for compatibility -sqlContext = snappy._wrapped +if catalogImplementation == 'in-memory': + sqlContext = snappy._wrapped +else: + sqlContext = spark._wrapped sqlCtx = sqlContext print("""Welcome to @@ -90,7 +103,8 @@ platform.python_build()[0], platform.python_build()[1])) print("SparkSession available as 'spark'.") -print("SnappySession available as 'snappy'.") +if catalogImplementation == 'in-memory': + print("SnappySession available as 'snappy'.") # The ./bin/pyspark script stores the old PYTHONSTARTUP value in OLD_PYTHONSTARTUP, # which allows us to execute the user's PYTHONSTARTUP file: