[SEDONA-226] Replace AnalysisException with IllegalArgumentException …

…in GeoParquet reader since AnalysisException has breaking ABI changes between Spark 3.0 and 3.3 (#771)
apache · Feb 19, 2023 · d7b3bc2 · d7b3bc2
1 parent 9fdab1e
commit d7b3bc2
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 14 deletions.
diff --git a/python/tests/__init__.py b/python/tests/__init__.py
@@ -16,16 +16,9 @@
 #  under the License.
 
 import os
-from os import path
 
 from tests.tools import tests_resource
 
-# data_path = path.abspath(path.dirname(__file__))
-#
-#
-# def create_data_path(relative_path: str) -> str:
-#     return os.path.join(data_path, relative_path)
-
 
 mixed_wkb_geometry_input_location = os.path.join(tests_resource, "county_small_wkb.tsv")
 mixed_wkt_geometry_input_location = os.path.join(tests_resource, "county_small.tsv")
@@ -45,3 +38,4 @@
 csv_point2_input_location = os.path.join(tests_resource, "equalitycheckfiles/testequals_point2.csv")
 geojson_id_input_location = os.path.join(tests_resource, "testContainsId.json")
 geoparquet_input_location = os.path.join(tests_resource, "geoparquet/example1.parquet")
+plain_parquet_input_location = os.path.join(tests_resource, "geoparquet/plain.parquet")
diff --git a/python/tests/sql/test_geoparquet.py b/python/tests/sql/test_geoparquet.py
@@ -15,6 +15,7 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
+import pytest
 import os.path
 
 from shapely.geometry import Point
@@ -25,6 +26,7 @@
 
 from tests.test_base import TestBase
 from tests import geoparquet_input_location
+from tests import plain_parquet_input_location
 
 
 class TestGeoParquet(TestBase):
@@ -61,3 +63,8 @@ def test_load_geoparquet_with_spatial_filter(self):
         rows = df.collect()
         assert len(rows) == 1
         assert rows[0]['name'] == 'Tanzania'
+
+    def test_load_plain_parquet_file(self):
+        with pytest.raises(Exception) as excinfo:
+            self.spark.read.format("geoparquet").load(plain_parquet_input_location)
+        assert "does not contain valid geo metadata" in str(excinfo.value)
diff --git a/.../scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSchemaConverter.scala b/.../scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSchemaConverter.scala
@@ -23,7 +23,6 @@ import org.apache.parquet.schema._
 import org.apache.parquet.schema.OriginalType._
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._
 import org.apache.parquet.schema.Type.Repetition._
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter.checkConversionRequirement
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
@@ -48,7 +47,7 @@ class GeoParquetToSparkSchemaConverter(
   assumeInt96IsTimestamp: Boolean = SQLConf.PARQUET_INT96_AS_TIMESTAMP.defaultValue.get) {
 
   private val geoParquetMetaData: GeoParquetMetaData = GeoParquetMetaData.parseKeyValueMetaData(keyValueMetaData).getOrElse {
-    throw new AnalysisException("GeoParquet file does not contain valid geo metadata")
+    throw new IllegalArgumentException("GeoParquet file does not contain valid geo metadata")
   }
 
   def this(keyValueMetaData: java.util.Map[String, String], conf: SQLConf) = this(
@@ -106,13 +105,13 @@ class GeoParquetToSparkSchemaConverter(
       if (originalType == null) s"$typeName" else s"$typeName ($originalType)"
 
     def typeNotSupported() =
-      throw new AnalysisException(s"Parquet type not supported: $typeString")
+      throw new IllegalArgumentException(s"Parquet type not supported: $typeString")
 
     def typeNotImplemented() =
-      throw new AnalysisException(s"Parquet type not yet supported: $typeString")
+      throw new IllegalArgumentException(s"Parquet type not yet supported: $typeString")
 
     def illegalType() =
-      throw new AnalysisException(s"Illegal Parquet type: $typeString")
+      throw new IllegalArgumentException(s"Illegal Parquet type: $typeString")
 
     // When maxPrecision = -1, we skip precision range check, and always respect the precision
     // specified in field.getDecimalMetadata.  This is useful when interpreting decimal types stored
@@ -242,7 +241,7 @@ class GeoParquetToSparkSchemaConverter(
           valueContainsNull = valueOptional)
 
       case _ =>
-        throw new AnalysisException(s"Unrecognized Parquet type: $field")
+        throw new IllegalArgumentException(s"Unrecognized Parquet type: $field")
     }
   }
 
@@ -560,7 +559,7 @@ extends SparkToParquetSchemaConverter(writeLegacyParquetFormat, outputTimestampT
         convertField(field.copy(dataType = udt.sqlType))
 
       case _ =>
-        throw new AnalysisException(s"Unsupported data type ${field.dataType.catalogString}")
+        throw new IllegalArgumentException(s"Unsupported data type ${field.dataType.catalogString}")
     }
   }
 }