Skip to content

Commit

Permalink
[SEDONA-226] Replace AnalysisException with IllegalArgumentException …
Browse files Browse the repository at this point in the history
…in GeoParquet reader since AnalysisException has breaking ABI changes between Spark 3.0 and 3.3 (#771)
  • Loading branch information
Kontinuation authored and zzs-wherobots committed Feb 19, 2023
1 parent 9fdab1e commit d7b3bc2
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 14 deletions.
8 changes: 1 addition & 7 deletions python/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,9 @@
# under the License.

import os
from os import path

from tests.tools import tests_resource

# data_path = path.abspath(path.dirname(__file__))
#
#
# def create_data_path(relative_path: str) -> str:
# return os.path.join(data_path, relative_path)


mixed_wkb_geometry_input_location = os.path.join(tests_resource, "county_small_wkb.tsv")
mixed_wkt_geometry_input_location = os.path.join(tests_resource, "county_small.tsv")
Expand All @@ -45,3 +38,4 @@
csv_point2_input_location = os.path.join(tests_resource, "equalitycheckfiles/testequals_point2.csv")
geojson_id_input_location = os.path.join(tests_resource, "testContainsId.json")
geoparquet_input_location = os.path.join(tests_resource, "geoparquet/example1.parquet")
plain_parquet_input_location = os.path.join(tests_resource, "geoparquet/plain.parquet")
7 changes: 7 additions & 0 deletions python/tests/sql/test_geoparquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.

import pytest
import os.path

from shapely.geometry import Point
Expand All @@ -25,6 +26,7 @@

from tests.test_base import TestBase
from tests import geoparquet_input_location
from tests import plain_parquet_input_location


class TestGeoParquet(TestBase):
Expand Down Expand Up @@ -61,3 +63,8 @@ def test_load_geoparquet_with_spatial_filter(self):
rows = df.collect()
assert len(rows) == 1
assert rows[0]['name'] == 'Tanzania'

def test_load_plain_parquet_file(self):
with pytest.raises(Exception) as excinfo:
self.spark.read.format("geoparquet").load(plain_parquet_input_location)
assert "does not contain valid geo metadata" in str(excinfo.value)
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import org.apache.parquet.schema._
import org.apache.parquet.schema.OriginalType._
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._
import org.apache.parquet.schema.Type.Repetition._
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter.checkConversionRequirement
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
Expand All @@ -48,7 +47,7 @@ class GeoParquetToSparkSchemaConverter(
assumeInt96IsTimestamp: Boolean = SQLConf.PARQUET_INT96_AS_TIMESTAMP.defaultValue.get) {

private val geoParquetMetaData: GeoParquetMetaData = GeoParquetMetaData.parseKeyValueMetaData(keyValueMetaData).getOrElse {
throw new AnalysisException("GeoParquet file does not contain valid geo metadata")
throw new IllegalArgumentException("GeoParquet file does not contain valid geo metadata")
}

def this(keyValueMetaData: java.util.Map[String, String], conf: SQLConf) = this(
Expand Down Expand Up @@ -106,13 +105,13 @@ class GeoParquetToSparkSchemaConverter(
if (originalType == null) s"$typeName" else s"$typeName ($originalType)"

def typeNotSupported() =
throw new AnalysisException(s"Parquet type not supported: $typeString")
throw new IllegalArgumentException(s"Parquet type not supported: $typeString")

def typeNotImplemented() =
throw new AnalysisException(s"Parquet type not yet supported: $typeString")
throw new IllegalArgumentException(s"Parquet type not yet supported: $typeString")

def illegalType() =
throw new AnalysisException(s"Illegal Parquet type: $typeString")
throw new IllegalArgumentException(s"Illegal Parquet type: $typeString")

// When maxPrecision = -1, we skip precision range check, and always respect the precision
// specified in field.getDecimalMetadata. This is useful when interpreting decimal types stored
Expand Down Expand Up @@ -242,7 +241,7 @@ class GeoParquetToSparkSchemaConverter(
valueContainsNull = valueOptional)

case _ =>
throw new AnalysisException(s"Unrecognized Parquet type: $field")
throw new IllegalArgumentException(s"Unrecognized Parquet type: $field")
}
}

Expand Down Expand Up @@ -560,7 +559,7 @@ extends SparkToParquetSchemaConverter(writeLegacyParquetFormat, outputTimestampT
convertField(field.copy(dataType = udt.sqlType))

case _ =>
throw new AnalysisException(s"Unsupported data type ${field.dataType.catalogString}")
throw new IllegalArgumentException(s"Unsupported data type ${field.dataType.catalogString}")
}
}
}
Expand Down

0 comments on commit d7b3bc2

Please sign in to comment.