diff --git a/common/src/main/java/org/apache/sedona/common/Constructors.java b/common/src/main/java/org/apache/sedona/common/Constructors.java index c391d77f1c..b82ba014e7 100644 --- a/common/src/main/java/org/apache/sedona/common/Constructors.java +++ b/common/src/main/java/org/apache/sedona/common/Constructors.java @@ -43,6 +43,28 @@ public static Geometry geomFromWKT(String wkt, int srid) throws ParseException { return new WKTReader(geometryFactory).read(wkt); } + public static Geometry geomFromEWKT(String ewkt) throws ParseException { + if (ewkt == null) { + return null; + } + int SRID = 0; + String wkt = ewkt; + + int index = ewkt.indexOf("SRID="); + if (index != -1) { + int semicolonIndex = ewkt.indexOf(';', index); + if (semicolonIndex != -1) { + SRID = Integer.parseInt(ewkt.substring(index + 5, semicolonIndex)); + wkt = ewkt.substring(semicolonIndex + 1); + } + else { + throw new ParseException("Invalid EWKT string"); + } + } + GeometryFactory geometryFactory = new GeometryFactory(new PrecisionModel(), SRID); + return new WKTReader(geometryFactory).read(wkt); + } + public static Geometry geomFromWKB(byte[] wkb) throws ParseException { return new WKBReader().read(wkb); } diff --git a/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java b/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java index 1caa93aa68..0e2da3fca1 100644 --- a/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java +++ b/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java @@ -37,6 +37,27 @@ public void geomFromWKT() throws ParseException { ParseException invalid = assertThrows(ParseException.class, () -> Constructors.geomFromWKT("not valid", 0)); assertEquals("Unknown geometry type: NOT (line 1)", invalid.getMessage()); } + + @Test + public void geomFromEWKT() throws ParseException { + assertNull(Constructors.geomFromEWKT(null)); + + Geometry geom = Constructors.geomFromEWKT("POINT (1 1)"); + assertEquals(0, geom.getSRID()); + assertEquals("POINT (1 1)", geom.toText()); + + geom = Constructors.geomFromEWKT("SRID=4269; POINT (1 1)"); + assertEquals(4269, geom.getSRID()); + assertEquals("POINT (1 1)", geom.toText()); + + geom = Constructors.geomFromEWKT("SRID=4269;POINT (1 1)"); + assertEquals(4269, geom.getSRID()); + assertEquals("POINT (1 1)", geom.toText()); + + ParseException invalid = assertThrows(ParseException.class, () -> Constructors.geomFromEWKT("not valid")); + assertEquals("Unknown geometry type: NOT (line 1)", invalid.getMessage()); + } + @Test public void mLineFromWKT() throws ParseException { assertNull(Constructors.mLineFromText(null, 0)); diff --git a/docs/api/flink/Constructor.md b/docs/api/flink/Constructor.md index d76fe44cde..2015ffc3f8 100644 --- a/docs/api/flink/Constructor.md +++ b/docs/api/flink/Constructor.md @@ -230,6 +230,26 @@ Output: POINT(40.7128 -74.006) ``` +## ST_GeomFromEWKT + +Introduction: Construct a Geometry from OGC Extended WKT + +Format: +`ST_GeomFromEWKT (EWkt:string)` + +Since: `v1.5.0` + +SQL example: +```sql +SELECT ST_AsText(ST_GeomFromEWKT('SRID=4269;POINT(40.7128 -74.0060)')) +``` + +Output: + +``` +POINT(40.7128 -74.006) +``` + ## ST_LineFromText Introduction: Construct a LineString from Text diff --git a/docs/api/sql/Constructor.md b/docs/api/sql/Constructor.md index 92bc14d82e..d023ec2fcc 100644 --- a/docs/api/sql/Constructor.md +++ b/docs/api/sql/Constructor.md @@ -274,6 +274,26 @@ Output: POINT(40.7128 -74.006) ``` +## ST_GeomFromEWKT + +Introduction: Construct a Geometry from OGC Extended WKT + +Format: +`ST_GeomFromEWKT (EWkt:string)` + +Since: `v1.5.0` + +SQL example: +```sql +SELECT ST_AsText(ST_GeomFromEWKT('SRID=4269;POINT(40.7128 -74.0060)')) +``` + +Output: + +``` +POINT(40.7128 -74.006) +``` + ## ST_LineFromText Introduction: Construct a Line from Wkt text diff --git a/flink/src/main/java/org/apache/sedona/flink/Catalog.java b/flink/src/main/java/org/apache/sedona/flink/Catalog.java index 89c350e4f8..7cc1f1f9a2 100644 --- a/flink/src/main/java/org/apache/sedona/flink/Catalog.java +++ b/flink/src/main/java/org/apache/sedona/flink/Catalog.java @@ -32,6 +32,7 @@ public static UserDefinedFunction[] getFuncs() { new Constructors.ST_PolygonFromText(), new Constructors.ST_PolygonFromEnvelope(), new Constructors.ST_GeomFromWKT(), + new Constructors.ST_GeomFromEWKT(), new Constructors.ST_GeomFromText(), new Constructors.ST_GeomFromWKB(), new Constructors.ST_GeomFromGeoJSON(), diff --git a/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java b/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java index fcc426a0a2..ec9f87665d 100644 --- a/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java +++ b/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java @@ -134,6 +134,13 @@ public Geometry eval(@DataTypeHint("String") String wktString) throws ParseExcep } } + public static class ST_GeomFromEWKT extends ScalarFunction { + @DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) + public Geometry eval(@DataTypeHint("String") String wktString) throws ParseException { + return org.apache.sedona.common.Constructors.geomFromEWKT(wktString); + } + } + public static class ST_GeomFromText extends ScalarFunction { @DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) public Geometry eval(@DataTypeHint("String") String wktString) throws ParseException { diff --git a/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java b/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java index a39a8a0422..12b33aa441 100644 --- a/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java +++ b/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java @@ -148,6 +148,23 @@ public void testGeomFromWKT() { assertEquals(data.get(data.size() - 1).getField(0).toString(), result.getField(0).toString()); } + @Test + public void testGeomFromEWKT() { + List data = new ArrayList<>(); + data.add(Row.of("SRID=123;MULTILINESTRING((1 2, 3 4), (4 5, 6 7))", "multiline", 0L)); + + Table geomTable = createTextTable(data, multilinestringColNames); + geomTable = geomTable + .select(call(Constructors.ST_GeomFromEWKT.class.getSimpleName(), + $(multilinestringColNames[0])) + .as(multilinestringColNames[0]), $(multilinestringColNames[1])); + String result = first(geomTable) + .getFieldAs(0) + .toString(); + String expectedGeom = "MULTILINESTRING ((1 2, 3 4), (4 5, 6 7))"; + assertEquals(expectedGeom, result); + } + @Test public void testGeomFromText() { List data = createPolygonWKT(testDataSize); diff --git a/python/sedona/sql/st_constructors.py b/python/sedona/sql/st_constructors.py index 4981894e70..87725103c3 100644 --- a/python/sedona/sql/st_constructors.py +++ b/python/sedona/sql/st_constructors.py @@ -31,6 +31,7 @@ "ST_GeomFromText", "ST_GeomFromWKB", "ST_GeomFromWKT", + "ST_GeomFromEWKT", "ST_LineFromText", "ST_LineStringFromText", "ST_Point", @@ -132,6 +133,17 @@ def ST_GeomFromWKT(wkt: ColumnOrName) -> Column: """ return _call_constructor_function("ST_GeomFromWKT", wkt) +@validate_argument_types +def ST_GeomFromEWKT(ewkt: ColumnOrName) -> Column: + """Generate a geometry column from a OGC Extended Well-Known Text (WKT) string column. + + :param ewkt: OGC Extended WKT string column to generate from. + :type ewkt: ColumnOrName + :return: Geometry column representing the EWKT string. + :rtype: Column + """ + return _call_constructor_function("ST_GeomFromEWKT", ewkt) + @validate_argument_types def ST_LineFromText(wkt: ColumnOrName) -> Column: diff --git a/python/tests/sql/test_constructor_test.py b/python/tests/sql/test_constructor_test.py index 2ed0d6ad31..93af3d7b03 100644 --- a/python/tests/sql/test_constructor_test.py +++ b/python/tests/sql/test_constructor_test.py @@ -60,6 +60,12 @@ def test_st_geom_from_wkt(self): polygon_df.show(10) assert polygon_df.count() == 100 + def test_st_geom_from_ewkt(self): + input_df = self.spark.createDataFrame([("SRID=4269;LineString(1 2, 3 4)",)], ["ewkt"]) + input_df.createOrReplaceTempView("input_ewkt") + line_df = self.spark.sql("select ST_GeomFromEWKT(ewkt) as geom from input_ewkt") + assert line_df.count() == 1 + def test_st_geom_from_wkt_3d(self): input_df = self.spark.createDataFrame([ ("Point(21 52 87)",), diff --git a/python/tests/sql/test_dataframe_api.py b/python/tests/sql/test_dataframe_api.py index 2c84e147fe..82aff41340 100644 --- a/python/tests/sql/test_dataframe_api.py +++ b/python/tests/sql/test_dataframe_api.py @@ -40,6 +40,7 @@ (stc.ST_GeomFromText, ("wkt",), "linestring_wkt", "", "LINESTRING (1 2, 3 4)"), (stc.ST_GeomFromWKB, ("wkb",), "constructor", "ST_ReducePrecision(geom, 2)", "LINESTRING (-2.1 -0.35, -1.5 -0.67)"), (stc.ST_GeomFromWKT, ("wkt",), "linestring_wkt", "", "LINESTRING (1 2, 3 4)"), + (stc.ST_GeomFromEWKT, ("ewkt",), "linestring_ewkt", "", "LINESTRING (1 2, 3 4)"), (stc.ST_LineFromText, ("wkt",), "linestring_wkt", "", "LINESTRING (1 2, 3 4)"), (stc.ST_LineStringFromText, ("multiple_point", lambda: f.lit(',')), "constructor", "", "LINESTRING (0 0, 1 0, 1 1, 0 0)"), (stc.ST_Point, ("x", "y"), "constructor", "", "POINT (0 1)"), @@ -364,6 +365,8 @@ def base_df(self, request): return TestDataFrameAPI.spark.sql("SELECT ST_GeomFromWKT('LINESTRING (0 0, 1 0, 2 0, 3 0, 4 0, 5 0)') AS line") elif request.param == "linestring_wkt": return TestDataFrameAPI.spark.sql("SELECT 'LINESTRING (1 2, 3 4)' AS wkt") + elif request.param == "linestring_ewkt": + return TestDataFrameAPI.spark.sql("SELECT 'SRID=4269;LINESTRING (1 2, 3 4)' AS ewkt") elif request.param == "min_max_x_y": return TestDataFrameAPI.spark.sql("SELECT 0.0 AS minx, 1.0 AS miny, 2.0 AS maxx, 3.0 AS maxy") elif request.param == "multipoint_geom": diff --git a/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala b/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala index 8ed7d61cd0..8f26dcd62a 100644 --- a/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala +++ b/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala @@ -43,6 +43,7 @@ object Catalog { function[ST_GeomFromText](0), function[ST_LineFromText](), function[ST_GeomFromWKT](0), + function[ST_GeomFromEWKT](), function[ST_GeomFromWKB](), function[ST_GeomFromGeoJSON](), function[ST_GeomFromGML](), diff --git a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala index ccc7ffe96c..79d1fb9263 100644 --- a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala +++ b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala @@ -91,6 +91,19 @@ case class ST_GeomFromWKT(inputExpressions: Seq[Expression]) } } +/** + * Return a Geometry from a OGC Extended WKT string + * + * @param inputExpressions This function takes a geometry string. The string format must be OGC Extended Well-Known text (EWKT) representation. + */ +case class ST_GeomFromEWKT(inputExpressions: Seq[Expression]) + extends InferredExpression(Constructors.geomFromEWKT _) { + + protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { + copy(inputExpressions = newChildren) + } +} + /** * Return a Geometry from a WKT string diff --git a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala index aa9eada87b..900c6f97c7 100644 --- a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala +++ b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala @@ -55,6 +55,9 @@ object st_constructors extends DataFrameAPI { def ST_GeomFromWKT(wkt: String, srid: Int): Column = wrapExpression[ST_GeomFromWKT](wkt, srid) + def ST_GeomFromEWKT(wkt: Column): Column = wrapExpression[ST_GeomFromEWKT](wkt) + def ST_GeomFromEWKT(wkt: String): Column = wrapExpression[ST_GeomFromEWKT](wkt) + def ST_LineFromText(wkt: Column): Column = wrapExpression[ST_LineFromText](wkt) def ST_LineFromText(wkt: String): Column = wrapExpression[ST_LineFromText](wkt) diff --git a/sql/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala b/sql/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala index 69b1a16668..66c92da09d 100644 --- a/sql/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala +++ b/sql/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala @@ -93,6 +93,29 @@ class constructorTestScala extends TestBaseScala { assert(thrown.getMessage == "Unknown geometry type: NOT (line 1)") } + it("Passed ST_GeomFromEWKT") { + var polygonWktDf = sparkSession.read.format("csv").option("delimiter", "\t").option("header", "false").load(mixedWktGeometryInputLocation) + polygonWktDf.createOrReplaceTempView("polygontable") + var polygonDf = sparkSession.sql("select ST_GeomFromEWKT(polygontable._c0) as countyshape from polygontable") + assert(polygonDf.count() == 100) + val nullGeom = sparkSession.sql("select ST_GeomFromEWKT(null)") + assert(nullGeom.first().isNullAt(0)) + val pointDf = sparkSession.sql("select ST_GeomFromEWKT('SRID=4269;POINT(-71.064544 42.28787)')") + assert(pointDf.count() == 1) + // Fail on wrong input type + intercept[Exception] { + sparkSession.sql("SELECT ST_GeomFromEWKT(0)").collect() + } + } + + it("Passed ST_GeomFromEWKT invalid input") { + // Fail on non wkt strings + val thrown = intercept[Exception] { + sparkSession.sql("SELECT ST_GeomFromEWKT('not wkt')").collect() + } + assert(thrown.getMessage == "Unknown geometry type: NOT (line 1)") + } + it("Passed ST_LineFromText") { val geometryDf = Seq("Linestring(1 2, 3 4)").map(wkt => Tuple1(wkt)).toDF("geom") geometryDf.createOrReplaceTempView("linetable") diff --git a/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala b/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala index 446645291a..2651c33a0e 100644 --- a/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala +++ b/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala @@ -94,6 +94,13 @@ class dataFrameAPITestScala extends TestBaseScala { assert(actualResult.getSRID == 4326) } + it("passed st_geomfromewkt") { + val df = sparkSession.sql("SELECT 'SRID=4269;POINT(0.0 1.0)' AS wkt").select(ST_GeomFromEWKT("wkt")) + val actualResult = df.take(1)(0).get(0).asInstanceOf[Geometry] + assert(actualResult.toText == "POINT (0 1)") + assert(actualResult.getSRID == 4269) + } + it("passed st_geomfromtext") { val df = sparkSession.sql("SELECT 'POINT(0.0 1.0)' AS wkt").select(ST_GeomFromText("wkt")) val actualResult = df.take(1)(0).get(0).asInstanceOf[Geometry].toText()