Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SEDONA-333] Implement ST_GeomFromEWKT #937

Merged
merged 10 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions common/src/main/java/org/apache/sedona/common/Constructors.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,28 @@ public static Geometry geomFromWKT(String wkt, int srid) throws ParseException {
return new WKTReader(geometryFactory).read(wkt);
}

public static Geometry geomFromEWKT(String ewkt) throws ParseException {
if (ewkt == null) {
return null;
}
int SRID = 0;
String wkt = ewkt;

int index = ewkt.indexOf("SRID=");
if (index != -1) {
int semicolonIndex = ewkt.indexOf(';', index);
if (semicolonIndex != -1) {
SRID = Integer.parseInt(ewkt.substring(index + 5, semicolonIndex));
wkt = ewkt.substring(semicolonIndex + 1);
}
else {
throw new ParseException("Invalid EWKT string");
}
}
GeometryFactory geometryFactory = new GeometryFactory(new PrecisionModel(), SRID);
return new WKTReader(geometryFactory).read(wkt);
}

public static Geometry geomFromWKB(byte[] wkb) throws ParseException {
return new WKBReader().read(wkb);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,23 @@ public void geomFromWKT() throws ParseException {
ParseException invalid = assertThrows(ParseException.class, () -> Constructors.geomFromWKT("not valid", 0));
assertEquals("Unknown geometry type: NOT (line 1)", invalid.getMessage());
}

@Test
public void geomFromEWKT() throws ParseException {
assertNull(Constructors.geomFromEWKT(null));

Geometry geom = Constructors.geomFromEWKT("POINT (1 1)");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add one more test case SRID=4269;POINT (1 1) (no space between semicolon and the wkt string)

assertEquals(0, geom.getSRID());
assertEquals("POINT (1 1)", geom.toText());

geom = Constructors.geomFromEWKT("SRID=4269; POINT (1 1)");
assertEquals(4269, geom.getSRID());
assertEquals("POINT (1 1)", geom.toText());

ParseException invalid = assertThrows(ParseException.class, () -> Constructors.geomFromEWKT("not valid"));
assertEquals("Unknown geometry type: NOT (line 1)", invalid.getMessage());
}

@Test
public void mLineFromWKT() throws ParseException {
assertNull(Constructors.mLineFromText(null, 0));
Expand Down
20 changes: 20 additions & 0 deletions docs/api/flink/Constructor.md
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,26 @@ Output:
POINT(40.7128 -74.006)
```

## ST_GeomFromEWKT

Introduction: Construct a Geometry from OGC Extended WKT

Format:
`ST_GeomFromEWKT (EWkt:string)`

Since: `v1.5.0`

SQL example:
```sql
SELECT ST_GeomFromEWKT('SRID=4269;POINT(40.7128 -74.0060)')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please print the result using ST_AsEWKT or ST_AsText . This will print SRID

```

Output:

```
POINT(40.7128 -74.006)
```

## ST_LineFromText

Introduction: Construct a LineString from Text
Expand Down
20 changes: 20 additions & 0 deletions docs/api/sql/Constructor.md
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,26 @@ Output:
POINT(40.7128 -74.006)
```

## ST_GeomFromEWKT

Introduction: Construct a Geometry from OGC Extended WKT

Format:
`ST_GeomFromEWKT (EWkt:string)`

Since: `v1.5.0`

SQL example:
```sql
SELECT ST_GeomFromEWKT('SRID=4269;POINT(40.7128 -74.0060)')
```

Output:

```
POINT(40.7128 -74.006)
```

## ST_LineFromText

Introduction: Construct a Line from Wkt text
Expand Down
1 change: 1 addition & 0 deletions flink/src/main/java/org/apache/sedona/flink/Catalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ public static UserDefinedFunction[] getFuncs() {
new Constructors.ST_PolygonFromText(),
new Constructors.ST_PolygonFromEnvelope(),
new Constructors.ST_GeomFromWKT(),
new Constructors.ST_GeomFromEWKT(),
new Constructors.ST_GeomFromText(),
new Constructors.ST_GeomFromWKB(),
new Constructors.ST_GeomFromGeoJSON(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,13 @@ public Geometry eval(@DataTypeHint("String") String wktString) throws ParseExcep
}
}

public static class ST_GeomFromEWKT extends ScalarFunction {
@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class)
public Geometry eval(@DataTypeHint("String") String wktString) throws ParseException {
return org.apache.sedona.common.Constructors.geomFromEWKT(wktString);
}
}

public static class ST_GeomFromText extends ScalarFunction {
@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class)
public Geometry eval(@DataTypeHint("String") String wktString) throws ParseException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,23 @@ public void testGeomFromWKT() {
assertEquals(data.get(data.size() - 1).getField(0).toString(), result.getField(0).toString());
}

@Test
public void testGeomFromEWKT() {
List<Row> data = new ArrayList<>();
data.add(Row.of("SRID=123;MULTILINESTRING((1 2, 3 4), (4 5, 6 7))", "multiline", 0L));

Table geomTable = createTextTable(data, multilinestringColNames);
geomTable = geomTable
.select(call(Constructors.ST_GeomFromEWKT.class.getSimpleName(),
$(multilinestringColNames[0]))
.as(multilinestringColNames[0]), $(multilinestringColNames[1]));
String result = first(geomTable)
.getFieldAs(0)
.toString();
String expectedGeom = "MULTILINESTRING ((1 2, 3 4), (4 5, 6 7))";
assertEquals(expectedGeom, result);
}

@Test
public void testGeomFromText() {
List<Row> data = createPolygonWKT(testDataSize);
Expand Down
12 changes: 12 additions & 0 deletions python/sedona/sql/st_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"ST_GeomFromText",
"ST_GeomFromWKB",
"ST_GeomFromWKT",
"ST_GeomFromEWKT",
"ST_LineFromText",
"ST_LineStringFromText",
"ST_Point",
Expand Down Expand Up @@ -132,6 +133,17 @@ def ST_GeomFromWKT(wkt: ColumnOrName) -> Column:
"""
return _call_constructor_function("ST_GeomFromWKT", wkt)

@validate_argument_types
def ST_GeomFromEWKT(ewkt: ColumnOrName) -> Column:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is not tested.

"""Generate a geometry column from a OGC Extended Well-Known Text (WKT) string column.

:param ewkt: OGC Extended WKT string column to generate from.
:type ewkt: ColumnOrName
:return: Geometry column representing the EWKT string.
:rtype: Column
"""
return _call_constructor_function("ST_GeomFromEWKT", ewkt)


@validate_argument_types
def ST_LineFromText(wkt: ColumnOrName) -> Column:
Expand Down
6 changes: 6 additions & 0 deletions python/tests/sql/test_constructor_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ def test_st_geom_from_wkt(self):
polygon_df.show(10)
assert polygon_df.count() == 100

def test_st_geom_from_ewkt(self):
input_df = self.spark.createDataFrame([("SRID=4269;LineString(1 2, 3 4)",)], ["ewkt"])
input_df.createOrReplaceTempView("input_ewkt")
line_df = self.spark.sql("select ST_LineFromText(ewkt) as geom from input_ewkt")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is not using ST_GeomFromEWKT

assert line_df.count() == 1

def test_st_geom_from_wkt_3d(self):
input_df = self.spark.createDataFrame([
("Point(21 52 87)",),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ object Catalog {
function[ST_GeomFromText](0),
function[ST_LineFromText](),
function[ST_GeomFromWKT](0),
function[ST_GeomFromEWKT](),
function[ST_GeomFromWKB](),
function[ST_GeomFromGeoJSON](),
function[ST_GeomFromGML](),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,19 @@ case class ST_GeomFromWKT(inputExpressions: Seq[Expression])
}
}

/**
* Return a Geometry from a OGC Extended WKT string
*
* @param inputExpressions This function takes a geometry string. The string format must be OGC Extended Well-Known text (EWKT) representation.
*/
case class ST_GeomFromEWKT(inputExpressions: Seq[Expression])
extends InferredExpression(Constructors.geomFromEWKT _) {

protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
copy(inputExpressions = newChildren)
}
}


/**
* Return a Geometry from a WKT string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ object st_constructors extends DataFrameAPI {

def ST_GeomFromWKT(wkt: String, srid: Int): Column = wrapExpression[ST_GeomFromWKT](wkt, srid)

def ST_GeomFromEWKT(wkt: Column): Column = wrapExpression[ST_GeomFromEWKT](wkt, 0)
def ST_GeomFromEWKT(wkt: String): Column = wrapExpression[ST_GeomFromEWKT](wkt, 0)

def ST_LineFromText(wkt: Column): Column = wrapExpression[ST_LineFromText](wkt)
def ST_LineFromText(wkt: String): Column = wrapExpression[ST_LineFromText](wkt)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,29 @@ class constructorTestScala extends TestBaseScala {
assert(thrown.getMessage == "Unknown geometry type: NOT (line 1)")
}

it("Passed ST_GeomFromEWKT") {
var polygonWktDf = sparkSession.read.format("csv").option("delimiter", "\t").option("header", "false").load(mixedWktGeometryInputLocation)
polygonWktDf.createOrReplaceTempView("polygontable")
var polygonDf = sparkSession.sql("select ST_GeomFromEWKT(polygontable._c0) as countyshape from polygontable")
assert(polygonDf.count() == 100)
val nullGeom = sparkSession.sql("select ST_GeomFromEWKT(null)")
assert(nullGeom.first().isNullAt(0))
val pointDf = sparkSession.sql("select ST_GeomFromEWKT('SRID=4269;POINT(-71.064544 42.28787)')")
assert(pointDf.count() == 1)
// Fail on wrong input type
intercept[Exception] {
sparkSession.sql("SELECT ST_GeomFromEWKT(0)").collect()
}
}

it("Passed ST_GeomFromEWKT invalid input") {
// Fail on non wkt strings
val thrown = intercept[Exception] {
sparkSession.sql("SELECT ST_GeomFromEWKT('not wkt')").collect()
}
assert(thrown.getMessage == "Unknown geometry type: NOT (line 1)")
}

it("Passed ST_LineFromText") {
val geometryDf = Seq("Linestring(1 2, 3 4)").map(wkt => Tuple1(wkt)).toDF("geom")
geometryDf.createOrReplaceTempView("linetable")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,13 @@ class dataFrameAPITestScala extends TestBaseScala {
assert(actualResult.getSRID == 4326)
}

it("passed st_geomfromewkt") {
val df = sparkSession.sql("SELECT ST_GeomFromEWKT('SRID=4269;POINT(0.0 1.0)')")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test didn't test the DataFrame API

val actualResult = df.take(1)(0).get(0).asInstanceOf[Geometry]
assert(actualResult.toText == "POINT (0 1)")
assert(actualResult.getSRID == 4269)
}

it("passed st_geomfromtext") {
val df = sparkSession.sql("SELECT 'POINT(0.0 1.0)' AS wkt").select(ST_GeomFromText("wkt"))
val actualResult = df.take(1)(0).get(0).asInstanceOf[Geometry].toText()
Expand Down