Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SEDONA-230] rdd.saveAsGeoJSON should generate feature properties with field names #746

Merged
merged 7 commits into from
Jan 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions R/tests/testthat/helper-initialize.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,21 @@ expect_geom_equal <- function(sc, lhs, rhs) {
}
}

expect_geom_equal_geojson <- function(sc, lhs, rhs) {
testthat::expect_equal(length(lhs), length(rhs))
for (i in seq_along(lhs)) {
testthat::expect_true(
invoke_static(
sc,
"org.apache.sedona.common.utils.GeomUtils",
"equalsExactGeomUnsortedUserData",
lhs[[i]],
rhs[[i]]
)
)
}
}

as.coordinate_list <- function(geometry) {
geometry %>%
invoke("getCoordinates") %>%
Expand Down
19 changes: 17 additions & 2 deletions R/tests/testthat/test-data-interface.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,21 @@ expect_result_matches_original <- function(pt_rdd) {
)
}

expect_result_matches_original_geojson <- function(pt_rdd) {
expect_equal(
pt_rdd %>% invoke("%>%", list("rawSpatialRDD"), list("count")),
test_rdd_with_non_spatial_attrs$.jobj %>%
invoke("%>%", list("rawSpatialRDD"), list("count"))
)
expect_geom_equal_geojson(
sc,
test_rdd_with_non_spatial_attrs$.jobj %>%
invoke("%>%", list("rawSpatialRDD"), list("takeOrdered", 5L), list("toArray")),
pt_rdd %>%
invoke("%>%", list("rawSpatialRDD"), list("takeOrdered", 5L), list("toArray"))
)
}

test_that("sedona_read_dsv_to_typed_rdd() creates PointRDD correctly", {
pt_rdd <- sedona_read_dsv_to_typed_rdd(
sc,
Expand Down Expand Up @@ -319,11 +334,11 @@ test_that("sedona_write_geojson() works as expected", {
sc$state$object_cache$storage_levels$memory_only
)

expect_result_matches_original(pt_rdd)
expect_result_matches_original_geojson(pt_rdd)
})

test_that("sedona_save_spatial_rdd() works as expected", {
for (fmt in c("wkb", "wkt", "geojson")) {
for (fmt in c("wkb", "wkt")) {
location <- tempfile(pattern = "pt_", fileext = paste0(".", fmt))
copy_to(
sc, tibble::tibble(id = 1, name = "a point", type = "point")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,34 @@ public static boolean equalsExactGeom(Geometry geom1, Object geom2) {
else return false;
}

/**
* This is for verifying the correctness of two geometries loaded from geojson
* @param geom1
* @param geom2
* @return
*/
public static boolean equalsExactGeomUnsortedUserData(Geometry geom1, Object geom2) {
if (! (geom2 instanceof Geometry)) return false;
Geometry g = (Geometry) geom2;
if (equalsUserData(geom1.getUserData(), g.getUserData())) return geom1.equalsExact(g);
else return false;
}

/**
* Use for check if two user data attributes are equal
* This is mainly used for GeoJSON parser as the column order is uncertain each time
* @param userData1
* @param userData2
* @return
*/
public static boolean equalsUserData(Object userData1, Object userData2) {
String[] split1 = ((String) userData1).split("\t");
String[] split2 = ((String) userData2).split("\t");
Arrays.sort(split1);
Arrays.sort(split2);
return Arrays.equals(split1, split2);
}

/**
* Swaps the XY coordinates of a geometry.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,23 +119,22 @@ public FormatUtils(FileDataSplitter splitter, boolean carryInputData, GeometryTy

public static List<String> readGeoJsonPropertyNames(String geoJson)
{
if (geoJson == null) {
logger.warn("The given GeoJson record is null and cannot be used to find property names");
return null;
}
List<String> propertyList = new ArrayList<>();
if (geoJson.contains("Feature") || geoJson.contains("feature") || geoJson.contains("FEATURE")) {
if (geoJson.contains("properties")) {
Feature feature = (Feature) GeoJSONFactory.create(geoJson);
if (Objects.isNull(feature.getId())) {
return new ArrayList(feature.getProperties().keySet());
}
else {
List<String> propertyList = new ArrayList<>(Arrays.asList("id"));
for (String geoJsonProperty : feature.getProperties().keySet()) {
propertyList.add(geoJsonProperty);
}
return propertyList;
}
Feature feature = (Feature) GeoJSONFactory.create(geoJson);
if (!Objects.isNull(feature.getId())) {
propertyList.add("id");
}
Map<String, Object> properties = feature.getProperties();
if (properties != null) {
propertyList.addAll(properties.keySet());
}
}
logger.warn("[Sedona] The GeoJSON file doesn't have feature properties");
return null;
return propertyList.size() > 0 ? propertyList:null;
}

private void readObject(ObjectInputStream inputStream)
Expand Down Expand Up @@ -173,7 +172,7 @@ public Geometry readGeoJSON(String geoJson)
for (Object property : featurePropertiesproperties.values()
) {
if (property == null) {
nonSpatialData.add("null");
nonSpatialData.add("");
}
else {
nonSpatialData.add(property.toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
Expand Down Expand Up @@ -583,30 +584,34 @@ public Iterator<String> call(Iterator<T> iterator)
*/
public void saveAsGeoJSON(String outputLocation)
{
this.rawSpatialRDD.mapPartitions(new FlatMapFunction<Iterator<T>, String>()
{
@Override
public Iterator<String> call(Iterator<T> iterator)
throws Exception
{
ArrayList<String> result = new ArrayList();
GeoJSONWriter writer = new GeoJSONWriter();
while (iterator.hasNext()) {
Geometry spatialObject = iterator.next();
Feature jsonFeature;
if (spatialObject.getUserData() != null) {
Map<String, Object> userData = new HashMap<String, Object>();
userData.put("UserData", spatialObject.getUserData());
jsonFeature = new Feature(writer.write(spatialObject), userData);
this.rawSpatialRDD.mapPartitions((FlatMapFunction<Iterator<T>, String>) iterator -> {
ArrayList<String> result = new ArrayList();
GeoJSONWriter writer = new GeoJSONWriter();
while (iterator.hasNext()) {
Geometry spatialObject = iterator.next();
Feature jsonFeature;
if (spatialObject.getUserData() != null) {
Map<String, Object> fields = new HashMap<String, Object>();
String[] fieldValues = spatialObject.getUserData().toString().split("\t");
if (fieldNames != null && fieldValues.length == fieldNames.size()) {
for (int i = 0 ; i < fieldValues.length ; i++) {
fields.put(fieldNames.get(i), fieldValues[i]);
}
}
else {
jsonFeature = new Feature(writer.write(spatialObject), null);
for (int i = 0 ; i < fieldValues.length ; i++) {
fields.put("_c" + i, fieldValues[i]);
}
}
String jsonstring = jsonFeature.toString();
result.add(jsonstring);
jsonFeature = new Feature(writer.write(spatialObject), fields);
}
else {
jsonFeature = new Feature(writer.write(spatialObject), null);
}
return result.iterator();
String jsonstring = jsonFeature.toString();
result.add(jsonstring);
}
return result.iterator();
}).saveAsTextFile(outputLocation);
}

Expand Down
13 changes: 13 additions & 0 deletions core/src/test/java/org/apache/sedona/core/TestBase.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,18 @@

package org.apache.sedona.core;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.sedona.core.serde.SedonaKryoRegistrator;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.serializer.KryoSerializer;

import java.io.IOException;

public class TestBase
{
protected static SparkConf conf;
Expand All @@ -41,5 +46,13 @@ protected static void initialize(final String testSuiteName)
Logger.getLogger("org").setLevel(Level.WARN);
Logger.getLogger("akka").setLevel(Level.WARN);
}

protected static void deleteFile(String path)
throws IOException
{
Configuration hadoopConfig = new Configuration();
FileSystem fileSystem = FileSystem.get(hadoopConfig);
fileSystem.delete(new Path(path), true);
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,17 @@
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.locationtech.jts.geom.Geometry;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

import static org.junit.Assert.assertEquals;

public class GeoJsonReaderTest
public class GeoJsonIOTest
extends TestBase
{

Expand All @@ -43,12 +48,12 @@ public class GeoJsonReaderTest
public static void onceExecutedBeforeAll()
throws IOException
{
initialize(GeoJsonReaderTest.class.getName());
geoJsonGeomWithFeatureProperty = GeoJsonReaderTest.class.getClassLoader().getResource("testPolygon.json").getPath();
geoJsonGeomWithoutFeatureProperty = GeoJsonReaderTest.class.getClassLoader().getResource("testpolygon-no-property.json").getPath();
geoJsonWithInvalidGeometries = GeoJsonReaderTest.class.getClassLoader().getResource("testInvalidPolygon.json").getPath();
geoJsonWithNullProperty = GeoJsonReaderTest.class.getClassLoader().getResource("testpolygon-with-null-property-value.json").getPath();
geoJsonContainsId = GeoJsonReaderTest.class.getClassLoader().getResource("testContainsId.json").getPath();
initialize(GeoJsonIOTest.class.getName());
geoJsonGeomWithFeatureProperty = GeoJsonIOTest.class.getClassLoader().getResource("testPolygon.json").getPath();
geoJsonGeomWithoutFeatureProperty = GeoJsonIOTest.class.getClassLoader().getResource("testpolygon-no-property.json").getPath();
geoJsonWithInvalidGeometries = GeoJsonIOTest.class.getClassLoader().getResource("testInvalidPolygon.json").getPath();
geoJsonWithNullProperty = GeoJsonIOTest.class.getClassLoader().getResource("testpolygon-with-null-property-value.json").getPath();
geoJsonContainsId = GeoJsonIOTest.class.getClassLoader().getResource("testContainsId.json").getPath();
}

@AfterClass
Expand All @@ -74,6 +79,65 @@ public void testReadToGeometryRDD()
assertEquals(geojsonRDD.rawSpatialRDD.count(), 10);
}

@Test
public void testReadWriteGeoJson()
throws IOException
{
String tmpFilePath = "target/geojson.tmp";
SpatialRDD initRdd = GeoJsonReader.readToGeometryRDD(sc, geoJsonGeomWithFeatureProperty);
deleteFile(tmpFilePath);
initRdd.saveAsGeoJSON(tmpFilePath);
SpatialRDD newRdd = GeoJsonReader.readToGeometryRDD(sc, tmpFilePath);

// Check the basic correctness
assertEquals(initRdd.fieldNames.size(), newRdd.fieldNames.size());
assertEquals(initRdd.rawSpatialRDD.count(), newRdd.rawSpatialRDD.count());

// Note that two RDDs may put <field name, value> in different order

// Put field names and values to a hash map for comparison
Geometry initGeom = (Geometry) initRdd.rawSpatialRDD.takeOrdered(1).get(0);
String[] initGeomFields = initGeom.getUserData().toString().split("\t");
Map<String, Object> initKvs = new HashMap<String, Object>();
for (int i = 0; i < initGeomFields.length; i++) {
initKvs.put(initRdd.fieldNames.get(i).toString(), initGeomFields[i]);
}

// Put field names and values to a hash map for comparison
Geometry newGeom = (Geometry) newRdd.rawSpatialRDD.takeOrdered(1).get(0);
String[] newGeomFields = newGeom.getUserData().toString().split("\t");
Map<String, Object> newKvs = new HashMap<String, Object>();
for (int i = 0; i < initGeomFields.length; i++) {
newKvs.put(newRdd.fieldNames.get(i).toString(), newGeomFields[i]);
}

for (int i = 0; i < initRdd.fieldNames.size(); i++) {
// The same field name should fetch the same value in both maps
assertEquals(initKvs.get(initRdd.fieldNames.get(i).toString()),
newKvs.get(initRdd.fieldNames.get(i).toString()));
}
}

@Test
public void testReadWriteSpecialGeoJsons()
throws IOException
{
String tmpFilePath = "target/geojson.tmp";
SpatialRDD initRdd = GeoJsonReader.readToGeometryRDD(sc, geoJsonGeomWithoutFeatureProperty);
deleteFile(tmpFilePath);
initRdd.saveAsGeoJSON(tmpFilePath);
SpatialRDD newRdd = GeoJsonReader.readToGeometryRDD(sc, tmpFilePath);
assertEquals(initRdd.rawSpatialRDD.count(), newRdd.rawSpatialRDD.count());

initRdd = GeoJsonReader.readToGeometryRDD(sc, geoJsonWithNullProperty);
deleteFile(tmpFilePath);
initRdd.saveAsGeoJSON(tmpFilePath);
newRdd = GeoJsonReader.readToGeometryRDD(sc, tmpFilePath);
assertEquals(initRdd.rawSpatialRDD.count(), newRdd.rawSpatialRDD.count());

// deleteFile(tmpFilePath);
}

/**
* Test geojson with null values in the properties
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public class TestReadInvalidSyntaxGeometriesTest
public static void onceExecutedBeforeAll()
throws IOException
{
initialize(GeoJsonReaderTest.class.getName());
initialize(GeoJsonIOTest.class.getName());
invalidSyntaxGeoJsonGeomWithFeatureProperty = TestReadInvalidSyntaxGeometriesTest.class.getClassLoader().getResource("invalidSyntaxGeometriesJson.json").getPath();
}

Expand Down