apache · jiayuasu · Mar 20, 2023 · Mar 19, 2023 · Mar 19, 2023 · Mar 20, 2023
@@ -32,6 +32,13 @@ jobs:
           ~/.ivy2/cache
           ~/.sbt
         key: ${{ runner.os }}-sbt-${{ hashFiles('**/build.sbt') }}
-    - run: (cd examples/rdd-colocation-mining;sbt clean assembly;java -jar target/scala-2.12/*.jar)
-    - run: (cd examples/sql;sbt clean assembly;java -jar target/scala-2.12/*.jar)
-    - run: (cd examples/viz;sbt clean assembly;java -jar target/scala-2.12/*.jar)
+    - name: Cache Maven packages
+      uses: actions/cache@v2
+      with:
+        path: ~/.m2
+        key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
+        restore-keys: ${{ runner.os }}-m2
+    - run: (cd examples/spark-rdd-colocation-mining;sbt clean assembly;java -jar target/scala-2.12/*.jar)
+    - run: (cd examples/spark-sql;sbt clean assembly;java -jar target/scala-2.12/*.jar)
+    - run: (cd examples/spark-viz;sbt clean assembly;java -jar target/scala-2.12/*.jar)
+    - run: (cd examples/flink-sql;mvn clean install;java -jar target/sedona-flink-example-1.0.0.jar)
@@ -75,7 +75,7 @@
     "    appName(\"Sedona App\").\\\n",
     "    config(\"spark.serializer\", KryoSerializer.getName).\\\n",
     "    config(\"spark.kryo.registrator\", SedonaKryoRegistrator.getName) .\\\n",
-    "    config(\"spark.jars.packages\", \"org.apache.sedona:sedona-python-adapter-3.0_2.12:1.1.0-incubating,org.datasyslab:geotools-wrapper:1.1.0-25.2\") .\\\n",
+    "    config(\"spark.jars.packages\", \"org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.0,org.datasyslab:geotools-wrapper:1.4.0-28.2\") .\\\n",
     "    getOrCreate()"
    ]
   },
@@ -183,9 +183,7 @@
    "outputs": [
     {
      "data": {
-      "image/svg+xml": [
-       "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"-176.64696132 26.718666680000002 95.20719264000002 48.162659640000015\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,101.59999300000001)\"><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"1.9041438528000003\" opacity=\"0.6\" d=\"M -173.120769,30.244859 L -173.120769,71.355134 L -84.965961,71.355134 L -84.965961,30.244859 L -173.120769,30.244859 z\" /></g></svg>"
-      ],
+      "image/svg+xml": "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"-176.64696132 26.718666680000002 95.20719264000002 48.162659640000015\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,101.59999300000001)\"><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"1.9041438528000003\" opacity=\"0.6\" d=\"M -173.120769,30.244859 L -173.120769,71.355134 L -84.965961,71.355134 L -84.965961,30.244859 L -173.120769,30.244859 z\" /></g></svg>",
       "text/plain": [
        "Envelope(-173.120769, -84.965961, 30.244859, 71.355134)"
       ]
@@ -229,9 +227,7 @@
    "outputs": [
     {
      "data": {
-      "image/svg+xml": [
-       "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"-176.64696132 26.718666680000002 95.20719264000002 48.162659640000015\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,101.59999300000001)\"><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"1.9041438528000003\" opacity=\"0.6\" d=\"M -173.120769,30.244859 L -173.120769,71.355134 L -84.965961,71.355134 L -84.965961,30.244859 L -173.120769,30.244859 z\" /></g></svg>"
-      ],
+      "image/svg+xml": "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"-176.64696132 26.718666680000002 95.20719264000002 48.162659640000015\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,101.59999300000001)\"><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"1.9041438528000003\" opacity=\"0.6\" d=\"M -173.120769,30.244859 L -173.120769,71.355134 L -84.965961,71.355134 L -84.965961,30.244859 L -173.120769,30.244859 z\" /></g></svg>",
       "text/plain": [
        "Envelope(-173.120769, -84.965961, 30.244859, 71.355134)"
       ]

@@ -63,7 +63,7 @@
     "    appName(\"Demo-app\").\\\n",
     "    config(\"spark.serializer\", KryoSerializer.getName).\\\n",
     "    config(\"spark.kryo.registrator\", SedonaKryoRegistrator.getName) .\\\n",
-    "    config(\"spark.jars.packages\", \"org.apache.sedona:sedona-python-adapter-3.0_2.12:1.2.1-incubating,org.datasyslab:geotools-wrapper:1.1.0-25.2\") .\\\n",
+    "    config(\"spark.jars.packages\", \"org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.0,org.datasyslab:geotools-wrapper:1.4.0-28.2\") .\\\n",
     "    getOrCreate()\n",
     "\n",
     "SedonaRegistrator.registerAll(spark)\n",

@@ -121,8 +121,7 @@
     "    appName('appName'). \\\n",
     "    config(\"spark.serializer\", KryoSerializer.getName). \\\n",
     "    config(\"spark.kryo.registrator\", SedonaKryoRegistrator.getName). \\\n",
-    "    config('spark.jars.packages',\n",
-    "           'org.apache.sedona:sedona-python-adapter-3.0_2.12:1.1.0-incubating,org.datasyslab:geotools-wrapper:1.1.0-25.2'). \\\n",
+    "    config(\"spark.jars.packages\", \"org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.0,org.datasyslab:geotools-wrapper:1.4.0-28.2\") .\\\n",
     "    getOrCreate()"
    ]
   },

@@ -12,11 +12,11 @@ pytest-cov = "*"
 
 [packages]
 pandas="*"
-geopandas="==0.6.0"
+geopandas="==0.10.2"
 pyspark="==3.1.2"
 attrs="*"
 ipykernel = "*"
-apache-sedona="==1.1.0"
+apache-sedona="==1.4.0"
 matplotlib = "*"
 descartes = "*"
 

@@ -1,6 +1,6 @@
 #Download Apache Spark
-wget https://archive.apache.org/dist/spark/spark-3.1.2/spark-3.1.2-bin-hadoop3.2.tgz
-tar -xzf spark-3.1.2-bin-hadoop3.2.tgz
+wget https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz
+tar -xzf spark-3.3.2-bin-hadoop3.tgz
 
 #Tidy up
-rm spark-3.1.2-bin-hadoop3.2.tgz
+rm spark-3.3.2-bin-hadoop3.tgz
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-SPARK_HOME=$HOME/spark-3.1.2-bin-hadoop3.2
+SPARK_HOME=$HOME/spark-3.3.2-bin-hadoop3
 export PATH=$SPARK_HOME/bin:$PATH
 export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH
 export PYSPARK_SUBMIT_ARGS="--master local[*] pyspark-shell"

@@ -43,6 +43,7 @@ RangeJoin polygonshape#20: geometry, pointshape#43: geometry, false
 	All join queries in SedonaSQL are inner joins
 
 ## Distance join
+
 Introduction: Find geometries from A and geometries from B such that the internal Euclidean distance of each geometry pair is less or equal than a certain distance
 
 Spark SQL Example:
@@ -72,7 +73,7 @@ DistanceJoin pointshape1#12: geometry, pointshape2#33: geometry, 2.0, true
 ```
 
 !!!warning
-	Sedona doesn't control the distance's unit (degree or meter). It is same with the geometry. To change the geometry's unit, please transform the coordinate reference system. See [ST_Transform](Function.md#st_transform).
+	Sedona doesn't control the distance's unit (degree or meter). It is same with the geometry. If your coordinates are in the longitude and latitude system, the unit of `distance` should be degree instead of meter or mile. To change the geometry's unit, please either transform the coordinate reference system to a meter-based system. See [ST_Transform](Function.md#st_transform). If you don't want to transform your data and are ok with sacrificing the query accuracy, you can use an approximate degree value for distance. Please use [this calculator](https://lucidar.me/en/online-unit-converter-length-to-angle/convert-degrees-to-meters/#online-converter).
 
 ## Broadcast index join
 
@@ -127,9 +128,9 @@ Note: If the distance is an expression, it is only evaluated on the first argume
 
 When one table involved a spatial join query is smaller than a threadhold, Sedona will automatically choose broadcast index join instead of Sedona optimized join. The current threshold is controlled by [sedona.join.autoBroadcastJoinThreshold](../Parameter) and set to the same as `spark.sql.autoBroadcastJoinThreshold`.
 
-## Google S2 based equi-join
+## Google S2 based approximate equi-join
 
-If the performance of Sedona optimized join is not ideal, which is possibly caused by  complicated and overlapping geometries, you can resort to Sedona built-in Google S2-based equi-join. This equi-join leverages Spark's internal equi-join algorithm and might be performant in some cases given that the refinement step is optional.
+If the performance of Sedona optimized join is not ideal, which is possibly caused by  complicated and overlapping geometries, you can resort to Sedona built-in Google S2-based approximate equi-join. This equi-join leverages Spark's internal equi-join algorithm and might be performant given that you can opt to skip the refinement step  by sacrificing query accuracy.
 
 Please use the following steps:
 
@@ -161,14 +162,16 @@ FROM lcs JOIN rcs ON lcs.cellId = rcs.cellId
 
 Due to the nature of S2 Cellid, the equi-join results might have a few false-positives depending on the S2 level you choose. A smaller level indicates bigger cells, less exploded rows, but more false positives.
 
-To ensure the correctness, you can use [Spatial Predicate](../Predicate/) to filter out them. 
+To ensure the correctness, you can use one of the [Spatial Predicates](../Predicate/) to filter out them. Use this query instead of the query in Step 2.
 
 ```sql
-SELECT *
-FROM joinresult
-WHERE ST_Contains(lcs.geom, rcs.geom)
+SELECT lcs.id as lcs_id, lcs.geom as lcs_geom, lcs.name as lcs_name, rcs.id as rcs_id, rcs.geom as rcs_geom, rcs.name as rcs_name
+FROM lcs, rcs
+WHERE lcs.cellId = rcs.cellId AND ST_Contains(lcs.geom, rcs.geom)
 ```
 
+As you see, compared to the query in Step 2, we added one more filter, which is `ST_Contains`, to remove false positives. You can also use `ST_Intersects` and so on.
+
 !!!tip
 	You can skip this step if you don't need 100% accuracy and want faster query speed.
 
@@ -195,6 +198,18 @@ GROUP BY (lcs_geom, rcs_geom)
 !!!note
 	If you are doing point-in-polygon join, this is not a problem and you can safely discard this issue. This issue only happens when you do polygon-polygon, polygon-linestring, linestring-linestring join.
 
+### S2 for distance join
+
+This also works for distance join. You first need to use `ST_Buffer(geometry, distance)` to wrap one of your original geometry column. If your original geometry column contains points, this `ST_Buffer` will make them become circles with a radius of `distance`.
+
+For example. run this query first on the left table before Step 1.
+
+```sql
+SELECT id, ST_Buffer(geom, DISTANCE), name
+FROM lefts
+```
+
+Since the coordinates are in the longitude and latitude system, so the unit of `distance` should be degree instead of meter or mile. You will have to estimate the corresponding degrees based on your meter values. Please use [this calculator](https://lucidar.me/en/online-unit-converter-length-to-angle/convert-degrees-to-meters/#online-converter).
 
 ## Regular spatial predicate pushdown
 Introduction: Given a join query and a predicate in the same WHERE clause, first executes the Predicate as a filter, then executes the join query.

@@ -413,6 +413,8 @@ rm *.asc
 
 You must have the maintainer privilege of `https://pypi.org/project/apache-sedona/` and `https://www.npmjs.com/package/apache-sedona`
 
+To publish Sedona pythons, you have to use GitHub actions since we release wheels for different platforms. Please use this repo: https://github.com/jiayuasu/sedona-publish-python
+
 ```bash
 #!/bin/bash
 
@@ -423,7 +425,6 @@ cp -r sedona-{{ sedona_create_release.current_git_tag}}/* apache-sedona-{{ sedon
 
 rm -rf sedona-{{ sedona_create_release.current_git_tag}}
 
-cd apache-sedona-{{ sedona_create_release.current_version }}-src/python && python3 setup.py sdist bdist_wheel && twine upload dist/* && cd ..
 cd zeppelin && npm publish && cd ..
 rm -rf apache-sedona-{{ sedona_create_release.current_version }}-src
 ```

@@ -6,27 +6,27 @@ Old GeoSpark releases: [GitHub releases](https://github.com/apache/sedona/releas
 
 Automatically generated binary JARs (per each Master branch commit): [GitHub Action](https://github.com/apache/sedona/actions/workflows/java.yml)
 
-## Verify the integrity
+## Verify the integ1.4.0rity
 
 [Public keys](https://downloads.apache.org/sedona/KEYS)
 
 [Instructions](https://www.apache.org/info/verification.html)
 
 ## Versions
 
-### 1.3.1-incubating
+### 1.4.0
 
 | | Download from ASF | Checksum | Signature |
 |:-----------------:|:--------:|:--------:|:---------:|
-|    Source code    |    [src](https://www.apache.org/dyn/closer.lua/sedona/1.3.1-incubating/apache-sedona-1.3.1-incubating-src.tar.gz)      |     [sha512](https://downloads.apache.org/sedona/1.3.1-incubating/apache-sedona-1.3.1-incubating-src.tar.gz.sha512)     |     [asc](https://downloads.apache.org/sedona/1.3.1-incubating/apache-sedona-1.3.1-incubating-src.tar.gz.asc)      |
-|       Binary      |    [bin](https://www.apache.org/dyn/closer.lua/sedona/1.3.1-incubating/apache-sedona-1.3.1-incubating-bin.tar.gz)      |     [sha512](https://downloads.apache.org/sedona/1.3.1-incubating/apache-sedona-1.3.1-incubating-bin.tar.gz.sha512)     |     [asc](https://downloads.apache.org/sedona/1.3.1-incubating/apache-sedona-1.3.1-incubating-bin.tar.gz.asc)
+|    Source code    |    [src](https://www.apache.org/dyn/closer.lua/sedona/1.4.0/apache-sedona-1.4.0-src.tar.gz)      |     [sha512](https://downloads.apache.org/sedona/1.4.0/apache-sedona-1.4.0-src.tar.gz.sha512)     |     [asc](https://downloads.apache.org/sedona/1.4.0/apache-sedona-1.4.0-src.tar.gz.asc)      |
+|       Binary      |    [bin](https://www.apache.org/dyn/closer.lua/sedona/1.4.0/apache-sedona-1.4.0-bin.tar.gz)      |     [sha512](https://downloads.apache.org/sedona/1.4.0/apache-sedona-1.4.0-bin.tar.gz.sha512)     |     [asc](https://downloads.apache.org/sedona/1.4.0/apache-sedona-1.4.0-bin.tar.gz.asc)
 
-### 1.2.1-incubating
+### 1.3.1-incubating
 
 | | Download from ASF | Checksum | Signature |
 |:-----------------:|:--------:|:--------:|:---------:|
-|    Source code    |    [src](https://www.apache.org/dyn/closer.lua/sedona/1.2.1-incubating/apache-sedona-1.2.1-incubating-src.tar.gz)      |     [sha512](https://downloads.apache.org/sedona/1.2.1-incubating/apache-sedona-1.2.1-incubating-src.tar.gz.sha512)     |     [asc](https://downloads.apache.org/sedona/1.2.1-incubating/apache-sedona-1.2.1-incubating-src.tar.gz.asc)      |
-|       Binary      |    [bin](https://www.apache.org/dyn/closer.lua/sedona/1.2.1-incubating/apache-sedona-1.2.1-incubating-bin.tar.gz)      |     [sha512](https://downloads.apache.org/sedona/1.2.1-incubating/apache-sedona-1.2.1-incubating-bin.tar.gz.sha512)     |     [asc](https://downloads.apache.org/sedona/1.2.1-incubating/apache-sedona-1.2.1-incubating-bin.tar.gz.asc)
+|    Source code    |    [src](https://www.apache.org/dyn/closer.lua/sedona/1.3.1-incubating/apache-sedona-1.3.1-incubating-src.tar.gz)      |     [sha512](https://downloads.apache.org/sedona/1.3.1-incubating/apache-sedona-1.3.1-incubating-src.tar.gz.sha512)     |     [asc](https://downloads.apache.org/sedona/1.3.1-incubating/apache-sedona-1.3.1-incubating-src.tar.gz.asc)      |
+|       Binary      |    [bin](https://www.apache.org/dyn/closer.lua/sedona/1.3.1-incubating/apache-sedona-1.3.1-incubating-bin.tar.gz)      |     [sha512](https://downloads.apache.org/sedona/1.3.1-incubating/apache-sedona-1.3.1-incubating-bin.tar.gz.sha512)     |     [asc](https://downloads.apache.org/sedona/1.3.1-incubating/apache-sedona-1.3.1-incubating-bin.tar.gz.asc)
 
 ### Past releases
 

@@ -1,6 +1,5 @@
+### 03/19/2023: Sedona 1.4.0 released. It provides GeoParquet filter pushdown (10X less memory footprint), faster serialization (3X speed), S2-based fast approximate join and enhanced R language support
 ### 01/2023: Apache Sedona graduated to an Apache Top Level Project!
 ### 12/23/2022: Sedona 1.3.1-incubating is released. It adds native support of GeoParquet, DataFrame style API, Scala 2.13, Python 3.10, spatial aggregation on Flink. Please check Sedona release notes.
 ### 08/30/2022: Sedona 1.2.1-incubating is released. It supports Spark 2.4 - 3.3. and Flink 1.12+.
-### 04/16/2022: Sedona 1.2.0-incubating is released. Sedona now supports geospatial stream processing in Apache Flink.
-### 11/23/2021: Sedona 1.1.1-incubating is released. It now supports Spark 3.2.
-### 10/06/2021: Sedona 1.1.0-incubating is released. R lang API is available on CRAN. Raster data and map algebra SQL functions are now supported.
+### 04/16/2022: Sedona 1.2.0-incubating is released. Sedona now supports geospatial stream processing in Apache Flink.
@@ -1,14 +1,18 @@
 !!!warning
 	Support of Spark 2.X and Scala 2.11 was removed in Sedona 1.3.0+ although some parts of the source code might still be compatible. Sedona 1.3.0+ releases binary for both Scala 2.12 and 2.13.
 
+!!!danger
+	Sedona Python currently only works with Shapely 1.x. If you use GeoPandas, please use <= GeoPandas `0.11.1`. GeoPandas > 0.11.1 will automatically installe Shapely 2.0. If you use Shapely, please use <= `1.8.4`.
+
 ## Sedona 1.4.0
 
 Sedona 1.4.0 is compiled against, Spark 3.3 / Flink 1.12, Java 8.
 
 ### Highlights
 
-* [X] **Sedona Spark** Pushdown spatial predicate on GeoParquet to reduce memory consumption by 10X: see [explanation](../../api/sql/Optimizer/#geoparquet)
 * [X] **Sedona Spark & Flink** Serialize and deserialize geometries 3 - 7X faster
+* [X] **Sedona Spark & Flink** Google S2 based spatial join for fast approximate point-in-polygon join. See [Join query in Spark](../../api/sql/Optimizer/#google-s2-based-approximate-equi-join) and [Join query in Flink](../../tutorial/flink/sql/#join-query)
+* [X] **Sedona Spark** Pushdown spatial predicate on GeoParquet to reduce memory consumption by 10X: see [explanation](../../api/sql/Optimizer/#geoparquet)
 * [X] **Sedona Spark** Automatically use broadcast index spatial join for small datasets
 * [X] **Sedona Spark** New RasterUDT added to Sedona GeoTiff reader.
 * [X] **Sedona Spark** A number of bug fixes and improvement to the Sedona R module.