From 0abda2d99621e68a4828391fa9d1ca36f1285af7 Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Fri, 11 Aug 2023 01:33:18 -0700 Subject: [PATCH] Add the official Docker image --- binder/Sedona_OvertureMaps_GeoParquet.ipynb | 369 ++++++++++++++++++++ docker/sedona-spark-jupyterlab/build.sh | 5 +- docs/setup/docker.md | 21 +- 3 files changed, 384 insertions(+), 11 deletions(-) create mode 100644 binder/Sedona_OvertureMaps_GeoParquet.ipynb diff --git a/binder/Sedona_OvertureMaps_GeoParquet.ipynb b/binder/Sedona_OvertureMaps_GeoParquet.ipynb new file mode 100644 index 0000000000..b47d716a7d --- /dev/null +++ b/binder/Sedona_OvertureMaps_GeoParquet.ipynb @@ -0,0 +1,369 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "92984a1c", + "metadata": {}, + "outputs": [], + "source": [ + "from sedona.spark import *\n", + "import os\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4392353", + "metadata": {}, + "outputs": [], + "source": [ + "DATA_LINK = \"s3a://wherobots-public-data/overturemaps-us-west-2/release/2023-07-26-alpha.0/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08c71acb", + "metadata": {}, + "outputs": [], + "source": [ + "config = SedonaContext.builder() .\\\n", + " config(\"spark.hadoop.fs.s3a.aws.credentials.provider\", \"org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider\"). \\\n", + " config(\"fs.s3a.aws.credentials.provider\", \"org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider\"). \\\n", + " config('spark.jars.packages',\n", + " 'org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.1,'\n", + " 'org.datasyslab:geotools-wrapper:1.4.0-28.2'). \\\n", + " getOrCreate()\n", + "\n", + "sedona = SedonaContext.create(config)" + ] + }, + { + "cell_type": "markdown", + "id": "8f3340ee", + "metadata": {}, + "source": [ + "# State Boundary" + ] + }, + { + "cell_type": "markdown", + "id": "8b1b506c", + "metadata": {}, + "source": [ + "### Pick a state.\n", + "#### Washington is selected\n", + "\n", + "[Click here for boundaries of other states](https://gist.github.com/JoshuaCarroll/49630cbeeb254a49986e939a26672e9c)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "045f24b3", + "metadata": {}, + "outputs": [], + "source": [ + "# Washington state boundary\n", + "#spatial_filter = \"POLYGON((-123.3208 49.0023,-123.0338 49.0027,-122.0650 49.0018,-121.7491 48.9973,-121.5912 48.9991,-119.6082 49.0009,-118.0378 49.0005,-117.0319 48.9996,-117.0415 47.9614,-117.0394 46.5060,-117.0394 46.4274,-117.0621 46.3498,-117.0277 46.3384,-116.9879 46.2848,-116.9577 46.2388,-116.9659 46.2022,-116.9254 46.1722,-116.9357 46.1432,-116.9584 46.1009,-116.9762 46.0785,-116.9433 46.0537,-116.9165 45.9960,-118.0330 46.0008,-118.9867 45.9998,-119.1302 45.9320,-119.1708 45.9278,-119.2559 45.9402,-119.3047 45.9354,-119.3644 45.9220,-119.4386 45.9172,-119.4894 45.9067,-119.5724 45.9249,-119.6013 45.9196,-119.6700 45.8565,-119.8052 45.8479,-119.9096 45.8278,-119.9652 45.8245,-120.0710 45.7852,-120.1705 45.7623,-120.2110 45.7258,-120.3628 45.7057,-120.4829 45.6951,-120.5942 45.7469,-120.6340 45.7460,-120.6924 45.7143,-120.8558 45.6721,-120.9142 45.6409,-120.9471 45.6572,-120.9787 45.6419,-121.0645 45.6529,-121.1469 45.6078,-121.1847 45.6083,-121.2177 45.6721,-121.3392 45.7057,-121.4010 45.6932,-121.5328 45.7263,-121.6145 45.7091,-121.7361 45.6947,-121.8095 45.7067,-121.9338 45.6452,-122.0451 45.6088,-122.1089 45.5833,-122.1426 45.5838,-122.2009 45.5660,-122.2641 45.5439,-122.3321 45.5482,-122.3795 45.5756,-122.4392 45.5636,-122.5676 45.6006,-122.6891 45.6236,-122.7647 45.6582,-122.7750 45.6817,-122.7619 45.7613,-122.7962 45.8106,-122.7839 45.8642,-122.8114 45.9120,-122.8148 45.9612,-122.8587 46.0160,-122.8848 46.0604,-122.9034 46.0832,-122.9597 46.1028,-123.0579 46.1556,-123.1210 46.1865,-123.1664 46.1893,-123.2810 46.1446,-123.3703 46.1470,-123.4314 46.1822,-123.4287 46.2293,-123.4946 46.2691,-123.5557 46.2582,-123.6209 46.2573,-123.6875 46.2497,-123.7404 46.2691,-123.8729 46.2350,-123.9292 46.2383,-123.9711 46.2677,-124.0212 46.2924,-124.0329 46.2653,-124.2444 46.2596,-124.2691 46.4312,-124.3529 46.8386,-124.4380 47.1832,-124.5616 47.4689,-124.7566 47.8012,-124.8679 48.0423,-124.8679 48.2457,-124.8486 48.3727,-124.7539 48.4984,-124.4174 48.4096,-124.2389 48.3599,-124.0116 48.2964,-123.9141 48.2795,-123.5413 48.2247,-123.3998 48.2539,-123.2501 48.2841,-123.1169 48.4233,-123.1609 48.4533,-123.2220 48.5548,-123.2336 48.5902,-123.2721 48.6901,-123.0084 48.7675,-123.0084 48.8313,-123.3215 49.0023,-123.3208 49.0023))\"\n", + "\n", + "# Bellevue city boundary\n", + "spatial_filter = \"POLYGON ((-122.235128 47.650163, -122.233796 47.65162, -122.231581 47.653287, -122.228514 47.65482, -122.227526 47.655204, -122.226175 47.655729, -122.222039 47.656743999999996, -122.218428 47.657464, -122.217026 47.657506, -122.21437399999999 47.657588, -122.212091 47.657464, -122.212135 47.657320999999996, -122.21092999999999 47.653552, -122.209834 47.650121, -122.209559 47.648976, -122.209642 47.648886, -122.21042 47.648658999999995, -122.210897 47.64864, -122.211005 47.648373, -122.21103099999999 47.648320999999996, -122.211992 47.64644, -122.212457 47.646426, -122.212469 47.646392, -122.212469 47.646088999999996, -122.212471 47.645213, -122.213115 47.645212, -122.213123 47.644576, -122.21352999999999 47.644576, -122.213768 47.644560999999996, -122.21382 47.644560999999996, -122.21382 47.644456999999996, -122.21373299999999 47.644455, -122.213748 47.643102999999996, -122.213751 47.642790999999995, -122.213753 47.642716, -122.213702 47.642697999999996, -122.213679 47.642689999999995, -122.21364 47.642678, -122.213198 47.642541, -122.213065 47.642500000000005, -122.212918 47.642466, -122.21275 47.642441, -122.212656 47.642433, -122.21253899999999 47.642429, -122.212394 47.64243, -122.212182 47.642444999999995, -122.211957 47.642488, -122.211724 47.642551999999995, -122.21143599999999 47.642647, -122.210906 47.642834, -122.210216 47.643099, -122.209858 47.643215, -122.20973000000001 47.643248, -122.20973599999999 47.643105, -122.209267 47.643217, -122.208832 47.643302, -122.208391 47.643347999999996, -122.207797 47.643414, -122.207476 47.643418, -122.20701199999999 47.643397, -122.206795 47.643387999999995, -122.205742 47.643246, -122.20549 47.643201999999995, -122.20500200000001 47.643119, -122.204802 47.643085, -122.204641 47.643066, -122.204145 47.643012, -122.203547 47.643012, -122.203097 47.643107, -122.20275699999999 47.643283, -122.202507 47.643496999999996, -122.202399 47.643653, -122.202111 47.643771, -122.201668 47.643767, -122.201363 47.643665, -122.20133 47.643648999999996, -122.201096 47.643536, -122.200744 47.64328, -122.200568 47.64309, -122.200391 47.642849, -122.200162 47.642539, -122.199896 47.642500000000005, -122.19980799999999 47.642424, -122.199755 47.642376999999996, -122.199558 47.642227999999996, -122.199439 47.642157, -122.199293 47.642078999999995, -122.199131 47.642004, -122.198928 47.641925, -122.19883 47.641892, -122.19856300000001 47.641811999999994, -122.198203 47.641731, -122.197662 47.641619999999996, -122.196819 47.641436, -122.196294 47.641309, -122.196294 47.642314, -122.19628 47.642855, -122.196282 47.642897999999995, -122.196281 47.643111, -122.196283 47.643415, -122.196283 47.643508999999995, -122.19628399999999 47.643739, -122.196287 47.644203999999995, -122.196287 47.644262999999995, -122.19629 47.644937999999996, -122.19629 47.644954999999996, -122.196292 47.645271, -122.196291 47.645426, -122.19629499999999 47.646315, -122.19629499999999 47.646432, -122.195925 47.646432, -122.195251 47.646432, -122.190853 47.646429999999995, -122.187649 47.646428, -122.187164 47.646426, -122.18683 47.646426, -122.185547 47.646409, -122.185546 47.646316, -122.185537 47.645599, -122.185544 47.644197, -122.185537 47.643294999999995, -122.185544 47.642733, -122.185541 47.641757, -122.185555 47.640681, -122.185561 47.63972, -122.185557 47.638228999999995, -122.185591 47.635419, -122.185611 47.634750999999994, -122.18562299999999 47.634484, -122.18561700000001 47.634375999999996, -122.185592 47.634311, -122.185549 47.634232999999995, -122.185504 47.634181999999996, -122.185426 47.634119, -122.184371 47.633424999999995, -122.18400000000001 47.633198, -122.183896 47.633134, -122.1838 47.633067, -122.18375499999999 47.633019999999995, -122.183724 47.632959, -122.183695 47.632858, -122.183702 47.632675, -122.182757 47.632622999999995, -122.182365 47.63259, -122.18220600000001 47.632562, -122.181984 47.632504999999995, -122.18163799999999 47.632363, -122.18142 47.632262999999995, -122.181229 47.632165, -122.181612 47.632172999999995, -122.18271899999999 47.632151, -122.183138 47.632135, -122.18440000000001 47.632081, -122.184743 47.632065999999995, -122.185312 47.63205, -122.185624 47.632047, -122.185625 47.631873999999996, -122.184618 47.63187, -122.184291 47.631878, -122.184278 47.631817999999996, -122.183882 47.629942, -122.182689 47.623548, -122.182594 47.622789999999995, -122.182654 47.622155, -122.183135 47.622372999999996, -122.183471 47.622506, -122.18360200000001 47.622552, -122.183893 47.622637999999995, -122.184244 47.62272, -122.184618 47.622777, -122.184741 47.622727999999995, -122.184605 47.622679, -122.18424 47.622622, -122.183985 47.622569, -122.183717 47.622501, -122.183506 47.622439, -122.18327 47.622357, -122.18305699999999 47.622271999999995, -122.182669 47.622088999999995, -122.182796 47.621545, -122.18347 47.619628999999996, -122.18365 47.619098, -122.183859 47.6184, -122.183922 47.617793999999996, -122.183956 47.617292, -122.183792 47.616388, -122.183261 47.614391999999995, -122.183202 47.613802, -122.183209 47.613155, -122.183436 47.612384999999996, -122.18395100000001 47.610445999999996, -122.184338 47.60924, -122.184657 47.609116, -122.18481 47.609051, -122.18491900000001 47.608987, -122.184974 47.608942, -122.185047 47.608846, -122.185082 47.608743999999994, -122.185109 47.608526999999995, -122.185116 47.608359, -122.18513 47.608315999999995, -122.185157 47.608273999999994, -122.185183 47.608247, -122.185246 47.608214, -122.185354 47.608196, -122.185475 47.608191999999995, -122.185472 47.606697, -122.185472 47.606373999999995, -122.185521 47.606272, -122.185528 47.606210999999995, -122.185506 47.606037, -122.185451 47.605872999999995, -122.185411 47.605781, -122.185358 47.605681999999995, -122.185248 47.605509999999995, -122.185127 47.605365, -122.185058 47.605292, -122.184772 47.605038, -122.184428 47.604834, -122.184122 47.604693999999995, -122.183775 47.604574, -122.183644 47.604546, -122.183708 47.604400999999996, -122.183749 47.604223999999995, -122.18376 47.604037, -122.183707 47.603778, -122.183619 47.603556999999995, -122.183559 47.603406, -122.183488 47.603303, -122.183824 47.603167, -122.184108 47.603052, -122.184478 47.602902, -122.18543 47.602495, -122.186669 47.601957, -122.186433 47.601220999999995, -122.186341 47.601127999999996, -122.18874199999999 47.593742999999996, -122.188434 47.592338999999996, -122.188479 47.591786, -122.188217 47.591269999999994, -122.18795399999999 47.590871, -122.186822 47.589228, -122.187421 47.589228999999996, -122.18848299999999 47.589228999999996, -122.188433 47.587922999999996, -122.18990000000001 47.588547, -122.191368 47.589169999999996, -122.19158 47.589222, -122.191779 47.589254999999994, -122.192117 47.589289, -122.191569 47.587478999999995, -122.191323 47.586628999999995, -122.191295 47.586554, -122.191268 47.586479, -122.191192 47.586318, -122.191163 47.586268999999994, -122.1911 47.586164, -122.19099 47.586011, -122.19067 47.585668999999996, -122.1905 47.585515, -122.190301 47.58531, -122.190143 47.585152, -122.189573 47.584576999999996, -122.188702 47.583735999999995, -122.188646 47.583679, -122.188239 47.583258, -122.188037 47.583005, -122.187832 47.582657, -122.187726 47.582164999999996, -122.18769499999999 47.581964, -122.18768299999999 47.581781, -122.187678 47.581592, -122.18766099999999 47.581455, -122.187674 47.581311, -122.18768 47.581146, -122.187722 47.580877, -122.187817 47.580569999999994, -122.187932 47.580301999999996, -122.188047 47.580087, -122.188161 47.579933999999994, -122.188399 47.579660999999994, -122.18851699999999 47.579547, -122.188621 47.579454, -122.188042 47.579493, -122.18762 47.579527, -122.187806 47.579358, -122.188009 47.579175, -122.18814499999999 47.579051, -122.188177 47.579021, -122.18842000000001 47.5788, -122.188638 47.578461, -122.188895 47.57806, -122.189791 47.577281, -122.190008 47.577103, -122.190372 47.576805, -122.19119 47.576358, -122.191877 47.576087, -122.193025 47.57566, -122.194317 47.575185999999995, -122.196061 47.574664, -122.197239 47.574386999999994, -122.197873 47.574267, -122.198286 47.574189999999994, -122.199091 47.574044, -122.199067 47.574574999999996, -122.199007 47.575921, -122.200335 47.578222, -122.20057299999999 47.578345999999996, -122.2009 47.578517999999995, -122.201095 47.578621999999996, -122.20138399999999 47.578776999999995, -122.201465 47.57882, -122.201516 47.578846999999996, -122.205753 47.581112, -122.209515 47.583124, -122.210634 47.583721, -122.21473399999999 47.587021, -122.21538699999999 47.588254, -122.21580399999999 47.589042, -122.216534 47.590421, -122.220092 47.596261, -122.220434 47.596821, -122.22041899999999 47.597837999999996, -122.220289 47.606455, -122.220234 47.610121, -122.22048 47.615221999999996, -122.220359 47.615379, -122.220283 47.615477999999996, -122.21999 47.615854999999996, -122.219993 47.61597, -122.22023300000001 47.616634, -122.220356 47.616687999999996, -122.220409 47.616712, -122.221401 47.618538, -122.22142 47.618573, -122.221456 47.618635, -122.221791 47.619222, -122.222492 47.619682999999995, -122.222799 47.619886, -122.222083 47.620368, -122.222046 47.620407, -122.222028 47.620449, -122.222025 47.620483, -122.22203999999999 47.620523999999996, -122.222079 47.620557999999996, -122.222156 47.620594999999994, -122.222458 47.620629, -122.222454 47.620673, -122.222454 47.620711, -122.22244599999999 47.621041999999996, -122.223056 47.621041, -122.223129 47.62104, -122.223153 47.62104, -122.223574 47.621041, -122.22377900000001 47.621041, -122.223857 47.621041, -122.22467499999999 47.621041, -122.224712 47.62104, -122.224958 47.62104, -122.225167 47.621049, -122.226882 47.621037, -122.227565 47.621032, -122.228002 47.621029, -122.22797800000001 47.621300999999995, -122.227919 47.626574999999995, -122.227914 47.627085, -122.227901 47.6283, -122.227881 47.630069, -122.227869 47.631177, -122.227879 47.631952999999996, -122.22789 47.633879, -122.227886 47.63409, -122.227871 47.635534, -122.227918 47.635565, -122.228953 47.635624, -122.22895199999999 47.635571999999996, -122.231018 47.635574999999996, -122.233276 47.635588999999996, -122.233287 47.63617, -122.233273 47.63639, -122.233272 47.636469999999996, -122.23327 47.636578, -122.233266 47.636827, -122.233263 47.636851, -122.233262 47.637014, -122.23322999999999 47.638110999999995, -122.233239 47.638219, -122.233262 47.638279, -122.233313 47.638324999999995, -122.233255 47.638359, -122.233218 47.638380999999995, -122.233153 47.638450999999996, -122.233136 47.638552999999995, -122.233137 47.638692, -122.232715 47.639348999999996, -122.232659 47.640093, -122.232704 47.641375, -122.233821 47.645111, -122.234906 47.648874, -122.234924 47.648938, -122.235128 47.650163))\"" + ] + }, + { + "cell_type": "markdown", + "id": "63033ca3", + "metadata": {}, + "source": [ + "### Visualizing Overture Maps\n", + "\n", + "Explanation to the each step is similar accross the different datasets. [Click here to learn more about Overture Maps.](https://docs.overturemaps.org)\n", + "\n", + "1. `df = sedona.read.format(\"geoparquet\").load(DATA_LINK+\"theme=XX/type=YY\")`\n", + "\n", + " It reads the dataset mentioned by theme and type, that's stored in GeoParquet format.\n", + "\n", + "\n", + "2. `df = df.filter(\"ST_Contains(ST_GeomFromWKT('\"+state_boundary+\"'), geometry) = true\")`\n", + " \n", + " This filters out all the data that is not in the mentioned `state_boundary` string. Please select a state as you wish.\n", + " \n", + " ST_GeomFromWKT() - constructs a geometry from WKT (Well Known Text)\n", + " \n", + " ST_Contains(A, B) - checks if A fully contains B and returns True\n", + "\n", + "3. `XX_geom = df.selectExpr(\"geometry\")`\n", + "\n", + " Storing geometry column for SedonaKepler.\n", + " \n", + "\n", + "5. `map = SedonaKepler.create_map(XX_geom, 'XX')`\n", + "\n", + " Creating a map object using SedonaKepler with inputs geometry column and the name of dataset.\n", + " \n" + ] + }, + { + "cell_type": "markdown", + "id": "31e81f8b", + "metadata": {}, + "source": [ + "# Building Dataset " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0c55157", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "df_building = sedona.read.format(\"geoparquet\").load(DATA_LINK+\"theme=buildings/type=building\")\n", + "\n", + "df_building = df_building.filter(\"ST_Contains(ST_GeomFromWKT('\"+spatial_filter+\"'), geometry) = true\")\n", + "\n", + "df_building = df_building.limit(200_000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb62f16e", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "map_building = SedonaKepler.create_map(df_building, 'Building')\n", + "\n", + "map_building" + ] + }, + { + "cell_type": "markdown", + "id": "9d2cd7e5", + "metadata": {}, + "source": [ + "# Place Dataset " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6443d1d", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "df_place = sedona.read.format(\"geoparquet\").load(DATA_LINK+\"theme=places/type=place\")\n", + "\n", + "df_place = df_place.filter(\"ST_Contains(ST_GeomFromWKT('\"+spatial_filter+\"'), geometry) = true\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f826cb8", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "map_place = SedonaKepler.create_map(df_place, \"Place\")\n", + "\n", + "map_place" + ] + }, + { + "cell_type": "markdown", + "id": "c0809afb", + "metadata": {}, + "source": [ + "# Admins Theme Datasets" + ] + }, + { + "cell_type": "markdown", + "id": "930b6c81", + "metadata": {}, + "source": [ + "## Administrative Boundary Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b9d6296", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "df_admin = sedona.read.format(\"geoparquet\").load(DATA_LINK+\"theme=admins/type=administrativeBoundary\")\n", + "\n", + "df_admin = df_admin.filter(\"ST_Contains(ST_GeomFromWKT('\"+spatial_filter+\"'), geometry) = true\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ce9597a", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "map_admin = SedonaKepler.create_map(df_admin, \"Admin\")\n", + "\n", + "map_admin" + ] + }, + { + "cell_type": "markdown", + "id": "0acad0d7", + "metadata": {}, + "source": [ + "## Locality Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8541ada7", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "df_locality = sedona.read.format(\"geoparquet\").load(DATA_LINK+\"theme=admins/type=locality\")\n", + "\n", + "df_locality = df_locality.filter(\"ST_Contains(ST_GeomFromWKT('\"+spatial_filter+\"'), geometry) = true\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c5c3bf3", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "map_locality = SedonaKepler.create_map(df_locality, 'Locality')\n", + "\n", + "map_locality" + ] + }, + { + "cell_type": "markdown", + "id": "328ffd56", + "metadata": {}, + "source": [ + "# Transportation Theme Datasets" + ] + }, + { + "cell_type": "markdown", + "id": "0b914f57", + "metadata": {}, + "source": [ + "## Connector Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "224167dc", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "df_connector = sedona.read.format(\"geoparquet\").load(DATA_LINK+\"theme=transportation/type=connector\")\n", + "\n", + "df_connector = df_connector.filter(\"ST_Contains(ST_GeomFromWKT('\"+spatial_filter+\"'), geometry) = true\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5f8de53", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "map_connector = SedonaKepler.create_map(df_connector, \"Connector\")\n", + "\n", + "map_connector" + ] + }, + { + "cell_type": "markdown", + "id": "b1e7ab2d", + "metadata": {}, + "source": [ + "## Segment Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba8eff7c", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "df_segment = sedona.read.format(\"geoparquet\").load(DATA_LINK+\"theme=transportation/type=segment\")\n", + "\n", + "df_segment = df_segment.filter(\"ST_Contains(ST_GeomFromWKT('\"+spatial_filter+\"'), geometry) = true\")\n", + "\n", + "df_segment = df_segment.limit(200000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9bf0862", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "map_segment = SedonaKepler.create_map(df_segment, \"Segment\")\n", + "\n", + "map_segment" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docker/sedona-spark-jupyterlab/build.sh b/docker/sedona-spark-jupyterlab/build.sh index 4933aa07bd..aa45516f18 100755 --- a/docker/sedona-spark-jupyterlab/build.sh +++ b/docker/sedona-spark-jupyterlab/build.sh @@ -41,14 +41,15 @@ if [ -z "$BUILD_MODE" ] || [ "$BUILD_MODE" = "local" ]; then --build-arg spark_version="${SPARK_VERSION}" \ --build-arg sedona_version="${SEDONA_VERSION}" \ -f docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile \ - -t sedona/sedona-jupyterlab:${SEDONA_VERSION} . + -t apache/sedona:${SEDONA_VERSION} . else # If release, build the image for cross-platform docker buildx build --platform linux/amd64,linux/arm64 \ --progress=plain \ + --no-cache \ --output type=registry \ --build-arg spark_version="${SPARK_VERSION}" \ --build-arg sedona_version="${SEDONA_VERSION}" \ -f docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile \ - -t drjiayu/sedona-jupyterlab:${SEDONA_VERSION} . + -t apache/sedona:${SEDONA_VERSION} . fi \ No newline at end of file diff --git a/docs/setup/docker.md b/docs/setup/docker.md index 21fb87aa6e..59bf9198ea 100644 --- a/docs/setup/docker.md +++ b/docs/setup/docker.md @@ -1,6 +1,8 @@ # Sedona JupyterLab Docker Image -Dockerfiles for Apache Sedona with JupyterLab and 1 master node and 1 worker node +Sedona Docker images are available on [Sedona official DockerHub repo](https://hub.docker.com/r/apache/sedona). + +We provide a Docker image for Apache Sedona with Python JupyterLab and 1 master node and 1 worker node. ## How to use @@ -9,19 +11,19 @@ Dockerfiles for Apache Sedona with JupyterLab and 1 master node and 1 worker nod Format: ```bash -docker pull drjiayu/sedona-jupyterlab: +docker pull apache/sedona: ``` Example 1: Pull the latest image of Sedona master branch ```bash -docker pull drjiayu/sedona-jupyterlab:latest +docker pull apache/sedona:latest ``` Example 2: Pull the image of a specific Sedona release ```bash -docker pull drjiayu/sedona-jupyterlab:1.4.1 +docker pull apache/sedona:1.4.1 ``` ### Start the container @@ -29,19 +31,19 @@ docker pull drjiayu/sedona-jupyterlab:1.4.1 Format: ```bash -docker run -p 8888:8888 -p 8080:8080 -p 8081:8081 -p 4040:4040 drjiayu/sedona-jupyterlab: +docker run -p 8888:8888 -p 8080:8080 -p 8081:8081 -p 4040:4040 apache/sedona: ``` Example 1: ```bash -docker run -p 8888:8888 -p 8080:8080 -p 8081:8081 -p 4040:4040 drjiayu/sedona-jupyterlab:latest +docker run -p 8888:8888 -p 8080:8080 -p 8081:8081 -p 4040:4040 apache/sedona:latest ``` Example 2: ```bash -docker run -p 8888:8888 -p 8080:8080 -p 8081:8081 -p 4040:4040 drjiayu/sedona-jupyterlab:1.4.1 +docker run -p 8888:8888 -p 8080:8080 -p 8081:8081 -p 4040:4040 apache/sedona:1.4.1 ``` This command will bind the container's ports 8888, 8080, 8081, 4040 to the host's ports 8888, 8080, 8081, 4040 respectively. @@ -52,7 +54,7 @@ Open your browser and go to [http://localhost:8888/](http://localhost:8888/) to ### Notes -* This container assumes you have at least 8GB RAM and takes all your CPU cores and 8GM RAM. +* This container assumes you have at least 8GB RAM and takes all your CPU cores and 8GM RAM. The 1 worker will take 4GB and the Jupyter program will take the remaining 4GB. * Sedona in this container runs in the cluster mode. Only 1 notebook can be run at a time. If you want to run another notebook, please shut down the kernel of the current notebook first ([How?](https://jupyterlab.readthedocs.io/en/stable/user/running.html)). ## How to build @@ -105,13 +107,14 @@ This docker image can only be built against Sedona 1.4.1+ and Spark 3.0+ * OS: Ubuntu 22.02 * JDK: openjdk-19 * Python: 3.10 +* Spark 3.4.1 ### Web UI * JupyterLab: http://localhost:8888/ * Spark master URL: spark://localhost:7077 * Spark job UI: http://localhost:4040 * Spark master web UI: http://localhost:8080/ -* Spark web UI: http://localhost:8081/ +* Spark work web UI: http://localhost:8081/ ## How to push to DockerHub