From 3f5fdf022fe62d8e6f4aab4eee81a23d4988cbfa Mon Sep 17 00:00:00 2001 From: Ian Thomas Date: Wed, 25 Oct 2023 12:55:23 +0100 Subject: [PATCH 1/4] Add clean_notebook to pre-commit --- .pre-commit-config.yaml | 4 ++ examples/tiling.ipynb | 51 ++++--------------- examples/user_guide/11_Geography.ipynb | 16 +----- .../user_guide/12_Inspection_Reductions.ipynb | 17 +------ examples/user_guide/2_Points.ipynb | 15 +----- examples/user_guide/7_Networks.ipynb | 7 --- 6 files changed, 17 insertions(+), 93 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3baa20da3..2213b6766 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,6 +23,10 @@ repos: - id: codespell additional_dependencies: - tomli + - repo: https://github.com/hoxbro/clean_notebook + rev: v0.1.13 + hooks: + - id: clean-notebook ci: autofix_prs: false diff --git a/examples/tiling.ipynb b/examples/tiling.ipynb index 5f7bd282a..8959e8a3b 100644 --- a/examples/tiling.ipynb +++ b/examples/tiling.ipynb @@ -17,9 +17,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from datashader.tiles import render_tiles" @@ -73,9 +71,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", @@ -106,9 +102,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import datashader as ds\n", @@ -133,9 +127,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import datashader.transfer_functions as tf\n", @@ -160,9 +152,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from PIL import ImageDraw\n", @@ -184,9 +174,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "full_extent_of_data = (-500000, -500000, 500000, 500000)\n", @@ -224,9 +212,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from bokeh.plotting import figure\n", @@ -302,9 +288,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "full_extent_of_data = (int(-20e6), int(-20e6), int(20e6), int(20e6))\n", @@ -331,9 +315,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "xmin, ymin, xmax, ymax = full_extent_of_data\n", @@ -352,22 +334,9 @@ } ], "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.4.5" + "pygments_lexer": "ipython3" } }, "nbformat": 4, diff --git a/examples/user_guide/11_Geography.ipynb b/examples/user_guide/11_Geography.ipynb index a1c7fe8bb..46a0a4590 100644 --- a/examples/user_guide/11_Geography.ipynb +++ b/examples/user_guide/11_Geography.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -22,22 +21,9 @@ } ], "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.10" + "pygments_lexer": "ipython3" } }, "nbformat": 4, diff --git a/examples/user_guide/12_Inspection_Reductions.ipynb b/examples/user_guide/12_Inspection_Reductions.ipynb index 486866194..f8dbd5d8c 100644 --- a/examples/user_guide/12_Inspection_Reductions.ipynb +++ b/examples/user_guide/12_Inspection_Reductions.ipynb @@ -129,7 +129,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -228,7 +227,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -246,22 +244,9 @@ } ], "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" + "pygments_lexer": "ipython3" } }, "nbformat": 4, diff --git a/examples/user_guide/2_Points.ipynb b/examples/user_guide/2_Points.ipynb index 2e7c832cd..f036a262f 100644 --- a/examples/user_guide/2_Points.ipynb +++ b/examples/user_guide/2_Points.ipynb @@ -19,22 +19,9 @@ } ], "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.10" + "pygments_lexer": "ipython3" } }, "nbformat": 4, diff --git a/examples/user_guide/7_Networks.ipynb b/examples/user_guide/7_Networks.ipynb index d7a5dba98..bba50d865 100644 --- a/examples/user_guide/7_Networks.ipynb +++ b/examples/user_guide/7_Networks.ipynb @@ -180,13 +180,6 @@ "" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, From 5191b73e5dc23a6449cc9909dda1e456017bb9e3 Mon Sep 17 00:00:00 2001 From: Ian Thomas Date: Wed, 25 Oct 2023 14:09:43 +0100 Subject: [PATCH 2/4] Add new geopandas page to user guide --- doc/user_guide/index.rst | 4 + examples/user_guide/13_Geopandas.ipynb | 189 +++++++++++++++++++++++++ examples/user_guide/8_Polygons.ipynb | 25 ++-- 3 files changed, 209 insertions(+), 9 deletions(-) create mode 100644 examples/user_guide/13_Geopandas.ipynb diff --git a/doc/user_guide/index.rst b/doc/user_guide/index.rst index 9172920a0..0ed09ea69 100644 --- a/doc/user_guide/index.rst +++ b/doc/user_guide/index.rst @@ -47,6 +47,9 @@ Contents: `12. Inspection Reductions `_ Using reduction to inspect rather than aggregate data. +`13. GeoPandas `_ +GeoPandas support in Datashader. + .. toctree:: :hidden: :maxdepth: 3 @@ -63,3 +66,4 @@ Contents: Performance Geography Inspection Reductions + GeoPandas diff --git a/examples/user_guide/13_Geopandas.ipynb b/examples/user_guide/13_Geopandas.ipynb new file mode 100644 index 000000000..59fd8f878 --- /dev/null +++ b/examples/user_guide/13_Geopandas.ipynb @@ -0,0 +1,189 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "From version 0.16 onwards Datashader supports rendering GeoPandas `GeoDataFrame`s directly rather than having to convert them to SpatialPandas first.\n", + "\n", + "Here is a demonstration using the \"geoda.natregimes\" dataset from `geodatasets`, which includes data on US counties." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import colorcet as cc\n", + "import datashader as ds\n", + "import datashader.transfer_functions as tf\n", + "import geopandas\n", + "from geodatasets import get_path" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First load the GeoPandas `GeoDataFrame`. The first time this is called will download and cache the dataset, and subsequent calls will be faster as they will use the cached dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = geopandas.read_file(get_path(\"geoda.natregimes\"))\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The geometry type of this `GeoDataFrame` is POLYGON, and there are many columns. Columns that we will use are \"DNL90\" (log of population density in 1990) and \"UE90\" (unemployment rate in 1990)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Population\n", + "\n", + "To view 1990 population data using Datashader, first create a canvas to render to of an appropriate size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "canvas = ds.Canvas(plot_width=800, plot_height=400)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The polygons are rasterized to the canvas using the `Canvas.polygons` method. This takes the source dataframe and name of the geometry column, plus an aggregator. Here we aggregate using the maximum of the population density column so that if there are multiple polygons touching a particular pixel it selects the maximum population density of those polygons." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agg = canvas.polygons(df, geometry=\"geometry\", agg=ds.max(\"DNL90\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we shade the aggregation using Colorcet's `fire` colormap using histogram equalization so that the colors are applied nonlinearly for most even use of the colormap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "im = tf.shade(agg, cmap=cc.fire, how=\"eq_hist\")\n", + "tf.set_background(im, \"black\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Counties with the lowest population density are rendered in black and dark red, and those with the highest population density are rendered in yellow and white." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Unemployment\n", + "\n", + "The \"UE90\" column contains unemployment percentage per county in 1990." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df[\"UE90\"], df[\"UE90\"].min(), df[\"UE90\"].max()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see from the `min` and `max` values that unemployment goes from zero to just over 30%.\n", + "\n", + "To rasterize this using Datashader it is recommended to use a monochromatic colormap such as `colorcet.blues` and apply this linearly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agg = canvas.polygons(df, geometry=\"geometry\", agg=ds.max(\"UE90\"))\n", + "im = tf.shade(agg, cmap=cc.blues, how=\"linear\")\n", + "tf.set_background(im, \"white\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lines\n", + "\n", + "Datashader can also render GeoPandas `GeoDataFrame`s as lines rather than polygons. Use the same code as in the population example above but replace `Canvas.polygons()` with `Canvas.line()` instead." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agg = canvas.line(df, geometry=\"geometry\", agg=ds.max(\"DNL90\"))\n", + "im = tf.shade(agg, cmap=cc.fire, how=\"eq_hist\")\n", + "tf.set_background(im, \"black\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Geometry type support\n", + "\n", + "The following table shows which geometry types are supported by which Datashader `Canvas` functions.\n", + "\n", + "|Canvas function |Supported geometry types |\n", + "|-----------------|--------------------------------------------------|\n", + "|`Canvas.line` |LineString, MultiLineString, MultiPolygon, Polygon|\n", + "|`Canvas.point` |MultiPoint, Point |\n", + "|`Canvas.polygons`|MultiPolygon, Polygon |" + ] + } + ], + "metadata": { + "language_info": { + "name": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/user_guide/8_Polygons.ipynb b/examples/user_guide/8_Polygons.ipynb index a431bc5d1..7cebad7dd 100644 --- a/examples/user_guide/8_Polygons.ipynb +++ b/examples/user_guide/8_Polygons.ipynb @@ -32,7 +32,7 @@ "import datashader.transfer_functions as tf\n", "import spatialpandas as sp\n", "import spatialpandas.geometry\n", - "import spatialpandas.dask " + "import spatialpandas.dask" ] }, { @@ -67,9 +67,9 @@ " [[[0, 0, 1, 0, 2, 2, -1, 4, 0, 0], # Filled quadrilateral (CCW order)\n", " [0.5, 1, 1, 2, 1.5, 1.5, 0.5, 1], # Triangular hole (CW order)\n", " [0, 2, 0, 2.5, 0.5, 2.5, 0.5, 2, 0, 2]], # Rectangular hole (CW order)\n", - " \n", + "\n", " [[-0.5, 3, 1.5, 3, 1.5, 4, -0.5, 3]],], # Filled triangle\n", - " \n", + "\n", " # Second Element\n", " [[[1.25, 0, 1.25, 2, 4, 2, 4, 0, 1.25, 0], # Filled rectangle (CCW order)\n", " [1.5, 0.25, 3.75, 0.25, 3.75, 1.75, 1.5, 1.75, 1.5, 0.25]],]]) # Rectangular hole (CW order)" @@ -176,31 +176,31 @@ "outputs": [], "source": [ "import bokeh.sampledata\n", - "try: \n", + "try:\n", " from bokeh.sampledata.us_counties import data # noqa\n", - "except: \n", + "except:\n", " bokeh.sampledata.download()\n", "from bokeh.sampledata.us_counties import data as counties\n", "from bokeh.sampledata.unemployment import data as unemployment\n", "\n", - "counties = { code: county for code, county in counties.items() \n", + "counties = { code: county for code, county in counties.items()\n", " if county[\"state\"] in [\"tx\"] }\n", "\n", - "county_boundaries = [[[*zip(county[\"lons\"] + county[\"lons\"][:1], \n", + "county_boundaries = [[[*zip(county[\"lons\"] + county[\"lons\"][:1],\n", " county[\"lats\"] + county[\"lats\"][:1])]\n", " for county in counties.values()]]\n", "\n", "county_rates = [unemployment[county_id] for county_id in counties]\n", "\n", "boundary_coords = [[np.concatenate(list(\n", - " zip(county[\"lons\"][::-1] + county[\"lons\"][-1:], \n", + " zip(county[\"lons\"][::-1] + county[\"lons\"][-1:],\n", " county[\"lats\"][::-1] + county[\"lats\"][-1:])\n", "))] for county in counties.values()]\n", "\n", "boundaries = sp.geometry.PolygonArray(boundary_coords)\n", "\n", "county_info = sp.GeoDataFrame({'boundary': boundaries,\n", - " 'unemployment': county_rates}) " + " 'unemployment': county_rates})" ] }, { @@ -299,6 +299,13 @@ "df_world.head()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since version 0.16, Datashader supports direct use of `geopandas` `GeoDataFrame`s without having to convert them to `spatialpandas`. See [GeoPandas](13_Geopandas.ipynb)." + ] + }, { "cell_type": "markdown", "metadata": {}, From 49dabd6cc24c84d9436aadb97072c4e690768e2d Mon Sep 17 00:00:00 2001 From: Ian Thomas Date: Wed, 25 Oct 2023 16:55:28 +0100 Subject: [PATCH 3/4] Section on GeoPandas vs SpatialPandas [skip ci] --- examples/user_guide/13_Geopandas.ipynb | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/examples/user_guide/13_Geopandas.ipynb b/examples/user_guide/13_Geopandas.ipynb index 59fd8f878..3124e6d20 100644 --- a/examples/user_guide/13_Geopandas.ipynb +++ b/examples/user_guide/13_Geopandas.ipynb @@ -166,7 +166,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Geometry type support\n", + "## " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Geometry type support\n", "\n", "The following table shows which geometry types are supported by which Datashader `Canvas` functions.\n", "\n", @@ -176,6 +183,15 @@ "|`Canvas.point` |MultiPoint, Point |\n", "|`Canvas.polygons`|MultiPolygon, Polygon |" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## GeoPandas or SpatialPandas?\n", + "\n", + "Datashader supports the same line, point and polygon rendering using GeoPandas and SpatialPandas, and produces the same output using either. They work in different ways such that SpatialPandas is usually faster for viewing large datasets and GeoPandas faster when zooming into a small region of a large dataset. The GeoPandas approach is more convenient if you already have your data in GeoPandas format and do not want the overhead of converting to SpatialPandas." + ] } ], "metadata": { From 52729dcc428d0319c463427519841b6dddaf9774 Mon Sep 17 00:00:00 2001 From: Ian Thomas Date: Thu, 26 Oct 2023 08:56:49 +0100 Subject: [PATCH 4/4] Add antialiased line example --- examples/user_guide/13_Geopandas.ipynb | 31 +++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/examples/user_guide/13_Geopandas.ipynb b/examples/user_guide/13_Geopandas.ipynb index 3124e6d20..fd4425374 100644 --- a/examples/user_guide/13_Geopandas.ipynb +++ b/examples/user_guide/13_Geopandas.ipynb @@ -108,8 +108,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Unemployment\n", - "\n", + "## Unemployment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "The \"UE90\" column contains unemployment percentage per county in 1990." ] }, @@ -166,15 +171,31 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## " + "Lines can be rendered with antialiasing. Here is the previous example with an antialiased line width of 2 pixels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agg = canvas.line(df, geometry=\"geometry\", agg=ds.max(\"DNL90\"), line_width=2)\n", + "im = tf.shade(agg, cmap=cc.fire, how=\"eq_hist\")\n", + "tf.set_background(im, \"black\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Geometry type support" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Geometry type support\n", - "\n", "The following table shows which geometry types are supported by which Datashader `Canvas` functions.\n", "\n", "|Canvas function |Supported geometry types |\n",