diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 322dc9272..e863dd5a4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -39,7 +39,7 @@ jobs: if doit changes_in_dir --name $DIR; then doit small_data_setup --name $DIR doit test_project --name $DIR - doit small_data_cleanup --name $DIR + # doit small_data_cleanup --name $DIR fi; fi; done diff --git a/ship_traffic/AIS_categories.csv b/ship_traffic/AIS_categories.csv new file mode 100644 index 000000000..7f43e2e7d --- /dev/null +++ b/ship_traffic/AIS_categories.csv @@ -0,0 +1,282 @@ +num,desc,category,category_desc +0,Not available,0,Unknown +1,Reserved,0,Unknown +2,Reserved,0,Unknown +3,Reserved,0,Unknown +4,Reserved,0,Unknown +5,Reserved,0,Unknown +6,Reserved,0,Unknown +7,Reserved,0,Unknown +8,Reserved,0,Unknown +9,Reserved,0,Unknown +10,Reserved,0,Unknown +11,Reserved,0,Unknown +12,Reserved,0,Unknown +13,Reserved,0,Unknown +14,Reserved,0,Unknown +15,Reserved,0,Unknown +16,Reserved,0,Unknown +17,Reserved,0,Unknown +18,Reserved,0,Unknown +19,Reserved,0,Unknown +20,"Wing in ground (WIG)",1,WIG +21,"Wing in ground (WIG), Hazardous category A",1,WIG +22,"Wing in ground (WIG), Hazardous category B",1,WIG +23,"Wing in ground (WIG), Hazardous category C",1,WIG +24,"Wing in ground (WIG), Hazardous category D",1,WIG +25,"Wing in ground (WIG), Reserved",1,WIG +26,"Wing in ground (WIG), Reserved",1,WIG +27,"Wing in ground (WIG), Reserved",1,WIG +28,"Wing in ground (WIG), Reserved",1,WIG +29,"Wing in ground (WIG), Reserved",1,WIG +30,Fishing,2,Fishing +31,Towing,3,Towing +32,Towing: length exceeds 200m or breadth exceeds 25m,3,Towing +33,Dredging or underwater ops,4,Dredging +34,Diving ops,5,Diving +35,Military ops,6,Military +36,Sailing,7,Sailing +37,Pleasure Craft,8,Pleasure +38,Reserved,0,Unknown +39,Reserved,0,Unknown +40,"High speed craft (HSC)",9,High Speed +41,"High speed craft (HSC), Hazardous category A",9,High Speed +42,"High speed craft (HSC), Hazardous category B",9,High Speed +43,"High speed craft (HSC), Hazardous category C",9,High Speed +44,"High speed craft (HSC), Hazardous category D",9,High Speed +45,"High speed craft (HSC), Reserved",9,High Speed +46,"High speed craft (HSC), Reserved",9,High Speed +47,"High speed craft (HSC), Reserved",9,High Speed +48,"High speed craft (HSC), Reserved",9,High Speed +49,"High speed craft (HSC), No additional information",9,High Speed +50,Pilot Vessel,10,Pilot Vessel +51,Search and Rescue vessel,11,Search and Rescue vessel +52,Tug,12,Tug +53,Port Tender,18,Passenger +54,Anti-pollution equipment,13,Industrial +55,Law Enforcement,14,Law Enforcement +56,Spare - Local Vessel,15,Spare +57,Spare - Local Vessel,15,Spare +58,Medical Transport,16,Medical Transport +59,Noncombatant ship according to RR Resolution No. 18,17,Noncombatant +60,"Passenger",18,Passenger +61,"Passenger, Hazardous category A",18,Passenger +62,"Passenger, Hazardous category B",18,Passenger +63,"Passenger, Hazardous category C",18,Passenger +64,"Passenger, Hazardous category D",18,Passenger +65,"Passenger, Reserved",18,Passenger +66,"Passenger, Reserved",18,Passenger +67,"Passenger, Reserved",18,Passenger +68,"Passenger, Reserved",18,Passenger +69,"Passenger, No additional information",18,Passenger +70,"Cargo",19,Cargo +71,"Cargo, Hazardous category A",19,Cargo +72,"Cargo, Hazardous category B",19,Cargo +73,"Cargo, Hazardous category C",19,Cargo +74,"Cargo, Hazardous category D",19,Cargo +75,"Cargo, Reserved",19,Cargo +76,"Cargo, Reserved",19,Cargo +77,"Cargo, Reserved",19,Cargo +78,"Cargo, Reserved",19,Cargo +79,"Cargo, No additional information",19,Cargo +80,"Tanker",20,Tanker +81,"Tanker, Hazardous category A",20,Tanker +82,"Tanker, Hazardous category B",20,Tanker +83,"Tanker, Hazardous category C",20,Tanker +84,"Tanker, Hazardous category D",20,Tanker +85,"Tanker, Reserved",20,Tanker +86,"Tanker, Reserved",20,Tanker +87,"Tanker, Reserved",20,Tanker +88,"Tanker, Reserved",20,Tanker +89,"Tanker, No additional information",20,Tanker +90,"Other Type",21,Other +91,"Other Type, Hazardous category A",21,Other +92,"Other Type, Hazardous category B",21,Other +93,"Other Type, Hazardous category C",21,Other +94,"Other Type, Hazardous category D",21,Other +95,"Other Type, Reserved",21,Other +96,"Other Type, Reserved",21,Other +97,"Other Type, Reserved",21,Other +98,"Other Type, Reserved",21,Other +99,"Other Type, no additional information",21,Other +100,Reserved,0,Unknown +101,Reserved,0,Unknown +102,Reserved,0,Unknown +103,Reserved,0,Unknown +104,Reserved,0,Unknown +105,Reserved,0,Unknown +106,Reserved,0,Unknown +107,Reserved,0,Unknown +108,Reserved,0,Unknown +109,Reserved,0,Unknown +110,Reserved,0,Unknown +111,Reserved,0,Unknown +112,Reserved,0,Unknown +113,Reserved,0,Unknown +114,Reserved,0,Unknown +115,Reserved,0,Unknown +116,Reserved,0,Unknown +117,Reserved,0,Unknown +118,Reserved,0,Unknown +119,Reserved,0,Unknown +120,Reserved,0,Unknown +121,Reserved,0,Unknown +122,Reserved,0,Unknown +123,Reserved,0,Unknown +124,Reserved,0,Unknown +125,Reserved,0,Unknown +126,Reserved,0,Unknown +127,Reserved,0,Unknown +128,Reserved,0,Unknown +129,Reserved,0,Unknown +130,Reserved,0,Unknown +131,Reserved,0,Unknown +132,Reserved,0,Unknown +133,Reserved,0,Unknown +134,Reserved,0,Unknown +135,Reserved,0,Unknown +136,Reserved,0,Unknown +137,Reserved,0,Unknown +138,Reserved,0,Unknown +139,Reserved,0,Unknown +140,Reserved,0,Unknown +141,Reserved,0,Unknown +142,Reserved,0,Unknown +143,Reserved,0,Unknown +144,Reserved,0,Unknown +145,Reserved,0,Unknown +146,Reserved,0,Unknown +147,Reserved,0,Unknown +148,Reserved,0,Unknown +149,Reserved,0,Unknown +150,Reserved,0,Unknown +151,Reserved,0,Unknown +152,Reserved,0,Unknown +153,Reserved,0,Unknown +154,Reserved,0,Unknown +155,Reserved,0,Unknown +156,Reserved,0,Unknown +157,Reserved,0,Unknown +158,Reserved,0,Unknown +159,Reserved,0,Unknown +160,Reserved,0,Unknown +161,Reserved,0,Unknown +162,Reserved,0,Unknown +163,Reserved,0,Unknown +164,Reserved,0,Unknown +165,Reserved,0,Unknown +166,Reserved,0,Unknown +167,Reserved,0,Unknown +168,Reserved,0,Unknown +169,Reserved,0,Unknown +170,Reserved,0,Unknown +171,Reserved,0,Unknown +172,Reserved,0,Unknown +173,Reserved,0,Unknown +174,Reserved,0,Unknown +175,Reserved,0,Unknown +176,Reserved,0,Unknown +177,Reserved,0,Unknown +178,Reserved,0,Unknown +179,Reserved,0,Unknown +180,Reserved,0,Unknown +181,Reserved,0,Unknown +182,Reserved,0,Unknown +183,Reserved,0,Unknown +184,Reserved,0,Unknown +185,Reserved,0,Unknown +186,Reserved,0,Unknown +187,Reserved,0,Unknown +188,Reserved,0,Unknown +189,Reserved,0,Unknown +190,Reserved,0,Unknown +191,Reserved,0,Unknown +192,Reserved,0,Unknown +193,Reserved,0,Unknown +194,Reserved,0,Unknown +195,Reserved,0,Unknown +196,Reserved,0,Unknown +197,Reserved,0,Unknown +198,Reserved,0,Unknown +199,Reserved,0,Unknown +200,Reserved,0,Unknown +201,Reserved,0,Unknown +202,Reserved,0,Unknown +203,Reserved,0,Unknown +204,Reserved,0,Unknown +205,Reserved,0,Unknown +206,Reserved,0,Unknown +207,Reserved,0,Unknown +208,Reserved,0,Unknown +209,Reserved,0,Unknown +210,Reserved,0,Unknown +211,Reserved,0,Unknown +212,Reserved,0,Unknown +213,Reserved,0,Unknown +214,Reserved,0,Unknown +215,Reserved,0,Unknown +216,Reserved,0,Unknown +217,Reserved,0,Unknown +218,Reserved,0,Unknown +219,Reserved,0,Unknown +220,Reserved,0,Unknown +221,Reserved,0,Unknown +222,Reserved,0,Unknown +223,Reserved,0,Unknown +224,Reserved,0,Unknown +225,Reserved,0,Unknown +226,Reserved,0,Unknown +227,Reserved,0,Unknown +228,Reserved,0,Unknown +229,Reserved,0,Unknown +230,Reserved,0,Unknown +231,Reserved,0,Unknown +232,Reserved,0,Unknown +233,Reserved,0,Unknown +234,Reserved,0,Unknown +235,Reserved,0,Unknown +236,Reserved,0,Unknown +237,Reserved,0,Unknown +238,Reserved,0,Unknown +239,Reserved,0,Unknown +240,Reserved,0,Unknown +241,Reserved,0,Unknown +242,Reserved,0,Unknown +243,Reserved,0,Unknown +244,Reserved,0,Unknown +245,Reserved,0,Unknown +246,Reserved,0,Unknown +247,Reserved,0,Unknown +248,Reserved,0,Unknown +249,Reserved,0,Unknown +250,Reserved,0,Unknown +251,Reserved,0,Unknown +252,Reserved,0,Unknown +253,Reserved,0,Unknown +254,Reserved,0,Unknown +255,Reserved,0,Unknown +1001,Fishing vessels,2,Fishing +1002,Fishing vessels,2,Fishing +1003,Freight Vessels,19,Cargo +1004,Freight Vessels,19,Cargo +1005,Industrial vessels,13,Industrial +1006,Miscellaneous vessels,21,Other +1007,Offshore drilling vessels,13,Industrial +1008,non-vessel,21,Other +1009,non-vessel,21,Other +1010,Offshore supply vessel,13,Industrial +1011,Oil Recovery vessel,13,Industrial +1012,Passenger ships,18,Passenger +1013,Passenger ships,18,Passenger +1014,Passenger ships,18,Passenger +1015,Passenger ships,18,Passenger +1016,Public freight,19,Cargo +1017,Public tankship/barge,20,Tanker +1018,Unclassified public vessel,0,Unknown +1019,Recreational Vessel,8,Pleasure +1020,Research Vessel,21,Other +1021,SAR Aircraft,21,Other +1022,School ship,21,Other +1023,Tank Barge,20,Tanker +1024,Tank Ship,20,Tanker +1025,Towing Vessel,3,Towing diff --git a/ship_traffic/anaconda-project.yml b/ship_traffic/anaconda-project.yml new file mode 100644 index 000000000..13169b554 --- /dev/null +++ b/ship_traffic/anaconda-project.yml @@ -0,0 +1,67 @@ +# To reproduce: install 'anaconda-project', then 'anaconda-project run' +name: ship_traffic +description: Visualizing AIS location tracking data for marine vessels near the USA +maintainers: +- jbednar +labels: +- datashader +- holoviews + +user_fields: [labels, skip, maintainers, user_fields] + +channels: +- pyviz/label/dev + +packages: &pkgs +- bokeh ==2.2.3 +- colorcet ==2 +- dask ==2020.12.0 +- datashader ==0.12.0 +- holoviews ==1.14.2a1 +- notebook ==6.1.5 +- numba ==0.51.2 +- numexpr ==2.7.1 +- pandas ==1.1.5 +- panel ==0.10.3 +- python ==3.7.9 +- spatialpandas ==0.4.0a1 +- xarray ==0.16.2 +- pip ==20.3.3 +- conda-forge::pyarrow ==2 + +dependencies: *pkgs + +commands: + dashboard: + unix: panel serve ship_traffic.ipynb + supports_http_options: true + notebook: + notebook: ship_traffic.ipynb + test: + unix: pytest --nbsmoke-run -k *.ipynb --ignore envs + windows: pytest --nbsmoke-run -k *.ipynb --ignore envs + env_spec: test + lint: + unix: pytest --nbsmoke-lint -k *.ipynb --ignore envs + windows: pytest --nbsmoke-lint -k *.ipynb --ignore envs + env_spec: test + +variables: {} +downloads: + DATA: + url: http://s3.amazonaws.com/datashader-data/ship_traffic.zip + description: | + US AIS records from 1/2020 + filename: data/AIS_2020_01_broadcast.parq + unzip: true + +env_specs: + default: {} + test: + packages: + - nbsmoke=0.2.8 + - pytest=4.4.1 +platforms: +- linux-64 +- osx-64 +- win-64 diff --git a/ship_traffic/ship_traffic.ipynb b/ship_traffic/ship_traffic.ipynb new file mode 100644 index 000000000..7a1db4d7e --- /dev/null +++ b/ship_traffic/ship_traffic.ipynb @@ -0,0 +1,507 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Exploring AIS vessel-traffic data\n", + "\n", + "This [Jupyter](https://jupyter.org) notebook demonstrates how to use the [Datashader](https://datashader.org)-based rendering in [HoloViews](https://holoviews.org) to explore and analyze US Coast Guard [Automatic Identification System (AIS)](https://en.wikipedia.org/wiki/Automatic_identification_system) vessel-location data. AIS data includes vessels identified by their [Maritime Mobile Service Identity](https://en.wikipedia.org/wiki/Maritime_Mobile_Service_Identity) numbers along with other data such as vessel type. Data is provided here for January 2020 (200 million datapoints), but additional months and years of data can be downloaded for US coastal areas from [marinecadastre.gov](https://marinecadastre.gov/ais), and with slight modifications the same code here should work for AIS data available for other regions. This notebook also illustrates a workflow for visualizing large categorical datasets in general, letting users interact with individual datapoints even though the data itself is never sent to the browser for plotting." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os, requests, numpy as np, pandas as pd, holoviews as hv, holoviews.operation.datashader as hd\n", + "import dask.dataframe as dd, panel as pn, colorcet as cc, datashader as ds\n", + "import spatialpandas as sp, spatialpandas.io, spatialpandas.geometry, spatialpandas.dask\n", + "\n", + "from PIL import Image\n", + "from holoviews.util.transform import lon_lat_to_easting_northing as ll2en\n", + "from dask.diagnostics import ProgressBar\n", + "\n", + "hv.extension('bokeh', 'matplotlib', width=100)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vessel categories \n", + "\n", + "AIS pings come with an associated integer `VesselType`, which broadly labels what sort of vessel it is. Different types of vessels are used for different purposes and behave differently, as we will see below when we color-code the location of each ping by the `VesselType` using Datshader. \n", + "\n", + "Here, we'll use type names defined in a separate file constructed using lists of 100+ [AIS Vessel Types](https://api.vtexplorer.com/docs/ref-aistypes.html). We've further collapsed those types into a smaller number of vessel categories:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vessel_types=pd.read_csv(\"AIS_categories.csv\")\n", + "vessel_types.iloc[34:37]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll further reduce the `category` to the 6 most common, with the rest as `Other`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "categories = {r.num: r.category if r.category in [0,2,3,19,12,18] else 21 for i, r in vessel_types.iterrows()}\n", + "categories[np.NaN] = 0\n", + "\n", + "def category_desc(val):\n", + " \"\"\"Return description for the category with the indicated integer value\"\"\"\n", + " return vessel_types[vessel_types.category==val].iloc[0].category_desc\n", + "\n", + "vessel_mapping = dict(zip(vessel_types.num.to_list(), vessel_types.category.to_list()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can print the resulting categories by number, with their description:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "groups = {categories[i]: category_desc(categories[i]) for i in vessel_types.num.unique()}\n", + "print(\" \".join([f\"{k}:{v}\" for k,v in sorted(groups.items())]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll map these categories to colors defined by [colorcet](https://colorcet.holoviz.org) and construct a color key and legend to use for plotting:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "colors = cc.glasbey_bw_minc_20_minl_30\n", + "color_key = {list(groups.keys())[i]:tuple(int(e*255.) for e in v) for i,v in \n", + " enumerate(colors[:(len(groups))][::-1])}\n", + "legend = hv.NdOverlay({groups[k]: hv.Points([0,0], label=str(groups[k])).opts(\n", + " color=cc.rgb_to_hex(*v), size=0) \n", + " for k, v in color_key.items()})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load traffic data\n", + "\n", + "Next we will load the data from disk, either directly from a spatially indexed Parquet file (if previously cached) or from the raw CSV files. We'll also project the location data to the coordinate system we will use later for plotting. There's a lot of data to process, so we'll use [Dask](https://dask.org) to ensure that we use all the cores available on this machine. Dask breaks a dataset into partitions that can be processed in parallel, so here we define a function for dealing with one partition's worth of data, along with a schema showing what the final dataframe's columnar structure will be." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def convert_partition(df):\n", + " east, north = ll2en(df.LON.astype('float32'), df.LAT.astype('float32'))\n", + " return sp.GeoDataFrame({\n", + " 'geometry': sp.geometry.PointArray((east, north)),\n", + " 'MMSI': df.MMSI.fillna(0).astype('int32'),\n", + " 'category': df.VesselType.replace(categories).astype('int32')})\n", + "\n", + "example = sp.GeoDataFrame({\n", + " 'geometry': sp.geometry.PointArray([], dtype='float32'),\n", + " 'MMSI': np.array([], dtype='int32'),\n", + " 'category': np.array([], dtype='int32')})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next we will define the function that will load our data, reading a much-smaller (and much faster to load) previously cached Parquet-format file from disk if available. To use files covering other date ranges, just download them and change `basedir` and/or `basename` to match them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basedir = './data/'\n", + "basename = 'AIS_2020_01'\n", + "index = 'MMSI'\n", + "dfcols = ['MMSI', 'LON', 'LAT', 'VesselType']\n", + "vesselcols = ['MMSI', 'IMO', 'CallSign', 'VesselName', 'VesselType', 'Length', 'Width']\n", + "\n", + "def load_data(spatial_index=False):\n", + " cache_file = basedir+basename+'_broadcast.parq'\n", + " vessels_file = basedir+basename+'_vessels.parq'\n", + " \n", + " if (os.path.exists(cache_file) and os.path.exists(vessels_file)):\n", + " print('Reading vessel info file')\n", + " vessels = dd.read_parquet(vessels_file).compute()\n", + "\n", + " print('Reading parquet file')\n", + " gdf = sp.io.read_parquet_dask(cache_file).persist()\n", + " \n", + " else:\n", + " csvs = basedir+basename+'*.csv'\n", + " with ProgressBar():\n", + " print('Writing vessel info file')\n", + " df = dd.read_csv(csvs, usecols=vesselcols, assume_missing=True)\n", + " vessels = df.groupby(index).last().reset_index().compute()\n", + " vessels[index] = vessels[index].astype('int32')\n", + " vessels.to_parquet(vessels_file)\n", + "\n", + " print('Reading CSV files')\n", + " gdf = dd.read_csv(csvs, usecols=dfcols, assume_missing=True)\n", + " gdf = gdf.map_partitions(convert_partition, meta=example).persist()\n", + "\n", + " print('Writing parquet file')\n", + " gdf = gdf.pack_partitions_to_parquet(cache_file, npartitions=64).persist()\n", + " \n", + " with ProgressBar(): \n", + " if spatial_index: \n", + " print('Building spatial index') # Takes a couple of minutes for 1 month's data\n", + " gdf = gdf.build_sindex().persist()\n", + " gdf['category'] = gdf['category'].astype('category').cat.as_known()\n", + "\n", + " return gdf, vessels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's actually load the data, using the disk cache and memory cache if available. If you set `spatial_index` to `True` above it should speed up selection of individual points in the final plot, though `load_data` will then take several minutes rather than a few seconds." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "df, vessels = pn.state.as_cached('df', load_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we can see that this is a collection of points (latitude and longitude projected to Web Mercator) with an associated integer MMSI and category value:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.head(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Predefined locations\n", + "\n", + "We'll provide interactive plots later that let you zoom in anywhere you like, but first let's highlight a few specific areas of interest for those without a live Python process to interact with:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def ranges(lon_range, lat_range):\n", + " x_range, y_range = ll2en(lon_range, lat_range)\n", + " return dict(x=tuple(x_range), y=tuple(y_range))\n", + "\n", + "x_range, y_range = ll2en([-54,-132], [15,51])\n", + "bounds = dict(x=tuple(x_range), y=tuple(y_range))\n", + "\n", + "loc = {\n", + " 'Continental US': ranges((-132.0, -54.0), (15.0, 51.0)),\n", + " 'Vancouver Area': ranges((-126.0, -120.7), (47.5, 49.5)),\n", + " 'NY and NJ': ranges(( -75.6, -71.3), (39.4, 41.1)),\n", + " 'Gulf of Mexico': ranges(( -98.0, -81.0), (23.8, 32.0)),\n", + " 'Gulf Coast': ranges(( -98.0, -87.0), (25.2, 31.0)),\n", + " 'Louisiana Coast': ranges(( -91.5, -87.8), (28.4, 30.1)),\n", + " 'Mississipi Delta': ranges(( -90.1, -89.2), (28.65,29.15)),\n", + " 'Louisiana East Bay': ranges(( -89.37, -89.28),(28.86,28.9)),\n", + " 'Bering Sea': ranges((-171.0, -159.0), (52.0, 56.0)),\n", + " 'Hawaii': ranges((-160.0, -154.5), (19.5, 22.1)),\n", + " 'LA to San Diego': ranges((-120.5, -117.0), (32.6, 34.1)),\n", + " 'Great Lakes': ranges(( -89.0, -77.0), (41.2, 46.1)),\n", + " 'Chesapeake Bay': ranges(( -78.0, -71.0), (36.4, 39.6)),\n", + " 'Pamlico Sound, NC': ranges(( -80.0, -72.5), (33.1, 36.8)),\n", + " 'Savannah, GA': ranges(( -81.2, -80.3), (31.85,32.25)),\n", + " 'Florida': ranges(( -90.0, -74.5), (23.3, 31.0)),\n", + " 'Puerto Rico': ranges(( -68.1, -64.2), (17.4, 19.5))}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can easily render these to PNGs using HoloViews to call Datashader and render the results using Matplotlib:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hv.output(backend='matplotlib')\n", + "hv.opts.defaults(\n", + " hv.opts.RGB(xaxis=None, yaxis=None, axiswise=True, bgcolor='black'),\n", + " hv.opts.Layout(hspace=0.0, vspace=0.1, sublabel_format=None, framewise=True, fig_size=400))\n", + "\n", + "plots = [hd.datashade(hv.Points(df), color_key=color_key, cmap=cc.fire, width=1000, height=600,\n", + " dynamic=False, x_range=ranges['x'], y_range=ranges['y']).relabel(region)\n", + " for region, ranges in loc.items()]\n", + "hv.Layout(plots).cols(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Even more structure is visible if we color by the vessel category using the color key we defined:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "legend" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plots = [hd.datashade(hv.Points(df, vdims='category'), color_key=color_key,\n", + " aggregator=ds.count_cat('category'), width=1000, height=600,\n", + " dynamic=False, x_range=ranges['x'], y_range=ranges['y']).relabel(region)\n", + " for region, ranges in loc.items()]\n", + "hv.Layout(plots).cols(1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hv.output(backend='bokeh')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Interactive plots\n", + "\n", + "To let you explore the data yourself, we can plot it using the [Bokeh](https://bokeh.org) backend, which provides JavaScript-based interactive plotting in a web browser. \n", + "\n", + "To zoom in & interact with the plot, click the “Wheel zoom” tool in the [Bokeh toolbar](https://docs.bokeh.org/en/latest/docs/user_guide/tools.html) on the side of the plot and click and drag the plot in order to look around, or use the \"Box Zoom\" tool to select an area of interest. As you zoom in, finer-grained detail will emerge and fill in, as long as you have a live Python process running to render the data dynamically using Datashader. Depending on the size of the dataset and your machine, updating the plot might take a few seconds." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pts = hv.Points(df, vdims=['category']).redim.range(**loc['Continental US'])\n", + "points = hd.dynspread(hd.datashade(pts, aggregator=ds.count_cat('category'), color_key=color_key))\n", + "\n", + "tiles = hv.element.tiles.ESRI().opts(alpha=0.4, bgcolor=\"black\").opts(responsive=True, min_height=600)\n", + "labels = hv.element.tiles.StamenLabels().opts(alpha=0.7, level='glyph')\n", + "\n", + "tiles * points * labels * legend" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the color key, you can see that vessel behavior is highly dependent on category, with very different patterns of motion between these categories (and presumably the other categories not shown). E.g. fishing vessels tend to hug the coasts in meandering patterns, while cargo vessels travel along straight lines further from the coast. If you zoom in to a river, you can see that passenger vessels tend to travel _across_ narrow waterways, while towing and cargo vessels travel _along_ them. Zooming and panning (using the tools at the right) reveal other patterns at different locations and scales.\n", + "\n", + "# Selecting specific datapoints\n", + "\n", + "Datashader renders data into a screen-sized array of values or pixels, which allows it to handle much larger volumes of data than can be sent to a web browser. What if you what to interact with the underlying data, e.g. to get information about clusters of datapoints or even individual datapoints? For instance, here's a challenge: look up at the \"NY and NJ\" plot above, and you'll see some pink circles and lines that turn out to be in the middle of New York State and Pennsylvania, far from the ocean. What could those be?\n", + "\n", + "To find out more about particular datapoints that we see, we can use HoloViews and Bokeh tools to watch for the x,y location of a tap, then query the underlying dataset for a ping in that region, and then then highlight it on top of the main plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "columns = ['VesselName', 'MMSI', 'IMO', 'VesselType']\n", + "\n", + "def format_vessel_type(num):\n", + " if np.isnan(num): num = 0\n", + " return f'{num:.0f} ({vessel_types.loc[num].desc})'\n", + "\n", + "def brief_vessel_record(df):\n", + " return df.iloc[:1].merge(vessels, on='MMSI').merge(vessel_types, on='category')[['geometry']+columns]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pts2 = hv.Points(df, vdims=['category']).redim.range(**loc['Vancouver Area'])\n", + "pointsp = hd.dynspread(hd.datashade(pts2, color_key=color_key, aggregator=ds.count_cat('category'), min_alpha=90))\n", + "\n", + "max_hits = pn.widgets.IntSlider(value=2, start=1, end=10, name=\"Max hits\", sizing_mode='stretch_width')\n", + "highlighter = hd.inspect_points.instance(streams=[hv.streams.Tap], transform=brief_vessel_record,\n", + " x=-13922122, y=6184391) # optional initial values for static web page\n", + "\n", + "highlight = highlighter(pointsp).opts(color='white', tools=[\"hover\"], marker='square', \n", + " size=10, fill_alpha=0)\n", + "\n", + "#tiles * pointsp * highlight * legend" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We could view the result above by uncommenting the last line, but let's just go ahead and make a little [Panel](https://panel.holoviz.org) app so we can add a few extra interactive features. First, some code to fetch a photo of the selected vessels, if available, plus additional info about each vessel:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_photo_url(mmsi):\n", + " headers = {'User-Agent': 'Mozilla/5.0'}\n", + " r=requests.get(f'{ship_url}{mmsi}', allow_redirects=True, headers=headers)\n", + " ship_id = [el for el in r.url.split('/') if el.startswith('shipid')]\n", + " if ship_id == []: return ''\n", + " ship_id =ship_id[0].replace('shipid:','')\n", + " return f\"https://photos.marinetraffic.com/ais/showphoto.aspx?shipid={ship_id}&size=thumb300&stamp=false\"\n", + "\n", + "def get_photos(df=None, n_records=2):\n", + " photos = []\n", + " if df is not None and 'MMSI' in df.columns:\n", + " for mmsi in df.iloc[:n_records].MMSI.drop_duplicates().to_list():\n", + " try:\n", + " url = get_photo_url(mmsi)\n", + " response = requests.get(url, stream=True)\n", + " im = Image.open(response.raw)\n", + " photos += [pn.Column('MMSI: %s' % mmsi,im)] \n", + " except Exception:\n", + " pass\n", + " return pn.Row(*([pn.Spacer(sizing_mode='stretch_width')]+photos+[pn.Spacer(sizing_mode='stretch_width')]))\n", + "\n", + "ship_url = 'https://tinyurl.com/aispage/mmsi:'\n", + "def full_vessel_record(df, n_records=2):\n", + " \"Given a dataframe that includes MMSI values, return with URL, vessel info added\"\n", + " if not len(df.columns):\n", + " return None\n", + " df_with_info = df.iloc[:n_records].merge(vessels, on='MMSI')\n", + " df_with_types = df_with_info.merge(vessel_types, how='left', left_on='VesselType', right_on='num')[columns]\n", + " df_with_types['URL'] = df_with_types.MMSI.apply(lambda x: f'{ship_url}{x}')\n", + " df_with_types.VesselType = df_with_types.VesselType.apply(format_vessel_type)\n", + " result = pd.DataFrame(df_with_types).drop_duplicates()\n", + " return pn.pane.DataFrame(result, index=False, render_links=True, na_rep='', sizing_mode='stretch_width')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then we can build an app with some widgets for controlling the visibility of the background map, the data, and the text labels, plus a table showing information about the selected vessel and a photo if available:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "photos = pn.bind(get_photos, df=highlighter.param.hits, n_records=max_hits)\n", + "table = pn.bind(full_vessel_record, df=highlighter.param.hits, n_records=max_hits)\n", + "sopts = dict(start=0, end=1, sizing_mode='stretch_width')\n", + "map_opacity = pn.widgets.FloatSlider(value=0.7, name=\"Map opacity\", **sopts)\n", + "data_opacity = pn.widgets.FloatSlider(value=1.0, name=\"Data opacity\", **sopts)\n", + "label_opacity = pn.widgets.FloatSlider(value=0.9, name=\"Label opacity\", **sopts)\n", + "overlay = (tiles.apply.opts(alpha=map_opacity) *\n", + " pointsp.apply.opts(alpha=data_opacity) *\n", + " labels.apply.opts(alpha=label_opacity) * highlight * legend)\n", + "\n", + "description = \"\"\"\n", + "## US AIS vessel traffic data, Jan 2020\n", + "\n", + "Zoom or pan to explore the data, then click to select\n", + "a particular data point to see more information about\n", + "it (after a delay). You may need to zoom in before a\n", + "point is selectable.\n", + "\"\"\"\n", + "\n", + "pn.Column(description,\n", + " pn.Row(map_opacity, data_opacity, label_opacity, max_hits),\n", + " overlay, table, photos, sizing_mode='stretch_width').servable()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here you should be able to explore this dataset fully, clicking on any interesting datapoints or clusters, including the \"Unknown\" circles in New York State and Pennsylvania mentioned above (which turn out to be false readings for a [US Coast Guard ship based in Guam](https://en.wikipedia.org/wiki/USCGC_Myrtle_Hazard), clearly not able to be on land in those patterns; perhaps another example of a [previously reported GPS disruption](https://skytruth.org/2020/05/ais-ship-tracking-data-shows-false-vessel-tracks-circling-above-point-reyes-near-san-francisco/)). There are no doubt many other interesting patterns to discover here!\n", + "\n", + "The above app should run fine in a Jupyter notebook, but it can also be launched as a separate web server using `panel serve --port 5006 ship_traffic.ipynb`, allowing you to let other people explore this dataset as well. And you can adapt the code in this notebook to work with just about any other data that you can map onto the x and y axes of a plot, including categorical data if available." + ] + } + ], + "metadata": { + "language_info": { + "name": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/test_data/ship_traffic/AIS_2020_01_01.csv b/test_data/ship_traffic/AIS_2020_01_01.csv new file mode 100644 index 000000000..3a65ed2be --- /dev/null +++ b/test_data/ship_traffic/AIS_2020_01_01.csv @@ -0,0 +1,10 @@ +MMSI,BaseDateTime,LAT,LON,SOG,COG,Heading,VesselName,IMO,CallSign,VesselType,Status,Length,Width,Draft,Cargo,TranscieverClass +367149340,2020-01-01T00:00:00,29.96476,-90.02724,1.3,10.0,16.0,SYDNEE TAYLOR,,WDD4807,31,0,26,9,,31,B +367687520,2020-01-01T00:00:00,30.20558,-91.03578,10.7,124.9,130.0,CHIPPEWA,,WDI3361,31,0,22,,,,B +367368170,2020-01-01T00:00:03,47.53785,-122.32833,0.6,46.5,141.0,SONJA H,,WDE5536,31,0,18,6,,32,B +367007980,2020-01-01T00:00:05,37.95154,-121.32682,0.0,-49.6,511.0,ANGIE M BRUSCO,IMO5111359,WDC3446,31,0,28,7,3.4,,B +367538940,2020-01-01T00:00:05,30.00258,-93.22608,3.1,168.0,511.0,RITA ANN,,WDG4670,31,0,21,,,,B +367054790,2020-01-01T00:00:07,29.76044,-95.10476,0.1,-180.8,511.0,COLT,,WDC6330,31,15,20,7,,31,B +12345678,2020-01-01T00:00:06,30.05201,-90.54060,0.0,162.2,511.0,INGRAM TEST UNIT,,WWW0000,32,0,46,14,3.5,32,B +367567020,2020-01-01T00:00:08,34.14870,-119.20176,0.0,0.0,511.0,LULAPIN,IMO8997869,WDG7412,31,0,23,8,,,B +367389480,2020-01-01T00:00:11,30.69094,-81.45967,0.0,-157.8,511.0,MAVERICK,,WDE7139,31,0,18,6,,52,B diff --git a/test_data/ship_traffic/AIS_2020_01_02.csv b/test_data/ship_traffic/AIS_2020_01_02.csv new file mode 100644 index 000000000..db3c31c94 --- /dev/null +++ b/test_data/ship_traffic/AIS_2020_01_02.csv @@ -0,0 +1,10 @@ +MMSI,BaseDateTime,LAT,LON,SOG,COG,Heading,VesselName,IMO,CallSign,VesselType,Status,Length,Width,Draft,Cargo,TranscieverClass +368119660,2020-01-02T00:00:00,40.66729,-74.00783,0.0,-49.6,17.0,,,,,0,,,,,B +368009250,2020-01-02T00:00:00,47.66543,-122.39625,0.1,-156.9,109.0,,,,,0,,,,,A +368114180,2020-01-02T00:00:00,29.83139,-90.06786,0.0,-49.6,511.0,,,,,0,,,,,B +368010960,2020-01-02T00:00:00,37.04289,-89.17536,5.8,123.4,123.0,,,,,0,,,,,A +368090830,2020-01-02T00:00:00,27.84044,-97.08588,0.0,0.0,511.0,,,,,0,,,,,B +636018561,2020-01-02T00:00:01,34.13566,-76.47055,6.0,-178.9,236.0,,,,,0,,,,,B +563056600,2020-01-02T00:00:01,48.47790,-124.90163,9.9,86.6,89.0,,,,,0,,,,,A +367790680,2020-01-02T00:00:02,55.05991,-162.32734,0.0,197.0,511.0,,,,,15,,,,,B +368029640,2020-01-02T00:00:02,40.71516,-74.01818,0.0,-49.6,511.0,,,,,0,,,,,B