From 43ea8ec19452cac2bd4a65524c67a4b41eaa87a9 Mon Sep 17 00:00:00 2001 From: Kamil Raczycki Date: Tue, 29 Nov 2022 22:32:19 +0100 Subject: [PATCH] feat: added additional pre-commit hooks - swapped `black` to `black-jupyter` - added flake8-bugbear - added refurb - added pdm-lock-check - changed autoupdate schedule to monthly --- .pre-commit-config.yaml | 16 +++++++++-- docs/copy_examples.py | 2 +- examples/joiners/intersection_joiner.ipynb | 2 +- examples/loaders/geoparquet_loader.ipynb | 2 +- .../administrative_boundary_regionizer.ipynb | 22 ++++++++------- examples/regionizers/h3_regionizer.ipynb | 6 ++-- examples/regionizers/voronoi_regionizer.ipynb | 21 +++++++------- srai/joiners/intersection_joiner.py | 28 ++++++++++--------- .../administrative_boundary_regionizer.py | 2 +- 9 files changed, 59 insertions(+), 42 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9202e857..10ad3f64 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,7 +9,7 @@ repos: - repo: https://github.com/psf/black rev: 22.10.0 hooks: - - id: black + - id: black-jupyter - repo: https://github.com/pycqa/isort rev: 5.10.1 hooks: @@ -30,10 +30,22 @@ repos: rev: 5.0.4 hooks: - id: flake8 + additional_dependencies: + - flake8-bugbear - repo: https://github.com/pre-commit/mirrors-mypy rev: v0.991 hooks: - id: mypy + - repo: https://github.com/dosisod/refurb + rev: v1.8.0 + hooks: + - id: refurb + language: python + language_version: python3.10 + - repo: https://github.com/pdm-project/pdm + rev: 2.2.1 + hooks: + - id: pdm-lock-check - repo: https://github.com/kynan/nbstripout rev: 0.6.1 hooks: @@ -57,6 +69,6 @@ ci: autofix_prs: true autoupdate_branch: "" autoupdate_commit_msg: "build(pre-commit.ci): pre-commit autoupdate" - autoupdate_schedule: weekly + autoupdate_schedule: monthly skip: [] submodules: false diff --git a/docs/copy_examples.py b/docs/copy_examples.py index ef2b043e..7a1b92c2 100644 --- a/docs/copy_examples.py +++ b/docs/copy_examples.py @@ -16,7 +16,7 @@ def write_file(file_path: Path) -> None: """ root_path = file_path.relative_to(".") print(f"Copying {root_path} file to {root_path}") - with open(root_path, "rb") as src, mkdocs_gen_files.open(root_path, "wb") as dst: + with root_path.open("rb") as src, mkdocs_gen_files.open(root_path, "wb") as dst: dst.write(src.read()) diff --git a/examples/joiners/intersection_joiner.ipynb b/examples/joiners/intersection_joiner.ipynb index 0ee11ffb..50a5102a 100644 --- a/examples/joiners/intersection_joiner.ipynb +++ b/examples/joiners/intersection_joiner.ipynb @@ -40,7 +40,7 @@ " geometry.Polygon([(-1.5, 0.5), (-1.5, 0), (-0.5, 0), (-0.5, 0.5)]),\n", " geometry.Polygon([(-1.5, -1.5), (-1.5, -2.5), (-0.5, -2.5), (-0.5, -1.5)]),\n", " geometry.Point((0, 0)),\n", - " geometry.Point((-0.5, -0.5))\n", + " geometry.Point((-0.5, -0.5)),\n", " ],\n", " crs=\"epsg:4326\",\n", ")\n", diff --git a/examples/loaders/geoparquet_loader.ipynb b/examples/loaders/geoparquet_loader.ipynb index be730f33..3d0ef568 100644 --- a/examples/loaders/geoparquet_loader.ipynb +++ b/examples/loaders/geoparquet_loader.ipynb @@ -65,7 +65,7 @@ "metadata": {}, "outputs": [], "source": [ - "limited_gdf = gpql.load(file_path=\"files/example.parquet\", columns=['continent', 'name', 'pop_est'])\n", + "limited_gdf = gpql.load(file_path=\"files/example.parquet\", columns=[\"continent\", \"name\", \"pop_est\"])\n", "limited_gdf" ] }, diff --git a/examples/regionizers/administrative_boundary_regionizer.ipynb b/examples/regionizers/administrative_boundary_regionizer.ipynb index d792dea7..dc920304 100644 --- a/examples/regionizers/administrative_boundary_regionizer.ipynb +++ b/examples/regionizers/administrative_boundary_regionizer.ipynb @@ -83,7 +83,7 @@ " resolution=50,\n", ")\n", "fig.update_layout(height=600, width=800, margin={\"r\": 0, \"t\": 0, \"l\": 0, \"b\": 0})\n", - "fig.show(renderer=\"png\") # replace with fig.show() to allow interactivity\n" + "fig.show(renderer=\"png\") # replace with fig.show() to allow interactivity" ] }, { @@ -103,7 +103,9 @@ "metadata": {}, "outputs": [], "source": [ - "madagascar_bbox = box(minx=43.2541870461, miny=-25.6014344215, maxx=50.4765368996, maxy=-12.0405567359)\n", + "madagascar_bbox = box(\n", + " minx=43.2541870461, miny=-25.6014344215, maxx=50.4765368996, maxy=-12.0405567359\n", + ")\n", "madagascar_bbox_gdf = gpd.GeoDataFrame({\"geometry\": [madagascar_bbox]}, crs=\"EPSG:4326\")" ] }, @@ -228,7 +230,7 @@ " projection_type=\"equirectangular\",\n", " lataxis_range=[miny - 1, maxy + 1],\n", " lonaxis_range=[minx - 1, maxx + 1],\n", - " showlakes=False, \n", + " showlakes=False,\n", " showcountries=False,\n", " showframe=False,\n", " resolution=50,\n", @@ -303,8 +305,8 @@ " showframe=False,\n", " resolution=50,\n", " )\n", - " \n", - " size = len(result.to_json().encode('utf-8'))\n", + "\n", + " size = len(result.to_json().encode(\"utf-8\"))\n", " fig.update_layout(\n", " height=450,\n", " width=700,\n", @@ -312,7 +314,7 @@ " title_text=f\"Toposimplify value: {epsilon} ({size/1000} KB)\",\n", " )\n", "\n", - " fig.show(renderer=\"png\") # replace with fig.show() to allow interactivity\n" + " fig.show(renderer=\"png\") # replace with fig.show() to allow interactivity" ] }, { @@ -335,7 +337,9 @@ "import requests\n", "\n", "r = requests.get(\"https://raw.githubusercontent.com/w8r/paris-metro-graph/master/metro.json\").json()\n", - "stations_gdf = gpd.GeoDataFrame({\"geometry\": [Point(s['longitude'], s['latitude']) for s in r['nodes']]}, crs=\"EPSG:4326\")\n", + "stations_gdf = gpd.GeoDataFrame(\n", + " {\"geometry\": [Point(s[\"longitude\"], s[\"latitude\"]) for s in r[\"nodes\"]]}, crs=\"EPSG:4326\"\n", + ")\n", "stations_gdf" ] }, @@ -374,9 +378,7 @@ " mapbox_style=\"carto-positron\",\n", " zoom=10.8,\n", ")\n", - "fig2 = px.scatter_mapbox(\n", - " stations_gdf, lat=stations_gdf.geometry.y, lon=stations_gdf.geometry.x\n", - ")\n", + "fig2 = px.scatter_mapbox(stations_gdf, lat=stations_gdf.geometry.y, lon=stations_gdf.geometry.x)\n", "fig.add_trace(fig2.data[0])\n", "fig.update_layout(margin={\"r\": 0, \"t\": 0, \"l\": 0, \"b\": 0})\n", "fig.update_traces(showlegend=False)\n", diff --git a/examples/regionizers/h3_regionizer.ipynb b/examples/regionizers/h3_regionizer.ipynb index 4674633d..e384c4a9 100644 --- a/examples/regionizers/h3_regionizer.ipynb +++ b/examples/regionizers/h3_regionizer.ipynb @@ -57,7 +57,7 @@ " crs=\"epsg:4326\",\n", ")\n", "resolution = 4\n", - "gdf.plot()\n" + "gdf.plot()" ] }, { @@ -85,7 +85,7 @@ "\n", "ax = gdf.plot()\n", "gdf_h3.plot(ax=ax, color=\"red\", alpha=0.5)\n", - "plt.show()\n" + "plt.show()" ] }, { @@ -113,7 +113,7 @@ "\n", "ax = gdf.plot()\n", "gdf_h3_buffered.plot(ax=ax, color=\"red\", alpha=0.5)\n", - "plt.show()\n" + "plt.show()" ] } ], diff --git a/examples/regionizers/voronoi_regionizer.ipynb b/examples/regionizers/voronoi_regionizer.ipynb index 44cac755..3cdff0dc 100644 --- a/examples/regionizers/voronoi_regionizer.ipynb +++ b/examples/regionizers/voronoi_regionizer.ipynb @@ -246,7 +246,12 @@ "metadata": {}, "outputs": [], "source": [ - "stations_csv = gpd.pd.read_csv('https://raw.githubusercontent.com/trainline-eu/stations/master/stations.csv', sep=';', index_col='id', usecols=['id', 'latitude', 'longitude', 'country'])\n", + "stations_csv = gpd.pd.read_csv(\n", + " \"https://raw.githubusercontent.com/trainline-eu/stations/master/stations.csv\",\n", + " sep=\";\",\n", + " index_col=\"id\",\n", + " usecols=[\"id\", \"latitude\", \"longitude\", \"country\"],\n", + ")\n", "stations_csv" ] }, @@ -259,14 +264,14 @@ "stations = []\n", "positions = set()\n", "for idx, r in stations_csv.iterrows():\n", - " if r.country != 'DE' or gpd.pd.isna(r.latitude) or gpd.pd.isna(r.longitude):\n", + " if r.country != \"DE\" or gpd.pd.isna(r.latitude) or gpd.pd.isna(r.longitude):\n", " continue\n", - " pos = round(r.longitude,5), round(r.latitude,5)\n", + " pos = round(r.longitude, 5), round(r.latitude, 5)\n", " if not pos in positions:\n", " stations.append({\"id\": idx, \"geometry\": Point(*pos)})\n", " positions.add(pos)\n", "\n", - "stations_gdf = gpd.GeoDataFrame(data=stations, crs=\"EPSG:4326\").set_index('id')\n", + "stations_gdf = gpd.GeoDataFrame(data=stations, crs=\"EPSG:4326\").set_index(\"id\")\n", "\n", "del stations_csv\n", "del stations\n", @@ -307,9 +312,7 @@ " color=rail_result_gdf.index,\n", " color_continuous_scale=px.colors.sequential.Viridis,\n", ")\n", - "fig2 = px.scatter_geo(\n", - " stations_gdf, lat=stations_gdf.geometry.y, lon=stations_gdf.geometry.x\n", - ")\n", + "fig2 = px.scatter_geo(stations_gdf, lat=stations_gdf.geometry.y, lon=stations_gdf.geometry.x)\n", "fig.update_traces(marker={\"opacity\": 0.6}, selector=dict(type=\"choropleth\"))\n", "fig.add_trace(fig2.data[0])\n", "fig.update_traces(marker_color=\"white\", marker_size=2, selector=dict(type=\"scattergeo\"))\n", @@ -344,9 +347,7 @@ " mapbox_style=\"open-street-map\",\n", " zoom=11,\n", ")\n", - "fig2 = px.scatter_mapbox(\n", - " stations_gdf, lat=stations_gdf.geometry.y, lon=stations_gdf.geometry.x\n", - ")\n", + "fig2 = px.scatter_mapbox(stations_gdf, lat=stations_gdf.geometry.y, lon=stations_gdf.geometry.x)\n", "fig.add_trace(fig2.data[0])\n", "fig.update_layout(margin={\"r\": 0, \"t\": 0, \"l\": 0, \"b\": 0})\n", "fig.update_layout(coloraxis_showscale=False)\n", diff --git a/srai/joiners/intersection_joiner.py b/srai/joiners/intersection_joiner.py index d64db6a3..26e22e03 100644 --- a/srai/joiners/intersection_joiner.py +++ b/srai/joiners/intersection_joiner.py @@ -44,10 +44,14 @@ def join( if len(features) == 0: raise ValueError("Features must not be empty.") + result_gdf: gpd.GeoDataFrame + if return_geom: - return self._join_with_geom(regions, features) + result_gdf = self._join_with_geom(regions, features) else: - return self._join_without_geom(regions, features) + result_gdf = self._join_without_geom(regions, features) + + return result_gdf def _join_with_geom( self, regions: gpd.GeoDataFrame, features: gpd.GeoDataFrame @@ -64,17 +68,15 @@ def _join_with_geom( a MultiIndex and a geometry with the intersection """ - joined_parts = [] - - for _, single in features.groupby(features["geometry"].geom_type): - joined_parts.append( - gpd.overlay( - single[["geometry"]].reset_index(names="feature_id"), - regions[["geometry"]].reset_index(names="region_id"), - how="intersection", - keep_geom_type=False, - ).set_index(["region_id", "feature_id"]) - ) + joined_parts = [ + gpd.overlay( + single[["geometry"]].reset_index(names="feature_id"), + regions[["geometry"]].reset_index(names="region_id"), + how="intersection", + keep_geom_type=False, + ).set_index(["region_id", "feature_id"]) + for _, single in features.groupby(features["geometry"].geom_type) + ] joint = gpd.GeoDataFrame(pd.concat(joined_parts, ignore_index=False)) return joint diff --git a/srai/regionizers/administrative_boundary_regionizer.py b/srai/regionizers/administrative_boundary_regionizer.py index bb37fb03..7a0ad634 100644 --- a/srai/regionizers/administrative_boundary_regionizer.py +++ b/srai/regionizers/administrative_boundary_regionizer.py @@ -146,7 +146,7 @@ def _generate_regions_from_all_geometries( ) -> List[Dict[str, Any]]: """Query and optimize downloading data from Overpass.""" elements_ids = set() - generated_regions: List[Dict[str, Any]] = list() + generated_regions: List[Dict[str, Any]] = [] all_geometries = ( seq([self._flatten_geometries(g) for g in gdf_wgs84.geometry]).flatten().list()