From d37dd08c48a806f73c2e75ece10caa4e8f9978b7 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Tue, 21 Jul 2020 21:32:36 +0100 Subject: [PATCH 1/5] use bulk --- momepy/distribution.py | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/momepy/distribution.py b/momepy/distribution.py index 0891d0ac..b1f72546 100644 --- a/momepy/distribution.py +++ b/momepy/distribution.py @@ -168,22 +168,36 @@ def __init__(self, gdf, unique_id, perimeters=None): unique_id = "mm_uid" self.id = gdf[unique_id] - gdf["_bounds"] = gdf.geometry.bounds.apply(list, axis=1) + # gdf["_bounds"] = gdf.geometry.bounds.apply(list, axis=1) + # for i, row in tqdm( + # enumerate( + # gdf[[perimeters, "_bounds", gdf._geometry_column_name]].itertuples() + # ), + # total=gdf.shape[0], + # ): + # neighbors = list(self.sindex.intersection(row[2])) + # neighbors.remove(i) + + # # if no neighbour exists + # length = 0 + # if not neighbors: + # results_list.append(0) + # else: + # length = gdf.iloc[neighbors].intersection(row[3]).length.sum() + # results_list.append(length / row[1]) + + inp, res = self.sindex.query_bulk(gdf.geometry) for i, row in tqdm( - enumerate( - gdf[[perimeters, "_bounds", gdf._geometry_column_name]].itertuples() - ), + enumerate(gdf[[perimeters, gdf._geometry_column_name]].itertuples()), total=gdf.shape[0], ): - neighbors = list(self.sindex.intersection(row[2])) + neighbors = list(res[inp == i]) neighbors.remove(i) - # if no neighbour exists - length = 0 if not neighbors: results_list.append(0) else: - length = gdf.iloc[neighbors].intersection(row[3]).length.sum() + length = gdf.iloc[neighbors].intersection(row[2]).length.sum() results_list.append(length / row[1]) self.series = pd.Series(results_list, index=gdf.index) From e7974e6b120bf73af46c43dea816fee710420c21 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Tue, 21 Jul 2020 21:52:53 +0100 Subject: [PATCH 2/5] vectorize --- momepy/distribution.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/momepy/distribution.py b/momepy/distribution.py index b1f72546..e51f7832 100644 --- a/momepy/distribution.py +++ b/momepy/distribution.py @@ -186,21 +186,17 @@ def __init__(self, gdf, unique_id, perimeters=None): # length = gdf.iloc[neighbors].intersection(row[3]).length.sum() # results_list.append(length / row[1]) - inp, res = self.sindex.query_bulk(gdf.geometry) - for i, row in tqdm( - enumerate(gdf[[perimeters, gdf._geometry_column_name]].itertuples()), - total=gdf.shape[0], - ): - neighbors = list(res[inp == i]) - neighbors.remove(i) - - if not neighbors: - results_list.append(0) - else: - length = gdf.iloc[neighbors].intersection(row[2]).length.sum() - results_list.append(length / row[1]) - - self.series = pd.Series(results_list, index=gdf.index) + inp, res = self.sindex.query_bulk(gdf.geometry, predicate="intersects") + left = gdf.geometry.take(inp).reset_index(drop=True) + right = gdf.geometry.take(res).reset_index(drop=True) + intersections = left.intersection(right).length + results = ( + intersections.groupby(inp).sum().reset_index(drop=True) + - self.perimeters.reset_index(drop=True) + ) / self.perimeters.reset_index(drop=True) + results.index = gdf.index + + self.series = results class StreetAlignment: From 7ecec70eed620a051681bdf0b26d211b1f14d5b2 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Tue, 21 Jul 2020 22:09:23 +0100 Subject: [PATCH 3/5] dep unique_id --- benchmarks/bench_distribution.py | 2 +- momepy/distribution.py | 56 ++++++++------------------------ tests/test_distribution.py | 12 +++---- 3 files changed, 19 insertions(+), 51 deletions(-) diff --git a/benchmarks/bench_distribution.py b/benchmarks/bench_distribution.py index cb733179..2f759514 100644 --- a/benchmarks/bench_distribution.py +++ b/benchmarks/bench_distribution.py @@ -27,7 +27,7 @@ def time_Orientation(self): mm.Orientation(self.df_buildings) def time_SharedWallsRatio(self): - mm.SharedWallsRatio(self.df_buildings, "uID") + mm.SharedWallsRatio(self.df_buildings) def time_StreetAlignment(self): mm.StreetAlignment( diff --git a/momepy/distribution.py b/momepy/distribution.py index e51f7832..071aaa5e 100644 --- a/momepy/distribution.py +++ b/momepy/distribution.py @@ -6,6 +6,7 @@ import math import statistics +import warnings import numpy as np import pandas as pd @@ -119,8 +120,7 @@ class SharedWallsRatio: ---------- gdf : GeoDataFrame GeoDataFrame containing gdf to analyse - unique_id : str, list, np.array, pd.Series - the name of the dataframe column, ``np.array``, or ``pd.Series`` with unique id + unique_id : (deprecated) perimeters : str, list, np.array, pd.Series (default None) the name of the dataframe column, ``np.array``, or ``pd.Series`` where is stored perimeter value @@ -130,61 +130,31 @@ class SharedWallsRatio: Series containing resulting values gdf : GeoDataFrame original GeoDataFrame - id : Series - Series containing used unique ID perimeters : GeoDataFrame Series containing used perimeters values - sindex : rtree spatial index - spatial index of gdf Examples -------- - >>> buildings_df['swr'] = momepy.SharedWallsRatio(buildings_df, 'uID').series + >>> buildings_df['swr'] = momepy.SharedWallsRatio(buildings_df).series 100%|██████████| 144/144 [00:00<00:00, 648.72it/s] >>> buildings_df['swr'][10] 0.3424804411228673 """ - def __init__(self, gdf, unique_id, perimeters=None): - self.gdf = gdf + def __init__(self, gdf, unique_id=None, perimeters=None): + if unique_id: + warnings.warn( + "unique_id is deprecated and will be removed in v0.4.", FutureWarning, + ) - gdf = gdf.copy() - self.sindex = gdf.sindex # define rtree index - # define empty list for results - results_list = [] + self.gdf = gdf if perimeters is None: - gdf["mm_p"] = gdf.geometry.length - perimeters = "mm_p" + self.perimeters = gdf.geometry.length + elif isinstance(perimeters, str): + self.perimeters = gdf[perimeters] else: - if not isinstance(perimeters, str): - gdf["mm_p"] = perimeters - perimeters = "mm_p" - - self.perimeters = gdf[perimeters] - - if not isinstance(unique_id, str): - gdf["mm_uid"] = unique_id - unique_id = "mm_uid" - self.id = gdf[unique_id] - - # gdf["_bounds"] = gdf.geometry.bounds.apply(list, axis=1) - # for i, row in tqdm( - # enumerate( - # gdf[[perimeters, "_bounds", gdf._geometry_column_name]].itertuples() - # ), - # total=gdf.shape[0], - # ): - # neighbors = list(self.sindex.intersection(row[2])) - # neighbors.remove(i) - - # # if no neighbour exists - # length = 0 - # if not neighbors: - # results_list.append(0) - # else: - # length = gdf.iloc[neighbors].intersection(row[3]).length.sum() - # results_list.append(length / row[1]) + self.perimeters = perimeters inp, res = self.sindex.query_bulk(gdf.geometry, predicate="intersects") left = gdf.geometry.take(inp).reset_index(drop=True) diff --git a/tests/test_distribution.py b/tests/test_distribution.py index 5ab075ed..eab7a8f4 100644 --- a/tests/test_distribution.py +++ b/tests/test_distribution.py @@ -29,20 +29,18 @@ def test_Orientation(self): assert self.df_streets["orient"][0] == pytest.approx(check) def test_SharedWallsRatio(self): - self.df_buildings["swr"] = mm.SharedWallsRatio(self.df_buildings, "uID").series - self.df_buildings["swr_uid"] = mm.SharedWallsRatio( - self.df_buildings, range(len(self.df_buildings)) - ).series + self.df_buildings["swr"] = mm.SharedWallsRatio(self.df_buildings).series self.df_buildings["swr_array"] = mm.SharedWallsRatio( - self.df_buildings, "uID", self.df_buildings.geometry.length + self.df_buildings, perimeters=self.df_buildings.geometry.length ).series nonconsecutive = self.df_buildings.drop(2) - result = mm.SharedWallsRatio(nonconsecutive, "uID").series + result = mm.SharedWallsRatio(nonconsecutive).series check = 0.3424804411228673 assert self.df_buildings["swr"][10] == check - assert self.df_buildings["swr_uid"][10] == check assert self.df_buildings["swr_array"][10] == check assert result[10] == check + with pytest.warns(FutureWarning): + mm.SharedWallsRatio(self.df_buildings, "uID") def test_StreetAlignment(self): self.df_buildings["orient"] = orient = mm.Orientation(self.df_buildings).series From ae856eacdbae3d073f5e7dc1a6c2ee10b712ee4d Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Tue, 21 Jul 2020 22:11:02 +0100 Subject: [PATCH 4/5] fix --- momepy/distribution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/momepy/distribution.py b/momepy/distribution.py index 071aaa5e..19a43c61 100644 --- a/momepy/distribution.py +++ b/momepy/distribution.py @@ -156,7 +156,7 @@ def __init__(self, gdf, unique_id=None, perimeters=None): else: self.perimeters = perimeters - inp, res = self.sindex.query_bulk(gdf.geometry, predicate="intersects") + inp, res = gdf.sindex.query_bulk(gdf.geometry, predicate="intersects") left = gdf.geometry.take(inp).reset_index(drop=True) right = gdf.geometry.take(res).reset_index(drop=True) intersections = left.intersection(right).length From 23e298a6547c33e0c6f17578d959648baeda16bd Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Tue, 21 Jul 2020 22:20:31 +0100 Subject: [PATCH 5/5] require gpd 0.8 --- momepy/distribution.py | 8 ++++++++ tests/test_distribution.py | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/momepy/distribution.py b/momepy/distribution.py index 19a43c61..dcf41502 100644 --- a/momepy/distribution.py +++ b/momepy/distribution.py @@ -7,13 +7,17 @@ import math import statistics import warnings +from distutils.version import LooseVersion import numpy as np import pandas as pd +import geopandas as gpd from tqdm import tqdm # progress bar from .utils import _azimuth +GPD_08 = str(gpd.__version__) >= LooseVersion("0.8.0") + __all__ = [ "Orientation", "SharedWallsRatio", @@ -142,6 +146,10 @@ class SharedWallsRatio: """ def __init__(self, gdf, unique_id=None, perimeters=None): + if not GPD_08: + raise ImportError( + "The 'geopandas' >= 0.8.0 package is required to use SharedWallsRatio." + ) if unique_id: warnings.warn( "unique_id is deprecated and will be removed in v0.4.", FutureWarning, diff --git a/tests/test_distribution.py b/tests/test_distribution.py index eab7a8f4..8495363a 100644 --- a/tests/test_distribution.py +++ b/tests/test_distribution.py @@ -1,9 +1,13 @@ +from distutils.version import LooseVersion + import geopandas as gpd import momepy as mm import numpy as np import pytest from libpysal.weights import Queen +GPD_08 = str(gpd.__version__) >= LooseVersion("0.8.0") + class TestDistribution: def setup_method(self): @@ -28,6 +32,7 @@ def test_Orientation(self): check = 40.7607 assert self.df_streets["orient"][0] == pytest.approx(check) + @pytest.mark.skipif(not GPD_08, reason="requires geopandas > 0.7") def test_SharedWallsRatio(self): self.df_buildings["swr"] = mm.SharedWallsRatio(self.df_buildings).series self.df_buildings["swr_array"] = mm.SharedWallsRatio(