Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PERF: refactor SharedWallsRatio #191

Merged
merged 5 commits into from
Jul 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmarks/bench_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def time_Orientation(self):
mm.Orientation(self.df_buildings)

def time_SharedWallsRatio(self):
mm.SharedWallsRatio(self.df_buildings, "uID")
mm.SharedWallsRatio(self.df_buildings)

def time_StreetAlignment(self):
mm.StreetAlignment(
Expand Down
78 changes: 33 additions & 45 deletions momepy/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,18 @@

import math
import statistics
import warnings
from distutils.version import LooseVersion

import numpy as np
import pandas as pd
import geopandas as gpd
from tqdm import tqdm # progress bar

from .utils import _azimuth

GPD_08 = str(gpd.__version__) >= LooseVersion("0.8.0")

__all__ = [
"Orientation",
"SharedWallsRatio",
Expand Down Expand Up @@ -119,8 +124,7 @@ class SharedWallsRatio:
----------
gdf : GeoDataFrame
GeoDataFrame containing gdf to analyse
unique_id : str, list, np.array, pd.Series
the name of the dataframe column, ``np.array``, or ``pd.Series`` with unique id
unique_id : (deprecated)
perimeters : str, list, np.array, pd.Series (default None)
the name of the dataframe column, ``np.array``, or ``pd.Series`` where is stored perimeter value

Expand All @@ -130,63 +134,47 @@ class SharedWallsRatio:
Series containing resulting values
gdf : GeoDataFrame
original GeoDataFrame
id : Series
Series containing used unique ID
perimeters : GeoDataFrame
Series containing used perimeters values
sindex : rtree spatial index
spatial index of gdf

Examples
--------
>>> buildings_df['swr'] = momepy.SharedWallsRatio(buildings_df, 'uID').series
>>> buildings_df['swr'] = momepy.SharedWallsRatio(buildings_df).series
100%|██████████| 144/144 [00:00<00:00, 648.72it/s]
>>> buildings_df['swr'][10]
0.3424804411228673
"""

def __init__(self, gdf, unique_id, perimeters=None):
self.gdf = gdf
def __init__(self, gdf, unique_id=None, perimeters=None):
if not GPD_08:
raise ImportError(
"The 'geopandas' >= 0.8.0 package is required to use SharedWallsRatio."
)
if unique_id:
warnings.warn(
"unique_id is deprecated and will be removed in v0.4.", FutureWarning,
)

gdf = gdf.copy()
self.sindex = gdf.sindex # define rtree index
# define empty list for results
results_list = []
self.gdf = gdf

if perimeters is None:
gdf["mm_p"] = gdf.geometry.length
perimeters = "mm_p"
self.perimeters = gdf.geometry.length
elif isinstance(perimeters, str):
self.perimeters = gdf[perimeters]
else:
if not isinstance(perimeters, str):
gdf["mm_p"] = perimeters
perimeters = "mm_p"

self.perimeters = gdf[perimeters]

if not isinstance(unique_id, str):
gdf["mm_uid"] = unique_id
unique_id = "mm_uid"
self.id = gdf[unique_id]

gdf["_bounds"] = gdf.geometry.bounds.apply(list, axis=1)
for i, row in tqdm(
enumerate(
gdf[[perimeters, "_bounds", gdf._geometry_column_name]].itertuples()
),
total=gdf.shape[0],
):
neighbors = list(self.sindex.intersection(row[2]))
neighbors.remove(i)

# if no neighbour exists
length = 0
if not neighbors:
results_list.append(0)
else:
length = gdf.iloc[neighbors].intersection(row[3]).length.sum()
results_list.append(length / row[1])

self.series = pd.Series(results_list, index=gdf.index)
self.perimeters = perimeters

inp, res = gdf.sindex.query_bulk(gdf.geometry, predicate="intersects")
left = gdf.geometry.take(inp).reset_index(drop=True)
right = gdf.geometry.take(res).reset_index(drop=True)
intersections = left.intersection(right).length
results = (
intersections.groupby(inp).sum().reset_index(drop=True)
- self.perimeters.reset_index(drop=True)
) / self.perimeters.reset_index(drop=True)
results.index = gdf.index

self.series = results


class StreetAlignment:
Expand Down
17 changes: 10 additions & 7 deletions tests/test_distribution.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from distutils.version import LooseVersion

import geopandas as gpd
import momepy as mm
import numpy as np
import pytest
from libpysal.weights import Queen

GPD_08 = str(gpd.__version__) >= LooseVersion("0.8.0")


class TestDistribution:
def setup_method(self):
Expand All @@ -28,21 +32,20 @@ def test_Orientation(self):
check = 40.7607
assert self.df_streets["orient"][0] == pytest.approx(check)

@pytest.mark.skipif(not GPD_08, reason="requires geopandas > 0.7")
def test_SharedWallsRatio(self):
self.df_buildings["swr"] = mm.SharedWallsRatio(self.df_buildings, "uID").series
self.df_buildings["swr_uid"] = mm.SharedWallsRatio(
self.df_buildings, range(len(self.df_buildings))
).series
self.df_buildings["swr"] = mm.SharedWallsRatio(self.df_buildings).series
self.df_buildings["swr_array"] = mm.SharedWallsRatio(
self.df_buildings, "uID", self.df_buildings.geometry.length
self.df_buildings, perimeters=self.df_buildings.geometry.length
).series
nonconsecutive = self.df_buildings.drop(2)
result = mm.SharedWallsRatio(nonconsecutive, "uID").series
result = mm.SharedWallsRatio(nonconsecutive).series
check = 0.3424804411228673
assert self.df_buildings["swr"][10] == check
assert self.df_buildings["swr_uid"][10] == check
assert self.df_buildings["swr_array"][10] == check
assert result[10] == check
with pytest.warns(FutureWarning):
mm.SharedWallsRatio(self.df_buildings, "uID")

def test_StreetAlignment(self):
self.df_buildings["orient"] = orient = mm.Orientation(self.df_buildings).series
Expand Down