Skip to content

Commit

Permalink
fix: repair geofabik pbf parsing (#391)
Browse files Browse the repository at this point in the history
* ci: test windows workflow

* fix: change location of temp dir in PbfFileClipper

* feat: add osm index reloading from cache

* chore: change open encoding to utf-8

* fix: add missing column for geofabrik index

* chore: add changelog entry

* chore: bump version 0.5.1 -> 0.5.2
  • Loading branch information
RaczeQ authored Oct 29, 2023
1 parent db994ae commit a9a1633
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 37 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ jobs:
include:
- os: macos-latest
python-version: "3.11"
- os: windows-latest
python-version: "3.11"
env:
OS: ${{ matrix.os }}
PYTHON: ${{ matrix.python-version }}
Expand Down
14 changes: 13 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Security

## [0.5.2] - 2023-10-29

### Added

- Geofabrik and OSM Fr index caching and reading from cache to avoid reloading.
- Tests for Windows OS.

### Changed

- `PbfFileClipper` temporary files operations moved to working directory.

## [0.5.1] - 2023-10-27

### Added
Expand Down Expand Up @@ -185,7 +196,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Intersection Joiner
- Geoparquet Loader

[unreleased]: https://github.com/srai-lab/srai/compare/0.5.1...HEAD
[unreleased]: https://github.com/srai-lab/srai/compare/0.5.2...HEAD
[0.5.2]: https://github.com/srai-lab/srai/compare/0.5.1...0.5.2
[0.5.1]: https://github.com/srai-lab/srai/compare/0.5.0...0.5.1
[0.5.0]: https://github.com/srai-lab/srai/compare/0.4.1...0.5.0
[0.4.1]: https://github.com/srai-lab/srai/compare/0.4.0...0.4.1
Expand Down
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ authors:
given-names: "Szymon"
orcid: "https://orcid.org/0000-0002-2047-1649"
title: "SRAI: Spatial Representations for Artificial Intelligence"
version: 0.5.1
version: 0.5.2
date-released: 2022-11-23
url: "https://kraina-ai.github.io/srai"
repository-code: "https://github.com/kraina-ai/srai"
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "srai"
version = "0.5.1"
version = "0.5.2"
description = "A set of python modules for geospatial machine learning and data mining"
authors = [
{ name = "Piotr Gramacki", email = "pgramacki@kraina.ai" },
Expand Down Expand Up @@ -184,7 +184,7 @@ close-quotes-on-newline = true
wrap-one-line = true

[tool.bumpver]
current_version = "0.5.1"
current_version = "0.5.2"
version_pattern = "MAJOR.MINOR.PATCH[PYTAGNUM]"
commit_message = "chore: bump version {old_version} -> {new_version}"
commit = true
Expand Down
2 changes: 1 addition & 1 deletion srai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@
for complete documentation.
"""

__version__ = "0.5.1"
__version__ = "0.5.2"
72 changes: 42 additions & 30 deletions srai/loaders/osm_loaders/openstreetmap_extracts.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@
from srai.constants import WGS84_CRS
from srai.geometry import flatten_geometry

OPENSTREETMAP_FR_POLYGONS_INDEX = "https://download.openstreetmap.fr/polygons"
OPENSTREETMAP_FR_EXTRACTS_INDEX = "https://download.openstreetmap.fr/extracts"
OPENSTREETMAP_FR_POLYGONS_INDEX_URL = "https://download.openstreetmap.fr/polygons"
OPENSTREETMAP_FR_EXTRACTS_INDEX_URL = "https://download.openstreetmap.fr/extracts"
OPENSTREETMAP_FR_INDEX_GDF: Optional[gpd.GeoDataFrame] = None

GEOFABRIK_INDEX = "https://download.geofabrik.de/index-v1.json"
GEOFABRIK_INDEX_URL = "https://download.geofabrik.de/index-v1.json"
GEOFABRIK_INDEX_GDF: Optional[gpd.GeoDataFrame] = None


Expand Down Expand Up @@ -282,7 +282,7 @@ def _filter_extracts(
].iterrows():
extract = OpenStreetMapExtract(
id=extract_row.id,
url=extract_row["urls"]["pbf"],
url=extract_row["url"],
geometry=extract_row.geometry,
)
filtered_extracts.append(extract)
Expand Down Expand Up @@ -359,18 +359,25 @@ def _load_geofabrik_index() -> gpd.GeoDataFrame:
Returns:
gpd.GeoDataFrame: Extracts index with metadata.
"""
result = requests.get(
GEOFABRIK_INDEX,
headers={"User-Agent": "SRAI Python package (https://github.com/kraina-ai/srai)"},
)
parsed_data = json.loads(result.text)
gdf = gpd.GeoDataFrame.from_features(parsed_data["features"])
gdf["area"] = gdf.geometry.area
gdf.sort_values(by="area", ignore_index=True, inplace=True)

save_path = "cache/geofabrik_index.csv"
Path(save_path).parent.mkdir(parents=True, exist_ok=True)
gdf[[col for col in gdf.columns if col != "geometry" and col != "urls"]].to_csv(save_path)
save_path = Path("cache/geofabrik_index.geojson")

if save_path.exists():
gdf = gpd.read_file(save_path)
else:
result = requests.get(
GEOFABRIK_INDEX_URL,
headers={"User-Agent": "SRAI Python package (https://github.com/kraina-ai/srai)"},
)
parsed_data = json.loads(result.text)
gdf = gpd.GeoDataFrame.from_features(parsed_data["features"])
gdf["area"] = gdf.geometry.area
gdf.sort_values(by="area", ignore_index=True, inplace=True)
gdf["url"] = gdf["urls"].apply(lambda d: d["pbf"])
gdf = gdf[["id", "name", "geometry", "area", "url"]]

save_path.parent.mkdir(parents=True, exist_ok=True)
gdf.to_file(save_path, driver="GeoJSON")

return gdf


Expand All @@ -381,17 +388,22 @@ def _load_openstreetmap_fr_index() -> gpd.GeoDataFrame:
Returns:
gpd.GeoDataFrame: Extracts index with metadata.
"""
with tqdm() as pbar:
extracts = _iterate_openstreetmap_fr_index("osm_fr", "/", True, pbar)
gdf = gpd.GeoDataFrame(
data=[asdict(extract) for extract in extracts], geometry="geometry"
).set_crs(WGS84_CRS)
gdf["area"] = gdf.geometry.area
gdf.sort_values(by="area", ignore_index=True, inplace=True)

save_path = "cache/osm_fr_index.csv"
Path(save_path).parent.mkdir(parents=True, exist_ok=True)
gdf[[col for col in gdf.columns if col != "geometry" and col != "urls"]].to_csv(save_path)
save_path = Path("cache/osm_fr_index.geojson")

if save_path.exists():
gdf = gpd.read_file(save_path)
else:
with tqdm() as pbar:
extracts = _iterate_openstreetmap_fr_index("osm_fr", "/", True, pbar)
gdf = gpd.GeoDataFrame(
data=[asdict(extract) for extract in extracts], geometry="geometry"
).set_crs(WGS84_CRS)
gdf["area"] = gdf.geometry.area
gdf.sort_values(by="area", ignore_index=True, inplace=True)

save_path.parent.mkdir(parents=True, exist_ok=True)
gdf.to_file(save_path, driver="GeoJSON")

return gdf


Expand All @@ -417,7 +429,7 @@ def _iterate_openstreetmap_fr_index(
pbar.set_description_str(id_prefix)
extracts = []
result = requests.get(
f"{OPENSTREETMAP_FR_EXTRACTS_INDEX}{directory_url}",
f"{OPENSTREETMAP_FR_EXTRACTS_INDEX_URL}{directory_url}",
headers={"User-Agent": "SRAI Python package (https://github.com/kraina-ai/srai)"},
)
soup = BeautifulSoup(result.text, "html.parser")
Expand All @@ -432,7 +444,7 @@ def _iterate_openstreetmap_fr_index(
extracts.append(
OpenStreetMapExtract(
id=f"{id_prefix}_{name}",
url=f"{OPENSTREETMAP_FR_EXTRACTS_INDEX}{directory_url}{link['href']}",
url=f"{OPENSTREETMAP_FR_EXTRACTS_INDEX_URL}{directory_url}{link['href']}",
geometry=polygon,
)
)
Expand Down Expand Up @@ -465,7 +477,7 @@ def _parse_polygon_file(polygon_url: str) -> Optional[MultiPolygon]:
Empty if request returns 404 not found.
"""
result = requests.get(
f"{OPENSTREETMAP_FR_POLYGONS_INDEX}/{polygon_url}",
f"{OPENSTREETMAP_FR_POLYGONS_INDEX_URL}/{polygon_url}",
headers={"User-Agent": "SRAI Python package (https://github.com/kraina-ai/srai)"},
)
if result.status_code == 404:
Expand Down
2 changes: 1 addition & 1 deletion srai/loaders/osm_loaders/pbf_file_clipper.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def clip_pbf_file(
if Path(final_osm_path).exists():
return Path(final_osm_path)

with tempfile.TemporaryDirectory() as tmp_dir_name:
with tempfile.TemporaryDirectory(dir=self.working_directory) as tmp_dir_name:
tmp_dir_path = Path(tmp_dir_name)

final_osm_path_alphanumeric_safe = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
@pytest.fixture # type: ignore
def popular_filter_api_data() -> Dict[str, Any]:
"""Load example taginfo API response data from file."""
with (Path(__file__).parent / "popular_filter_example.json").open("rt") as f:
with (Path(__file__).parent / "popular_filter_example.json").open("rt", encoding="utf-8") as f:
res: Dict[str, Any] = json.load(f)
return res

Expand Down

0 comments on commit a9a1633

Please sign in to comment.