diff --git a/.github/workflows/_tests.yml b/.github/workflows/_tests.yml index 346b333be..f7a9546da 100644 --- a/.github/workflows/_tests.yml +++ b/.github/workflows/_tests.yml @@ -13,6 +13,8 @@ jobs: include: - os: macos-latest python-version: "3.11" + - os: windows-latest + python-version: "3.11" env: OS: ${{ matrix.os }} PYTHON: ${{ matrix.python-version }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e1afeb7b..f2ab4233c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Security +## [0.5.2] - 2023-10-29 + +### Added + +- Geofabrik and OSM Fr index caching and reading from cache to avoid reloading. +- Tests for Windows OS. + +### Changed + +- `PbfFileClipper` temporary files operations moved to working directory. + ## [0.5.1] - 2023-10-27 ### Added @@ -185,7 +196,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Intersection Joiner - Geoparquet Loader -[unreleased]: https://github.com/srai-lab/srai/compare/0.5.1...HEAD +[unreleased]: https://github.com/srai-lab/srai/compare/0.5.2...HEAD +[0.5.2]: https://github.com/srai-lab/srai/compare/0.5.1...0.5.2 [0.5.1]: https://github.com/srai-lab/srai/compare/0.5.0...0.5.1 [0.5.0]: https://github.com/srai-lab/srai/compare/0.4.1...0.5.0 [0.4.1]: https://github.com/srai-lab/srai/compare/0.4.0...0.4.1 diff --git a/CITATION.cff b/CITATION.cff index 453d399bb..e81606d76 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -14,7 +14,7 @@ authors: given-names: "Szymon" orcid: "https://orcid.org/0000-0002-2047-1649" title: "SRAI: Spatial Representations for Artificial Intelligence" -version: 0.5.1 +version: 0.5.2 date-released: 2022-11-23 url: "https://kraina-ai.github.io/srai" repository-code: "https://github.com/kraina-ai/srai" diff --git a/pyproject.toml b/pyproject.toml index 1cae5ed1d..420f01498 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "srai" -version = "0.5.1" +version = "0.5.2" description = "A set of python modules for geospatial machine learning and data mining" authors = [ { name = "Piotr Gramacki", email = "pgramacki@kraina.ai" }, @@ -184,7 +184,7 @@ close-quotes-on-newline = true wrap-one-line = true [tool.bumpver] -current_version = "0.5.1" +current_version = "0.5.2" version_pattern = "MAJOR.MINOR.PATCH[PYTAGNUM]" commit_message = "chore: bump version {old_version} -> {new_version}" commit = true diff --git a/srai/__init__.py b/srai/__init__.py index f9a5e5530..f8b5f1523 100644 --- a/srai/__init__.py +++ b/srai/__init__.py @@ -10,4 +10,4 @@ for complete documentation. """ -__version__ = "0.5.1" +__version__ = "0.5.2" diff --git a/srai/loaders/osm_loaders/openstreetmap_extracts.py b/srai/loaders/osm_loaders/openstreetmap_extracts.py index c8a67544e..a78b584ed 100644 --- a/srai/loaders/osm_loaders/openstreetmap_extracts.py +++ b/srai/loaders/osm_loaders/openstreetmap_extracts.py @@ -24,11 +24,11 @@ from srai.constants import WGS84_CRS from srai.geometry import flatten_geometry -OPENSTREETMAP_FR_POLYGONS_INDEX = "https://download.openstreetmap.fr/polygons" -OPENSTREETMAP_FR_EXTRACTS_INDEX = "https://download.openstreetmap.fr/extracts" +OPENSTREETMAP_FR_POLYGONS_INDEX_URL = "https://download.openstreetmap.fr/polygons" +OPENSTREETMAP_FR_EXTRACTS_INDEX_URL = "https://download.openstreetmap.fr/extracts" OPENSTREETMAP_FR_INDEX_GDF: Optional[gpd.GeoDataFrame] = None -GEOFABRIK_INDEX = "https://download.geofabrik.de/index-v1.json" +GEOFABRIK_INDEX_URL = "https://download.geofabrik.de/index-v1.json" GEOFABRIK_INDEX_GDF: Optional[gpd.GeoDataFrame] = None @@ -282,7 +282,7 @@ def _filter_extracts( ].iterrows(): extract = OpenStreetMapExtract( id=extract_row.id, - url=extract_row["urls"]["pbf"], + url=extract_row["url"], geometry=extract_row.geometry, ) filtered_extracts.append(extract) @@ -359,18 +359,25 @@ def _load_geofabrik_index() -> gpd.GeoDataFrame: Returns: gpd.GeoDataFrame: Extracts index with metadata. """ - result = requests.get( - GEOFABRIK_INDEX, - headers={"User-Agent": "SRAI Python package (https://github.com/kraina-ai/srai)"}, - ) - parsed_data = json.loads(result.text) - gdf = gpd.GeoDataFrame.from_features(parsed_data["features"]) - gdf["area"] = gdf.geometry.area - gdf.sort_values(by="area", ignore_index=True, inplace=True) - - save_path = "cache/geofabrik_index.csv" - Path(save_path).parent.mkdir(parents=True, exist_ok=True) - gdf[[col for col in gdf.columns if col != "geometry" and col != "urls"]].to_csv(save_path) + save_path = Path("cache/geofabrik_index.geojson") + + if save_path.exists(): + gdf = gpd.read_file(save_path) + else: + result = requests.get( + GEOFABRIK_INDEX_URL, + headers={"User-Agent": "SRAI Python package (https://github.com/kraina-ai/srai)"}, + ) + parsed_data = json.loads(result.text) + gdf = gpd.GeoDataFrame.from_features(parsed_data["features"]) + gdf["area"] = gdf.geometry.area + gdf.sort_values(by="area", ignore_index=True, inplace=True) + gdf["url"] = gdf["urls"].apply(lambda d: d["pbf"]) + gdf = gdf[["id", "name", "geometry", "area", "url"]] + + save_path.parent.mkdir(parents=True, exist_ok=True) + gdf.to_file(save_path, driver="GeoJSON") + return gdf @@ -381,17 +388,22 @@ def _load_openstreetmap_fr_index() -> gpd.GeoDataFrame: Returns: gpd.GeoDataFrame: Extracts index with metadata. """ - with tqdm() as pbar: - extracts = _iterate_openstreetmap_fr_index("osm_fr", "/", True, pbar) - gdf = gpd.GeoDataFrame( - data=[asdict(extract) for extract in extracts], geometry="geometry" - ).set_crs(WGS84_CRS) - gdf["area"] = gdf.geometry.area - gdf.sort_values(by="area", ignore_index=True, inplace=True) - - save_path = "cache/osm_fr_index.csv" - Path(save_path).parent.mkdir(parents=True, exist_ok=True) - gdf[[col for col in gdf.columns if col != "geometry" and col != "urls"]].to_csv(save_path) + save_path = Path("cache/osm_fr_index.geojson") + + if save_path.exists(): + gdf = gpd.read_file(save_path) + else: + with tqdm() as pbar: + extracts = _iterate_openstreetmap_fr_index("osm_fr", "/", True, pbar) + gdf = gpd.GeoDataFrame( + data=[asdict(extract) for extract in extracts], geometry="geometry" + ).set_crs(WGS84_CRS) + gdf["area"] = gdf.geometry.area + gdf.sort_values(by="area", ignore_index=True, inplace=True) + + save_path.parent.mkdir(parents=True, exist_ok=True) + gdf.to_file(save_path, driver="GeoJSON") + return gdf @@ -417,7 +429,7 @@ def _iterate_openstreetmap_fr_index( pbar.set_description_str(id_prefix) extracts = [] result = requests.get( - f"{OPENSTREETMAP_FR_EXTRACTS_INDEX}{directory_url}", + f"{OPENSTREETMAP_FR_EXTRACTS_INDEX_URL}{directory_url}", headers={"User-Agent": "SRAI Python package (https://github.com/kraina-ai/srai)"}, ) soup = BeautifulSoup(result.text, "html.parser") @@ -432,7 +444,7 @@ def _iterate_openstreetmap_fr_index( extracts.append( OpenStreetMapExtract( id=f"{id_prefix}_{name}", - url=f"{OPENSTREETMAP_FR_EXTRACTS_INDEX}{directory_url}{link['href']}", + url=f"{OPENSTREETMAP_FR_EXTRACTS_INDEX_URL}{directory_url}{link['href']}", geometry=polygon, ) ) @@ -465,7 +477,7 @@ def _parse_polygon_file(polygon_url: str) -> Optional[MultiPolygon]: Empty if request returns 404 not found. """ result = requests.get( - f"{OPENSTREETMAP_FR_POLYGONS_INDEX}/{polygon_url}", + f"{OPENSTREETMAP_FR_POLYGONS_INDEX_URL}/{polygon_url}", headers={"User-Agent": "SRAI Python package (https://github.com/kraina-ai/srai)"}, ) if result.status_code == 404: diff --git a/srai/loaders/osm_loaders/pbf_file_clipper.py b/srai/loaders/osm_loaders/pbf_file_clipper.py index 5e48a4eee..6a84b4519 100644 --- a/srai/loaders/osm_loaders/pbf_file_clipper.py +++ b/srai/loaders/osm_loaders/pbf_file_clipper.py @@ -83,7 +83,7 @@ def clip_pbf_file( if Path(final_osm_path).exists(): return Path(final_osm_path) - with tempfile.TemporaryDirectory() as tmp_dir_name: + with tempfile.TemporaryDirectory(dir=self.working_directory) as tmp_dir_name: tmp_dir_path = Path(tmp_dir_name) final_osm_path_alphanumeric_safe = ( diff --git a/tests/loaders/osm_loaders/filters/test_popular_tag_downloader.py b/tests/loaders/osm_loaders/filters/test_popular_tag_downloader.py index d082be963..d31e83a25 100644 --- a/tests/loaders/osm_loaders/filters/test_popular_tag_downloader.py +++ b/tests/loaders/osm_loaders/filters/test_popular_tag_downloader.py @@ -14,7 +14,7 @@ @pytest.fixture # type: ignore def popular_filter_api_data() -> Dict[str, Any]: """Load example taginfo API response data from file.""" - with (Path(__file__).parent / "popular_filter_example.json").open("rt") as f: + with (Path(__file__).parent / "popular_filter_example.json").open("rt", encoding="utf-8") as f: res: Dict[str, Any] = json.load(f) return res