Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable pytest failures on warnings on FutureWarnings (Replace deprecated geopandas.dataset module) #1360

Merged
merged 10 commits into from
Apr 16, 2024
18 changes: 12 additions & 6 deletions python/cuspatial/benchmarks/api/bench_api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
import pathlib

import cupy
import geopandas
import pytest
Expand Down Expand Up @@ -223,12 +225,16 @@ def bench_quadtree_point_to_nearest_linestring(benchmark):
SCALE = 3
MAX_DEPTH = 7
MIN_SIZE = 125
host_countries = geopandas.read_file(
geopandas.datasets.get_path("naturalearth_lowres")
)
host_cities = geopandas.read_file(
geopandas.datasets.get_path("naturalearth_cities")
data_dir = (
pathlib.Path(__file__).parent.parent.parent
/ "cuspatial"
/ "tests"
/ "data"
)
naturalearth_lowres = data_dir / "naturalearth_lowres.shp"
naturalearth_cities = data_dir / "naturalearth_cities.shp"
host_countries = geopandas.read_file(naturalearth_lowres)
host_cities = geopandas.read_file(naturalearth_cities)
gpu_countries = cuspatial.from_geopandas(
host_countries[host_countries["geometry"].type == "Polygon"]
)
Expand Down
16 changes: 11 additions & 5 deletions python/cuspatial/cuspatial/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
import pathlib

import geopandas as gpd
import numpy as np
Expand Down Expand Up @@ -308,13 +309,18 @@ def factory(length):


@pytest.fixture
def naturalearth_cities():
return gpd.read_file(gpd.datasets.get_path("naturalearth_cities"))
def data_dir():
return pathlib.Path(__file__).parent / "data"


@pytest.fixture
def naturalearth_lowres():
return gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
def naturalearth_cities(data_dir):
return gpd.read_file(data_dir / "naturalearth_cities.shp")


@pytest.fixture
def naturalearth_lowres(data_dir):
return gpd.read_file(data_dir / "naturalearth_lowres.shp")


@pytest.fixture(scope="session")
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 changes: 5 additions & 5 deletions python/cuspatial/cuspatial/tests/test_geocolumn_accessor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2022 NVIDIA Corporation
# Copyright (c) 2022-2024, NVIDIA CORPORATION

import cupy as cp
import geopandas as gpd
Expand All @@ -18,10 +18,10 @@
"range, expected",
[[slice(0, 3), [0, 3, 4, 5]], [slice(3, 6), [0, 30, 40, 41]]],
)
def test_GeoColumnAccessor_polygon_offset(range, expected):
gpdf = cuspatial.from_geopandas(
gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
)
def test_GeoColumnAccessor_polygon_offset(
range, expected, naturalearth_lowres
):
gpdf = cuspatial.from_geopandas(naturalearth_lowres)
shorter = gpdf[range]["geometry"]
expected = cp.array(expected)
got = shorter.polygons.geometry_offset
Expand Down
15 changes: 6 additions & 9 deletions python/cuspatial/cuspatial/tests/test_geodataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,9 @@ def test_interleaved_polygons(gpdf, polys):
)


def test_to_geopandas_with_geopandas_dataset():
df = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
gdf = cuspatial.from_geopandas(df)
assert_eq_geo_df(df, gdf.to_geopandas())
def test_to_geopandas_with_geopandas_dataset(naturalearth_lowres):
gdf = cuspatial.from_geopandas(naturalearth_lowres)
assert_eq_geo_df(naturalearth_lowres, gdf.to_geopandas())


def test_to_shapely_random():
Expand Down Expand Up @@ -327,14 +326,12 @@ def test_boolmask(gpdf, df_boolmask):
reason="Size discrepancies between Python versions. See "
"https://github.com/rapidsai/cuspatial/issues/1352",
)
def test_memory_usage(gs):
def test_memory_usage(gs, data_dir):
assert gs.memory_usage() == 224
host_dataframe = gpd.read_file(
gpd.datasets.get_path("naturalearth_lowres")
)
host_dataframe = gpd.read_file(data_dir / "naturalearth_lowres.shp")
gpu_dataframe = cuspatial.from_geopandas(host_dataframe)
# The df size is 8kb of cudf rows and 217kb of the geometry column
assert gpu_dataframe.memory_usage().sum() == 224945
assert gpu_dataframe.memory_usage().sum() == 216793
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is now essentially the same as test_memory_usage_large below. Is it OK to deduplicate?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's the same size because there are no geoseries in the geodataframe other than "geometry", right? The logic is different.

Copy link
Contributor Author

@mroeschke mroeschke Mar 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes you're correct. gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")) also has pop_est, continent, name, iso_a3, and gdp_md_est columns. Is it OK if this test no longer has those columns?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know. @isVoid @thomcom do you?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does change the logic, previously the test was also validating that the cudf series are being read with from_geopandas. I don't think another test verifies this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry coming back to this now.

cudf series

Did you mean geopandas series? It looks like that may be tested in python/cuspatial/cuspatial/tests/test_from_geopandas.py and I don't see cuspatial.from_geopandas supporting a cudf.Series



def test_from_dict():
Expand Down
7 changes: 2 additions & 5 deletions python/cuspatial/cuspatial/tests/test_geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,11 +551,8 @@ def test_memory_usage_simple(gs):
assert cugs.memory_usage() == 1616


def test_memory_usage_large():
host_dataframe = gpd.read_file(
gpd.datasets.get_path("naturalearth_lowres")
)
geometry = cuspatial.from_geopandas(host_dataframe)["geometry"]
def test_memory_usage_large(naturalearth_lowres):
geometry = cuspatial.from_geopandas(naturalearth_lowres)["geometry"]
# the geometry column from naturalearth_lowres is 217kb of coordinates
assert geometry.memory_usage() == 216793

Expand Down
6 changes: 5 additions & 1 deletion python/cuspatial/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -124,5 +124,9 @@ regex = "(?P<value>.*)"
[tool.pytest.ini_options]
xfail_strict = true
filterwarnings = [
"error:::cudf"
"error:::cudf",
"error::FutureWarning",
"error::DeprecationWarning",
# https://github.com/pytest-dev/pytest-cov/issues/557
"ignore:The --rsyncdir command line argument:DeprecationWarning",
]
Loading