From 1bbf3f58144762475b9335e6f1d882b5c9482fa2 Mon Sep 17 00:00:00 2001 From: MoooCat <141886018+MooooCat@users.noreply.github.com> Date: Sat, 20 Jan 2024 19:01:26 +0800 Subject: [PATCH] [Bugfix] Temporarily modify single table demo data link (#121) * change demo data link * Fix col names --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- sdgx/utils.py | 10 ++++++---- tests/data_models/inspector/test_discrete.py | 4 ++-- tests/data_models/inspector/test_numeric.py | 2 +- tests/dataloader/test_dataloader.py | 12 ++++++------ 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/sdgx/utils.py b/sdgx/utils.py index e1394df3..4b546a25 100644 --- a/sdgx/utils.py +++ b/sdgx/utils.py @@ -65,8 +65,10 @@ def download_demo_data(data_dir: str | Path = "./dataset") -> Path: # Download from datahub demo_data_path.parent.mkdir(parents=True, exist_ok=True) - logger.info("Downloading demo data from datahub.io to {}".format(demo_data_path)) - url = "https://datahub.io/machine-learning/adult/r/adult.csv" + logger.info("Downloading demo data from github data source to {}".format(demo_data_path)) + url = ( + "https://raw.githubusercontent.com/saravrajavelu/Adult-Income-Analysis/master/adult.csv" + ) urllib.request.urlretrieve(url, demo_data_path) return demo_data_path @@ -93,9 +95,9 @@ def get_demo_single_table(data_dir: str | Path = "./dataset"): "occupation", "relationship", "race", - "sex", + "gender", "native-country", - "class", + "income", ] return pd_obj, discrete_cols diff --git a/tests/data_models/inspector/test_discrete.py b/tests/data_models/inspector/test_discrete.py index 32d25a93..8e5ab5cb 100644 --- a/tests/data_models/inspector/test_discrete.py +++ b/tests/data_models/inspector/test_discrete.py @@ -26,9 +26,9 @@ def test_inspector(inspector: DiscreteInspector, raw_data): "occupation", "relationship", "race", - "sex", + "gender", "native-country", - "class", + "income", ] ) diff --git a/tests/data_models/inspector/test_numeric.py b/tests/data_models/inspector/test_numeric.py index e6b4cfec..fdf8f725 100644 --- a/tests/data_models/inspector/test_numeric.py +++ b/tests/data_models/inspector/test_numeric.py @@ -19,7 +19,7 @@ def test_inspector(inspector: NumericInspector, raw_data): assert inspector.ready assert inspector.numeric_columns assert sorted(inspector.inspect()["numeric_columns"]) == sorted( - ["education-num", "fnlwgt", "hoursperweek", "age", "capitalgain", "capitalloss"] + ["educational-num", "fnlwgt", "hours-per-week", "age", "capital-gain", "capital-loss"] ) diff --git a/tests/dataloader/test_dataloader.py b/tests/dataloader/test_dataloader.py index d61ac29b..c239cc94 100644 --- a/tests/dataloader/test_dataloader.py +++ b/tests/dataloader/test_dataloader.py @@ -24,17 +24,17 @@ def test_demo_dataloader(dataloader_builder: DataLoader, cacher, demo_single_tab "workclass", "fnlwgt", "education", - "education-num", + "educational-num", "marital-status", "occupation", "relationship", "race", - "sex", - "capitalgain", - "capitalloss", - "hoursperweek", + "gender", + "capital-gain", + "capital-loss", + "hours-per-week", "native-country", - "class", + "income", ] ) )