Skip to content

Commit

Permalink
[Bugfix] Temporarily modify single table demo data link (#121)
Browse files Browse the repository at this point in the history
* change demo data link

* Fix col names

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
MooooCat and pre-commit-ci[bot] authored Jan 20, 2024
1 parent 9ac9988 commit 1bbf3f5
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 13 deletions.
10 changes: 6 additions & 4 deletions sdgx/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@ def download_demo_data(data_dir: str | Path = "./dataset") -> Path:
# Download from datahub
demo_data_path.parent.mkdir(parents=True, exist_ok=True)

logger.info("Downloading demo data from datahub.io to {}".format(demo_data_path))
url = "https://datahub.io/machine-learning/adult/r/adult.csv"
logger.info("Downloading demo data from github data source to {}".format(demo_data_path))
url = (
"https://raw.githubusercontent.com/saravrajavelu/Adult-Income-Analysis/master/adult.csv"
)
urllib.request.urlretrieve(url, demo_data_path)

return demo_data_path
Expand All @@ -93,9 +95,9 @@ def get_demo_single_table(data_dir: str | Path = "./dataset"):
"occupation",
"relationship",
"race",
"sex",
"gender",
"native-country",
"class",
"income",
]
return pd_obj, discrete_cols

Expand Down
4 changes: 2 additions & 2 deletions tests/data_models/inspector/test_discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ def test_inspector(inspector: DiscreteInspector, raw_data):
"occupation",
"relationship",
"race",
"sex",
"gender",
"native-country",
"class",
"income",
]
)

Expand Down
2 changes: 1 addition & 1 deletion tests/data_models/inspector/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_inspector(inspector: NumericInspector, raw_data):
assert inspector.ready
assert inspector.numeric_columns
assert sorted(inspector.inspect()["numeric_columns"]) == sorted(
["education-num", "fnlwgt", "hoursperweek", "age", "capitalgain", "capitalloss"]
["educational-num", "fnlwgt", "hours-per-week", "age", "capital-gain", "capital-loss"]
)


Expand Down
12 changes: 6 additions & 6 deletions tests/dataloader/test_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,17 @@ def test_demo_dataloader(dataloader_builder: DataLoader, cacher, demo_single_tab
"workclass",
"fnlwgt",
"education",
"education-num",
"educational-num",
"marital-status",
"occupation",
"relationship",
"race",
"sex",
"capitalgain",
"capitalloss",
"hoursperweek",
"gender",
"capital-gain",
"capital-loss",
"hours-per-week",
"native-country",
"class",
"income",
]
)
)
Expand Down

0 comments on commit 1bbf3f5

Please sign in to comment.