Skip to content

Commit

Permalink
fix: 🐛 don't check if dataset is supported when we know it is (#720)
Browse files Browse the repository at this point in the history
* fix: 🐛 don't check if dataset is supported when we know it is

* feat: 🎸 ensure the supported datasets are not private

* ci: 🎡 ignore codecov upload errors (#721)

* Update libs/libcommon/src/libcommon/dataset.py

Co-authored-by: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com>

* refactor: 💡 don't use double negations :P

* style: 💄 please black

---------

Co-authored-by: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com>
  • Loading branch information
severo and albertvillanova authored Jan 30, 2023
1 parent 4414d57 commit 2f38593
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/_unit-tests-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,5 @@ jobs:
with:
working-directory: ${{ inputs.working-directory }}
files: ./coverage.xml
fail_ci_if_error: true
fail_ci_if_error: false
flags: ${{ env.codecov_flag }}
2 changes: 1 addition & 1 deletion libs/libcommon/src/libcommon/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,4 +216,4 @@ def check_support(


def get_supported_datasets(hf_endpoint: str, hf_token: Optional[str] = None) -> list[str]:
return [d.id for d in HfApi(endpoint=hf_endpoint, token=hf_token).list_datasets() if d.id is not None]
return [d.id for d in HfApi(endpoint=hf_endpoint, token=hf_token).list_datasets() if d.id and not d.private]
5 changes: 4 additions & 1 deletion libs/libcommon/src/libcommon/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def update_dataset(
hf_token: Optional[str] = None,
force: bool = False,
priority: Priority = Priority.NORMAL,
do_check_support: bool = True,
) -> None:
"""
Update a dataset
Expand All @@ -37,13 +38,15 @@ def update_dataset(
hf_token (Optional[str], optional): The HF token. Defaults to None.
force (bool, optional): Force the update. Defaults to False.
priority (Priority, optional): The priority of the job. Defaults to Priority.NORMAL.
do_check_support (bool, optional): Check if the dataset is supported. Defaults to True.
Returns: None.
Raises:
- [`~libcommon.dataset.DatasetError`]: if the dataset could not be accessed or is not supported
"""
check_support(dataset=dataset, hf_endpoint=hf_endpoint, hf_token=hf_token)
if do_check_support:
check_support(dataset=dataset, hf_endpoint=hf_endpoint, hf_token=hf_token)
logging.debug(f"refresh dataset='{dataset}'")
for init_processing_step in init_processing_steps:
if init_processing_step.input_type == "dataset":
Expand Down
1 change: 1 addition & 0 deletions services/admin/src/admin/routes/backfill.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ async def backfill_endpoint(request: Request) -> Response:
hf_token=hf_token,
force=False,
priority=Priority.LOW,
do_check_support=False,
)
# ^ we simply ask an update for all the datasets on the Hub, supported by the datasets-server
# we could be more precise and only ask for updates for the datasets that have some missing
Expand Down

0 comments on commit 2f38593

Please sign in to comment.