Skip to content

Commit

Permalink
raise not found error when downloading results with a model that does…
Browse files Browse the repository at this point in the history
… not exist on a dataset (#734)
  • Loading branch information
nankolena authored Dec 20, 2024
1 parent 5e3863c commit f9bd576
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 11 deletions.
29 changes: 22 additions & 7 deletions kolena/dataset/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
from kolena.dataset.dataset import _to_deserialized_dataframe
from kolena.dataset.dataset import _to_serialized_dataframe
from kolena.errors import IncorrectUsageError
from kolena.errors import NotFoundError

EvalConfig = Optional[Dict[str, Any]]
"""
Expand Down Expand Up @@ -232,6 +233,14 @@ def download_results(
existing_dataset = _load_dataset_metadata(dataset)
assert existing_dataset

# validate that the model exists in the workspace
_get_model_id(model)

# validate that the model has results on the dataset
models = _get_models(dataset)
if model not in [model_entity.name for model_entity in models]:
raise NotFoundError(f"model '{model}' does not exist on dataset '{dataset}'")

id_fields = existing_dataset.id_fields

df = _fetch_results(dataset, model, commit, include_extracted_properties)
Expand Down Expand Up @@ -373,6 +382,18 @@ def upload_results(
_upload_results(dataset, model, results, thresholded_fields=thresholded_fields, tags=tags)


def _get_models(
dataset: str,
) -> List[ModelEntity]:
existing_dataset = _load_dataset_metadata(dataset)
assert existing_dataset, f"dataset {dataset} not found"

request = LoadByDatasetRequest(dataset_id=existing_dataset.id)
response = krequests.put(Path.LOAD_BY_DATASET, json=asdict(request))
krequests.raise_for_status(response)
return from_dict(LoadByDatasetResponse, response.json()).models


@with_event(EventAPI.Event.GET_MODELS_BY_DATASET)
def get_models(
dataset: str,
Expand All @@ -384,10 +405,4 @@ def get_models(
:return: A list of models tested on the given dataset.
"""
existing_dataset = _load_dataset_metadata(dataset)
assert existing_dataset, f"dataset {dataset} not found"

request = LoadByDatasetRequest(dataset_id=existing_dataset.id)
response = krequests.put(Path.LOAD_BY_DATASET, json=asdict(request))
krequests.raise_for_status(response)
return from_dict(LoadByDatasetResponse, response.json()).models
return _get_models(dataset)
24 changes: 20 additions & 4 deletions tests/integration/dataset/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,16 +544,32 @@ def test__download_results__dataset_does_not_exist() -> None:
assert "does not exist" in exc_info_value


def test__download_results__model_does_not_exist() -> None:
dataset_name = with_test_prefix(f"{__file__}::test__download_results__model_does_not_exist")
def test__download_results__model_does_not_exist_in_dataset() -> None:
dataset_1_name = with_test_prefix(f"{__file__}::test__download_results__model_does_not_exist_1")
dataset_2_name = with_test_prefix(f"{__file__}::test__download_results__model_does_not_exist_2")
model_name = with_test_prefix(f"{__file__}::test__download_results__model_does_not_exist")
df_dp = get_df_dp()
upload_dataset(dataset_name, df_dp, id_fields=ID_FIELDS)
upload_dataset(dataset_1_name, df_dp, id_fields=ID_FIELDS)
upload_dataset(dataset_2_name, df_dp, id_fields=ID_FIELDS)

# if the model does not exist in the workspace
with pytest.raises(NotFoundError) as exc_info:
download_results(dataset_name, model_name)
download_results(dataset_1_name, model_name)
exc_info_value = str(exc_info.value)
assert "no such model" in exc_info_value

# model only has results on dataset_1 - downloading result on dataset_2 will lead to not found error
df_result = get_df_result()
_upload_results(
dataset_1_name,
model_name,
df_result,
)
with pytest.raises(NotFoundError) as exc_info:
download_results(dataset_2_name, model_name)
exc_info_value = str(exc_info.value)
assert "does not exist on dataset" in exc_info_value


def test__download_results__reset_dataset() -> None:
dataset_name = with_test_prefix(f"{__file__}::test__download_results__reset_dataset")
Expand Down

0 comments on commit f9bd576

Please sign in to comment.