From 053b49aa98467f12c1fcd9300e817c26b7ccfd5a Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 6 Jan 2022 17:01:42 +0200 Subject: [PATCH] Signal to encode predictions as proba now works In a previous iteration it was encoded as a numpy file, but now it's serialized to JSON which means that results.probabilities is simply a string if imputation is required. --- frameworks/TPOT/__init__.py | 15 +++++++-------- frameworks/hyperoptsklearn/__init__.py | 15 +++++++-------- frameworks/oboe/__init__.py | 15 +++++++-------- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/frameworks/TPOT/__init__.py b/frameworks/TPOT/__init__.py index 959c7072e..9828c6473 100644 --- a/frameworks/TPOT/__init__.py +++ b/frameworks/TPOT/__init__.py @@ -25,14 +25,13 @@ def run(dataset: Dataset, config: TaskConfig): ) def process_results(results): - if results.probabilities is not None and not results.probabilities.shape: # numpy load always return an array - prob_format = results.probabilities.item() - if prob_format == "predictions": - target_values_enc = dataset.target.label_encoder.transform(dataset.target.values) - results.probabilities = Encoder('one-hot', target=False, encoded_type=float).fit(target_values_enc).transform(results.predictions) - else: - raise ValueError(f"Unknown probabilities format: {prob_format}") - return results + if isinstance(results.probabilities, str) and results.probabilities == "predictions": + target_values_enc = dataset.target.label_encoder.transform(dataset.target.values) + results.probabilities = Encoder('one-hot', target=False, encoded_type=float).fit(target_values_enc).transform(results.predictions) + is_numpy_like = hasattr(results.probabilities, "shape") and results.probabilities.shape + if results.probabilities is None or is_numpy_like: + return results + raise ValueError(f"Unknown probabilities format: {results.probabilities}") return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config, diff --git a/frameworks/hyperoptsklearn/__init__.py b/frameworks/hyperoptsklearn/__init__.py index 97ecd49b2..7edcb225b 100644 --- a/frameworks/hyperoptsklearn/__init__.py +++ b/frameworks/hyperoptsklearn/__init__.py @@ -25,14 +25,13 @@ def run(dataset: Dataset, config: TaskConfig): ) def process_results(results): - if results.probabilities is not None and not results.probabilities.shape: # numpy load always return an array - prob_format = results.probabilities.item() - if prob_format == "predictions": - target_values_enc = dataset.target.label_encoder.transform(dataset.target.values) - results.probabilities = Encoder('one-hot', target=False, encoded_type=float).fit(target_values_enc).transform(results.predictions) - else: - raise ValueError(f"Unknown probabilities format: {prob_format}") - return results + if isinstance(results.probabilities, str) and results.probabilities == "predictions": + target_values_enc = dataset.target.label_encoder.transform(dataset.target.values) + results.probabilities = Encoder('one-hot', target=False, encoded_type=float).fit(target_values_enc).transform(results.predictions) + is_numpy_like = hasattr(results.probabilities, "shape") and results.probabilities.shape + if results.probabilities is None or is_numpy_like: + return results + raise ValueError(f"Unknown probabilities format: {results.probabilities}") return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config, diff --git a/frameworks/oboe/__init__.py b/frameworks/oboe/__init__.py index d21694d78..3e6f9bfa8 100644 --- a/frameworks/oboe/__init__.py +++ b/frameworks/oboe/__init__.py @@ -25,14 +25,13 @@ def run(dataset: Dataset, config: TaskConfig): ) def process_results(results): - if results.probabilities is not None and not results.probabilities.shape: # numpy load always return an array - prob_format = results.probabilities.item() - if prob_format == "predictions": - target_values_enc = dataset.target.label_encoder.transform(dataset.target.values) - results.probabilities = Encoder('one-hot', target=False, encoded_type=float).fit(target_values_enc).transform(results.predictions) - else: - raise ValueError(f"Unknown probabilities format: {prob_format}") - return results + if isinstance(results.probabilities, str) and results.probabilities == "predictions": + target_values_enc = dataset.target.label_encoder.transform(dataset.target.values) + results.probabilities = Encoder('one-hot', target=False, encoded_type=float).fit(target_values_enc).transform(results.predictions) + is_numpy_like = hasattr(results.probabilities, "shape") and results.probabilities.shape + if results.probabilities is None or is_numpy_like: + return results + raise ValueError(f"Unknown probabilities format: {results.probabilities}") return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config,