From 053b49aa98467f12c1fcd9300e817c26b7ccfd5a Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 6 Jan 2022 17:01:42 +0200
Subject: [PATCH] Signal to encode predictions as proba now works
In a previous iteration it was encoded as a numpy file, but now it's
serialized to JSON which means that results.probabilities is simply a
string if imputation is required.
---
frameworks/TPOT/__init__.py | 15 +++++++--------
frameworks/hyperoptsklearn/__init__.py | 15 +++++++--------
frameworks/oboe/__init__.py | 15 +++++++--------
3 files changed, 21 insertions(+), 24 deletions(-)
diff --git a/frameworks/TPOT/__init__.py b/frameworks/TPOT/__init__.py
index 959c7072e..9828c6473 100644
--- a/frameworks/TPOT/__init__.py
+++ b/frameworks/TPOT/__init__.py
@@ -25,14 +25,13 @@ def run(dataset: Dataset, config: TaskConfig):
)
def process_results(results):
- if results.probabilities is not None and not results.probabilities.shape: # numpy load always return an array
- prob_format = results.probabilities.item()
- if prob_format == "predictions":
- target_values_enc = dataset.target.label_encoder.transform(dataset.target.values)
- results.probabilities = Encoder('one-hot', target=False, encoded_type=float).fit(target_values_enc).transform(results.predictions)
- else:
- raise ValueError(f"Unknown probabilities format: {prob_format}")
- return results
+ if isinstance(results.probabilities, str) and results.probabilities == "predictions":
+ target_values_enc = dataset.target.label_encoder.transform(dataset.target.values)
+ results.probabilities = Encoder('one-hot', target=False, encoded_type=float).fit(target_values_enc).transform(results.predictions)
+ is_numpy_like = hasattr(results.probabilities, "shape") and results.probabilities.shape
+ if results.probabilities is None or is_numpy_like:
+ return results
+ raise ValueError(f"Unknown probabilities format: {results.probabilities}")
return run_in_venv(__file__, "exec.py",
input_data=data, dataset=dataset, config=config,
diff --git a/frameworks/hyperoptsklearn/__init__.py b/frameworks/hyperoptsklearn/__init__.py
index 97ecd49b2..7edcb225b 100644
--- a/frameworks/hyperoptsklearn/__init__.py
+++ b/frameworks/hyperoptsklearn/__init__.py
@@ -25,14 +25,13 @@ def run(dataset: Dataset, config: TaskConfig):
)
def process_results(results):
- if results.probabilities is not None and not results.probabilities.shape: # numpy load always return an array
- prob_format = results.probabilities.item()
- if prob_format == "predictions":
- target_values_enc = dataset.target.label_encoder.transform(dataset.target.values)
- results.probabilities = Encoder('one-hot', target=False, encoded_type=float).fit(target_values_enc).transform(results.predictions)
- else:
- raise ValueError(f"Unknown probabilities format: {prob_format}")
- return results
+ if isinstance(results.probabilities, str) and results.probabilities == "predictions":
+ target_values_enc = dataset.target.label_encoder.transform(dataset.target.values)
+ results.probabilities = Encoder('one-hot', target=False, encoded_type=float).fit(target_values_enc).transform(results.predictions)
+ is_numpy_like = hasattr(results.probabilities, "shape") and results.probabilities.shape
+ if results.probabilities is None or is_numpy_like:
+ return results
+ raise ValueError(f"Unknown probabilities format: {results.probabilities}")
return run_in_venv(__file__, "exec.py",
input_data=data, dataset=dataset, config=config,
diff --git a/frameworks/oboe/__init__.py b/frameworks/oboe/__init__.py
index d21694d78..3e6f9bfa8 100644
--- a/frameworks/oboe/__init__.py
+++ b/frameworks/oboe/__init__.py
@@ -25,14 +25,13 @@ def run(dataset: Dataset, config: TaskConfig):
)
def process_results(results):
- if results.probabilities is not None and not results.probabilities.shape: # numpy load always return an array
- prob_format = results.probabilities.item()
- if prob_format == "predictions":
- target_values_enc = dataset.target.label_encoder.transform(dataset.target.values)
- results.probabilities = Encoder('one-hot', target=False, encoded_type=float).fit(target_values_enc).transform(results.predictions)
- else:
- raise ValueError(f"Unknown probabilities format: {prob_format}")
- return results
+ if isinstance(results.probabilities, str) and results.probabilities == "predictions":
+ target_values_enc = dataset.target.label_encoder.transform(dataset.target.values)
+ results.probabilities = Encoder('one-hot', target=False, encoded_type=float).fit(target_values_enc).transform(results.predictions)
+ is_numpy_like = hasattr(results.probabilities, "shape") and results.probabilities.shape
+ if results.probabilities is None or is_numpy_like:
+ return results
+ raise ValueError(f"Unknown probabilities format: {results.probabilities}")
return run_in_venv(__file__, "exec.py",
input_data=data, dataset=dataset, config=config,