Skip to content

Commit

Permalink
Fix orchestration tests. Update datetime conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasMeissnerDS committed Oct 28, 2024
1 parent 15e7863 commit 808084d
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 98 deletions.
2 changes: 1 addition & 1 deletion bluecast/blueprints/orchestration.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def predict(self, y_preds: List[np.ndarray]) -> np.ndarray:
:param y_preds : List of arrays containing model predictions.
"""
if self.weights is None:
if len(self.weights) == 0:
raise ValueError("Model weights have not been optimized. Call `fit` first.")

weighted_pred = np.average(np.array(y_preds), axis=0, weights=self.weights)
Expand Down
15 changes: 9 additions & 6 deletions bluecast/preprocessing/datetime_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(
if date_parts is None:
self.date_parts = [
"year",
"dayofyear",
"week_of_year",
"month",
"day",
Expand All @@ -37,18 +38,16 @@ def __init__(
else:
self.date_parts = date_parts
self.date_part_periods = {
"year": 1,
"dayofyear": 365,
"month": 12,
"week_of_year": 52,
"day": 31,
"dayofweek": 7,
"hour": 24,
}
self.included_date_parts: Dict[Union[str, int, float], List[str]] = (
{}
) # To store date parts included for each date column
self.included_cyclic_features: Dict[Union[str, int, float], List[str]] = (
{}
) # To store cyclic features included for each date column
self.included_date_parts: Dict[Union[str, int, float], List[str]] = {}
self.included_cyclic_features: Dict[Union[str, int, float], List[str]] = {}

def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Expand Down Expand Up @@ -80,6 +79,8 @@ def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
date_part_values = df[c].dt.day.astype(float)
elif date_part == "dayofweek":
date_part_values = df[c].dt.dayofweek.astype(float)
elif date_part == "dayofyear":
date_part_values = df[c].dt.dayofyear.astype(float)
elif date_part == "hour":
date_part_values = df[c].dt.hour.astype(float)
else:
Expand Down Expand Up @@ -138,6 +139,8 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
date_part_values = df[c].dt.day.astype(float)
elif date_part == "dayofweek":
date_part_values = df[c].dt.dayofweek.astype(float)
elif date_part == "dayofyear":
date_part_values = df[c].dt.dayofyear.astype(float)
elif date_part == "hour":
date_part_values = df[c].dt.hour.astype(float)
else:
Expand Down
157 changes: 66 additions & 91 deletions bluecast/tests/test_orchestration.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from unittest.mock import MagicMock, patch
from unittest.mock import MagicMock

import numpy as np
import optuna
import pandas as pd
import pytest
from optuna import Trial
from optuna.pruners import HyperbandPruner
from optuna.samplers import CmaEsSampler
from optuna.trial import create_trial
from sklearn.metrics import roc_auc_score

from bluecast.blueprints.cast import BlueCast
from bluecast.blueprints.cast_cv import BlueCastCV
Expand Down Expand Up @@ -107,110 +106,86 @@ def test_find_best_match_with_all_models(setup_model_match_maker, mocker):
assert dataset.equals(mm.training_datasets[1])


# Mock Trial for testing _objective method
# Mock data for testing
@pytest.fixture
def mock_trial():
trial = MagicMock(spec=Trial)
trial.suggest_float.side_effect = lambda name, low, high: 0.5
return trial


@pytest.fixture
def sample_data():
# Sample true labels and predictions for testing
y_true = np.array([1, 0, 1, 0, 1])
y_preds = [
np.array([0.6, 0.4, 0.7, 0.2, 0.9]),
np.array([0.5, 0.3, 0.8, 0.1, 0.85]),
]
def setup_data():
np.random.seed(0)
y_true = np.random.randint(0, 2, 100) # Binary target
y_preds = [np.random.rand(100) for _ in range(3)] # Predictions from 3 models
return y_true, y_preds


def test_optuna_weights_initialization():
model = OptunaWeights(random_state=42, n_trials=100)
assert model.random_state == 42
assert model.n_trials == 100
assert model.optimize_direction == "maximize"
assert model.weights == []


def test_objective_method(mock_trial, sample_data):
model = OptunaWeights(random_state=42)
y_true, y_preds = sample_data

auc_score = model._objective(mock_trial, y_true, y_preds)

# Check if suggest_float was called correctly
assert mock_trial.suggest_float.call_count == len(y_preds) - 1
assert 0 <= auc_score <= 1, "AUC score should be in the range [0, 1]"

@pytest.fixture
def optuna_weights_instance():
return OptunaWeights(random_state=42, n_trials=10)

def test_fit_method(sample_data):
y_true, y_preds = sample_data
model = OptunaWeights(random_state=42, n_trials=10)

with patch.object(CmaEsSampler, "__init__", lambda self, seed: None):
with patch.object(HyperbandPruner, "__init__", lambda self: None):
model.fit(y_true, y_preds)
# Test initialization
def test_init(optuna_weights_instance):
assert optuna_weights_instance.random_state == 42
assert optuna_weights_instance.n_trials == 10
assert optuna_weights_instance.objective == roc_auc_score
assert optuna_weights_instance.optimize_direction == "maximize"
assert optuna_weights_instance.weights == []

assert model.study is not None, "Study should be created during fit"
assert len(model.weights) == len(
y_preds
), "Weights should be calculated for each model"
assert abs(sum(model.weights) - 1.0) < 1e-6, "Weights should sum to 1"

# Test fit method with valid data
def test_fit(setup_data, optuna_weights_instance):
y_true, y_preds = setup_data
optuna_weights_instance.fit(y_true, y_preds)
assert optuna_weights_instance.weights is not None
assert np.isclose(
sum(optuna_weights_instance.weights), 1.0
), "Weights should sum to 1."

def test_fit_with_single_prediction(sample_data):
y_true, y_preds = sample_data
model = OptunaWeights(random_state=42, n_trials=10)

# Test fit method raises error with single prediction
def test_fit_single_prediction(setup_data, optuna_weights_instance):
y_true, y_preds = setup_data
with pytest.raises(
ValueError, match="`y_preds` must contain predictions from at least two models."
ValueError, match="`y_preds` must contain predictions from at least two models"
):
model.fit(y_true, [y_preds[0]]) # Only one prediction
optuna_weights_instance.fit(y_true, [y_preds[0]])


def test_predict_without_fitting(sample_data):
y_true, y_preds = sample_data
model = OptunaWeights(random_state=42, n_trials=10)

# Test predict method without calling fit first
def test_predict_without_fit(setup_data, optuna_weights_instance):
_, y_preds = setup_data
with pytest.raises(
ValueError, match="Model weights have not been optimized. Call `fit` first."
):
model.predict(y_preds)

optuna_weights_instance.predict(y_preds)

def test_predict_after_fitting(sample_data):
y_true, y_preds = sample_data
model = OptunaWeights(random_state=42, n_trials=10)

with patch.object(CmaEsSampler, "__init__", lambda self, seed: None):
with patch.object(HyperbandPruner, "__init__", lambda self: None):
model.fit(y_true, y_preds)

prediction = model.predict(y_preds)
assert isinstance(prediction, np.ndarray), "Prediction should be a numpy array"
# Test predict method with optimized weights
def test_predict(setup_data, optuna_weights_instance):
y_true, y_preds = setup_data
optuna_weights_instance.fit(y_true, y_preds)
weighted_pred = optuna_weights_instance.predict(y_preds)
assert (
prediction.shape == y_true.shape
), "Prediction shape should match y_true shape"
assert (0 <= prediction).all() and (
prediction <= 1
).all(), "Predictions should be probabilities"


def test_study_direction_maximize(sample_data):
y_true, y_preds = sample_data
model = OptunaWeights(random_state=42, n_trials=10, optimize_direction="maximize")
with patch.object(CmaEsSampler, "__init__", lambda self, seed: None):
with patch.object(HyperbandPruner, "__init__", lambda self: None):
model.fit(y_true, y_preds)
assert model.study.direction == optuna.study.StudyDirection.MAXIMIZE


def test_study_direction_minimize(sample_data):
y_true, y_preds = sample_data
model = OptunaWeights(random_state=42, n_trials=10, optimize_direction="minimize")
with patch.object(CmaEsSampler, "__init__", lambda self, seed: None):
with patch.object(HyperbandPruner, "__init__", lambda self: None):
model.fit(y_true, y_preds)
assert model.study.direction == optuna.study.StudyDirection.MINIMIZE
weighted_pred.shape == y_true.shape
), "Prediction output should match shape of y_true"
assert (0 <= weighted_pred).all() and (
weighted_pred <= 1
).all(), "Predictions should be probabilities between 0 and 1"


# Test objective function
def test_objective_function(setup_data, optuna_weights_instance):
y_true, y_preds = setup_data

# Create a mock trial with predefined suggestions
trial = create_trial(
params={"weight0": 0.5, "weight1": 0.3, "weight2": 0.2},
distributions={
"weight0": optuna.distributions.FloatDistribution(0, 1),
"weight1": optuna.distributions.FloatDistribution(0, 1),
"weight2": optuna.distributions.FloatDistribution(0, 1),
},
value=0.0,
)

score = optuna_weights_instance._objective(trial, y_true, y_preds)
assert isinstance(
score, float
), "Objective function should return a score as a float."
Binary file modified dist/bluecast-1.6.3-py3-none-any.whl
Binary file not shown.
Binary file modified dist/bluecast-1.6.3.tar.gz
Binary file not shown.

0 comments on commit 808084d

Please sign in to comment.