diff --git a/dowhy/gcm/anomaly_scorers.py b/dowhy/gcm/anomaly_scorers.py index c5c41f35d8..9a4735fa02 100644 --- a/dowhy/gcm/anomaly_scorers.py +++ b/dowhy/gcm/anomaly_scorers.py @@ -43,13 +43,13 @@ def fit(self, X: np.ndarray) -> None: if (X.ndim == 2 and X.shape[1] > 1) or X.ndim > 2: raise ValueError("The MedianCDFQuantileScorer currently only supports one-dimensional data!") - self._distribution_samples = X.reshape(-1) + self._distribution_samples = X.reshape(-1).astype(float) def score(self, X: np.ndarray) -> np.ndarray: if self._distribution_samples is None: raise ValueError("Scorer has not been fitted!") - X = shape_into_2d(X) + X = shape_into_2d(X.astype(float)) equal_samples = np.sum(np.isclose(X, self._distribution_samples, rtol=0, atol=0, equal_nan=True), axis=1) greater_samples = np.sum(X > self._distribution_samples, axis=1) + equal_samples / 2 diff --git a/tests/gcm/test_anomaly_scorers.py b/tests/gcm/test_anomaly_scorers.py index c7f45e01fc..e28fc42c90 100644 --- a/tests/gcm/test_anomaly_scorers.py +++ b/tests/gcm/test_anomaly_scorers.py @@ -29,3 +29,14 @@ def test_given_data_with_nans_when_using_median_quantile_scorer_with_nan_support assert scorer.score(np.array([1, 4, 8, np.nan])) == approx( [-np.log(2 * 0.5 / 10), -np.log(2 * 3.5 / 10), -np.log(2 * 0.5 / 10), -np.log(2 * 1 / 10)] ) + + +def test_given_numpy_arrays_with_object_type_when_using_median_quantile_scorer_then_does_not_raise_error(): + training_data = np.array([1, 2, 3, 4, 5, 6, 7, 8, np.nan, np.nan], dtype=object) + + scorer = RescaledMedianCDFQuantileScorer() + scorer.fit(training_data) + + assert scorer.score(np.array([1, 4, 8, np.nan], dtype=object)) == approx( + [-np.log(2 * 0.5 / 10), -np.log(2 * 3.5 / 10), -np.log(2 * 0.5 / 10), -np.log(2 * 1 / 10)] + )