From de95c0f0e04e98e96783dd0bd38b5d9f2d80c56a Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Mon, 17 Jun 2024 13:50:05 +0200
Subject: [PATCH 1/4] np.float_ to np.float64

---
 .../mol2any/mol2floatvector.py                | 30 ++++++-------
 molpipeline/estimators/chemprop/abstract.py   |  4 +-
 molpipeline/estimators/chemprop/models.py     | 16 +++----
 .../estimators/chemprop/neural_fingerprint.py | 16 +++----
 molpipeline/estimators/nearest_neighbor.py    |  6 +--
 .../estimators/similarity_transformation.py   | 44 +++++++++----------
 molpipeline/metrics/ignore_error_scorer.py    |  8 ++--
 .../mol2any/mol2concatinated_vector.py        | 18 ++++----
 molpipeline/mol2any/mol2net_charge.py         | 10 ++---
 molpipeline/mol2any/mol2rdkit_phys_chem.py    |  4 +-
 molpipeline/utils/kernel.py                   | 16 +++----
 .../test_estimators/test_nearest_neighbors.py |  4 +-
 .../test_similarity_transformation.py         |  4 +-
 13 files changed, 90 insertions(+), 90 deletions(-)

diff --git a/molpipeline/abstract_pipeline_elements/mol2any/mol2floatvector.py b/molpipeline/abstract_pipeline_elements/mol2any/mol2floatvector.py
index 15984727..0c3db20c 100644
--- a/molpipeline/abstract_pipeline_elements/mol2any/mol2floatvector.py
+++ b/molpipeline/abstract_pipeline_elements/mol2any/mol2floatvector.py
@@ -68,18 +68,18 @@ def n_features(self) -> int:
 
     def assemble_output(
         self,
-        value_list: Iterable[npt.NDArray[np.float_]],
-    ) -> npt.NDArray[np.float_]:
+        value_list: Iterable[npt.NDArray[np.float64]],
+    ) -> npt.NDArray[np.float64]:
         """Transform output of all transform_single operations to matrix.
 
         Parameters
         ----------
-        value_list: Iterable[npt.NDArray[np.float_]]
+        value_list: Iterable[npt.NDArray[np.float64]]
             List of numpy arrays with calculated descriptor values of each molecule.
 
         Returns
         -------
-        npt.NDArray[np.float_]
+        npt.NDArray[np.float64]
             Matrix with descriptor values of each molecule.
         """
         return np.vstack(list(value_list))
@@ -127,7 +127,7 @@ def set_params(self, **parameters: dict[str, Any]) -> Self:
         super().set_params(**parameter_copy)
         return self
 
-    def fit_to_result(self, values: list[npt.NDArray[np.float_]]) -> Self:
+    def fit_to_result(self, values: list[npt.NDArray[np.float64]]) -> Self:
         """Fit object to data.
 
         Parameters
@@ -146,25 +146,25 @@ def fit_to_result(self, values: list[npt.NDArray[np.float_]]) -> Self:
         return self
 
     def _normalize_matrix(
-        self, value_matrix: npt.NDArray[np.float_]
-    ) -> npt.NDArray[np.float_]:
+        self, value_matrix: npt.NDArray[np.float64]
+    ) -> npt.NDArray[np.float64]:
         """Normalize matrix with descriptor values.
 
         Parameters
         ----------
-        value_matrix: npt.NDArray[np.float_]
+        value_matrix: npt.NDArray[np.float64]
             Matrix with descriptor values of molecules.
 
         Returns
         -------
-        npt.NDArray[np.float_]
+        npt.NDArray[np.float64]
             Normalized matrix with descriptor values of molecules.
         """
         if self._standardizer is not None:
             return self._standardizer.transform(value_matrix)
         return value_matrix
 
-    def transform(self, values: list[RDKitMol]) -> npt.NDArray[np.float_]:
+    def transform(self, values: list[RDKitMol]) -> npt.NDArray[np.float64]:
         """Transform the list of molecules to sparse matrix.
 
         Parameters
@@ -174,13 +174,13 @@ def transform(self, values: list[RDKitMol]) -> npt.NDArray[np.float_]:
 
         Returns
         -------
-        npt.NDArray[np.float_]
+        npt.NDArray[np.float64]
             Matrix with descriptor values of molecules.
         """
-        descriptor_matrix: npt.NDArray[np.float_] = super().transform(values)
+        descriptor_matrix: npt.NDArray[np.float64] = super().transform(values)
         return descriptor_matrix
 
-    def finalize_single(self, value: npt.NDArray[np.float_]) -> npt.NDArray[np.float_]:
+    def finalize_single(self, value: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
         """Finalize single value. Here: standardize vector.
 
         Parameters
@@ -201,7 +201,7 @@ def finalize_single(self, value: npt.NDArray[np.float_]) -> npt.NDArray[np.float
     @abc.abstractmethod
     def pretransform_single(
         self, value: RDKitMol
-    ) -> Union[npt.NDArray[np.float_], InvalidInstance]:
+    ) -> Union[npt.NDArray[np.float64], InvalidInstance]:
         """Transform mol to dict, where items encode columns indices and values, respectively.
 
         Parameters
@@ -211,6 +211,6 @@ def pretransform_single(
 
         Returns
         -------
-        npt.NDArray[np.float_]
+        npt.NDArray[np.float64]
             Vector with descriptor values of molecule.
         """
diff --git a/molpipeline/estimators/chemprop/abstract.py b/molpipeline/estimators/chemprop/abstract.py
index 32e74d18..4f6174a5 100644
--- a/molpipeline/estimators/chemprop/abstract.py
+++ b/molpipeline/estimators/chemprop/abstract.py
@@ -114,7 +114,7 @@ def _update_trainer(
     def fit(
         self,
         X: MoleculeDataset,  # pylint: disable=invalid-name
-        y: Sequence[int | float] | npt.NDArray[np.int_ | np.float_],
+        y: Sequence[int | float] | npt.NDArray[np.int_ | np.float64],
     ) -> Self:
         """Fit the model to the data.
 
@@ -122,7 +122,7 @@ def fit(
         ----------
         X : MoleculeDataset
             The input data.
-        y : Sequence[int | float] | npt.NDArray[np.int_ | np.float_]
+        y : Sequence[int | float] | npt.NDArray[np.int_ | np.float64]
             The target data.
 
         Returns
diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py
index 96e93cc0..e3257ae5 100644
--- a/molpipeline/estimators/chemprop/models.py
+++ b/molpipeline/estimators/chemprop/models.py
@@ -128,7 +128,7 @@ def _is_classifier(self) -> bool:
 
     def _predict(
         self, X: MoleculeDataset  # pylint: disable=invalid-name
-    ) -> npt.NDArray[np.float_]:
+    ) -> npt.NDArray[np.float64]:
         """Predict the labels.
 
         Parameters
@@ -138,7 +138,7 @@ def _predict(
 
         Returns
         -------
-        npt.NDArray[np.float_]
+        npt.NDArray[np.float64]
             The predictions for the input data.
         """
         self.model.eval()
@@ -164,7 +164,7 @@ def _predict(
     def fit(
         self,
         X: MoleculeDataset,
-        y: Sequence[int | float] | npt.NDArray[np.int_ | np.float_],
+        y: Sequence[int | float] | npt.NDArray[np.int_ | np.float64],
     ) -> Self:
         """Fit the model to the data.
 
@@ -172,7 +172,7 @@ def fit(
         ----------
         X : MoleculeDataset
             The input data.
-        y : Sequence[int | float] | npt.NDArray[np.int_ | np.float_]
+        y : Sequence[int | float] | npt.NDArray[np.int_ | np.float64]
             The target data.
 
         Returns
@@ -186,7 +186,7 @@ def fit(
 
     def predict(
         self, X: MoleculeDataset  # pylint: disable=invalid-name
-    ) -> npt.NDArray[np.float_]:
+    ) -> npt.NDArray[np.float64]:
         """Predict the output.
 
         Parameters
@@ -196,7 +196,7 @@ def predict(
 
         Returns
         -------
-        npt.NDArray[np.float_]
+        npt.NDArray[np.float64]
             The predictions for the input data.
         """
         predictions = self._predict(X)
@@ -213,7 +213,7 @@ def predict(
     @available_if(_is_classifier)
     def predict_proba(
         self, X: MoleculeDataset  # pylint: disable=invalid-name
-    ) -> npt.NDArray[np.float_]:
+    ) -> npt.NDArray[np.float64]:
         """Predict the probabilities.
 
         Parameters
@@ -223,7 +223,7 @@ def predict_proba(
 
         Returns
         -------
-        npt.NDArray[np.float_]
+        npt.NDArray[np.float64]
             The probabilities of the input data.
         """
         if self._is_binary_classifier():
diff --git a/molpipeline/estimators/chemprop/neural_fingerprint.py b/molpipeline/estimators/chemprop/neural_fingerprint.py
index 5f04826f..d6b49121 100644
--- a/molpipeline/estimators/chemprop/neural_fingerprint.py
+++ b/molpipeline/estimators/chemprop/neural_fingerprint.py
@@ -49,7 +49,7 @@ def __init__(
     def fit(
         self,
         X: MoleculeDataset,  # pylint: disable=invalid-name
-        y: Sequence[int | float] | npt.NDArray[np.int_ | np.float_],
+        y: Sequence[int | float] | npt.NDArray[np.int_ | np.float64],
     ) -> Self:
         """Fit the model.
 
@@ -57,7 +57,7 @@ def fit(
         ----------
         X : MoleculeDataset
             The input data.
-        y : Sequence[int | float] | npt.NDArray[np.int_ | np.float_]
+        y : Sequence[int | float] | npt.NDArray[np.int_ | np.float64]
             The target data.
 
         Returns
@@ -71,7 +71,7 @@ def fit(
 
     def transform(
         self, X: MoleculeDataset  # pylint: disable=invalid-name
-    ) -> npt.NDArray[np.float_]:
+    ) -> npt.NDArray[np.float64]:
         """Transform the input.
 
         Parameters
@@ -81,7 +81,7 @@ def transform(
 
         Returns
         -------
-        npt.NDArray[np.float_]
+        npt.NDArray[np.float64]
             The neural fingerprint of the input data.
         """
         self.model.eval()
@@ -91,20 +91,20 @@ def transform(
     def fit_transform(
         self,
         X: MoleculeDataset,  # pylint: disable=invalid-name
-        y: Sequence[int | float] | npt.NDArray[np.int_ | np.float_],
-    ) -> npt.NDArray[np.float_]:
+        y: Sequence[int | float] | npt.NDArray[np.int_ | np.float64],
+    ) -> npt.NDArray[np.float64]:
         """Fit the model and transform the input.
 
         Parameters
         ----------
         X : MoleculeDataset
             The input data.
-        y : Sequence[int | float] | npt.NDArray[np.int_ | np.float_]
+        y : Sequence[int | float] | npt.NDArray[np.int_ | np.float64]
             The target data.
 
         Returns
         -------
-        npt.NDArray[np.float_]
+        npt.NDArray[np.float64]
             The neural fingerprint of the input data.
         """
         self.fit(X, y)
diff --git a/molpipeline/estimators/nearest_neighbor.py b/molpipeline/estimators/nearest_neighbor.py
index f406be84..456d90ef 100644
--- a/molpipeline/estimators/nearest_neighbor.py
+++ b/molpipeline/estimators/nearest_neighbor.py
@@ -37,7 +37,7 @@
 
 AllMetrics = Union[
     SklearnNativeMetrics,
-    Callable[[Any, Any], float | npt.NDArray[np.float_] | Sequence[float]],
+    Callable[[Any, Any], float | npt.NDArray[np.float64] | Sequence[float]],
 ]
 
 
@@ -153,7 +153,7 @@ def predict(
 
         Returns
         -------
-        tuple[npt.NDArray[Any], npt.NDArray[np.float_]] | npt.NDArray[Any]
+        tuple[npt.NDArray[Any], npt.NDArray[np.float64]] | npt.NDArray[Any]
             The indices of the nearest points in the population matrix and the distances to the points.
         """
         if self.learned_names_ is None:
@@ -179,7 +179,7 @@ def fit_predict(
         y: Sequence[Any],
         return_distance: bool = False,
         n_neighbors: int | None = None,
-    ) -> tuple[npt.NDArray[Any], npt.NDArray[np.float_]] | npt.NDArray[Any]:
+    ) -> tuple[npt.NDArray[Any], npt.NDArray[np.float64]] | npt.NDArray[Any]:
         """Find the k-neighbors of a point.
 
         Parameters
diff --git a/molpipeline/estimators/similarity_transformation.py b/molpipeline/estimators/similarity_transformation.py
index 13c5744e..dac64d93 100644
--- a/molpipeline/estimators/similarity_transformation.py
+++ b/molpipeline/estimators/similarity_transformation.py
@@ -24,11 +24,11 @@ class TanimotoToTraining(BaseEstimator, TransformerMixin):
 
     Attributes
     ----------
-    training_matrix: npt.NDArray[np.float_] | csr_matrix | None
+    training_matrix: npt.NDArray[np.float64] | csr_matrix | None
         Features seen during fit.
     """
 
-    training_matrix: npt.NDArray[np.float_] | csr_matrix | None
+    training_matrix: npt.NDArray[np.float64] | csr_matrix | None
 
     def __init__(self, distance: bool = False) -> None:
         """Initialize TanimotoSimilarityToTraining.
@@ -44,21 +44,21 @@ def __init__(self, distance: bool = False) -> None:
 
     def _sim(
         self,
-        matrix_a: npt.NDArray[np.float_] | csr_matrix,
-        matrix_b: npt.NDArray[np.float_] | csr_matrix,
-    ) -> npt.NDArray[np.float_]:
+        matrix_a: npt.NDArray[np.float64] | csr_matrix,
+        matrix_b: npt.NDArray[np.float64] | csr_matrix,
+    ) -> npt.NDArray[np.float64]:
         """Compute the similarity matrix.
 
         Parameters
         ----------
-        matrix_a : npt.NDArray[np.float_] | csr_matrix
+        matrix_a : npt.NDArray[np.float64] | csr_matrix
             First matrix.
-        matrix_b : npt.NDArray[np.float_] | csr_matrix
+        matrix_b : npt.NDArray[np.float64] | csr_matrix
             Second matrix.
 
         Returns
         -------
-        npt.NDArray[np.float_]
+        npt.NDArray[np.float64]
             Similarity matrix. If distance is True, the distance matrix is computed instead.
         """
         if not isinstance(matrix_a, csr_matrix):
@@ -71,16 +71,16 @@ def _sim(
 
     def fit(
         self,
-        X: npt.NDArray[np.float_] | csr_matrix,  # pylint: disable=invalid-name
-        y: npt.NDArray[np.float_] | None = None,  # pylint: disable=unused-argument
+        X: npt.NDArray[np.float64] | csr_matrix,  # pylint: disable=invalid-name
+        y: npt.NDArray[np.float64] | None = None,  # pylint: disable=unused-argument
     ) -> Self:
         """Fit the model.
 
         Parameters
         ----------
-        X : npt.NDArray[np.float_] | csr_matrix
+        X : npt.NDArray[np.float64] | csr_matrix
             Feature matrix to which the similarity matrix is computed.
-        y : npt.NDArray[np.float_] | None, optional
+        y : npt.NDArray[np.float64] | None, optional
             Labels, by default None and never used
 
         Returns
@@ -92,18 +92,18 @@ def fit(
         return self
 
     def transform(
-        self, X: npt.NDArray[np.float_] | csr_matrix  # pylint: disable=invalid-name
-    ) -> npt.NDArray[np.float_]:
+        self, X: npt.NDArray[np.float64] | csr_matrix  # pylint: disable=invalid-name
+    ) -> npt.NDArray[np.float64]:
         """Transform the data.
 
         Parameters
         ----------
-        X : npt.NDArray[np.float_] | csr_matrix
+        X : npt.NDArray[np.float64] | csr_matrix
             Feature matrix to which the similarity matrix is computed.
 
         Returns
         -------
-        npt.NDArray[np.float_]
+        npt.NDArray[np.float64]
             Similarity matrix of X to the training matrix.
         """
         if self.training_matrix is None:
@@ -112,24 +112,24 @@ def transform(
 
     def fit_transform(
         self,
-        X: npt.NDArray[np.float_] | csr_matrix,  # pylint: disable=invalid-name
-        y: npt.NDArray[np.float_] | None = None,
+        X: npt.NDArray[np.float64] | csr_matrix,  # pylint: disable=invalid-name
+        y: npt.NDArray[np.float64] | None = None,
         **fit_params: Any,
-    ) -> npt.NDArray[np.float_]:
+    ) -> npt.NDArray[np.float64]:
         """Fit the model and transform the data.
 
         Parameters
         ----------
-        X: npt.NDArray[np.float_] | csr_matrix
+        X: npt.NDArray[np.float64] | csr_matrix
             Feature matrix to fit the model. Is returned as similarity matrix to itself.
-        y: npt.NDArray[np.float_] | None, optional
+        y: npt.NDArray[np.float64] | None, optional
             Labels, by default None and never used
         **fit_params: Any
             Additional fit parameters. Ignored.
 
         Returns
         -------
-        npt.NDArray[np.float_]
+        npt.NDArray[np.float64]
             Similarity matrix of X to itself.
         """
         self.fit(X, y)
diff --git a/molpipeline/metrics/ignore_error_scorer.py b/molpipeline/metrics/ignore_error_scorer.py
index faf642d1..45d018a8 100644
--- a/molpipeline/metrics/ignore_error_scorer.py
+++ b/molpipeline/metrics/ignore_error_scorer.py
@@ -43,17 +43,17 @@ def ignored_value_scorer(
         scorer_kwargs["greater_is_better"] = False
 
     def newscore(
-        y_true: npt.NDArray[np.float_ | np.int_],
-        y_pred: npt.NDArray[np.float_ | np.int_],
+        y_true: npt.NDArray[np.float64 | np.int_],
+        y_pred: npt.NDArray[np.float64 | np.int_],
         **kwargs: Any,
     ) -> float:
         """Compute the score for the given prediction arrays.
 
         Parameters
         ----------
-        y_true : npt.NDArray[np.float_ | np.int_]
+        y_true : npt.NDArray[np.float64 | np.int_]
             The true values.
-        y_pred : npt.NDArray[np.float_ | np.int_]
+        y_pred : npt.NDArray[np.float64 | np.int_]
             The predicted values.
         **kwargs
             Additional keyword arguments.
diff --git a/molpipeline/mol2any/mol2concatinated_vector.py b/molpipeline/mol2any/mol2concatinated_vector.py
index bf85256e..1437d630 100644
--- a/molpipeline/mol2any/mol2concatinated_vector.py
+++ b/molpipeline/mol2any/mol2concatinated_vector.py
@@ -140,23 +140,23 @@ def set_params(self, **parameters: dict[str, Any]) -> Self:
 
     def assemble_output(
         self,
-        value_list: Iterable[npt.NDArray[np.float_]],
-    ) -> npt.NDArray[np.float_]:
+        value_list: Iterable[npt.NDArray[np.float64]],
+    ) -> npt.NDArray[np.float64]:
         """Transform output of all transform_single operations to matrix.
 
         Parameters
         ----------
-        value_list: Iterable[npt.NDArray[np.float_]]
+        value_list: Iterable[npt.NDArray[np.float64]]
             List of molecular descriptors or fingerprints which are concatenated to a single matrix.
 
         Returns
         -------
-        npt.NDArray[np.float_]
+        npt.NDArray[np.float64]
             Matrix of shape (n_molecules, n_features) with concatenated features specified during init.
         """
         return np.vstack(list(value_list))
 
-    def transform(self, values: list[RDKitMol]) -> npt.NDArray[np.float_]:
+    def transform(self, values: list[RDKitMol]) -> npt.NDArray[np.float64]:
         """Transform the list of molecules to sparse matrix.
 
         Parameters
@@ -166,10 +166,10 @@ def transform(self, values: list[RDKitMol]) -> npt.NDArray[np.float_]:
 
         Returns
         -------
-        npt.NDArray[np.float_]
+        npt.NDArray[np.float64]
             Matrix of shape (n_molecules, n_features) with concatenated features specified during init.
         """
-        output: npt.NDArray[np.float_] = super().transform(values)
+        output: npt.NDArray[np.float64] = super().transform(values)
         return output
 
     def fit(
@@ -197,7 +197,7 @@ def fit(
 
     def pretransform_single(
         self, value: RDKitMol
-    ) -> Union[list[Union[npt.NDArray[np.float_], dict[int, int]]], InvalidInstance]:
+    ) -> Union[list[Union[npt.NDArray[np.float64], dict[int, int]]], InvalidInstance]:
         """Get pretransform of each element and concatenate for output.
 
         Parameters
@@ -207,7 +207,7 @@ def pretransform_single(
 
         Returns
         -------
-        Union[list[Union[npt.NDArray[np.float_], dict[int, int]]], InvalidInstance]
+        Union[list[Union[npt.NDArray[np.float64], dict[int, int]]], InvalidInstance]
             List of pretransformed values of each pipeline element.
             If any element returns None, InvalidInstance is returned.
         """
diff --git a/molpipeline/mol2any/mol2net_charge.py b/molpipeline/mol2any/mol2net_charge.py
index a897dbf8..6636f48c 100644
--- a/molpipeline/mol2any/mol2net_charge.py
+++ b/molpipeline/mol2any/mol2net_charge.py
@@ -77,7 +77,7 @@ def descriptor_list(self) -> list[str]:
 
     def _get_net_charge_gasteiger(
         self, value: RDKitMol
-    ) -> npt.NDArray[np.float_] | InvalidInstance:
+    ) -> npt.NDArray[np.float64] | InvalidInstance:
         """Transform a single molecule to it's net charge using Gasteiger charges.
 
         Based on https://github.com/rdkit/rdkit/discussions/4331
@@ -89,7 +89,7 @@ def _get_net_charge_gasteiger(
 
         Returns
         -------
-        Optional[npt.NDArray[np.float_]]
+        Optional[npt.NDArray[np.float64]]
             Net charge of the given molecule.
         """
         # copy molecule since ComputeGasteigerCharges modifies the molecule inplace
@@ -106,7 +106,7 @@ def _get_net_charge_gasteiger(
 
     def pretransform_single(
         self, value: RDKitMol
-    ) -> npt.NDArray[np.float_] | InvalidInstance:
+    ) -> npt.NDArray[np.float64] | InvalidInstance:
         """Transform a single molecule to it's net charge.
 
         Parameters
@@ -116,11 +116,11 @@ def pretransform_single(
 
         Returns
         -------
-        Optional[npt.NDArray[np.float_]]
+        Optional[npt.NDArray[np.float64]]
             Net charge of the given molecule.
         """
         if self._charge_method == "formal_charge":
-            return np.array([Chem.GetFormalCharge(value)], dtype=np.float_)
+            return np.array([Chem.GetFormalCharge(value)], dtype=np.float64)
         if self._charge_method == "gasteiger":
             return self._get_net_charge_gasteiger(value)
         raise ValueError(f"Unknown charge policy: {self._charge_method}")
diff --git a/molpipeline/mol2any/mol2rdkit_phys_chem.py b/molpipeline/mol2any/mol2rdkit_phys_chem.py
index 5fb60198..aeb87a4b 100644
--- a/molpipeline/mol2any/mol2rdkit_phys_chem.py
+++ b/molpipeline/mol2any/mol2rdkit_phys_chem.py
@@ -119,7 +119,7 @@ def descriptor_list(self, descriptor_list: list[str] | None) -> None:
 
     def pretransform_single(
         self, value: RDKitMol
-    ) -> Union[npt.NDArray[np.float_], InvalidInstance]:
+    ) -> Union[npt.NDArray[np.float64], InvalidInstance]:
         """Transform a single molecule to a descriptor vector.
 
         Parameters
@@ -129,7 +129,7 @@ def pretransform_single(
 
         Returns
         -------
-        Optional[npt.NDArray[np.float_]]
+        Optional[npt.NDArray[np.float64]]
             Descriptor vector for given molecule. None if calculation failed.
         """
         vec = np.full((len(self._descriptor_list),), np.nan)
diff --git a/molpipeline/utils/kernel.py b/molpipeline/utils/kernel.py
index e94a2b65..c315e6dd 100644
--- a/molpipeline/utils/kernel.py
+++ b/molpipeline/utils/kernel.py
@@ -9,7 +9,7 @@
 
 def tanimoto_similarity_sparse(
     matrix_a: sparse.csr_matrix, matrix_b: sparse.csr_matrix
-) -> npt.NDArray[np.float_]:
+) -> npt.NDArray[np.float64]:
     """Calculate a matrix of tanimoto similarities between feature matrix a and b.
 
     Parameters
@@ -21,7 +21,7 @@ def tanimoto_similarity_sparse(
 
     Returns
     -------
-    npt.NDArray[np.float_]
+    npt.NDArray[np.float64]
         Matrix of similarity values between instances of A (rows/first dim) , and instances of B (columns/second dim).
     """
     intersection = matrix_a.dot(matrix_b.transpose()).toarray()
@@ -39,7 +39,7 @@ def tanimoto_similarity_sparse(
 
 def tanimoto_distance_sparse(
     matrix_a: sparse.csr_matrix, matrix_b: sparse.csr_matrix
-) -> npt.NDArray[np.float_]:
+) -> npt.NDArray[np.float64]:
     """Calculate a matrix of tanimoto distance between feature matrix a and b.
 
     Tanimoto distance is defined as 1-similarity.
@@ -53,7 +53,7 @@ def tanimoto_distance_sparse(
 
     Returns
     -------
-    npt.NDArray[np.float_]
+    npt.NDArray[np.float64]
         Matrix of similarity values between instances of A (rows/first dim) , and instances of B (columns/second dim).
     """
     return 1 - tanimoto_similarity_sparse(matrix_a, matrix_b)
@@ -61,7 +61,7 @@ def tanimoto_distance_sparse(
 
 def self_tanimoto_similarity(
     matrix_a: Union[sparse.csr_matrix, npt.NDArray[np.int_]]
-) -> npt.NDArray[np.float_]:
+) -> npt.NDArray[np.float64]:
     """Calculate a matrix of tanimoto similarity between feature matrix a and itself.
 
     Parameters
@@ -71,7 +71,7 @@ def self_tanimoto_similarity(
 
     Returns
     -------
-    npt.NDArray[np.float_]
+    npt.NDArray[np.float64]
         Square matrix of similarity values between all instances in the matrix.
     """
     if isinstance(matrix_a, np.ndarray):
@@ -85,7 +85,7 @@ def self_tanimoto_similarity(
 
 def self_tanimoto_distance(
     matrix_a: Union[sparse.csr_matrix, npt.NDArray[np.int_]]
-) -> npt.NDArray[np.float_]:
+) -> npt.NDArray[np.float64]:
     """Calculate a matrix of tanimoto distance between feature matrix a and itself.
 
     Parameters
@@ -95,7 +95,7 @@ def self_tanimoto_distance(
 
     Returns
     -------
-    npt.NDArray[np.float_]
+    npt.NDArray[np.float64]
         Square matrix of similarity values between all instances in the matrix.
     """
     return 1 - self_tanimoto_similarity(matrix_a)
diff --git a/tests/test_estimators/test_nearest_neighbors.py b/tests/test_estimators/test_nearest_neighbors.py
index 1b5dfbdf..88bc14fa 100644
--- a/tests/test_estimators/test_nearest_neighbors.py
+++ b/tests/test_estimators/test_nearest_neighbors.py
@@ -74,7 +74,7 @@ def test_fit_and_predict_with_distance(self) -> None:
         model.fit(TEST_SMILES, TEST_SMILES)
         result = model.predict(TEST_SMILES, **{"return_distance": True})
         neighbors = result[:, :, 0]
-        distances = result[:, :, 1].astype(np.float_)
+        distances = result[:, :, 1].astype(np.float64)
         self.assertListEqual(neighbors.tolist(), TWO_NN)
         self.assertTrue(np.allclose(1 - distances, TWO_NN_SIMILARITIES))
 
@@ -207,5 +207,5 @@ def test_fit_and_predict_invalid_with_distance(self) -> None:
         distances = result[:, :, 1]
         self.assertListEqual(neighbors.tolist(), [["invalid", "invalid"]] + TWO_NN)
         self.assertTrue(
-            1 - np.allclose(distances[1:, :].astype(np.float_), TWO_NN_SIMILARITIES)
+            1 - np.allclose(distances[1:, :].astype(np.float64), TWO_NN_SIMILARITIES)
         )
diff --git a/tests/test_estimators/test_similarity_transformation.py b/tests/test_estimators/test_similarity_transformation.py
index bc1704b0..bcdf6aaa 100644
--- a/tests/test_estimators/test_similarity_transformation.py
+++ b/tests/test_estimators/test_similarity_transformation.py
@@ -64,7 +64,7 @@ def _generate_morgan_fingerprints(compound_list: list[str]) -> sparse.csr_matrix
 
 def _calculate_rdkit_self_similarity(
     compound_list: list[str],
-) -> npt.NDArray[np.float_]:
+) -> npt.NDArray[np.float64]:
     """Calculate the self similarity using RDKit.
 
     Parameters
@@ -74,7 +74,7 @@ def _calculate_rdkit_self_similarity(
 
     Returns
     -------
-    npt.NDArray[np.float_]
+    npt.NDArray[np.float64]
         Self similarity.
     """
     fp_list = []

From e67ed1e6ebc07dc582ba0b97b85340a6e1c40f36 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Mon, 17 Jun 2024 13:53:16 +0200
Subject: [PATCH 2/4] black

---
 .../abstract_pipeline_elements/mol2any/mol2floatvector.py     | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/molpipeline/abstract_pipeline_elements/mol2any/mol2floatvector.py b/molpipeline/abstract_pipeline_elements/mol2any/mol2floatvector.py
index 0c3db20c..3f25e2a4 100644
--- a/molpipeline/abstract_pipeline_elements/mol2any/mol2floatvector.py
+++ b/molpipeline/abstract_pipeline_elements/mol2any/mol2floatvector.py
@@ -180,7 +180,9 @@ def transform(self, values: list[RDKitMol]) -> npt.NDArray[np.float64]:
         descriptor_matrix: npt.NDArray[np.float64] = super().transform(values)
         return descriptor_matrix
 
-    def finalize_single(self, value: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
+    def finalize_single(
+        self, value: npt.NDArray[np.float64]
+    ) -> npt.NDArray[np.float64]:
         """Finalize single value. Here: standardize vector.
 
         Parameters

From a0a99bef7b2945ce0b9a466d12be52652eea06d5 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Tue, 6 Aug 2024 15:22:23 +0200
Subject: [PATCH 3/4] remove numpy restriction

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 9b597696..c6fab9f9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 joblib >= 1.3.0
 loguru
-numpy < 2.0.0
+numpy
 pandas
 rdkit >= 2023.9.1
 scipy

From eec7dae61c2c88c513c898534f14ec337216095a Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Tue, 6 Aug 2024 15:24:31 +0200
Subject: [PATCH 4/4] remove numpy restriction

---
 .github/workflows/linting.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index d2e0617c..6102b678 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -34,7 +34,6 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install "numpy<2.0.0"
         pip install mypy
         mypy . || exit_code=$?
         mypy --install-types --non-interactive