From 26f458d3f8d181b7fd9814f99dd61834ffbbfd9f Mon Sep 17 00:00:00 2001
From: virchan <virchan.math@gmail.com>
Date: Wed, 19 Jun 2024 13:03:44 -0700
Subject: [PATCH] Added doc-strings to temperature-scaling-related functions.

---
 sklearn/_temperature_scaling_test.py |   2 +
 sklearn/calibration_temperature.py   | 145 +++++++++++++++++++++++----
 2 files changed, 130 insertions(+), 17 deletions(-)

diff --git a/sklearn/_temperature_scaling_test.py b/sklearn/_temperature_scaling_test.py
index 991ade2b066db..66c3ffdc56f91 100644
--- a/sklearn/_temperature_scaling_test.py
+++ b/sklearn/_temperature_scaling_test.py
@@ -33,6 +33,8 @@
 Logistic_scaled.fit(X_train,y_train)
 Tree_scaled.fit(X_train,y_train)
 
+print(f" Initial temperatureSVC: {SVC_scaled.calibrated_classifiers_[0].calibrators[0]._initial_temperature}")
+
 print("Optimal Temperatures For Each Classifiers")
 print(f"- SVC: {SVC_scaled.calibrated_classifiers_[0].calibrators[0].T_}")
 print(f"- Logistic: {Logistic_scaled.calibrated_classifiers_[0].calibrators[0].T_}")
diff --git a/sklearn/calibration_temperature.py b/sklearn/calibration_temperature.py
index 48b9f2b7a1dae..20b211a411b18 100644
--- a/sklearn/calibration_temperature.py
+++ b/sklearn/calibration_temperature.py
@@ -64,7 +64,7 @@
 
 
 class CalibratedClassifierCV_test(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
-    """Probability calibration with isotonic regression or logistic regression.
+    """Probability calibration with isotonic regression, logistic regression, or temperature scaling (in-progress).
 
     This class uses cross-validation to both estimate the parameters of a
     classifier and subsequently calibrate a classifier. With default
@@ -98,11 +98,12 @@ class CalibratedClassifierCV_test(ClassifierMixin, MetaEstimatorMixin, BaseEstim
 
         .. versionadded:: 1.2
 
-    method : {'sigmoid', 'isotonic'}, default='sigmoid'
+    method : {'sigmoid', 'isotonic', 'temperature'}, default='sigmoid'
         The method to use for calibration. Can be 'sigmoid' which
-        corresponds to Platt's method (i.e. a logistic regression model) or
-        'isotonic' which is a non-parametric approach. It is not advised to
-        use isotonic calibration with too few calibration samples
+        corresponds to Platt's method (i.e. a logistic regression model),
+        'isotonic' which is a non-parametric approach, or 'temperature'
+        which corresponds to the temperature scalingt method. It is not
+        advised to use isotonic calibration with too few calibration samples
         ``(<<1000)`` since it tends to overfit.
 
     cv : int, cross-validation generator, iterable or "prefit", \
@@ -211,6 +212,9 @@ class CalibratedClassifierCV_test(ClassifierMixin, MetaEstimatorMixin, BaseEstim
     .. [4] Predicting Good Probabilities with Supervised Learning,
            A. Niculescu-Mizil & R. Caruana, ICML 2005
 
+    .. [5] On Calibration of Modern Neural Networks,
+           C. Guo, G. Pleiss, Y. Sun & K. Q. Weinberger, ICML 2017
+
     Examples
     --------
     >>> from sklearn.datasets import make_classification
@@ -933,9 +937,20 @@ def predict(self, T):
 
 
 def _row_max_normalization(data: np.ndarray) -> np.ndarray:
-    """Normalise the output by subtracting
-       the per-row maximum element.
+    """Normalize the input data by subtracting the maximum value of each row.
+
+    Parameters
+    ----------
+    data : np.ndarray
+        The input data array of shape (n_samples, n_classes).
+
+    Returns
+    -------
+    np.ndarray
+        A 2D array of the same shape as `data` where each row has been normalized
+        by subtracting the maximum value of that row.
     """
+
     row_max: np.ndarray = np.max(data,
                                  axis=1,
                                  keepdims=True
@@ -947,7 +962,27 @@ def _row_max_normalization(data: np.ndarray) -> np.ndarray:
 def _softmax_t(predictions: np.ndarray,
                temperature: float,
                ) -> np.ndarray:
-    """Softmax function scaled by the inverse temperature
+    """Compute the temperature-scaled softmax of the input predictions.
+
+    Parameters
+    ----------
+    predictions : np.ndarray
+        The input predictions array of shape (n_sample, n_classes).
+
+    temperature : float
+        The temperature parameter for scaling.
+
+    Returns
+    -------
+    np.ndarray
+        A 2D array of the same shape as `predictions` containing the temperature-scaled
+        softmax probabilities.
+
+    Notes
+    -----
+    - This function internally normalizes the predictions by subtracting the row-wise
+      maximum to improve numerical stability before scaling by the temperature.
+    - The softmax computation is done along the last axis of the input predictions.
     """
 
     softmax_t_output: np.ndarray = predictions
@@ -962,7 +997,27 @@ def _softmax_t(predictions: np.ndarray,
 def _exp_t(predictions: np.ndarray,
            temperature: float
            ) -> np.ndarray:
-    """Scale by inverse temperature, and then apply the nature exponential function
+    """Scale predictions by the inverse temperature and apply the exponential function.
+
+    Parameters
+    ----------
+    predictions : np.ndarray
+        The input predictions array of shape (n_samples, n_classes).
+
+    temperature : float
+        The temperature parameter for scaling.
+
+    Returns
+    -------
+    np.ndarray
+        A 2D array of the same shape as `predictions` containing the scaled and
+        exponentiated values.
+
+    Notes
+    -----
+    - This function internally normalizes the predictions by subtracting the row-wise
+      maximum to improve numerical stability before scaling by the temperature and
+      applying the exponential function.
     """
 
     exp_t_output: np.ndarray = predictions
@@ -977,12 +1032,46 @@ def _temperature_scaling(predictions: np.ndarray,
                          labels: np.ndarray,
                          initial_temperature: float
                          ) -> float:
-    """ Minimize the Negative Log Likelihood Loss with respect to Temperature
+    """Probability Calibration with temperature scaling (Guo-Pleiss-Sun-Weinberger 2017).
+
+    Parameters
+    ----------
+    predictions : ndarray of shape (n_samples,)
+        The decision function or predict proba for the samples.
+
+    labels : ndarray of shape (n_samples, n_classes)
+        One-hot encoded true labels for the samples.
+
+    initial_temperature : float
+       Initial temperature value to start the optimisation
+
+    Returns
+    -------
+    float
+        The optimised temperature parameter for probability calibration, with a
+        value in the range [1, infinity).
+
+    References
+    ----------
+    Guo, Pleiss, Sun & Weinberger, "On Calibration of Modern Neural Networks"
     """
 
     def negative_log_likelihood(temperature: float):
-        """Negative Log Likelihood Loss and its Derivative
-           with respect to Temperature
+        """ Compute the negative log likelihood loss and its derivative
+            with respect  to temperature.
+
+        Parameters
+        ----------
+        temperature : float
+            The current temperature value during optimisation.
+
+        Returns
+        -------
+        float
+            The negative log likelihood loss.
+        float
+            The derivative of the negative log likelihood loss with respect to
+            temperature.
         """
 
         # Initiate the Losses
@@ -1009,11 +1098,11 @@ def negative_log_likelihood(temperature: float):
         term_2 *= exp_t
         term_2 = term_2.sum(axis=1)
 
-        dL_dts: np.ndarray = (term_1 + term_2) / exp_t_sum
+        dlosses_dts: np.ndarray = (term_1 + term_2) / exp_t_sum
 
         # print(f"{-losses.sum() = },  {-dL_dts.sum() = }")
 
-        return -losses.sum(), -dL_dts.sum()
+        return -losses.sum(), -dlosses_dts.sum()
 
     temperature_minimizer: minimize = minimize(negative_log_likelihood,
                                                np.array([initial_temperature]),
@@ -1033,8 +1122,14 @@ class _TemperatureScaling():
 
     Attributes
     ----------
+
+    _initial_temperature: float or None
+        Initial temperature value to start the optimisation.
+        If None, the it is set to 1.5.
+
+
     T_ : float
-        The optimal temperature.
+        The optimised temperature for probability calibration.
     """
 
     def __init__(self,
@@ -1050,6 +1145,21 @@ def fit(self,
             X,
             y
             ):
+        """Fit the model using X, y as training data.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_classes)
+            Training data.
+
+        y : array-like of shape (n_samples, n_classes)
+            Training target.
+
+        Returns
+        -------
+        self : object
+            Returns an instance of self.
+        """
 
         self.T_: float = _temperature_scaling(np.log(X), y, self._initial_temperature)
 
@@ -1061,11 +1171,12 @@ def predict(self, X):
         Parameters
         ----------
         X : array-like of shape (n_samples, n_classes)
-            Data to predict from.
+            The decision function or predict proba for the samples
+
 
         Returns
         -------
-        X_ : ndarray of shape (n_samples,)
+        ndarray of shape (n_samples, n_classes)
             The predicted data.
         """