Merge pull request #2 from jspaezp/chore/fix_confidence_api

Chore/fix confidence api
wfondrie · Dec 19, 2024 · c246df7 · c246df7
2 parents 6749f84 + 100ec58
commit c246df7
Show file tree

Hide file tree

Showing 27 changed files with 1,654 additions and 805 deletions.
diff --git a/docs/source/vignettes/.gitignore b/docs/source/vignettes/.gitignore
@@ -0,0 +1,3 @@
+
+joint_models/
+basic_python_api_output/
diff --git a/docs/source/vignettes/basic_python_api.ipynb b/docs/source/vignettes/basic_python_api.ipynb
diff --git a/docs/source/vignettes/joint_models.ipynb b/docs/source/vignettes/joint_models.ipynb
diff --git a/mokapot/brew.py b/mokapot/brew.py
@@ -25,6 +25,7 @@
 )
 from mokapot.model import PercolatorModel, Model
 from mokapot.parsers.pin import parse_in_chunks
+from mokapot.utils import strictzip
 
 LOGGER = logging.getLogger(__name__)
 
@@ -103,12 +104,15 @@ def brew(
         model = PercolatorModel()
 
     try:
+        # Q: what is this doing? Why does the randon number
+        # generater get set only if the model has an estimator?
+        # Shouldn't it assign it to all the models if they are passed?
         model.estimator
         model.rng = rng
     except AttributeError:
         pass
 
-        # Check that all of the datasets have the same features:
+    # Check that all of the datasets have the same features:
     feat_set = set(datasets[0].feature_columns)
     if not all([
         set(dataset.feature_columns) == feat_set for dataset in datasets
@@ -292,13 +296,20 @@ def brew(
 
     # Reverse all scores for which desc is False (this way, we don't have to
     # return `descs` from this function
+    # Q: why dont we just return a class that denotes if its descending?
+    #    JSPP 2024-12-15
     for idx, desc in enumerate(descs):
         if not desc:
             scores[idx] = -scores[idx]
             descs[idx] = not descs[idx]
 
     # Coherces the tuple to a list
     models = list(models)
+
+    LOGGER.info("Assigning scores to PSMs...")
+    for score, dataset in strictzip(scores, datasets):
+        dataset.scores = score
+
     return list(models), scores
 
 
@@ -498,7 +509,9 @@ def _predict(
 
 @typechecked
 def _predict_with_ensemble(
-    dataset: PsmDataset, models: Iterable[Model], max_workers
+    dataset: PsmDataset,
+    models: Iterable[Model],
+    max_workers: int,
 ):
     """
     Return the new scores for the dataset using ensemble of all trained models

diff --git a/mokapot/column_defs.py b/mokapot/column_defs.py
@@ -1,3 +1,4 @@
+Q_VALUE_COL_NAME = "mokapot_qvalue"
 STANDARD_COLUMN_NAME_MAP = {
     "SpecId": "psm_id",
     "PSMId": "psm_id",
@@ -10,7 +11,7 @@
     "ModifiedPeptide": "modified_peptide",
     "modifiedpeptide": "modified_peptide",
     # "q-value": "q_value",
-    "q-value": "q-value",
+    "q-value": Q_VALUE_COL_NAME,
 }