Merge pull request #227 from SAP/fix/#214

password_model batch mode classify only new discoveries
SAP · Mar 30, 2022 · 4e28506 · 4e28506
2 parents c9f6672 + f2a8685
commit 4e28506
Showing 1 changed file with 12 additions and 11 deletions.
diff --git a/credentialdigger/models/password_model.py b/credentialdigger/models/password_model.py
@@ -46,17 +46,18 @@ def analyze_batch(self, discoveries):
         # We have to classify only the "new" discoveries
         new_discoveries = [d for d in discoveries if d['state'] == 'new']
         no_new_discoveries = [d for d in discoveries if d['state'] != 'new']
-        # Create a dataset with all the preprocessed (new) snippets
-        data = self._pre_process([d['snippet'] for d in new_discoveries])
-        # data = self._preprocess_batch_data(snippets)
-        # Compute a prediction for each snippet
-        outputs = self.model.predict(data)
-        logits = outputs['logits']
-        predictions = tf.argmax(logits, 1)
-        # Check predictions and set FP discoveries accordingly
-        for d, p in zip(new_discoveries, predictions):
-            if p == 0:
-                d['state'] = 'false_positive'
+        # Process new_discoveries if not empty
+        if new_discoveries:
+            # Create a dataset with all the preprocessed (new) snippets
+            data = self._pre_process([d['snippet'] for d in new_discoveries])
+            # Compute a prediction for each snippet
+            outputs = self.model.predict(data)
+            logits = outputs['logits']
+            predictions = tf.argmax(logits, 1)
+            # Check predictions and set FP discoveries accordingly
+            for d, p in zip(new_discoveries, predictions):
+                if p == 0:
+                    d['state'] = 'false_positive'
         return new_discoveries + no_new_discoveries
 
     def analyze(self, discovery):