Skip to content

Commit

Permalink
Merge pull request #227 from SAP/fix/#214
Browse files Browse the repository at this point in the history
password_model batch mode classify only new discoveries
  • Loading branch information
marcorosa authored Mar 30, 2022
2 parents c9f6672 + f2a8685 commit 4e28506
Showing 1 changed file with 12 additions and 11 deletions.
23 changes: 12 additions & 11 deletions credentialdigger/models/password_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,18 @@ def analyze_batch(self, discoveries):
# We have to classify only the "new" discoveries
new_discoveries = [d for d in discoveries if d['state'] == 'new']
no_new_discoveries = [d for d in discoveries if d['state'] != 'new']
# Create a dataset with all the preprocessed (new) snippets
data = self._pre_process([d['snippet'] for d in new_discoveries])
# data = self._preprocess_batch_data(snippets)
# Compute a prediction for each snippet
outputs = self.model.predict(data)
logits = outputs['logits']
predictions = tf.argmax(logits, 1)
# Check predictions and set FP discoveries accordingly
for d, p in zip(new_discoveries, predictions):
if p == 0:
d['state'] = 'false_positive'
# Process new_discoveries if not empty
if new_discoveries:
# Create a dataset with all the preprocessed (new) snippets
data = self._pre_process([d['snippet'] for d in new_discoveries])
# Compute a prediction for each snippet
outputs = self.model.predict(data)
logits = outputs['logits']
predictions = tf.argmax(logits, 1)
# Check predictions and set FP discoveries accordingly
for d, p in zip(new_discoveries, predictions):
if p == 0:
d['state'] = 'false_positive'
return new_discoveries + no_new_discoveries

def analyze(self, discovery):
Expand Down

0 comments on commit 4e28506

Please sign in to comment.