Skip to content

Commit

Permalink
Fix the problem that the first commit was missed
Browse files Browse the repository at this point in the history
  • Loading branch information
kings-way committed Jan 24, 2018
1 parent 461953f commit e3e2dfa
Showing 1 changed file with 31 additions and 24 deletions.
55 changes: 31 additions & 24 deletions truffleHog/truffleHog.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import stat
from defaultRegexes.regexChecks import regexes
from git import Repo
from git import NULL_TREE

def main():
parser = argparse.ArgumentParser(description='Find secrets hidden in the depths of git.')
Expand Down Expand Up @@ -223,33 +224,39 @@ def find_strings(git_url, since_commit=None, max_depth=None, printJson=False, do
if since_commit and since_commit_reached:
prev_commit = curr_commit
continue

# if not prev_commit, then curr_commit is the newest commit. And we have nothing to diff with.
# But we will diff the first commit with NULL_TREE here to check the oldest code.
# In this way, no commit will be missed.
if not prev_commit:
pass
prev_commit = list(repo.iter_commits(max_count=max_depth))[-1]
diff = prev_commit.diff(NULL_TREE, create_patch=True)
else:
#avoid searching the same diffs
hashes = str(prev_commit) + str(curr_commit)
if hashes in already_searched:
prev_commit = curr_commit
continue
already_searched.add(hashes)

diff = prev_commit.diff(curr_commit, create_patch=True)
for blob in diff:
printableDiff = blob.diff.decode('utf-8', errors='replace')
if printableDiff.startswith("Binary files"):
continue
commit_time = datetime.datetime.fromtimestamp(prev_commit.committed_date).strftime('%Y-%m-%d %H:%M:%S')
foundIssues = []
if do_entropy:
entropicDiff = find_entropy(printableDiff, commit_time, branch_name, prev_commit, blob, commitHash)
if entropicDiff:
foundIssues.append(entropicDiff)
if do_regex:
found_regexes = regex_check(printableDiff, commit_time, branch_name, prev_commit, blob, commitHash)
foundIssues += found_regexes
for foundIssue in foundIssues:
print_results(printJson, foundIssue)
output["foundIssues"] += foundIssues

# avoid searching the same diffs
hashes = str(prev_commit) + str(curr_commit)
if hashes in already_searched:
prev_commit = curr_commit
continue
already_searched.add(hashes)

for blob in diff:
printableDiff = blob.diff.decode('utf-8', errors='replace')
if printableDiff.startswith("Binary files"):
continue
commit_time = datetime.datetime.fromtimestamp(prev_commit.committed_date).strftime('%Y-%m-%d %H:%M:%S')
foundIssues = []
if do_entropy:
entropicDiff = find_entropy(printableDiff, commit_time, branch_name, prev_commit, blob, commitHash)
if entropicDiff:
foundIssues.append(entropicDiff)
if do_regex:
found_regexes = regex_check(printableDiff, commit_time, branch_name, prev_commit, blob, commitHash)
foundIssues += found_regexes
for foundIssue in foundIssues:
print_results(printJson, foundIssue)
output["foundIssues"] += foundIssues

prev_commit = curr_commit
output["project_path"] = project_path
Expand Down

0 comments on commit e3e2dfa

Please sign in to comment.