Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add per line exclude regex via --exclude-line #127

Merged
merged 6 commits into from
Feb 9, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion detect_secrets/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ def write_baseline_to_file(filename, data):
:type data: dict
:rtype: None
"""
with open(filename, 'w') as f:
with open(filename, 'w') as f: # pragma: no cover
f.write(format_baseline_for_output(data) + '\n')
14 changes: 13 additions & 1 deletion detect_secrets/plugins/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from abc import ABCMeta
from abc import abstractmethod
from abc import abstractproperty
Expand All @@ -12,10 +13,16 @@ class BasePlugin(object):
__metaclass__ = ABCMeta
secret_type = None

def __init__(self, **kwargs):
def __init__(self, exclude_lines_re=None, **kwargs):
if not self.secret_type:
raise ValueError('Plugins need to declare a secret_type.')

self.exclude_lines_re = None
if exclude_lines_re:
self.exclude_lines_re = re.compile(
exclude_lines_re,
)

def analyze(self, file, filename):
"""
:param file: The File object itself.
Expand All @@ -29,6 +36,11 @@ def analyze(self, file, filename):
for line_num, line in enumerate(file.readlines(), start=1):
if any(regex.search(line) for regex in WHITELIST_REGEXES):
continue
if (
self.exclude_lines_re
and self.exclude_lines_re.search(line)
):
continue
secrets = self.analyze_string(line, line_num, filename)
potential_secrets.update(secrets)

Expand Down
10 changes: 9 additions & 1 deletion detect_secrets/plugins/common/ini_file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ class IniFileParser(object):

_comment_regex = re.compile(r'\s*[;#]')

def __init__(self, file, add_header=False):
def __init__(self, file, add_header=False, exclude_lines_re=None):
self.parser = configparser.ConfigParser()
self.parser.optionxform = str

self.exclude_lines_re = exclude_lines_re

if not add_header:
self.parser.read_file(file)
else:
Expand Down Expand Up @@ -77,6 +79,12 @@ def _get_value_and_line_offset(self, key, values):
if not line.strip() or self._comment_regex.match(line):
continue

if (
self.exclude_lines_re
and self.exclude_lines_re.search(line)
):
continue

if current_value_list_index == 0:
first_line_regex = re.compile(r'^\s*{}[ :=]+{}'.format(
re.escape(key),
Expand Down
2 changes: 1 addition & 1 deletion detect_secrets/plugins/common/initialize.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Intelligent initialization of plugins."""
try:
from functools import lru_cache
except ImportError:
except ImportError: # pragma: no cover
from functools32 import lru_cache

from ..aws import AWSKeyDetector # noqa: F401
Expand Down
15 changes: 11 additions & 4 deletions detect_secrets/plugins/common/yaml_file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,11 @@ class YamlFileParser(object):
This parsing method is inspired by https://stackoverflow.com/a/13319530.
"""

def __init__(self, file):
def __init__(self, file, exclude_lines_re=None):
KevinHock marked this conversation as resolved.
Show resolved Hide resolved
self.content = file.read()
self.loader = yaml.SafeLoader(self.content)
self.exclude_lines_re = exclude_lines_re

self.loader = yaml.SafeLoader(self.content)
self.loader.compose_node = self._compose_node_shim

def json(self):
Expand Down Expand Up @@ -123,11 +124,17 @@ def get_ignored_lines(self):

:return: set
"""

ignored_lines = set()

for line_number, line in enumerate(self.content.split('\n'), 1):
if WHITELIST_REGEX['yaml'].search(line):
if (
WHITELIST_REGEX['yaml'].search(line)

or (
self.exclude_lines_re and
self.exclude_lines_re.search(line)
)
):
ignored_lines.add(line_number)

return ignored_lines
38 changes: 28 additions & 10 deletions detect_secrets/plugins/high_entropy_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class HighEntropyStringsPlugin(BasePlugin):

__metaclass__ = ABCMeta

def __init__(self, charset, limit, *args):
def __init__(self, charset, limit, exclude_lines_re, *args):
if limit < 0 or limit > 8:
raise ValueError(
'The limit set for HighEntropyStrings must be between 0.0 and 8.0',
Expand All @@ -48,6 +48,11 @@ def __init__(self, charset, limit, *args):
self.charset = charset
self.entropy_limit = limit
self.regex = re.compile(r'([\'"])([%s]+)(\1)' % charset)
self.exclude_lines_re = None
if exclude_lines_re:
self.exclude_lines_re = re.compile(
exclude_lines_re,
)
KevinHock marked this conversation as resolved.
Show resolved Hide resolved

def analyze(self, file, filename):
file_type_analyzers = (
Expand Down Expand Up @@ -125,7 +130,11 @@ def adhoc_scan(self, string):
# Since it's an individual string, it's just bad UX to require quotes
# around the expected secret.
with self.non_quoted_string_regex():
results = self.analyze_string(string, 0, 'does_not_matter')
results = self.analyze_string(
string,
line_num=0,
filename='does_not_matter',
)

# NOTE: Trailing space allows for nicer formatting
output = 'False' if not results else 'True '
Expand Down Expand Up @@ -168,7 +177,11 @@ def wrapped(file, filename):
potential_secrets = {}

with self.non_quoted_string_regex():
for value, lineno in IniFileParser(file, add_header).iterator():
for value, lineno in IniFileParser(
file,
add_header,
self.exclude_lines_re,
).iterator():
potential_secrets.update(self.analyze_string(
value,
lineno,
Expand All @@ -189,7 +202,10 @@ def _analyze_yaml_file(self, file, filename):
# we use this heuristic to quit early if appropriate.
raise yaml.YAMLError

parser = YamlFileParser(file)
parser = YamlFileParser(
file,
self.exclude_lines_re,
)
data = parser.json()
ignored_lines = parser.get_ignored_lines()
potential_secrets = {}
Expand Down Expand Up @@ -227,10 +243,11 @@ class HexHighEntropyString(HighEntropyStringsPlugin):

secret_type = 'Hex High Entropy String'

def __init__(self, hex_limit, **kwargs):
def __init__(self, hex_limit, exclude_lines_re=None, **kwargs):
super(HexHighEntropyString, self).__init__(
string.hexdigits,
hex_limit,
charset=string.hexdigits,
limit=hex_limit,
exclude_lines_re=exclude_lines_re,
)

@property
Expand Down Expand Up @@ -278,10 +295,11 @@ class Base64HighEntropyString(HighEntropyStringsPlugin):

secret_type = 'Base64 High Entropy String'

def __init__(self, base64_limit, **kwargs):
def __init__(self, base64_limit, exclude_lines_re=None, **kwargs):
super(Base64HighEntropyString, self).__init__(
string.ascii_letters + string.digits + '+/=',
base64_limit,
charset=string.ascii_letters + string.digits + '+/=',
limit=base64_limit,
exclude_lines_re=exclude_lines_re,
)

@property
Expand Down
1 change: 1 addition & 0 deletions test_data/config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
credentials:
some_value_here: not_a_secret
other_value_here: 1234567890a
CanonicalUserGetSkippedByExcludeLines: 1234567890ab
nested:
value: AKIAabcdefghijklmnop
other_value: abcdefghijklmnop
Expand Down
4 changes: 3 additions & 1 deletion tests/core/secrets_collection_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,9 @@ def test_exclude_regex_skips_files_appropriately(self):
def load_from_diff(self, existing_secrets=None, baseline_filename='', exclude_files_re=''):
collection = secrets_collection_factory(
secrets=existing_secrets,
plugins=(HexHighEntropyString(3),),
plugins=(
HexHighEntropyString(hex_limit=3),
),
exclude_files_re=exclude_files_re,
)

Expand Down
34 changes: 29 additions & 5 deletions tests/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,22 +59,46 @@ def test_scan_with_exclude_args(self, mock_baseline_initialize):
scan_all_files=False,
)

def test_scan_string_basic(self, mock_baseline_initialize):
@pytest.mark.parametrize(
'string, expected_base64_result, expected_hex_result',
[
(
'012345678ab',
'False (3.459)',
'True (3.459)',
),
(
'Benign',
'False (2.252)',
'False',
),
],
)
def test_scan_string_basic(
self,
mock_baseline_initialize,
string,
expected_base64_result,
expected_hex_result,
):
with mock_stdin(
'012345678ab',
string,
), mock_printer(
main_module,
) as printer_shim:
assert main('scan --string'.split()) == 0
assert uncolor(printer_shim.message) == textwrap.dedent("""
AWSKeyDetector : False
Base64HighEntropyString: False (3.459)
Base64HighEntropyString: {}
BasicAuthDetector : False
HexHighEntropyString : True (3.459)
HexHighEntropyString : {}
KeywordDetector : False
PrivateKeyDetector : False
SlackDetector : False
""")[1:]
""".format(
expected_base64_result,
expected_hex_result,
))[1:]

mock_baseline_initialize.assert_not_called()

Expand Down
35 changes: 25 additions & 10 deletions tests/plugins/high_entropy_strings_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ def setup(self, logic, non_secret_string, secret_string):
"'{secret}'",
True,
),
# Matches exclude_lines_re
(
"CanonicalUser: {secret}",
False,
),
],
)
def test_pattern(self, content_to_format, should_be_caught):
Expand Down Expand Up @@ -125,9 +130,12 @@ class TestBase64HighEntropyStrings(HighEntropyStringsTest):
def setup(self):
super(TestBase64HighEntropyStrings, self).setup(
# Testing default limit, as suggested by truffleHog.
Base64HighEntropyString(4.5),
'c3VwZXIgc2VjcmV0IHZhbHVl', # too short for high entropy
'c3VwZXIgbG9uZyBzdHJpbmcgc2hvdWxkIGNhdXNlIGVub3VnaCBlbnRyb3B5',
logic=Base64HighEntropyString(
base64_limit=4.5,
exclude_lines_re='(CanonicalUser)',
),
non_secret_string='c3VwZXIgc2VjcmV0IHZhbHVl', # too short for high entropy
secret_string='c3VwZXIgbG9uZyBzdHJpbmcgc2hvdWxkIGNhdXNlIGVub3VnaCBlbnRyb3B5',
)

def test_ini_file(self):
Expand Down Expand Up @@ -165,7 +173,10 @@ def test_ini_file(self):
assert count == 7

def test_yaml_file(self):
plugin = Base64HighEntropyString(3)
plugin = Base64HighEntropyString(
base64_limit=3,
exclude_lines_re='(CanonicalUser)',
KevinHock marked this conversation as resolved.
Show resolved Hide resolved
)

with open('test_data/config.yaml') as f:
secrets = plugin.analyze(f, 'test_data/config.yaml')
Expand All @@ -175,7 +186,7 @@ def test_yaml_file(self):
location = str(secret).splitlines()[1]
assert location in (
'Location: test_data/config.yaml:3',
'Location: test_data/config.yaml:5',
'Location: test_data/config.yaml:6',
)

def test_env_file(self):
Expand All @@ -196,15 +207,19 @@ class TestHexHighEntropyStrings(HighEntropyStringsTest):
def setup(self):
super(TestHexHighEntropyStrings, self).setup(
# Testing default limit, as suggested by truffleHog.
HexHighEntropyString(3),
'aaaaaa',
'2b00042f7481c7b056c4b410d28f33cf',
logic=HexHighEntropyString(
hex_limit=3,
exclude_lines_re='(CanonicalUser)',
),
non_secret_string='aaaaaa',
secret_string='2b00042f7481c7b056c4b410d28f33cf',
)

def test_discounts_when_all_numbers(self):
original_scanner = HighEntropyStringsPlugin(
string.hexdigits,
3,
charset=string.hexdigits,
limit=3,
exclude_lines_re=None,
)

# This makes sure discounting works.
Expand Down