Skip to content

Commit

Permalink
↪️ Merge pull request #127 from Yelp/add_per_line_exclude
Browse files Browse the repository at this point in the history
Add per line exclude regex via --exclude-line
  • Loading branch information
KevinHock authored Feb 9, 2019
2 parents 8e4190f + 9fad46a commit 9f3d9ee
Show file tree
Hide file tree
Showing 26 changed files with 500 additions and 216 deletions.
8 changes: 7 additions & 1 deletion detect_secrets/core/audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,13 @@ def _get_secret_with_context(
index_of_secret_in_output = lines_of_context

with codecs.open(filename, encoding='utf-8') as file:
output = list(itertools.islice(file.read().splitlines(), start_line, end_line))
output = list(
itertools.islice(
file.read().splitlines(),
start_line,
end_line,
),
)

try:
output[index_of_secret_in_output] = _highlight_secret(
Expand Down
26 changes: 16 additions & 10 deletions detect_secrets/core/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

def initialize(
plugins,
exclude_regex=None,
exclude_files_regex=None,
exclude_lines_regex=None,
path='.',
scan_all_files=False,
):
Expand All @@ -20,13 +21,18 @@ def initialize(
:type plugins: tuple of detect_secrets.plugins.base.BasePlugin
:param plugins: rules to initialize the SecretsCollection with.
:type exclude_regex: str|None
:type exclude_files_regex: str|None
:type exclude_lines_regex: str|None
:type path: str
:type scan_all_files: bool
:rtype: SecretsCollection
"""
output = SecretsCollection(plugins, exclude_regex)
output = SecretsCollection(
plugins,
exclude_files=exclude_files_regex,
exclude_lines=exclude_lines_regex,
)

if os.path.isfile(path):
# This option allows for much easier adhoc usage.
Expand All @@ -39,11 +45,11 @@ def initialize(
if not files_to_scan:
return output

if exclude_regex:
regex = re.compile(exclude_regex, re.IGNORECASE)
if exclude_files_regex:
exclude_files_regex = re.compile(exclude_files_regex, re.IGNORECASE)
files_to_scan = filter(
lambda file: (
not regex.search(file)
not exclude_files_regex.search(file)
),
files_to_scan,
)
Expand All @@ -66,13 +72,13 @@ def get_secrets_not_in_baseline(results, baseline):
:rtype: SecretsCollection
:returns: SecretsCollection of new results (filtering out baseline)
"""
regex = None
if baseline.exclude_regex:
regex = re.compile(baseline.exclude_regex, re.IGNORECASE)
exclude_files_regex = None
if baseline.exclude_files:
exclude_files_regex = re.compile(baseline.exclude_files, re.IGNORECASE)

new_secrets = SecretsCollection()
for filename in results.data:
if regex and regex.search(filename):
if exclude_files_regex and exclude_files_regex.search(filename):
continue

if filename not in baseline.data:
Expand Down
2 changes: 1 addition & 1 deletion detect_secrets/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ def write_baseline_to_file(filename, data):
:type data: dict
:rtype: None
"""
with open(filename, 'w') as f:
with open(filename, 'w') as f: # pragma: no cover
f.write(format_baseline_for_output(data) + '\n')
46 changes: 35 additions & 11 deletions detect_secrets/core/secrets_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,30 @@

class SecretsCollection(object):

def __init__(self, plugins=(), exclude_regex=''):
def __init__(
self,
plugins=(),
exclude_files=None,
exclude_lines=None,
):
"""
:type plugins: tuple of detect_secrets.plugins.base.BasePlugin
:param plugins: rules to determine whether a string is a secret
:type exclude_regex: str
:param exclude_regex: for optional regex for ignored paths.
:type exclude_files: str|None
:param exclude_files: optional regex for ignored paths.
:type exclude_lines: str|None
:param exclude_lines: optional regex for ignored lines.
:type version: str
:param version: version of detect-secrets that SecretsCollection
is valid at.
"""
self.data = {}
self.plugins = plugins
self.exclude_regex = exclude_regex
self.exclude_files = exclude_files
self.exclude_lines = exclude_lines
self.version = VERSION

@classmethod
Expand Down Expand Up @@ -59,20 +68,32 @@ def load_baseline_from_dict(cls, data):
:raises: IOError
"""
result = SecretsCollection()

if not all(key in data for key in (
'exclude_regex',
'plugins_used',
'results',
)):
raise IOError

result.exclude_regex = data['exclude_regex']
# In v0.12.0 `exclude_regex` got replaced by `exclude`
if not any(key in data for key in (
'exclude',
'exclude_regex',
)):
raise IOError

if 'exclude_regex' in data:
result.exclude_files = data['exclude_regex']
else:
result.exclude_files = data['exclude']['files']
result.exclude_lines = data['exclude']['lines']

plugins = []
for plugin in data['plugins_used']:
plugin_classname = plugin.pop('name')
plugins.append(initialize.from_plugin_classname(
plugin_classname,
exclude_lines_regex=result.exclude_lines,
**plugin
))
result.plugins = tuple(plugins)
Expand Down Expand Up @@ -141,13 +162,13 @@ def scan_diff(
log.error(alert)
raise

if self.exclude_regex:
regex = re.compile(self.exclude_regex, re.IGNORECASE)
if self.exclude_files:
regex = re.compile(self.exclude_files, re.IGNORECASE)

for patch_file in patch_set:
filename = patch_file.path
# If the file matches the exclude_regex, we skip it
if self.exclude_regex and regex.search(filename):
# If the file matches the exclude_files, we skip it
if self.exclude_files and regex.search(filename):
continue

if filename == baseline_filename:
Expand Down Expand Up @@ -241,7 +262,10 @@ def format_for_baseline_output(self):

return {
'generated_at': strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()),
'exclude_regex': self.exclude_regex,
'exclude': {
'files': self.exclude_files,
'lines': self.exclude_lines,
},
'plugins_used': plugins_used,
'results': results,
'version': self.version,
Expand Down
41 changes: 20 additions & 21 deletions detect_secrets/core/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@
from detect_secrets import VERSION


def add_exclude_lines_argument(parser):
parser.add_argument(
'--exclude-lines',
type=str,
help='Pass in regex to specify lines to ignore during scan.',
)


def add_use_all_plugins_argument(parser):
parser.add_argument(
'--use-all-plugins',
Expand All @@ -25,11 +33,10 @@ def add_default_arguments(self):
self._add_verbosity_argument()\
._add_version_argument()

return self

def add_pre_commit_arguments(self):
self._add_filenames_argument()\
._add_set_baseline_argument()\
._add_exclude_lines_argument()\
._add_use_all_plugins_argument()

PluginOptions(self.parser).add_arguments()
Expand Down Expand Up @@ -87,6 +94,10 @@ def _add_set_baseline_argument(self):
)
return self

def _add_exclude_lines_argument(self):
add_exclude_lines_argument(self.parser)
return self

def _add_use_all_plugins_argument(self):
add_use_all_plugins_argument(self.parser)

Expand Down Expand Up @@ -117,11 +128,15 @@ def _add_initialize_baseline_argument(self):
),
)

# Pairing `--exclude` with `--scan` because it's only used for the initialization.
# Pairing `--exclude-lines` to both pre-commit and `--scan`
# because it can be used for both.
add_exclude_lines_argument(self.parser)

# Pairing `--exclude-files` with `--scan` because it's only used for the initialization.
# The pre-commit hook framework already has an `exclude` option that can be used instead.
self.parser.add_argument(
'--exclude',
nargs=1,
'--exclude-files',
type=str,
help='Pass in regex to specify ignored paths during initialization scan.',
)

Expand Down Expand Up @@ -155,7 +170,6 @@ def _add_adhoc_scanning_argument(self):
'plugins\' verdict.'
),
)
return self


class AuditOptions(object):
Expand Down Expand Up @@ -234,7 +248,6 @@ class PluginOptions(object):
disable_help_text='Disables scanning for hex high entropy strings',
related_args=[
('--hex-limit', 3,),
('--hex-high-entropy-exclude', None,),
],
),
PluginDescriptor(
Expand All @@ -243,7 +256,6 @@ class PluginOptions(object):
disable_help_text='Disables scanning for base64 high entropy strings',
related_args=[
('--base64-limit', 4.5,),
('--base64-high-entropy-exclude', None,),
],
),
PluginDescriptor(
Expand Down Expand Up @@ -286,7 +298,6 @@ def __init__(self, parser):
def add_arguments(self):
self._add_custom_limits()
self._add_opt_out_options()
self._add_high_entropy_excludes()

return self

Expand Down Expand Up @@ -375,18 +386,6 @@ def _add_custom_limits(self):
help=high_entropy_help_text + 'defaults to 3.0.',
)

def _add_high_entropy_excludes(self):
self.parser.add_argument(
'--base64-high-entropy-exclude',
type=str,
help='Pass in regex to exclude false positives found by base 64 high-entropy detector.',
)
self.parser.add_argument(
'--hex-high-entropy-exclude',
type=str,
help='Pass in regex to exclude false positives found by hex high-entropy detector.',
)

def _add_opt_out_options(self):
for plugin in self.all_plugins:
self.parser.add_argument(
Expand Down
66 changes: 49 additions & 17 deletions detect_secrets/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@


def parse_args(argv):
return ParserBuilder().add_console_use_arguments() \
return ParserBuilder()\
.add_console_use_arguments()\
.parse_args(argv)


Expand All @@ -30,7 +31,10 @@ def main(argv=None):
if args.action == 'scan':
# Plugins are *always* rescanned with fresh settings, because
# we want to get the latest updates.
plugins = initialize.from_parser_builder(args.plugins)
plugins = initialize.from_parser_builder(
args.plugins,
exclude_lines_regex=args.exclude_lines,
)
if args.string:
line = args.string

Expand Down Expand Up @@ -117,26 +121,29 @@ def _perform_scan(args, plugins):
_get_plugin_from_baseline(old_baseline), args,
)

# Favors --exclude argument over existing baseline's regex (if exists)
if args.exclude:
args.exclude = args.exclude[0]
elif old_baseline and old_baseline.get('exclude_regex'):
args.exclude = old_baseline['exclude_regex']
# Favors `--exclude-files` and `--exclude-lines` CLI arguments
# over existing baseline's regexes (if given)
if old_baseline:
if not args.exclude_files:
args.exclude_files = _get_exclude_files(old_baseline)

if (
not args.exclude_lines
and old_baseline.get('exclude')
):
args.exclude_lines = old_baseline['exclude']['lines']

# If we have knowledge of an existing baseline file, we should use
# that knowledge and *not* scan that file.
# that knowledge and add it to our exclude_files regex.
if args.import_filename:
payload = '^{}$'.format(args.import_filename[0])
if args.exclude and payload not in args.exclude:
args.exclude += r'|{}'.format(payload)
elif not args.exclude:
args.exclude = payload
_add_baseline_to_exclude_files(args)

new_baseline = baseline.initialize(
plugins,
args.exclude,
args.path,
args.all_files,
plugins=plugins,
exclude_files_regex=args.exclude_files,
exclude_lines_regex=args.exclude_lines,
path=args.path,
scan_all_files=args.all_files,
).format_for_baseline_output()

if old_baseline:
Expand Down Expand Up @@ -164,5 +171,30 @@ def _read_from_file(filename): # pragma: no cover
return json.loads(f.read())


def _get_exclude_files(old_baseline):
"""
Older versions of detect-secrets always had an `exclude_regex` key,
this was replaced by the `files` key under an `exclude` key in v0.12.0
:rtype: str|None
"""
if old_baseline.get('exclude'):
return old_baseline['exclude']['files']
if old_baseline.get('exclude_regex'):
return old_baseline['exclude_regex']


def _add_baseline_to_exclude_files(args):
"""
Modifies args.exclude_files in-place.
"""
baseline_name_regex = r'^{}$'.format(args.import_filename[0])

if not args.exclude_files:
args.exclude_files = baseline_name_regex
elif baseline_name_regex not in args.exclude_files:
args.exclude_files += r'|{}'.format(baseline_name_regex)


if __name__ == '__main__':
sys.exit(main())
Loading

0 comments on commit 9f3d9ee

Please sign in to comment.