-
Notifications
You must be signed in to change notification settings - Fork 482
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support inline whitelisting for YAML files
- Loading branch information
1 parent
42373eb
commit f31080b
Showing
6 changed files
with
313 additions
and
272 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
import re | ||
|
||
# TODO: Update for not just python comments? | ||
WHITELIST_REGEX = re.compile(r'# ?pragma: ?whitelist[ -]secret') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
import configparser | ||
import re | ||
|
||
|
||
class IniFileParser(object): | ||
|
||
def __init__(self, file): | ||
self.parser = configparser.ConfigParser() | ||
self.parser.optionxform = str | ||
self.parser.read_file(file) | ||
|
||
# Hacky way to keep track of line location | ||
file.seek(0) | ||
self.lines = list(map(lambda x: x.strip(), file.readlines())) | ||
self.line_offset = 0 | ||
|
||
def iterator(self): | ||
if not self.parser.sections(): | ||
# To prevent cases where it's not an ini file, but the parser | ||
# helpfully attempts to parse everything to a DEFAULT section, | ||
# when not explicitly provided. | ||
raise configparser.Error | ||
|
||
for section_name, _ in self.parser.items(): | ||
for key, values in self.parser.items(section_name): | ||
for value, offset in self._get_value_and_line_offset( | ||
key, | ||
values, | ||
): | ||
yield value, offset | ||
|
||
def _get_value_and_line_offset(self, key, values): | ||
"""Returns the index of the location of key, value pair in lines. | ||
:type key: str | ||
:param key: key, in config file. | ||
:type values: str | ||
:param values: values for key, in config file. This is plural, | ||
because you can have multiple values per key. Eg. | ||
>>> key = | ||
... value1 | ||
... value2 | ||
:type lines: list | ||
:param lines: a collection of lines-so-far in file | ||
:rtype: list(tuple) | ||
""" | ||
values_list = self._construct_values_list(values) | ||
if not values_list: | ||
return [] | ||
|
||
current_value_list_index = 0 | ||
output = [] | ||
lines_modified = False | ||
|
||
first_line_regex = re.compile(r'^\s*{}[ :=]+{}'.format( | ||
re.escape(key), | ||
re.escape(values_list[current_value_list_index]), | ||
)) | ||
comment_regex = re.compile(r'\s*[;#]') | ||
for index, line in enumerate(self.lines): | ||
if current_value_list_index == 0: | ||
if first_line_regex.match(line): | ||
output.append(( | ||
values_list[current_value_list_index], | ||
self.line_offset + index + 1, | ||
)) | ||
|
||
current_value_list_index += 1 | ||
|
||
continue | ||
|
||
# Check ignored lines before checking values, because | ||
# you can write comments *after* the value. | ||
|
||
# Ignore blank lines | ||
if not line.strip(): | ||
continue | ||
|
||
# Ignore comments | ||
if comment_regex.match(line): | ||
continue | ||
|
||
if current_value_list_index == len(values_list): | ||
if index == 0: | ||
index = 1 # don't want to count the same line again | ||
|
||
self.line_offset += index | ||
self.lines = self.lines[index:] | ||
lines_modified = True | ||
|
||
break | ||
else: | ||
output.append(( | ||
values_list[current_value_list_index], | ||
self.line_offset + index + 1, | ||
)) | ||
|
||
current_value_list_index += 1 | ||
|
||
if not lines_modified: | ||
# No more lines left, if loop was not explicitly left. | ||
self.lines = [] | ||
|
||
return output | ||
|
||
@staticmethod | ||
def _construct_values_list(values): | ||
""" | ||
This values_list is a strange construction, because of ini format. | ||
We need to extract the values with the following supported format: | ||
>>> key = value0 | ||
... value1 | ||
... | ||
... # comment line here | ||
... value2 | ||
given that normally, either value0 is supplied, or (value1, value2), | ||
but still allowing for all three at once. | ||
Furthermore, with the configparser, we will get a list of values, | ||
and intermediate blank lines, but no comments. This means that we can't | ||
merely use the count of values' items to heuristically "skip ahead" lines, | ||
because we still have to manually parse through this. | ||
Therefore, we construct the values_list in the following fashion: | ||
1. Keep the first value (in the example, this is `value0`) | ||
2. For all other values, ignore blank lines. | ||
Then, we can parse through, and look for values only. | ||
""" | ||
values_list = values.splitlines() | ||
return values_list[:1] + list( | ||
filter( | ||
lambda x: x, | ||
values_list[1:], | ||
), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
import yaml | ||
|
||
from detect_secrets.plugins.core.constants import WHITELIST_REGEX | ||
|
||
|
||
class YamlFileParser(object): | ||
""" | ||
Yaml config files are interesting, because they don't necessarily conform | ||
to our basic regex for detecting HighEntropyStrings as strings don't | ||
need to be quoted. | ||
This causes interesting issues, because our regex won't catch non-quoted | ||
strings, and if we ignore the quoting requirement, then we increase our | ||
false positive rate, because any long string would have high entropy. | ||
Therefore, we take a different approach: intercept the parsing of the yaml | ||
file to identify string values. This assumes: | ||
1. Secrets are strings | ||
2. Secrets are not keys | ||
Then, we calculate the entropy of those string values. | ||
The difficulty comes from determining the line number which these values | ||
come from. To do this, we transform the string into a dictionary of | ||
meta-tags, in the following format: | ||
>>> { | ||
'key': { | ||
'__value__': value, | ||
'__line__': <line_number>, | ||
} | ||
} | ||
This way, we can quickly identify the line number for auditing at a later | ||
stage. | ||
This parsing method is inspired by https://stackoverflow.com/a/13319530. | ||
""" | ||
|
||
def __init__(self, file): | ||
self.content = file.read() | ||
self.loader = yaml.SafeLoader(self.content) | ||
|
||
self.loader.compose_node = self._compose_node_shim | ||
|
||
def json(self): | ||
return self.loader.get_single_data() | ||
|
||
def _compose_node_shim(self, parent, index): | ||
line = self.loader.line | ||
|
||
node = yaml.composer.Composer.compose_node(self.loader, parent, index) | ||
node.__line__ = line + 1 | ||
|
||
if node.tag.endswith(':map'): | ||
return self._tag_dict_values(node) | ||
|
||
# TODO: Not sure if need to do :seq | ||
|
||
return node | ||
|
||
def _tag_dict_values(self, map_node): | ||
""" | ||
:type map_node: yaml.nodes.MappingNode | ||
:param map_node: It looks like map_node.value contains a list of | ||
pair tuples, corresponding to key,value pairs. | ||
""" | ||
new_values = [] | ||
for key, value in map_node.value: | ||
if not value.tag.endswith(':str'): | ||
new_values.append((key, value,)) | ||
continue | ||
|
||
augmented_string = yaml.nodes.MappingNode( | ||
tag=map_node.tag, | ||
value=[ | ||
self._create_key_value_pair_for_mapping_node_value( | ||
'__value__', | ||
value.value, | ||
'tag:yaml.org,2002:str', | ||
), | ||
self._create_key_value_pair_for_mapping_node_value( | ||
'__line__', | ||
str(value.__line__), | ||
'tag:yaml.org,2002:int', | ||
), | ||
], | ||
) | ||
|
||
new_values.append((key, augmented_string,)) | ||
|
||
output = yaml.nodes.MappingNode( | ||
tag=map_node.tag, | ||
value=new_values, | ||
start_mark=map_node.start_mark, | ||
end_mark=map_node.end_mark, | ||
flow_style=map_node.flow_style, | ||
) | ||
return output | ||
|
||
@staticmethod | ||
def _create_key_value_pair_for_mapping_node_value(key, value, tag): | ||
return ( | ||
yaml.nodes.ScalarNode( | ||
tag='tag:yaml.org,2002:str', | ||
value=key, | ||
), | ||
yaml.nodes.ScalarNode( | ||
tag=tag, | ||
value=value, | ||
), | ||
) | ||
|
||
def get_ignored_lines(self): | ||
ignored_lines = set() | ||
|
||
for line_number, line in enumerate(self.content.split('\n'), 1): | ||
if WHITELIST_REGEX.search(line): | ||
ignored_lines.add(line_number) | ||
|
||
return ignored_lines |
Oops, something went wrong.