Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

small improvements to existing FileParsers #107

Merged
merged 4 commits into from
Dec 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion detect_secrets/plugins/core/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,18 @@
('\'', ''), # e.g. visual basic .net
('--', ''), # e.g. sql
# many other inline comment syntaxes are not included,
# because we want to be performant for the common case
# because we want to be performant for
# any(regex.search(line) for regex in WHITELIST_REGEXES)
# calls. of course, this won't be a concern if detect-secrets
# switches over to implementing file plugins for each supported
# filetype.
)
]
]

# add to this mapping (and WHITELIST_REGEXES if applicable) lazily,
# as more language specific file parsers are implemented.
# discussion: https://github.com/Yelp/detect-secrets/pull/105
WHITELIST_REGEX = {
'yaml': WHITELIST_REGEXES[0],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Super Nit: Maybe add to the regex comment # e.g. python -> # e.g. python or yaml

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that'll be necessary if a python file parser is implemented. Then we can 'py': WHITELIST_REGEXES[0]. More language/specification specific file parsers is what I'm pushing for, or at least suggesting, architecturally. See #105.

}
46 changes: 17 additions & 29 deletions detect_secrets/plugins/core/ini_file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@

class IniFileParser(object):

_comment_regex = re.compile(r'\s*[;#]')

def __init__(self, file):
self.parser = configparser.ConfigParser()
self.parser.optionxform = str
self.parser.read_file(file)

# Hacky way to keep track of line location
file.seek(0)
self.lines = list(map(lambda x: x.strip(), file.readlines()))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

++

self.lines = [line.strip() for line in file.readlines()]
self.line_offset = 0

def iterator(self):
Expand Down Expand Up @@ -56,42 +58,31 @@ def _get_value_and_line_offset(self, key, values):
output = []
lines_modified = False

first_line_regex = re.compile(r'^\s*{}[ :=]+{}'.format(
re.escape(key),
re.escape(values_list[current_value_list_index]),
))
comment_regex = re.compile(r'\s*[;#]')
for index, line in enumerate(self.lines):
# Check ignored lines before checking values, because
# you can write comments *after* the value.
if not line.strip() or self._comment_regex.match(line):
continue

if current_value_list_index == 0:
first_line_regex = re.compile(r'^\s*{}[ :=]+{}'.format(
re.escape(key),
re.escape(values_list[current_value_list_index]),
))
if first_line_regex.match(line):
output.append((
values_list[current_value_list_index],
self.line_offset + index + 1,
))

current_value_list_index += 1

continue

# Check ignored lines before checking values, because
# you can write comments *after* the value.

# Ignore blank lines
if not line.strip():
continue

# Ignore comments
if comment_regex.match(line):
continue

if current_value_list_index == len(values_list):
if index == 0:
index = 1 # don't want to count the same line again

index = 1 # don't want to count the same line again
self.line_offset += index
self.lines = self.lines[index:]
lines_modified = True

break
else:
output.append((
Expand Down Expand Up @@ -132,10 +123,7 @@ def _construct_values_list(values):
2. For all other values, ignore blank lines.
Then, we can parse through, and look for values only.
"""
values_list = values.splitlines()
return values_list[:1] + list(
filter(
lambda x: x,
values_list[1:],
),
)
lines = values.splitlines()
values_list = lines[:1]
values_list.extend(filter(None, lines[1:]))
return values_list
4 changes: 2 additions & 2 deletions detect_secrets/plugins/core/yaml_file_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import yaml

from detect_secrets.plugins.core.constants import WHITELIST_REGEXES
from detect_secrets.plugins.core.constants import WHITELIST_REGEX


class YamlFileParser(object):
Expand Down Expand Up @@ -127,7 +127,7 @@ def get_ignored_lines(self):
ignored_lines = set()

for line_number, line in enumerate(self.content.split('\n'), 1):
if any(regex.search(line) for regex in WHITELIST_REGEXES):
if WHITELIST_REGEX['yaml'].search(line):
ignored_lines.add(line_number)

return ignored_lines