Skip to content

Commit f0aaff4

Browse files
add exclude-regex-patterns (#458)
* add exclude-regex-patterns - mimics the functionality of the exclude-entropy-patterns option, but for regex scans. this will help reduce false positives such as env variables for user info auth in URLs - note that the scope is forced to "line" and not configurable due to the way the regex scan is done compared to the entropy scan - will follow up with changelog update after creating PR - a few unrelated changes were necessary pre-commit linters to pass * update changelog with `exclude-regex-patterns` * Apply suggestions from code review Co-authored-by: Scott Bailey <[email protected]> * update documentation to note lack of scope option, attempting to revert type hint change * Fix unit test Force cache flush before calling function with @lru_cache decorator; this ensures the return actually reflects the environment constructed by the unit test and not something left over from an unrelated test. --------- Co-authored-by: Scott Bailey <[email protected]>
1 parent 814d1b8 commit f0aaff4

File tree

10 files changed

+293
-16
lines changed

10 files changed

+293
-16
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ vX.X.X - Mar 3 2023
33

44
Features:
55
* [#455](https://github.com/godaddy/tartufo/pull/455) - Update documentation to fix incorrect wording
6+
* [#458](https://github.com/godaddy/tartufo/pull/458) - Adds `--exclude-regex-patterns` to allow for regex-based exclusions
67
* [#479](https://github.com/godaddy/tartufo/pull/479) - Remove upward traversal logic for config discovery
78

89
Bug fixes:

docs/source/configuration.rst

+50
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,56 @@ match-type No String ("search" or "match") Whether to perform a `search
265265
scope No String ("word" or "line") Whether to match against the current word or full line of text
266266
============ ======== ============================ ==============================================================
267267

268+
.. regex-exclusion-patterns:
269+
270+
Regex Exclusion Patterns
271+
++++++++++++++++++++++++
272+
273+
Regex scans can produce false positive matches such as environment variables in
274+
URLs. To avoid these false positives, you can use the
275+
``exclude-regex-patterns`` configuration option. These patterns will be
276+
applied to and matched against any strings flagged by regex pattern checks. As
277+
above, this directive utilizes an `array of tables`_, enabling two forms:
278+
279+
Option 1:
280+
281+
.. code-block:: toml
282+
283+
[tool.tartufo]
284+
exclude-regex-patterns = [
285+
{path-pattern = 'products_.*\.txt', pattern = '^SK[\d]{16,32}$', reason = 'SKU pattern that resembles Twilio API Key'},
286+
{path-pattern = '\.github/workflows/.*\.yaml', pattern = 'https://\${\S+}:\${\S+}@\S+', reason = 'URL with env variables for auth'},
287+
]
288+
289+
Option 2:
290+
291+
.. code-block:: toml
292+
293+
[[tool.tartufo.exclude-regex-patterns]]
294+
path-pattern = 'products_.*\.txt'
295+
pattern = '^SK[\d]{16,32}$'
296+
reason = 'SKU pattern that resembles Twilio API Key'
297+
298+
[[tool.tartufo.exclude-regex-patterns]]
299+
path-pattern = '\.github/workflows/.*\.yaml'
300+
pattern = 'https://\${\S+}:\${\S+}@\S+'
301+
reason = 'URL with env variables for auth'
302+
303+
304+
There are 4 relevant keys for this directive, as described below. Note that
305+
regex scans differ from entropy scans, so the exclusion pattern is always
306+
tested against the offending regex match(es). As a result, there is no
307+
``scope`` key for this directive.
308+
309+
============ ======== ============================ ==============================================================
310+
Key Required Value Description
311+
============ ======== ============================ ==============================================================
312+
pattern Yes Regular expression The pattern used to check against the match
313+
path-pattern No Regular expression A pattern to specify to what files the exclusion will apply
314+
reason No String A plaintext reason the exclusion has been added
315+
match-type No String ("search" or "match") Whether to perform a `search or match`_ regex operation
316+
============ ======== ============================ ==============================================================
317+
268318
.. _TOML: https://toml.io/
269319
.. _array of tables: https://toml.io/en/v1.0.0#array-of-tables
270320
.. _search or match: https://docs.python.org/3/library/re.html#search-vs-match

tartufo/cli.py

+10
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,16 @@ def get_command(self, ctx: click.Context, cmd_name: str) -> Optional[click.Comma
135135
excluded. ({"path-pattern": {path regex}, "pattern": {pattern regex}, "match-type": "match"|"search",
136136
"scope": "word"|"line"}).""",
137137
)
138+
@click.option(
139+
"-xr",
140+
"--exclude-regex-patterns",
141+
multiple=True,
142+
hidden=True,
143+
type=click.UNPROCESSED,
144+
help="""Specify a regular expression which matches regex strings to exclude from the scan. This option can be
145+
specified multiple times to exclude multiple patterns. If not provided (default), no regex strings will be
146+
excluded. ({"path-pattern": {path regex}, "pattern": {pattern regex}, "match-type": "match"|"search"}).""",
147+
)
138148
@click.option(
139149
"-e",
140150
"--exclude-signatures",

tartufo/config.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ def compile_path_rules(patterns: Iterable[str]) -> List[Pattern]:
241241
]
242242

243243

244-
def compile_rules(patterns: Iterable[Dict[str, str]]) -> List[Rule]:
244+
def compile_rules(patterns: Iterable[Dict[str, str]], exclude_type: str) -> List[Rule]:
245245
"""Take a list of regex string with paths and compile them into a List of Rule.
246246
247247
:param patterns: The list of patterns to be compiled
@@ -255,12 +255,17 @@ def compile_rules(patterns: Iterable[Dict[str, str]]) -> List[Rule]:
255255
raise ConfigException(
256256
f"Invalid value for match-type: {pattern.get('match-type')}"
257257
) from exc
258-
try:
259-
scope = Scope(pattern.get("scope", Scope.Line.value))
260-
except ValueError as exc:
261-
raise ConfigException(
262-
f"Invalid value for scope: {pattern.get('scope')}"
263-
) from exc
258+
if exclude_type == "regex":
259+
# regex exclusions always have line scope
260+
scope = Scope.Line
261+
else:
262+
# entropy exclusions can specify scope
263+
try:
264+
scope = Scope(pattern.get("scope", Scope.Line.value))
265+
except ValueError as exc:
266+
raise ConfigException(
267+
f"Invalid value for scope: {pattern.get('scope')}"
268+
) from exc
264269
try:
265270
rules.append(
266271
Rule(
@@ -273,6 +278,6 @@ def compile_rules(patterns: Iterable[Dict[str, str]]) -> List[Rule]:
273278
)
274279
except KeyError as exc:
275280
raise ConfigException(
276-
f"Invalid exclude-entropy-patterns: {patterns}"
281+
f"Invalid exclude-{exclude_type}-patterns: {patterns}"
277282
) from exc
278283
return rules

tartufo/scanner.py

+43-5
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ class ScannerBase(abc.ABC): # pylint: disable=too-many-instance-attributes
140140
_included_paths: Optional[List[Pattern]] = None
141141
_excluded_paths: Optional[List[Pattern]] = None
142142
_excluded_entropy: Optional[List[Rule]] = None
143+
_excluded_regex: Optional[List[Rule]] = None
143144
_rules_regexes: Optional[Set[Rule]] = None
144145
global_options: types.GlobalOptions
145146
logger: logging.Logger
@@ -272,12 +273,30 @@ def excluded_entropy(self) -> List[Rule]:
272273
patterns = list(self.global_options.exclude_entropy_patterns or ()) + list(
273274
self.config_data.get("exclude_entropy_patterns", ())
274275
)
275-
self._excluded_entropy = config.compile_rules(patterns) if patterns else []
276+
self._excluded_entropy = (
277+
config.compile_rules(patterns, "entropy") if patterns else []
278+
)
276279
self.logger.debug(
277280
"Excluded entropy was initialized as: %s", self._excluded_entropy
278281
)
279282
return self._excluded_entropy
280283

284+
@property
285+
def excluded_regex(self) -> List[Rule]:
286+
"""Get a list of regexes used as an exclusive list of paths to scan."""
287+
if self._excluded_regex is None:
288+
self.logger.info("Initializing excluded regex patterns")
289+
patterns = list(self.global_options.exclude_regex_patterns or ()) + list(
290+
self.config_data.get("exclude_regex_patterns", ())
291+
)
292+
self._excluded_regex = (
293+
config.compile_rules(patterns, "regex") if patterns else []
294+
)
295+
self.logger.debug(
296+
"Excluded regex was initialized as: %s", self._excluded_regex
297+
)
298+
return self._excluded_regex
299+
281300
@property
282301
def excluded_paths(self) -> List[Pattern]:
283302
"""Get a list of regexes used to match paths to exclude from the scan"""
@@ -390,7 +409,7 @@ def signature_is_excluded(self, blob: str, file_path: str) -> bool:
390409

391410
@staticmethod
392411
@lru_cache(maxsize=None)
393-
def rule_matches(rule: Rule, string: str, line: str, path: str) -> bool:
412+
def rule_matches(rule: Rule, string: Optional[str], line: str, path: str) -> bool:
394413
"""
395414
Match string and path against rule.
396415
@@ -402,6 +421,8 @@ def rule_matches(rule: Rule, string: str, line: str, path: str) -> bool:
402421
"""
403422
match = False
404423
if rule.re_match_scope == Scope.Word:
424+
if not string:
425+
raise TartufoException(f"String required for {Scope.Word} scope")
405426
scope = string
406427
elif rule.re_match_scope == Scope.Line:
407428
scope = line
@@ -434,6 +455,18 @@ def entropy_string_is_excluded(self, string: str, line: str, path: str) -> bool:
434455
for p in self.excluded_entropy
435456
)
436457

458+
def regex_string_is_excluded(self, line: str, path: str) -> bool:
459+
"""Find whether the signature of some data has been excluded in configuration.
460+
461+
:param line: Source line containing string of interest
462+
:param path: Path to check against rule path pattern
463+
:return: True if excluded, False otherwise
464+
"""
465+
466+
return bool(self.excluded_regex) and any(
467+
ScannerBase.rule_matches(p, None, line, path) for p in self.excluded_regex
468+
)
469+
437470
@staticmethod
438471
@lru_cache(maxsize=None)
439472
def calculate_entropy(data: str) -> float:
@@ -608,9 +641,14 @@ def scan_regex(self, chunk: types.Chunk) -> Generator[Issue, None, None]:
608641
for match in found_strings:
609642
# Filter out any explicitly "allowed" match signatures
610643
if not self.signature_is_excluded(match, chunk.file_path):
611-
issue = Issue(types.IssueType.RegEx, match, chunk)
612-
issue.issue_detail = rule.name
613-
yield issue
644+
if self.regex_string_is_excluded(match, chunk.file_path):
645+
self.logger.debug(
646+
"line containing regex was excluded: %s", match
647+
)
648+
else:
649+
issue = Issue(types.IssueType.RegEx, match, chunk)
650+
issue.issue_detail = rule.name
651+
yield issue
614652

615653
@property
616654
@abc.abstractmethod

tartufo/types.py

+4
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ class GlobalOptions:
5858
:param exclude_path_patterns: A list of paths to be excluded from the scan
5959
:param exclude_entropy_patterns: Patterns to be excluded from entropy
6060
matches
61+
:param exclude_regex_patterns: Patterns to be excluded from regex
62+
matches
6163
:param exclude_signatures: Signatures of previously found findings to be
6264
excluded from the list of current findings
6365
:param exclude_findings: Signatures of previously found findings to be
@@ -92,6 +94,7 @@ class GlobalOptions:
9294
"include_path_patterns",
9395
"exclude_path_patterns",
9496
"exclude_entropy_patterns",
97+
"exclude_regex_patterns",
9598
"exclude_signatures",
9699
"output_dir",
97100
"temp_dir",
@@ -114,6 +117,7 @@ class GlobalOptions:
114117
include_path_patterns: Tuple[Dict[str, str], ...]
115118
exclude_path_patterns: Tuple[Dict[str, str], ...]
116119
exclude_entropy_patterns: Tuple[Dict[str, str], ...]
120+
exclude_regex_patterns: Tuple[Dict[str, str], ...]
117121
exclude_signatures: Tuple[Dict[str, str], ...]
118122
output_dir: Optional[str]
119123
temp_dir: Optional[str]

tartufo/util.py

+16
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,17 @@ def echo_report_result(scanner: "ScannerBase", now: str):
123123
f" {pattern} (path={path_pattern}, scope={m_scope}, type={m_type}): {reason}"
124124
)
125125

126+
click.echo("\nExcluded regex patterns:")
127+
for e_item in scanner.excluded_regex:
128+
pattern = e_item.pattern.pattern if e_item.pattern else ""
129+
path_pattern = e_item.path_pattern.pattern if e_item.path_pattern else ""
130+
m_scope = e_item.re_match_scope.value if e_item.re_match_scope else ""
131+
m_type = e_item.re_match_type.value if e_item.re_match_type else ""
132+
reason = e_item.name
133+
click.echo(
134+
f" {pattern} (path={path_pattern}, scope={m_scope}, type={m_type}): {reason}"
135+
)
136+
126137

127138
def echo_result(
128139
options: "types.GlobalOptions",
@@ -151,6 +162,9 @@ def echo_result(
151162
"exclude_entropy_patterns": [
152163
str(pattern) for pattern in options.exclude_entropy_patterns
153164
],
165+
"exclude_regex_patterns": [
166+
str(pattern) for pattern in options.exclude_regex_patterns
167+
],
154168
# This member is for reference. Read below...
155169
# "found_issues": [
156170
# issue.as_dict(compact=options.compact) for issue in scanner.issues
@@ -191,6 +205,8 @@ def echo_result(
191205
click.echo("\n".join(scanner.excluded_signatures))
192206
click.echo("\nExcluded entropy patterns:")
193207
click.echo("\n".join(str(path) for path in scanner.excluded_entropy))
208+
click.echo("\nExcluded regex patterns:")
209+
click.echo("\n".join(str(path) for path in scanner.excluded_regex))
194210

195211

196212
def write_outputs(

0 commit comments

Comments
 (0)