Skip to content

Commit 02ff47c

Browse files
author
Jan-Philipp Litza
committed
Support multiple regexp matches per line
By using the match group's indices, this also avoids replacing unrelated chunks of the line.
1 parent abdef4c commit 02ff47c

File tree

2 files changed

+36
-17
lines changed

2 files changed

+36
-17
lines changed

anonip.py

+30-17
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,35 @@ def process_ip(self, ip):
180180
)
181181
return trunc_ip
182182

183+
def process_regex_match(self, match):
184+
"""
185+
This function processes a single regex match.
186+
187+
It returns the anonymized match as string and can be called with re.sub.
188+
189+
:param match: re.Match
190+
:return: str
191+
"""
192+
ret = []
193+
last_pos = 0
194+
195+
for i, g in enumerate(match.groups(), start=1):
196+
if not g:
197+
continue
198+
ip_str, ip = self.extract_ip(g)
199+
replacement = (
200+
self.process_ip(ip) if ip
201+
else self.replace or g
202+
)
203+
ret.extend((
204+
match.group(0)[last_pos:match.start(i) - match.start(0)],
205+
str(replacement),
206+
))
207+
last_pos = match.end(i) - match.start(0)
208+
209+
ret.append(match.group(0)[last_pos:])
210+
return "".join(ret)
211+
183212
def process_line_regex(self, line):
184213
"""
185214
This function processes a single line based on the provided regex.
@@ -189,23 +218,7 @@ def process_line_regex(self, line):
189218
:param line: str
190219
:return: str
191220
"""
192-
match = re.match(self.regex, line)
193-
if not match:
194-
logger.debug("Regex did not match!")
195-
return line
196-
groups = match.groups()
197-
198-
for m in set(groups):
199-
if not m:
200-
continue
201-
ip_str, ip = self.extract_ip(m)
202-
if ip:
203-
trunc_ip = self.process_ip(ip)
204-
line = line.replace(ip_str, str(trunc_ip))
205-
elif self.replace:
206-
line = line.replace(m, self.replace)
207-
208-
return line
221+
return re.sub(self.regex, self.process_regex_match, line)
209222

210223
def process_line_column(self, line):
211224
"""

tests.py

+6
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,12 @@ def test_column(line, columns, expected):
140140
'3.3.0.0 - - [20/May/2015:21:05:01 +0000] "GET /723.3.3.357 HTTP/1.1" 200 13358 "-" "useragent"',
141141
None,
142142
),
143+
(
144+
'3.3.3.3 - - [20/May/2015:21:05:01 +0000] "GET /723.3.3.357 HTTP/1.1" 200 13358 "-" "useragent [ip:1.2.3.4]"',
145+
re.compile(r"\b([0-9a-fA-F][0-9a-fA-F:\.]*|::[0-9a-fA-F:\.]+)\b"),
146+
'3.3.0.0 - - [20/May/2015:21:05:01 +0000] "GET /723.3.3.357 HTTP/1.1" 200 13358 "-" "useragent [ip:1.2.0.0]"',
147+
None,
148+
),
143149
(
144150
"blabla/ 3.3.3.3 /blublu",
145151
re.compile(r"^blabla/ ([^,]+) /blublu"),

0 commit comments

Comments
 (0)