Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit f97511a

Browse files
committed
Move event_fields filtering to serialize_event
Also make it an inclusive not exclusive filter, as the spec demands.
1 parent e90fcd9 commit f97511a

File tree

3 files changed

+119
-59
lines changed

3 files changed

+119
-59
lines changed

synapse/api/filtering.py

Lines changed: 1 addition & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from twisted.internet import defer
1919

2020
import ujson as json
21-
import re
2221

2322

2423
class Filtering(object):
@@ -81,7 +80,7 @@ def check_valid_filter(self, user_filter_json):
8180
# Don't allow '\\' in event field filters. This makes matching
8281
# events a lot easier as we can then use a negative lookbehind
8382
# assertion to split '\.' If we allowed \\ then it would
84-
# incorrectly split '\\.'
83+
# incorrectly split '\\.' See synapse.events.utils.serialize_event
8584
if r'\\' in field:
8685
raise SynapseError(
8786
400, r'The escape character \ cannot itself be escaped'
@@ -168,11 +167,6 @@ def __init__(self, filter_json):
168167
self.include_leave = filter_json.get("room", {}).get(
169168
"include_leave", False
170169
)
171-
self._event_fields = filter_json.get("event_fields", [])
172-
# Negative lookbehind assertion for '\'
173-
# (?<!stuff) matches if the current position in the string is not preceded
174-
# by a match for 'stuff'.
175-
self._split_field_regex = re.compile(r'(?<!\\)\.')
176170

177171
def __repr__(self):
178172
return "<FilterCollection %s>" % (json.dumps(self._filter_json),)
@@ -207,54 +201,6 @@ def filter_room_ephemeral(self, events):
207201
def filter_room_account_data(self, events):
208202
return self._room_account_data.filter(self._room_filter.filter(events))
209203

210-
def filter_event_fields(self, event):
211-
"""Remove fields from an event in accordance with the 'event_fields' of a filter.
212-
213-
If there are no event fields specified then all fields are included.
214-
The entries may include '.' charaters to indicate sub-fields.
215-
So ['content.body'] will include the 'body' field of the 'content' object.
216-
A literal '.' character in a field name may be escaped using a '\'.
217-
218-
Args:
219-
event(dict): The raw event to filter
220-
Returns:
221-
dict: The same event with some fields missing, if required.
222-
"""
223-
for field in self._event_fields:
224-
self.filter_field(event, field)
225-
return event
226-
227-
def filter_field(self, dictionary, field):
228-
"""Filter the given field from the given dictionary.
229-
230-
Args:
231-
dictionary(dict): The dictionary to remove the field from.
232-
field(str): The key to remove.
233-
Returns:
234-
dict: The same dictionary with the field removed.
235-
"""
236-
# "content.body.thing\.with\.dots" => ["content", "body", "thing\.with\.dots"]
237-
sub_fields = self._split_field_regex.split(field)
238-
# remove escaping so we can use the right key names when deleting
239-
sub_fields = [f.replace(r'\.', r'.') for f in sub_fields]
240-
241-
# common case e.g. 'origin_server_ts'
242-
if len(sub_fields) == 1:
243-
dictionary.pop(sub_fields[0], None)
244-
# nested field e.g. 'content.body'
245-
elif len(sub_fields) > 1:
246-
# Pop the last field as that's the key to delete and we need the
247-
# parent dict in order to remove the key. Drill down to the right dict.
248-
key_to_delete = sub_fields.pop(-1)
249-
sub_dict = dictionary
250-
for sub_field in sub_fields:
251-
if sub_field in sub_dict and type(sub_dict[sub_field]) == dict:
252-
sub_dict = sub_dict[sub_field]
253-
else:
254-
return dictionary
255-
sub_dict.pop(key_to_delete, None)
256-
return dictionary
257-
258204

259205
class Filter(object):
260206
def __init__(self, filter_json):

synapse/events/utils.py

Lines changed: 97 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,15 @@
1616
from synapse.api.constants import EventTypes
1717
from . import EventBase
1818

19+
import re
20+
21+
# Split strings on "." but not "\." This uses a negative lookbehind assertion for '\'
22+
# (?<!stuff) matches if the current position in the string is not preceded
23+
# by a match for 'stuff'.
24+
# TODO: This is fast, but fails to handle "foo\\.bar" which should be treated as
25+
# the literal fields "foo\" and "bar" but will instead be treated as "foo\\.bar"
26+
SPLIT_FIELD_REGEX = re.compile(r'(?<!\\)\.')
27+
1928

2029
def prune_event(event):
2130
""" Returns a pruned version of the given event, which removes all keys we
@@ -97,6 +106,87 @@ def add_fields(*fields):
97106
)
98107

99108

109+
def _copy_field(src, dst, field):
110+
"""Copy the field in 'src' to 'dst'.
111+
112+
For example, if src={"foo":{"bar":5}} and dst={}, and field=["foo","bar"]
113+
then dst={"foo":{"bar":5}}.
114+
115+
Args:
116+
src(dict): The dict to read from.
117+
dst(dict): The dict to modify.
118+
field(list<str>): List of keys to drill down to in 'src'.
119+
"""
120+
if len(field) == 0: # this should be impossible
121+
return
122+
if len(field) == 1: # common case e.g. 'origin_server_ts'
123+
if field[0] in src:
124+
dst[field[0]] = src[field[0]]
125+
return
126+
127+
# Else is a nested field e.g. 'content.body'
128+
# Pop the last field as that's the key to move across and we need the
129+
# parent dict in order to access the data. Drill down to the right dict.
130+
key_to_move = field.pop(-1)
131+
sub_dict = src
132+
for sub_field in field: # e.g. sub_field => "content"
133+
if sub_field in sub_dict and type(sub_dict[sub_field]) == dict:
134+
sub_dict = sub_dict[sub_field]
135+
else:
136+
return
137+
138+
if key_to_move not in sub_dict:
139+
return
140+
141+
# Insert the key into the output dictionary, creating nested objects
142+
# as required. We couldn't do this any earlier or else we'd need to delete
143+
# the empty objects if the key didn't exist.
144+
sub_out_dict = dst
145+
for sub_field in field:
146+
if sub_field not in sub_out_dict:
147+
sub_out_dict[sub_field] = {}
148+
sub_out_dict = sub_out_dict[sub_field]
149+
sub_out_dict[key_to_move] = sub_dict[key_to_move]
150+
151+
152+
def only_fields(dictionary, fields):
153+
"""Return a new dict with only the fields in 'dictionary' which are present
154+
in 'fields'.
155+
156+
If there are no event fields specified then all fields are included.
157+
The entries may include '.' charaters to indicate sub-fields.
158+
So ['content.body'] will include the 'body' field of the 'content' object.
159+
A literal '.' character in a field name may be escaped using a '\'.
160+
161+
Args:
162+
dictionary(dict): The dictionary to read from.
163+
fields(list<str>): A list of fields to copy over. Only shallow refs are
164+
taken.
165+
Returns:
166+
dict: A new dictionary with only the given fields. If fields was empty,
167+
the same dictionary is returned.
168+
"""
169+
if len(fields) == 0:
170+
return dictionary
171+
172+
# for each field, convert it:
173+
# ["content.body.thing\.with\.dots"] => [["content", "body", "thing\.with\.dots"]]
174+
split_fields = [SPLIT_FIELD_REGEX.split(f) for f in fields]
175+
176+
# for each element of the output array of arrays:
177+
# remove escaping so we can use the right key names. This purposefully avoids
178+
# using list comprehensions to avoid needless allocations as this may be called
179+
# on a lot of events.
180+
for field_array in split_fields:
181+
for i, field in enumerate(field_array):
182+
field_array[i] = field.replace(r'\.', r'.')
183+
184+
output = {}
185+
for field_array in split_fields:
186+
_copy_field(dictionary, output, field_array)
187+
return output
188+
189+
100190
def format_event_raw(d):
101191
return d
102192

@@ -137,7 +227,7 @@ def format_event_for_client_v2_without_room_id(d):
137227

138228
def serialize_event(e, time_now_ms, as_client_event=True,
139229
event_format=format_event_for_client_v1,
140-
token_id=None):
230+
token_id=None, event_fields=None):
141231
# FIXME(erikj): To handle the case of presence events and the like
142232
if not isinstance(e, EventBase):
143233
return e
@@ -164,6 +254,9 @@ def serialize_event(e, time_now_ms, as_client_event=True,
164254
d["unsigned"]["transaction_id"] = txn_id
165255

166256
if as_client_event:
167-
return event_format(d)
168-
else:
169-
return d
257+
d = event_format(d)
258+
259+
if isinstance(event_fields, list):
260+
d = only_fields(d, event_fields)
261+
262+
return d

tests/events/test_utils.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,24 @@ def test_content(self):
114114
'unsigned': {},
115115
}
116116
)
117+
118+
119+
class SerializeEventTestCase(unittest.TestCase):
120+
121+
def test_event_fields_works_with_keys(self):
122+
pass
123+
124+
def test_event_fields_works_with_nested_keys(self):
125+
pass
126+
127+
def test_event_fields_works_with_dot_keys(self):
128+
pass
129+
130+
def test_event_fields_works_with_nested_dot_keys(self):
131+
pass
132+
133+
def test_event_fields_nops_with_unknown_keys(self):
134+
pass
135+
136+
def test_event_fields_nops_with_non_dict_keys(self):
137+
pass

0 commit comments

Comments
 (0)