Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 406f7bf

Browse files
authored
Add an approximate difference method to StateFilters (#10825)
1 parent e0f11ae commit 406f7bf

File tree

3 files changed

+683
-3
lines changed

3 files changed

+683
-3
lines changed

changelog.d/10825.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add an 'approximate difference' method to `StateFilter`.

synapse/storage/state.py

Lines changed: 171 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@
1515
from typing import (
1616
TYPE_CHECKING,
1717
Awaitable,
18+
Collection,
1819
Dict,
1920
Iterable,
2021
List,
22+
Mapping,
2123
Optional,
2224
Set,
2325
Tuple,
@@ -29,7 +31,7 @@
2931

3032
from synapse.api.constants import EventTypes
3133
from synapse.events import EventBase
32-
from synapse.types import MutableStateMap, StateMap
34+
from synapse.types import MutableStateMap, StateKey, StateMap
3335

3436
if TYPE_CHECKING:
3537
from typing import FrozenSet # noqa: used within quoted type hint; flake8 sad
@@ -134,6 +136,23 @@ def from_lazy_load_member_list(members: Iterable[str]) -> "StateFilter":
134136
include_others=True,
135137
)
136138

139+
@staticmethod
140+
def freeze(types: Mapping[str, Optional[Collection[str]]], include_others: bool):
141+
"""
142+
Returns a (frozen) StateFilter with the same contents as the parameters
143+
specified here, which can be made of mutable types.
144+
"""
145+
types_with_frozen_values: Dict[str, Optional[FrozenSet[str]]] = {}
146+
for state_types, state_keys in types.items():
147+
if state_keys is not None:
148+
types_with_frozen_values[state_types] = frozenset(state_keys)
149+
else:
150+
types_with_frozen_values[state_types] = None
151+
152+
return StateFilter(
153+
frozendict(types_with_frozen_values), include_others=include_others
154+
)
155+
137156
def return_expanded(self) -> "StateFilter":
138157
"""Creates a new StateFilter where type wild cards have been removed
139158
(except for memberships). The returned filter is a superset of the
@@ -356,6 +375,157 @@ def get_member_split(self) -> Tuple["StateFilter", "StateFilter"]:
356375

357376
return member_filter, non_member_filter
358377

378+
def _decompose_into_four_parts(
379+
self,
380+
) -> Tuple[Tuple[bool, Set[str]], Tuple[Set[str], Set[StateKey]]]:
381+
"""
382+
Decomposes this state filter into 4 constituent parts, which can be
383+
thought of as this:
384+
all? - minus_wildcards + plus_wildcards + plus_state_keys
385+
386+
where
387+
* all represents ALL state
388+
* minus_wildcards represents entire state types to remove
389+
* plus_wildcards represents entire state types to add
390+
* plus_state_keys represents individual state keys to add
391+
392+
See `recompose_from_four_parts` for the other direction of this
393+
correspondence.
394+
"""
395+
is_all = self.include_others
396+
excluded_types: Set[str] = {t for t in self.types if is_all}
397+
wildcard_types: Set[str] = {t for t, s in self.types.items() if s is None}
398+
concrete_keys: Set[StateKey] = set(self.concrete_types())
399+
400+
return (is_all, excluded_types), (wildcard_types, concrete_keys)
401+
402+
@staticmethod
403+
def _recompose_from_four_parts(
404+
all_part: bool,
405+
minus_wildcards: Set[str],
406+
plus_wildcards: Set[str],
407+
plus_state_keys: Set[StateKey],
408+
) -> "StateFilter":
409+
"""
410+
Recomposes a state filter from 4 parts.
411+
412+
See `decompose_into_four_parts` (the other direction of this
413+
correspondence) for descriptions on each of the parts.
414+
"""
415+
416+
# {state type -> set of state keys OR None for wildcard}
417+
# (The same structure as that of a StateFilter.)
418+
new_types: Dict[str, Optional[Set[str]]] = {}
419+
420+
# if we start with all, insert the excluded statetypes as empty sets
421+
# to prevent them from being included
422+
if all_part:
423+
new_types.update({state_type: set() for state_type in minus_wildcards})
424+
425+
# insert the plus wildcards
426+
new_types.update({state_type: None for state_type in plus_wildcards})
427+
428+
# insert the specific state keys
429+
for state_type, state_key in plus_state_keys:
430+
if state_type in new_types:
431+
entry = new_types[state_type]
432+
if entry is not None:
433+
entry.add(state_key)
434+
elif not all_part:
435+
# don't insert if the entire type is already included by
436+
# include_others as this would actually shrink the state allowed
437+
# by this filter.
438+
new_types[state_type] = {state_key}
439+
440+
return StateFilter.freeze(new_types, include_others=all_part)
441+
442+
def approx_difference(self, other: "StateFilter") -> "StateFilter":
443+
"""
444+
Returns a state filter which represents `self - other`.
445+
446+
This is useful for determining what state remains to be pulled out of the
447+
database if we want the state included by `self` but already have the state
448+
included by `other`.
449+
450+
The returned state filter
451+
- MUST include all state events that are included by this filter (`self`)
452+
unless they are included by `other`;
453+
- MUST NOT include state events not included by this filter (`self`); and
454+
- MAY be an over-approximation: the returned state filter
455+
MAY additionally include some state events from `other`.
456+
457+
This implementation attempts to return the narrowest such state filter.
458+
In the case that `self` contains wildcards for state types where
459+
`other` contains specific state keys, an approximation must be made:
460+
the returned state filter keeps the wildcard, as state filters are not
461+
able to express 'all state keys except some given examples'.
462+
e.g.
463+
StateFilter(m.room.member -> None (wildcard))
464+
minus
465+
StateFilter(m.room.member -> {'@wombat:example.org'})
466+
is approximated as
467+
StateFilter(m.room.member -> None (wildcard))
468+
"""
469+
470+
# We first transform self and other into an alternative representation:
471+
# - whether or not they include all events to begin with ('all')
472+
# - if so, which event types are excluded? ('excludes')
473+
# - which entire event types to include ('wildcards')
474+
# - which concrete state keys to include ('concrete state keys')
475+
(self_all, self_excludes), (
476+
self_wildcards,
477+
self_concrete_keys,
478+
) = self._decompose_into_four_parts()
479+
(other_all, other_excludes), (
480+
other_wildcards,
481+
other_concrete_keys,
482+
) = other._decompose_into_four_parts()
483+
484+
# Start with an estimate of the difference based on self
485+
new_all = self_all
486+
# Wildcards from the other can be added to the exclusion filter
487+
new_excludes = self_excludes | other_wildcards
488+
# We remove wildcards that appeared as wildcards in the other
489+
new_wildcards = self_wildcards - other_wildcards
490+
# We filter out the concrete state keys that appear in the other
491+
# as wildcards or concrete state keys.
492+
new_concrete_keys = {
493+
(state_type, state_key)
494+
for (state_type, state_key) in self_concrete_keys
495+
if state_type not in other_wildcards
496+
} - other_concrete_keys
497+
498+
if other_all:
499+
if self_all:
500+
# If self starts with all, then we add as wildcards any
501+
# types which appear in the other's exclusion filter (but
502+
# aren't in the self exclusion filter). This is as the other
503+
# filter will return everything BUT the types in its exclusion, so
504+
# we need to add those excluded types that also match the self
505+
# filter as wildcard types in the new filter.
506+
new_wildcards |= other_excludes.difference(self_excludes)
507+
508+
# If other is an `include_others` then the difference isn't.
509+
new_all = False
510+
# (We have no need for excludes when we don't start with all, as there
511+
# is nothing to exclude.)
512+
new_excludes = set()
513+
514+
# We also filter out all state types that aren't in the exclusion
515+
# list of the other.
516+
new_wildcards &= other_excludes
517+
new_concrete_keys = {
518+
(state_type, state_key)
519+
for (state_type, state_key) in new_concrete_keys
520+
if state_type in other_excludes
521+
}
522+
523+
# Transform our newly-constructed state filter from the alternative
524+
# representation back into the normal StateFilter representation.
525+
return StateFilter._recompose_from_four_parts(
526+
new_all, new_excludes, new_wildcards, new_concrete_keys
527+
)
528+
359529

360530
class StateGroupStorage:
361531
"""High level interface to fetching state for event."""

0 commit comments

Comments
 (0)