Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit c0df6ba

Browse files
authored
Remove keylen from LruCache. (#9993)
`keylen` seems to be a thing that is frequently incorrectly set, and we don't really need it. The only time it was used was to figure out if we had removed a subtree in `del_multi`, which we can do better by changing `TreeCache.pop` to return a different type (`TreeCacheNode`). Commits should be independently reviewable.
1 parent 316f89e commit c0df6ba

File tree

11 files changed

+80
-55
lines changed

11 files changed

+80
-55
lines changed

changelog.d/9993.misc

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Remove `keylen` param on `LruCache`.

synapse/replication/slave/storage/client_ips.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def __init__(self, database: DatabasePool, db_conn, hs):
2424
super().__init__(database, db_conn, hs)
2525

2626
self.client_ip_last_seen = LruCache(
27-
cache_name="client_ip_last_seen", keylen=4, max_size=50000
27+
cache_name="client_ip_last_seen", max_size=50000
2828
) # type: LruCache[tuple, int]
2929

3030
async def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id):

synapse/storage/databases/main/client_ips.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ class ClientIpStore(ClientIpWorkerStore):
436436
def __init__(self, database: DatabasePool, db_conn, hs):
437437

438438
self.client_ip_last_seen = LruCache(
439-
cache_name="client_ip_last_seen", keylen=4, max_size=50000
439+
cache_name="client_ip_last_seen", max_size=50000
440440
)
441441

442442
super().__init__(database, db_conn, hs)

synapse/storage/databases/main/devices.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1053,7 +1053,7 @@ def __init__(self, database: DatabasePool, db_conn, hs):
10531053
# Map of (user_id, device_id) -> bool. If there is an entry that implies
10541054
# the device exists.
10551055
self.device_id_exists_cache = LruCache(
1056-
cache_name="device_id_exists", keylen=2, max_size=10000
1056+
cache_name="device_id_exists", max_size=10000
10571057
)
10581058

10591059
async def store_device(

synapse/storage/databases/main/events_worker.py

-1
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,6 @@ def __init__(self, database: DatabasePool, db_conn, hs):
157157

158158
self._get_event_cache = LruCache(
159159
cache_name="*getEvent*",
160-
keylen=3,
161160
max_size=hs.config.caches.event_cache_size,
162161
)
163162

synapse/util/caches/deferred_cache.py

-2
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ def __init__(
7070
self,
7171
name: str,
7272
max_entries: int = 1000,
73-
keylen: int = 1,
7473
tree: bool = False,
7574
iterable: bool = False,
7675
apply_cache_factor_from_config: bool = True,
@@ -101,7 +100,6 @@ def metrics_cb():
101100
# a Deferred.
102101
self.cache = LruCache(
103102
max_size=max_entries,
104-
keylen=keylen,
105103
cache_name=name,
106104
cache_type=cache_type,
107105
size_callback=(lambda d: len(d) or 1) if iterable else None,

synapse/util/caches/descriptors.py

-1
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,6 @@ def __get__(self, obj, owner):
270270
cache = DeferredCache(
271271
name=self.orig.__name__,
272272
max_entries=self.max_entries,
273-
keylen=self.num_args,
274273
tree=self.tree,
275274
iterable=self.iterable,
276275
) # type: DeferredCache[CacheKey, Any]

synapse/util/caches/lrucache.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from synapse.config import cache as cache_config
3535
from synapse.util import caches
3636
from synapse.util.caches import CacheMetric, register_cache
37-
from synapse.util.caches.treecache import TreeCache
37+
from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
3838

3939
try:
4040
from pympler.asizeof import Asizer
@@ -160,7 +160,6 @@ def __init__(
160160
self,
161161
max_size: int,
162162
cache_name: Optional[str] = None,
163-
keylen: int = 1,
164163
cache_type: Type[Union[dict, TreeCache]] = dict,
165164
size_callback: Optional[Callable] = None,
166165
metrics_collection_callback: Optional[Callable[[], None]] = None,
@@ -173,9 +172,6 @@ def __init__(
173172
cache_name: The name of this cache, for the prometheus metrics. If unset,
174173
no metrics will be reported on this cache.
175174
176-
keylen: The length of the tuple used as the cache key. Ignored unless
177-
cache_type is `TreeCache`.
178-
179175
cache_type (type):
180176
type of underlying cache to be used. Typically one of dict
181177
or TreeCache.
@@ -403,7 +399,9 @@ def cache_del_multi(key: KT) -> None:
403399
popped = cache.pop(key)
404400
if popped is None:
405401
return
406-
for leaf in enumerate_leaves(popped, keylen - len(cast(tuple, key))):
402+
# for each deleted node, we now need to remove it from the linked list
403+
# and run its callbacks.
404+
for leaf in iterate_tree_cache_entry(popped):
407405
delete_node(leaf)
408406

409407
@synchronized

synapse/util/caches/treecache.py

+66-38
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,43 @@
1-
from typing import Dict
1+
# Copyright 2016-2021 The Matrix.org Foundation C.I.C.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
214

315
SENTINEL = object()
416

517

18+
class TreeCacheNode(dict):
19+
"""The type of nodes in our tree.
20+
21+
Has its own type so we can distinguish it from real dicts that are stored at the
22+
leaves.
23+
"""
24+
25+
pass
26+
27+
628
class TreeCache:
729
"""
830
Tree-based backing store for LruCache. Allows subtrees of data to be deleted
931
efficiently.
1032
Keys must be tuples.
33+
34+
The data structure is a chain of TreeCacheNodes:
35+
root = {key_1: {key_2: _value}}
1136
"""
1237

1338
def __init__(self):
1439
self.size = 0
15-
self.root = {} # type: Dict
40+
self.root = TreeCacheNode()
1641

1742
def __setitem__(self, key, value):
1843
return self.set(key, value)
@@ -21,10 +46,23 @@ def __contains__(self, key):
2146
return self.get(key, SENTINEL) is not SENTINEL
2247

2348
def set(self, key, value):
49+
if isinstance(value, TreeCacheNode):
50+
# this would mean we couldn't tell where our tree ended and the value
51+
# started.
52+
raise ValueError("Cannot store TreeCacheNodes in a TreeCache")
53+
2454
node = self.root
2555
for k in key[:-1]:
26-
node = node.setdefault(k, {})
27-
node[key[-1]] = _Entry(value)
56+
next_node = node.get(k, SENTINEL)
57+
if next_node is SENTINEL:
58+
next_node = node[k] = TreeCacheNode()
59+
elif not isinstance(next_node, TreeCacheNode):
60+
# this suggests that the caller is not being consistent with its key
61+
# length.
62+
raise ValueError("value conflicts with an existing subtree")
63+
node = next_node
64+
65+
node[key[-1]] = value
2866
self.size += 1
2967

3068
def get(self, key, default=None):
@@ -33,25 +71,41 @@ def get(self, key, default=None):
3371
node = node.get(k, None)
3472
if node is None:
3573
return default
36-
return node.get(key[-1], _Entry(default)).value
74+
return node.get(key[-1], default)
3775

3876
def clear(self):
3977
self.size = 0
40-
self.root = {}
78+
self.root = TreeCacheNode()
4179

4280
def pop(self, key, default=None):
81+
"""Remove the given key, or subkey, from the cache
82+
83+
Args:
84+
key: key or subkey to remove.
85+
default: value to return if key is not found
86+
87+
Returns:
88+
If the key is not found, 'default'. If the key is complete, the removed
89+
value. If the key is partial, the TreeCacheNode corresponding to the part
90+
of the tree that was removed.
91+
"""
92+
# a list of the nodes we have touched on the way down the tree
4393
nodes = []
4494

4595
node = self.root
4696
for k in key[:-1]:
4797
node = node.get(k, None)
48-
nodes.append(node) # don't add the root node
4998
if node is None:
5099
return default
100+
if not isinstance(node, TreeCacheNode):
101+
# we've gone off the end of the tree
102+
raise ValueError("pop() key too long")
103+
nodes.append(node) # don't add the root node
51104
popped = node.pop(key[-1], SENTINEL)
52105
if popped is SENTINEL:
53106
return default
54107

108+
# working back up the tree, clear out any nodes that are now empty
55109
node_and_keys = list(zip(nodes, key))
56110
node_and_keys.reverse()
57111
node_and_keys.append((self.root, None))
@@ -61,14 +115,15 @@ def pop(self, key, default=None):
61115

62116
if n:
63117
break
118+
# found an empty node: remove it from its parent, and loop.
64119
node_and_keys[i + 1][0].pop(k)
65120

66-
popped, cnt = _strip_and_count_entires(popped)
121+
cnt = sum(1 for _ in iterate_tree_cache_entry(popped))
67122
self.size -= cnt
68123
return popped
69124

70125
def values(self):
71-
return list(iterate_tree_cache_entry(self.root))
126+
return iterate_tree_cache_entry(self.root)
72127

73128
def __len__(self):
74129
return self.size
@@ -78,36 +133,9 @@ def iterate_tree_cache_entry(d):
78133
"""Helper function to iterate over the leaves of a tree, i.e. a dict of that
79134
can contain dicts.
80135
"""
81-
if isinstance(d, dict):
136+
if isinstance(d, TreeCacheNode):
82137
for value_d in d.values():
83138
for value in iterate_tree_cache_entry(value_d):
84139
yield value
85140
else:
86-
if isinstance(d, _Entry):
87-
yield d.value
88-
else:
89-
yield d
90-
91-
92-
class _Entry:
93-
__slots__ = ["value"]
94-
95-
def __init__(self, value):
96-
self.value = value
97-
98-
99-
def _strip_and_count_entires(d):
100-
"""Takes an _Entry or dict with leaves of _Entry's, and either returns the
101-
value or a dictionary with _Entry's replaced by their values.
102-
103-
Also returns the count of _Entry's
104-
"""
105-
if isinstance(d, dict):
106-
cnt = 0
107-
for key, value in d.items():
108-
v, n = _strip_and_count_entires(value)
109-
d[key] = v
110-
cnt += n
111-
return d, cnt
112-
else:
113-
return d.value, 1
141+
yield d

tests/util/test_lrucache.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def test_pop(self):
5959
self.assertEquals(cache.pop("key"), None)
6060

6161
def test_del_multi(self):
62-
cache = LruCache(4, keylen=2, cache_type=TreeCache)
62+
cache = LruCache(4, cache_type=TreeCache)
6363
cache[("animal", "cat")] = "mew"
6464
cache[("animal", "dog")] = "woof"
6565
cache[("vehicles", "car")] = "vroom"
@@ -165,7 +165,7 @@ def test_del_multi(self):
165165
m2 = Mock()
166166
m3 = Mock()
167167
m4 = Mock()
168-
cache = LruCache(4, keylen=2, cache_type=TreeCache)
168+
cache = LruCache(4, cache_type=TreeCache)
169169

170170
cache.set(("a", "1"), "value", callbacks=[m1])
171171
cache.set(("a", "2"), "value", callbacks=[m2])

tests/util/test_treecache.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414

1515

16-
from synapse.util.caches.treecache import TreeCache
16+
from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
1717

1818
from .. import unittest
1919

@@ -64,12 +64,14 @@ def test_pop_mixedlevel(self):
6464
cache[("a", "b")] = "AB"
6565
cache[("b", "a")] = "BA"
6666
self.assertEquals(cache.get(("a", "a")), "AA")
67-
cache.pop(("a",))
67+
popped = cache.pop(("a",))
6868
self.assertEquals(cache.get(("a", "a")), None)
6969
self.assertEquals(cache.get(("a", "b")), None)
7070
self.assertEquals(cache.get(("b", "a")), "BA")
7171
self.assertEquals(len(cache), 1)
7272

73+
self.assertEquals({"AA", "AB"}, set(iterate_tree_cache_entry(popped)))
74+
7375
def test_clear(self):
7476
cache = TreeCache()
7577
cache[("a",)] = "A"

0 commit comments

Comments
 (0)