Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 2d33dad

Browse files
committed
Clean up the event_edges table
We make a number of changes to `event_edges`: * We give the `room_id` and `is_state` columns defaults (null and false respectively) so that we can stop populating them. * We drop any rows that have `is_state` set true - they should no longer exist. * We drop any rows that do not exist in `events` - these should not exist either. * We drop the old unique constraint on all the colums, which wasn't much use. * We create a new unique index on `(event_id, prev_event_id)`. * We add a foreign key constraint to `events`. These happen rather differently depending on whether we are on Postgres or SQLite. For SQLite, we just rebuild the whole table, copying only the rows we want to keep. For Postgres, we try to do things in the background as much as possible.
1 parent dc1ecf8 commit 2d33dad

File tree

5 files changed

+209
-3
lines changed

5 files changed

+209
-3
lines changed

synapse/storage/databases/main/events_bg_updates.py

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2019-2021 The Matrix.org Foundation C.I.C.
1+
# Copyright 2019-2022 The Matrix.org Foundation C.I.C.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -64,6 +64,9 @@ class _BackgroundUpdates:
6464
INDEX_STREAM_ORDERING2_TS = "index_stream_ordering2_ts"
6565
REPLACE_STREAM_ORDERING_COLUMN = "replace_stream_ordering_column"
6666

67+
EVENT_EDGES_DROP_INVALID_ROWS = "event_edges_drop_invalid_rows"
68+
EVENT_EDGES_REPLACE_INDEX = "event_edges_replace_index"
69+
6770

6871
@attr.s(slots=True, frozen=True, auto_attribs=True)
6972
class _CalculateChainCover:
@@ -240,6 +243,21 @@ def __init__(
240243

241244
################################################################################
242245

246+
self.db_pool.updates.register_background_update_handler(
247+
_BackgroundUpdates.EVENT_EDGES_DROP_INVALID_ROWS,
248+
self._background_drop_invalid_event_edges_rows,
249+
)
250+
251+
self.db_pool.updates.register_background_index_update(
252+
_BackgroundUpdates.EVENT_EDGES_REPLACE_INDEX,
253+
index_name="event_edges_event_id_prev_event_id_idx",
254+
table="event_edges",
255+
columns=["event_id", "prev_event_id"],
256+
unique=True,
257+
# the old index which just covered event_id is now redundant.
258+
replaces_index="ev_edges_id",
259+
)
260+
243261
async def _background_reindex_fields_sender(
244262
self, progress: JsonDict, batch_size: int
245263
) -> int:
@@ -1290,3 +1308,94 @@ def process(txn: Cursor) -> None:
12901308
)
12911309

12921310
return 0
1311+
1312+
async def _background_drop_invalid_event_edges_rows(
1313+
self, progress: JsonDict, batch_size: int
1314+
) -> int:
1315+
"""Drop invalid rows from event_edges
1316+
1317+
This only runs for postgres. For SQLite, it all happens synchronously.
1318+
1319+
Firstly, drop any rows with is_state=True. These may have been added a long time
1320+
ago, but they are no longer used.
1321+
1322+
We also drop rows that do not correspond to entries in `events`, and add a
1323+
foreign key.
1324+
"""
1325+
1326+
last_event_id = progress.get("last_event_id", "")
1327+
1328+
def drop_invalid_event_edges_txn(txn: LoggingTransaction) -> bool:
1329+
"""Returns True if we're done."""
1330+
1331+
# first we need to find an endpoint.
1332+
txn.execute(
1333+
"""
1334+
SELECT event_id FROM event_edges
1335+
WHERE event_id > ?
1336+
ORDER BY event_id
1337+
LIMIT 1 OFFSET ?
1338+
""",
1339+
(last_event_id, batch_size),
1340+
)
1341+
1342+
endpoint = None
1343+
row = txn.fetchone()
1344+
1345+
if row:
1346+
endpoint = row[0]
1347+
1348+
where_clause = "ee.event_id > ?"
1349+
args = [last_event_id]
1350+
if endpoint:
1351+
where_clause += " AND ee.event_id <= ?"
1352+
args.append(endpoint)
1353+
1354+
# now delete any that:
1355+
# - have is_state=TRUE, or
1356+
# - do not correspond to a row in `events`
1357+
txn.execute(
1358+
f"""
1359+
DELETE FROM event_edges
1360+
WHERE event_id IN (
1361+
SELECT ee.event_id
1362+
FROM event_edges ee
1363+
LEFT JOIN events ev USING (event_id)
1364+
WHERE ({where_clause}) AND
1365+
(is_state OR ev.event_id IS NULL)
1366+
)""",
1367+
args,
1368+
)
1369+
1370+
logger.info(
1371+
"cleaned up event_edges up to %s: removed %i/%i rows",
1372+
endpoint,
1373+
txn.rowcount,
1374+
batch_size,
1375+
)
1376+
1377+
if endpoint is not None:
1378+
self.db_pool.updates._background_update_progress_txn(
1379+
txn,
1380+
_BackgroundUpdates.EVENT_EDGES_DROP_INVALID_ROWS,
1381+
{"last_event_id": endpoint},
1382+
)
1383+
return False
1384+
1385+
# if that was the final batch, we validate the foreign key.
1386+
logger.info("cleaned up event_edges; enabling foreign key")
1387+
txn.execute(
1388+
"ALTER TABLE event_edges VALIDATE CONSTRAINT event_edges_event_id_fkey"
1389+
)
1390+
return True
1391+
1392+
done = await self.db_pool.runInteraction(
1393+
desc="drop_invalid_event_edges", func=drop_invalid_event_edges_txn
1394+
)
1395+
1396+
if done:
1397+
await self.db_pool.updates._end_background_update(
1398+
_BackgroundUpdates.EVENT_EDGES_DROP_INVALID_ROWS
1399+
)
1400+
1401+
return batch_size

synapse/storage/databases/main/purge_events.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,10 +214,10 @@ def _purge_history_txn(
214214

215215
# Delete all remote non-state events
216216
for table in (
217+
"event_edges",
217218
"events",
218219
"event_json",
219220
"event_auth",
220-
"event_edges",
221221
"event_forward_extremities",
222222
"event_relations",
223223
"event_search",

synapse/storage/schema/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
SCHEMA_VERSION = 70 # remember to update the list below when updating
15+
SCHEMA_VERSION = 71 # remember to update the list below when updating
1616
"""Represents the expectations made by the codebase about the database schema
1717
1818
This should be incremented whenever the codebase changes its requirements on the
@@ -67,6 +67,9 @@
6767
6868
Changes in SCHEMA_VERSION = 70:
6969
- event_reference_hashes is no longer written to.
70+
71+
Changes in SCHEMA_VERSION = 71:
72+
- event_edges.(room_id, is_state) are no longer written to.
7073
"""
7174

7275

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/* Copyright 2022 The Matrix.org Foundation C.I.C
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
-- We're going to stop populating event_edges.room_id and event_edges.is_state,
17+
-- which means we now need to give them defaults.
18+
19+
-- We also drop the exising unique constraint which spans all four columns. Franky
20+
-- it's not doing much, and there are other indexes on event_id and prev_event_id.
21+
-- Later on we introduce a proper unique constraint on (event_id, prev_event_id).
22+
--
23+
-- We also add a foreign key constraint (which will be enforced for new rows), but
24+
-- don't yet validate it for existing rows (since that's slow, and we haven't yet
25+
-- checked that all the rows are valid)
26+
27+
ALTER TABLE event_edges
28+
ALTER room_id DROP NOT NULL,
29+
ALTER is_state SET DEFAULT FALSE,
30+
DROP CONSTRAINT IF EXISTS event_edges_event_id_prev_event_id_room_id_is_state_key,
31+
ADD CONSTRAINT event_edges_event_id_fkey FOREIGN KEY (event_id) REFERENCES events(event_id) NOT VALID;
32+
33+
-- In the background, we drop any rows with is_state=True. These may have been
34+
-- added a long time ago, but they are no longer used.
35+
--
36+
-- We also drop rows that do not correspond to entries in `events`, and finally
37+
-- validate the foreign key.
38+
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
39+
(7101, 'event_edges_drop_invalid_rows', '{}');
40+
41+
-- We'll then create a new unique index on (event_id, prev_event_id).
42+
INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
43+
(7101, 'event_edges_replace_index', '{}', 'event_edges_drop_invalid_rows');
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/* Copyright 2022 The Matrix.org Foundation C.I.C
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
-- We're going to stop populating event_edges.room_id and event_edges.is_state,
17+
-- which means we now need to give them defaults.
18+
--
19+
-- We also take the opportunity to:
20+
-- - drop any rows with is_state=True (these were populated a long time ago, but
21+
-- are no longer used.)
22+
-- - drop any rows which do not correspond to entries in `events`
23+
-- - tighten the unique index so that it applies just to (event_id, prev_event_id)
24+
-- - drop the "ev_edges_id" index, which is redundant to the above.
25+
-- - add a foreign key constraint from event_id to `events`
26+
27+
CREATE TABLE new_event_edges (
28+
event_id TEXT NOT NULL,
29+
prev_event_id TEXT NOT NULL,
30+
room_id TEXT NULL,
31+
is_state BOOL NOT NULL DEFAULT 0,
32+
FOREIGN KEY(event_id) REFERENCES events(event_id)
33+
);
34+
35+
INSERT INTO new_event_edges
36+
SELECT ee.event_id, ee.prev_event_id, ee.room_id, ee.is_state
37+
FROM event_edges ee JOIN events ev USING (event_id)
38+
WHERE NOT ee.is_state;
39+
40+
DROP TABLE event_edges;
41+
42+
ALTER TABLE new_event_edges RENAME TO event_edges;
43+
44+
CREATE UNIQUE INDEX event_edges_event_id_prev_event_id_idx
45+
ON event_edges (event_id, prev_event_id);
46+
47+
CREATE INDEX ev_edges_prev_id ON event_edges (prev_event_id);
48+
49+
50+
51+

0 commit comments

Comments
 (0)