Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit e630722

Browse files
reivilibreclokep
andauthored
Optimise _update_client_ips_batch_txn to batch together database operations. (#12252)
Co-authored-by: Patrick Cloke <[email protected]>
1 parent 0cd182f commit e630722

File tree

4 files changed

+190
-51
lines changed

4 files changed

+190
-51
lines changed

changelog.d/12252.feature

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Move `update_client_ip` background job from the main process to the background worker.

synapse/storage/database.py

+98-3
Original file line numberDiff line numberDiff line change
@@ -1268,6 +1268,7 @@ async def simple_upsert_many(
12681268
value_names: Collection[str],
12691269
value_values: Collection[Collection[Any]],
12701270
desc: str,
1271+
lock: bool = True,
12711272
) -> None:
12721273
"""
12731274
Upsert, many times.
@@ -1279,21 +1280,24 @@ async def simple_upsert_many(
12791280
value_names: The value column names
12801281
value_values: A list of each row's value column values.
12811282
Ignored if value_names is empty.
1283+
lock: True to lock the table when doing the upsert. Unused if the database engine
1284+
supports native upserts.
12821285
"""
12831286

12841287
# We can autocommit if we are going to use native upserts
12851288
autocommit = (
12861289
self.engine.can_native_upsert and table not in self._unsafe_to_upsert_tables
12871290
)
12881291

1289-
return await self.runInteraction(
1292+
await self.runInteraction(
12901293
desc,
12911294
self.simple_upsert_many_txn,
12921295
table,
12931296
key_names,
12941297
key_values,
12951298
value_names,
12961299
value_values,
1300+
lock=lock,
12971301
db_autocommit=autocommit,
12981302
)
12991303

@@ -1305,6 +1309,7 @@ def simple_upsert_many_txn(
13051309
key_values: Collection[Iterable[Any]],
13061310
value_names: Collection[str],
13071311
value_values: Iterable[Iterable[Any]],
1312+
lock: bool = True,
13081313
) -> None:
13091314
"""
13101315
Upsert, many times.
@@ -1316,14 +1321,16 @@ def simple_upsert_many_txn(
13161321
value_names: The value column names
13171322
value_values: A list of each row's value column values.
13181323
Ignored if value_names is empty.
1324+
lock: True to lock the table when doing the upsert. Unused if the database engine
1325+
supports native upserts.
13191326
"""
13201327
if self.engine.can_native_upsert and table not in self._unsafe_to_upsert_tables:
13211328
return self.simple_upsert_many_txn_native_upsert(
13221329
txn, table, key_names, key_values, value_names, value_values
13231330
)
13241331
else:
13251332
return self.simple_upsert_many_txn_emulated(
1326-
txn, table, key_names, key_values, value_names, value_values
1333+
txn, table, key_names, key_values, value_names, value_values, lock=lock
13271334
)
13281335

13291336
def simple_upsert_many_txn_emulated(
@@ -1334,6 +1341,7 @@ def simple_upsert_many_txn_emulated(
13341341
key_values: Collection[Iterable[Any]],
13351342
value_names: Collection[str],
13361343
value_values: Iterable[Iterable[Any]],
1344+
lock: bool = True,
13371345
) -> None:
13381346
"""
13391347
Upsert, many times, but without native UPSERT support or batching.
@@ -1345,17 +1353,24 @@ def simple_upsert_many_txn_emulated(
13451353
value_names: The value column names
13461354
value_values: A list of each row's value column values.
13471355
Ignored if value_names is empty.
1356+
lock: True to lock the table when doing the upsert.
13481357
"""
13491358
# No value columns, therefore make a blank list so that the following
13501359
# zip() works correctly.
13511360
if not value_names:
13521361
value_values = [() for x in range(len(key_values))]
13531362

1363+
if lock:
1364+
# Lock the table just once, to prevent it being done once per row.
1365+
# Note that, according to Postgres' documentation, once obtained,
1366+
# the lock is held for the remainder of the current transaction.
1367+
self.engine.lock_table(txn, "user_ips")
1368+
13541369
for keyv, valv in zip(key_values, value_values):
13551370
_keys = {x: y for x, y in zip(key_names, keyv)}
13561371
_vals = {x: y for x, y in zip(value_names, valv)}
13571372

1358-
self.simple_upsert_txn_emulated(txn, table, _keys, _vals)
1373+
self.simple_upsert_txn_emulated(txn, table, _keys, _vals, lock=False)
13591374

13601375
def simple_upsert_many_txn_native_upsert(
13611376
self,
@@ -1792,6 +1807,86 @@ def simple_update_txn(
17921807

17931808
return txn.rowcount
17941809

1810+
async def simple_update_many(
1811+
self,
1812+
table: str,
1813+
key_names: Collection[str],
1814+
key_values: Collection[Iterable[Any]],
1815+
value_names: Collection[str],
1816+
value_values: Iterable[Iterable[Any]],
1817+
desc: str,
1818+
) -> None:
1819+
"""
1820+
Update, many times, using batching where possible.
1821+
If the keys don't match anything, nothing will be updated.
1822+
1823+
Args:
1824+
table: The table to update
1825+
key_names: The key column names.
1826+
key_values: A list of each row's key column values.
1827+
value_names: The names of value columns to update.
1828+
value_values: A list of each row's value column values.
1829+
"""
1830+
1831+
await self.runInteraction(
1832+
desc,
1833+
self.simple_update_many_txn,
1834+
table,
1835+
key_names,
1836+
key_values,
1837+
value_names,
1838+
value_values,
1839+
)
1840+
1841+
@staticmethod
1842+
def simple_update_many_txn(
1843+
txn: LoggingTransaction,
1844+
table: str,
1845+
key_names: Collection[str],
1846+
key_values: Collection[Iterable[Any]],
1847+
value_names: Collection[str],
1848+
value_values: Collection[Iterable[Any]],
1849+
) -> None:
1850+
"""
1851+
Update, many times, using batching where possible.
1852+
If the keys don't match anything, nothing will be updated.
1853+
1854+
Args:
1855+
table: The table to update
1856+
key_names: The key column names.
1857+
key_values: A list of each row's key column values.
1858+
value_names: The names of value columns to update.
1859+
value_values: A list of each row's value column values.
1860+
"""
1861+
1862+
if len(value_values) != len(key_values):
1863+
raise ValueError(
1864+
f"{len(key_values)} key rows and {len(value_values)} value rows: should be the same number."
1865+
)
1866+
1867+
# List of tuples of (value values, then key values)
1868+
# (This matches the order needed for the query)
1869+
args = [tuple(x) + tuple(y) for x, y in zip(value_values, key_values)]
1870+
1871+
for ks, vs in zip(key_values, value_values):
1872+
args.append(tuple(vs) + tuple(ks))
1873+
1874+
# 'col1 = ?, col2 = ?, ...'
1875+
set_clause = ", ".join(f"{n} = ?" for n in value_names)
1876+
1877+
if key_names:
1878+
# 'WHERE col3 = ? AND col4 = ? AND col5 = ?'
1879+
where_clause = "WHERE " + (" AND ".join(f"{n} = ?" for n in key_names))
1880+
else:
1881+
where_clause = ""
1882+
1883+
# UPDATE mytable SET col1 = ?, col2 = ? WHERE col3 = ? AND col4 = ?
1884+
sql = f"""
1885+
UPDATE {table} SET {set_clause} {where_clause}
1886+
"""
1887+
1888+
txn.execute_batch(sql, args)
1889+
17951890
async def simple_update_one(
17961891
self,
17971892
table: str,

synapse/storage/databases/main/client_ips.py

+34-32
Original file line numberDiff line numberDiff line change
@@ -616,9 +616,10 @@ async def _update_client_ips_batch(self) -> None:
616616
to_update = self._batch_row_update
617617
self._batch_row_update = {}
618618

619-
await self.db_pool.runInteraction(
620-
"_update_client_ips_batch", self._update_client_ips_batch_txn, to_update
621-
)
619+
if to_update:
620+
await self.db_pool.runInteraction(
621+
"_update_client_ips_batch", self._update_client_ips_batch_txn, to_update
622+
)
622623

623624
def _update_client_ips_batch_txn(
624625
self,
@@ -629,42 +630,43 @@ def _update_client_ips_batch_txn(
629630
self._update_on_this_worker
630631
), "This worker is not designated to update client IPs"
631632

632-
if "user_ips" in self.db_pool._unsafe_to_upsert_tables or (
633-
not self.database_engine.can_native_upsert
634-
):
635-
self.database_engine.lock_table(txn, "user_ips")
633+
# Keys and values for the `user_ips` upsert.
634+
user_ips_keys = []
635+
user_ips_values = []
636+
637+
# Keys and values for the `devices` update.
638+
devices_keys = []
639+
devices_values = []
636640

637641
for entry in to_update.items():
638642
(user_id, access_token, ip), (user_agent, device_id, last_seen) = entry
639-
640-
self.db_pool.simple_upsert_txn(
641-
txn,
642-
table="user_ips",
643-
keyvalues={"user_id": user_id, "access_token": access_token, "ip": ip},
644-
values={
645-
"user_agent": user_agent,
646-
"device_id": device_id,
647-
"last_seen": last_seen,
648-
},
649-
lock=False,
650-
)
643+
user_ips_keys.append((user_id, access_token, ip))
644+
user_ips_values.append((user_agent, device_id, last_seen))
651645

652646
# Technically an access token might not be associated with
653647
# a device so we need to check.
654648
if device_id:
655-
# this is always an update rather than an upsert: the row should
656-
# already exist, and if it doesn't, that may be because it has been
657-
# deleted, and we don't want to re-create it.
658-
self.db_pool.simple_update_txn(
659-
txn,
660-
table="devices",
661-
keyvalues={"user_id": user_id, "device_id": device_id},
662-
updatevalues={
663-
"user_agent": user_agent,
664-
"last_seen": last_seen,
665-
"ip": ip,
666-
},
667-
)
649+
devices_keys.append((user_id, device_id))
650+
devices_values.append((user_agent, last_seen, ip))
651+
652+
self.db_pool.simple_upsert_many_txn(
653+
txn,
654+
table="user_ips",
655+
key_names=("user_id", "access_token", "ip"),
656+
key_values=user_ips_keys,
657+
value_names=("user_agent", "device_id", "last_seen"),
658+
value_values=user_ips_values,
659+
)
660+
661+
if devices_values:
662+
self.db_pool.simple_update_many_txn(
663+
txn,
664+
table="devices",
665+
key_names=("user_id", "device_id"),
666+
key_values=devices_keys,
667+
value_names=("user_agent", "last_seen", "ip"),
668+
value_values=devices_values,
669+
)
668670

669671
async def get_last_client_ip_by_device(
670672
self, user_id: str, device_id: Optional[str]

tests/storage/test__base.py

+57-16
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# limitations under the License.
1515

1616
import secrets
17-
from typing import Any, Dict, Generator, List, Tuple
17+
from typing import Generator, Tuple
1818

1919
from twisted.test.proto_helpers import MemoryReactor
2020

@@ -24,7 +24,7 @@
2424
from tests import unittest
2525

2626

27-
class UpsertManyTests(unittest.HomeserverTestCase):
27+
class UpdateUpsertManyTests(unittest.HomeserverTestCase):
2828
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
2929
self.storage = hs.get_datastores().main
3030

@@ -46,9 +46,13 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
4646
)
4747
)
4848

49-
def _dump_to_tuple(
50-
self, res: List[Dict[str, Any]]
51-
) -> Generator[Tuple[int, str, str], None, None]:
49+
def _dump_table_to_tuple(self) -> Generator[Tuple[int, str, str], None, None]:
50+
res = self.get_success(
51+
self.storage.db_pool.simple_select_list(
52+
self.table_name, None, ["id, username, value"]
53+
)
54+
)
55+
5256
for i in res:
5357
yield (i["id"], i["username"], i["value"])
5458

@@ -75,13 +79,8 @@ def test_upsert_many(self) -> None:
7579
)
7680

7781
# Check results are what we expect
78-
res = self.get_success(
79-
self.storage.db_pool.simple_select_list(
80-
self.table_name, None, ["id, username, value"]
81-
)
82-
)
8382
self.assertEqual(
84-
set(self._dump_to_tuple(res)),
83+
set(self._dump_table_to_tuple()),
8584
{(1, "user1", "hello"), (2, "user2", "there")},
8685
)
8786

@@ -102,12 +101,54 @@ def test_upsert_many(self) -> None:
102101
)
103102

104103
# Check results are what we expect
105-
res = self.get_success(
106-
self.storage.db_pool.simple_select_list(
107-
self.table_name, None, ["id, username, value"]
104+
self.assertEqual(
105+
set(self._dump_table_to_tuple()),
106+
{(1, "user1", "hello"), (2, "user2", "bleb")},
107+
)
108+
109+
def test_simple_update_many(self):
110+
"""
111+
simple_update_many performs many updates at once.
112+
"""
113+
# First add some data.
114+
self.get_success(
115+
self.storage.db_pool.simple_insert_many(
116+
table=self.table_name,
117+
keys=("id", "username", "value"),
118+
values=[(1, "alice", "A"), (2, "bob", "B"), (3, "charlie", "C")],
119+
desc="insert",
108120
)
109121
)
122+
123+
# Check the data made it to the table
110124
self.assertEqual(
111-
set(self._dump_to_tuple(res)),
112-
{(1, "user1", "hello"), (2, "user2", "bleb")},
125+
set(self._dump_table_to_tuple()),
126+
{(1, "alice", "A"), (2, "bob", "B"), (3, "charlie", "C")},
127+
)
128+
129+
# Now use simple_update_many
130+
self.get_success(
131+
self.storage.db_pool.simple_update_many(
132+
table=self.table_name,
133+
key_names=("username",),
134+
key_values=(
135+
("alice",),
136+
("bob",),
137+
("stranger",),
138+
),
139+
value_names=("value",),
140+
value_values=(
141+
("aaa!",),
142+
("bbb!",),
143+
("???",),
144+
),
145+
desc="update_many1",
146+
)
147+
)
148+
149+
# Check the table is how we expect:
150+
# charlie has been left alone
151+
self.assertEqual(
152+
set(self._dump_table_to_tuple()),
153+
{(1, "alice", "aaa!"), (2, "bob", "bbb!"), (3, "charlie", "C")},
113154
)

0 commit comments

Comments
 (0)