@@ -1356,14 +1356,23 @@ async def have_seen_events(
1356
1356
Returns:
1357
1357
The set of events we have already seen.
1358
1358
"""
1359
- res = await self ._have_seen_events_dict (
1360
- (room_id , event_id ) for event_id in event_ids
1361
- )
1362
- return {eid for ((_rid , eid ), have_event ) in res .items () if have_event }
1359
+
1360
+ # @cachedList chomps lots of memory if you call it with a big list, so
1361
+ # we break it down. However, each batch requires its own index scan, so we make
1362
+ # the batches as big as possible.
1363
+
1364
+ results : Set [str ] = set ()
1365
+ for chunk in batch_iter (event_ids , 500 ):
1366
+ r = await self ._have_seen_events_dict (
1367
+ [(room_id , event_id ) for event_id in chunk ]
1368
+ )
1369
+ results .update (eid for ((_rid , eid ), have_event ) in r .items () if have_event )
1370
+
1371
+ return results
1363
1372
1364
1373
@cachedList (cached_method_name = "have_seen_event" , list_name = "keys" )
1365
1374
async def _have_seen_events_dict (
1366
- self , keys : Iterable [Tuple [str , str ]]
1375
+ self , keys : Collection [Tuple [str , str ]]
1367
1376
) -> Dict [Tuple [str , str ], bool ]:
1368
1377
"""Helper for have_seen_events
1369
1378
@@ -1375,33 +1384,30 @@ async def _have_seen_events_dict(
1375
1384
cache_results = {
1376
1385
(rid , eid ) for (rid , eid ) in keys if self ._get_event_cache .contains ((eid ,))
1377
1386
}
1378
- results = {x : True for x in cache_results }
1387
+ results = dict .fromkeys (cache_results , True )
1388
+ remaining = [k for k in keys if k not in cache_results ]
1389
+ if not remaining :
1390
+ return results
1379
1391
1380
- def have_seen_events_txn (
1381
- txn : LoggingTransaction , chunk : Tuple [Tuple [str , str ], ...]
1382
- ) -> None :
1392
+ def have_seen_events_txn (txn : LoggingTransaction ) -> None :
1383
1393
# we deliberately do *not* query the database for room_id, to make the
1384
1394
# query an index-only lookup on `events_event_id_key`.
1385
1395
#
1386
1396
# We therefore pull the events from the database into a set...
1387
1397
1388
1398
sql = "SELECT event_id FROM events AS e WHERE "
1389
1399
clause , args = make_in_list_sql_clause (
1390
- txn .database_engine , "e.event_id" , [eid for (_rid , eid ) in chunk ]
1400
+ txn .database_engine , "e.event_id" , [eid for (_rid , eid ) in remaining ]
1391
1401
)
1392
1402
txn .execute (sql + clause , args )
1393
1403
found_events = {eid for eid , in txn }
1394
1404
1395
- # ... and then we can update the results for each row in the batch
1396
- results .update ({(rid , eid ): (eid in found_events ) for (rid , eid ) in chunk })
1397
-
1398
- # each batch requires its own index scan, so we make the batches as big as
1399
- # possible.
1400
- for chunk in batch_iter ((k for k in keys if k not in cache_results ), 500 ):
1401
- await self .db_pool .runInteraction (
1402
- "have_seen_events" , have_seen_events_txn , chunk
1405
+ # ... and then we can update the results for each key
1406
+ results .update (
1407
+ {(rid , eid ): (eid in found_events ) for (rid , eid ) in remaining }
1403
1408
)
1404
1409
1410
+ await self .db_pool .runInteraction ("have_seen_events" , have_seen_events_txn )
1405
1411
return results
1406
1412
1407
1413
@cached (max_entries = 100000 , tree = True )
0 commit comments