Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 3295970

Browse files
authored
Some minor performance fixes for task schedular (#16313)
1 parent e9e2904 commit 3295970

File tree

5 files changed

+95
-34
lines changed

5 files changed

+95
-34
lines changed

changelog.d/16313.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Delete device messages asynchronously and in staged batches using the task scheduler.

synapse/replication/tcp/handler.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -672,14 +672,12 @@ def on_LOCK_RELEASED(
672672
cmd.instance_name, cmd.lock_name, cmd.lock_key
673673
)
674674

675-
async def on_NEW_ACTIVE_TASK(
675+
def on_NEW_ACTIVE_TASK(
676676
self, conn: IReplicationConnection, cmd: NewActiveTaskCommand
677677
) -> None:
678678
"""Called when get a new NEW_ACTIVE_TASK command."""
679679
if self._task_scheduler:
680-
task = await self._task_scheduler.get_task(cmd.data)
681-
if task:
682-
await self._task_scheduler._launch_task(task)
680+
self._task_scheduler.launch_task_by_id(cmd.data)
683681

684682
def new_connection(self, connection: IReplicationConnection) -> None:
685683
"""Called when we have a new connection."""

synapse/storage/databases/main/task_scheduler.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ async def get_scheduled_tasks(
5353
resource_id: Optional[str] = None,
5454
statuses: Optional[List[TaskStatus]] = None,
5555
max_timestamp: Optional[int] = None,
56+
limit: Optional[int] = None,
5657
) -> List[ScheduledTask]:
5758
"""Get a list of scheduled tasks from the DB.
5859
@@ -62,6 +63,7 @@ async def get_scheduled_tasks(
6263
statuses: Limit the returned tasks to the specific statuses
6364
max_timestamp: Limit the returned tasks to the ones that have
6465
a timestamp inferior to the specified one
66+
limit: Only return `limit` number of rows if set.
6567
6668
Returns: a list of `ScheduledTask`, ordered by increasing timestamps
6769
"""
@@ -94,6 +96,10 @@ def get_scheduled_tasks_txn(txn: LoggingTransaction) -> List[Dict[str, Any]]:
9496

9597
sql = sql + " ORDER BY timestamp"
9698

99+
if limit is not None:
100+
sql += " LIMIT ?"
101+
args.append(limit)
102+
97103
txn.execute(sql, args)
98104
return self.db_pool.cursor_to_dict(txn)
99105

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
/* Copyright 2023 The Matrix.org Foundation C.I.C
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
CREATE INDEX IF NOT EXISTS scheduled_tasks_timestamp ON scheduled_tasks(timestamp);

synapse/util/task_scheduler.py

Lines changed: 70 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@
1515
import logging
1616
from typing import TYPE_CHECKING, Awaitable, Callable, Dict, List, Optional, Set, Tuple
1717

18-
from prometheus_client import Gauge
19-
2018
from twisted.python.failure import Failure
2119

2220
from synapse.logging.context import nested_logging_context
23-
from synapse.metrics.background_process_metrics import run_as_background_process
21+
from synapse.metrics import LaterGauge
22+
from synapse.metrics.background_process_metrics import (
23+
run_as_background_process,
24+
wrap_as_background_process,
25+
)
2426
from synapse.types import JsonMapping, ScheduledTask, TaskStatus
2527
from synapse.util.stringutils import random_string
2628

@@ -30,12 +32,6 @@
3032
logger = logging.getLogger(__name__)
3133

3234

33-
running_tasks_gauge = Gauge(
34-
"synapse_scheduler_running_tasks",
35-
"The number of concurrent running tasks handled by the TaskScheduler",
36-
)
37-
38-
3935
class TaskScheduler:
4036
"""
4137
This is a simple task sheduler aimed at resumable tasks: usually we use `run_in_background`
@@ -70,6 +66,8 @@ class TaskScheduler:
7066
# Precision of the scheduler, evaluation of tasks to run will only happen
7167
# every `SCHEDULE_INTERVAL_MS` ms
7268
SCHEDULE_INTERVAL_MS = 1 * 60 * 1000 # 1mn
69+
# How often to clean up old tasks.
70+
CLEANUP_INTERVAL_MS = 30 * 60 * 1000
7371
# Time before a complete or failed task is deleted from the DB
7472
KEEP_TASKS_FOR_MS = 7 * 24 * 60 * 60 * 1000 # 1 week
7573
# Maximum number of tasks that can run at the same time
@@ -92,14 +90,26 @@ def __init__(self, hs: "HomeServer"):
9290
] = {}
9391
self._run_background_tasks = hs.config.worker.run_background_tasks
9492

93+
# Flag to make sure we only try and launch new tasks once at a time.
94+
self._launching_new_tasks = False
95+
9596
if self._run_background_tasks:
9697
self._clock.looping_call(
97-
run_as_background_process,
98+
self._launch_scheduled_tasks,
99+
TaskScheduler.SCHEDULE_INTERVAL_MS,
100+
)
101+
self._clock.looping_call(
102+
self._clean_scheduled_tasks,
98103
TaskScheduler.SCHEDULE_INTERVAL_MS,
99-
"handle_scheduled_tasks",
100-
self._handle_scheduled_tasks,
101104
)
102105

106+
LaterGauge(
107+
"synapse_scheduler_running_tasks",
108+
"The number of concurrent running tasks handled by the TaskScheduler",
109+
labels=None,
110+
caller=lambda: len(self._running_tasks),
111+
)
112+
103113
def register_action(
104114
self,
105115
function: Callable[
@@ -234,6 +244,7 @@ async def get_tasks(
234244
resource_id: Optional[str] = None,
235245
statuses: Optional[List[TaskStatus]] = None,
236246
max_timestamp: Optional[int] = None,
247+
limit: Optional[int] = None,
237248
) -> List[ScheduledTask]:
238249
"""Get a list of tasks. Returns all the tasks if no args is provided.
239250
@@ -247,6 +258,7 @@ async def get_tasks(
247258
statuses: Limit the returned tasks to the specific statuses
248259
max_timestamp: Limit the returned tasks to the ones that have
249260
a timestamp inferior to the specified one
261+
limit: Only return `limit` number of rows if set.
250262
251263
Returns
252264
A list of `ScheduledTask`, ordered by increasing timestamps
@@ -256,6 +268,7 @@ async def get_tasks(
256268
resource_id=resource_id,
257269
statuses=statuses,
258270
max_timestamp=max_timestamp,
271+
limit=limit,
259272
)
260273

261274
async def delete_task(self, id: str) -> None:
@@ -273,34 +286,58 @@ async def delete_task(self, id: str) -> None:
273286
raise Exception(f"Task {id} is currently ACTIVE and can't be deleted")
274287
await self._store.delete_scheduled_task(id)
275288

276-
async def _handle_scheduled_tasks(self) -> None:
277-
"""Main loop taking care of launching tasks and cleaning up old ones."""
278-
await self._launch_scheduled_tasks()
279-
await self._clean_scheduled_tasks()
289+
def launch_task_by_id(self, id: str) -> None:
290+
"""Try launching the task with the given ID."""
291+
# Don't bother trying to launch new tasks if we're already at capacity.
292+
if len(self._running_tasks) >= TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS:
293+
return
294+
295+
run_as_background_process("launch_task_by_id", self._launch_task_by_id, id)
296+
297+
async def _launch_task_by_id(self, id: str) -> None:
298+
"""Helper async function for `launch_task_by_id`."""
299+
task = await self.get_task(id)
300+
if task:
301+
await self._launch_task(task)
280302

303+
@wrap_as_background_process("launch_scheduled_tasks")
281304
async def _launch_scheduled_tasks(self) -> None:
282305
"""Retrieve and launch scheduled tasks that should be running at that time."""
283-
for task in await self.get_tasks(statuses=[TaskStatus.ACTIVE]):
284-
await self._launch_task(task)
285-
for task in await self.get_tasks(
286-
statuses=[TaskStatus.SCHEDULED], max_timestamp=self._clock.time_msec()
287-
):
288-
await self._launch_task(task)
306+
# Don't bother trying to launch new tasks if we're already at capacity.
307+
if len(self._running_tasks) >= TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS:
308+
return
309+
310+
if self._launching_new_tasks:
311+
return
289312

290-
running_tasks_gauge.set(len(self._running_tasks))
313+
self._launching_new_tasks = True
291314

315+
try:
316+
for task in await self.get_tasks(
317+
statuses=[TaskStatus.ACTIVE], limit=self.MAX_CONCURRENT_RUNNING_TASKS
318+
):
319+
await self._launch_task(task)
320+
for task in await self.get_tasks(
321+
statuses=[TaskStatus.SCHEDULED],
322+
max_timestamp=self._clock.time_msec(),
323+
limit=self.MAX_CONCURRENT_RUNNING_TASKS,
324+
):
325+
await self._launch_task(task)
326+
327+
finally:
328+
self._launching_new_tasks = False
329+
330+
@wrap_as_background_process("clean_scheduled_tasks")
292331
async def _clean_scheduled_tasks(self) -> None:
293332
"""Clean old complete or failed jobs to avoid clutter the DB."""
333+
now = self._clock.time_msec()
294334
for task in await self._store.get_scheduled_tasks(
295-
statuses=[TaskStatus.FAILED, TaskStatus.COMPLETE]
335+
statuses=[TaskStatus.FAILED, TaskStatus.COMPLETE],
336+
max_timestamp=now - TaskScheduler.KEEP_TASKS_FOR_MS,
296337
):
297338
# FAILED and COMPLETE tasks should never be running
298339
assert task.id not in self._running_tasks
299-
if (
300-
self._clock.time_msec()
301-
> task.timestamp + TaskScheduler.KEEP_TASKS_FOR_MS
302-
):
303-
await self._store.delete_scheduled_task(task.id)
340+
await self._store.delete_scheduled_task(task.id)
304341

305342
async def _launch_task(self, task: ScheduledTask) -> None:
306343
"""Launch a scheduled task now.
@@ -339,6 +376,9 @@ async def wrapper() -> None:
339376
)
340377
self._running_tasks.remove(task.id)
341378

379+
# Try launch a new task since we've finished with this one.
380+
self._clock.call_later(1, self._launch_scheduled_tasks)
381+
342382
if len(self._running_tasks) >= TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS:
343383
return
344384

@@ -355,4 +395,4 @@ async def wrapper() -> None:
355395

356396
self._running_tasks.add(task.id)
357397
await self.update_task(task.id, status=TaskStatus.ACTIVE)
358-
run_as_background_process(task.action, wrapper)
398+
run_as_background_process(f"task-{task.action}", wrapper)

0 commit comments

Comments
 (0)