Skip to content

Commit 089995d

Browse files
committed
drgn: add runq command
Signed-off-by: Richard Li <[email protected]>
1 parent b53e65b commit 089995d

File tree

2 files changed

+401
-0
lines changed

2 files changed

+401
-0
lines changed

drgn/commands/_builtin/crash/runq.py

Lines changed: 371 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,371 @@
1+
# Copyright (c) 2025, Oracle and/or its affiliates.
2+
# SPDX-License-Identifier: LGPL-2.1-or-later
3+
4+
"""
5+
crash runq - Display the tasks on the run queues of each cpu.
6+
7+
Implements the crash "runq" command for drgn
8+
"""
9+
10+
import argparse
11+
from typing import Any, Set
12+
13+
from drgn import Object, Program, cast, container_of
14+
from drgn.commands import argument, drgn_argument
15+
from drgn.commands.crash import crash_command
16+
from drgn.helpers.common.format import CellFormat, escape_ascii_string, print_table
17+
from drgn.helpers.linux.cpumask import for_each_online_cpu
18+
from drgn.helpers.linux.list import list_for_each_entry
19+
from drgn.helpers.linux.percpu import per_cpu
20+
21+
22+
def has_member(obj: Object, name: str) -> bool:
23+
"""
24+
Return true if a given object has a member with the given name.
25+
:param obj: Drgn object to check
26+
:param name: string member name to check
27+
:returns: whether the object has a member by that name
28+
"""
29+
try:
30+
obj.member_(name)
31+
return True
32+
except LookupError:
33+
return False
34+
35+
36+
def task_thread_info(task: Object) -> Object:
37+
"""
38+
Return a task's ``thread_info``
39+
40+
This is an equivalent to the kernel function / inline / macro
41+
``task_thread_info()``, but it must cover a wide variety of versions and
42+
configurations.
43+
44+
:param task: Object of type ``struct task_struct *``
45+
:returns: The ``struct thread_info *`` for this task
46+
"""
47+
if has_member(task, "thread_info"):
48+
return task.thread_info.address_of_()
49+
return cast("struct thread_info *", task.stack)
50+
51+
52+
def task_cpu(task: Object) -> int:
53+
"""
54+
Return the CPU on which a task is running.
55+
56+
This is an equivalent to the kernel function ``task_cpu()``, but it covers
57+
a wide variety of variations in kernel version and configuration. It would
58+
be a bit impractical to spell out all the variants, but essentially, if
59+
there's a "cpu" field in ``struct task_struct``, then we can just use that.
60+
Otherwise, we need to get it from the ``thread_info``.
61+
62+
:param task: Object of type ``struct task_struct *``
63+
:retruns: The cpu as a Python int
64+
"""
65+
if has_member(task, "cpu"):
66+
return task.cpu.value_()
67+
return task_thread_info(task).cpu.value_()
68+
69+
70+
def runq_clock(prog: Program, cpu: int) -> int:
71+
"""
72+
Get clock of cpu runqueue ``struct rq``
73+
74+
:param prog: drgn program
75+
:param cpu: cpu index
76+
:returns: cpu runqueue clock in ns granularity
77+
"""
78+
rq = per_cpu(prog["runqueues"], cpu)
79+
return rq.clock.value_()
80+
81+
82+
def get_task_arrival_time(task: Object) -> int:
83+
"""
84+
Get a task's arrival time on cpu
85+
86+
A task's arrival time is only updated when the task is put ON a cpu via
87+
context_switch.
88+
89+
:param task: ``struct task_struct *``
90+
:returns: arrival time instance in ns granularity
91+
"""
92+
93+
if has_member(task, "last_run"):
94+
arrival_time = task.last_run.value_()
95+
elif has_member(task, "timestamp"):
96+
arrival_time = task.timestamp.value_()
97+
else:
98+
arrival_time = task.sched_info.last_arrival.value_()
99+
100+
return arrival_time
101+
102+
103+
def task_lastrun2now(task: Object) -> int:
104+
"""
105+
Get the duration from task last run timestamp to now
106+
107+
The return duration will cover task's last run time on cpu and also
108+
the time staying in current status, usually the time slice for task
109+
on cpu will be short, so this can roughly tell how long this task
110+
has been staying in current status.
111+
For task status in "RU" status, if it's still on cpu, then this return
112+
the duration time this task has been running, otherwise it roughly tell
113+
how long this task has been staying in runqueue.
114+
115+
:param prog: drgn program
116+
:param task: ``struct task_struct *``
117+
:returns: duration in ns granularity
118+
"""
119+
prog = task.prog_
120+
arrival_time = get_task_arrival_time(task)
121+
rq_clock = runq_clock(prog, task_cpu(task))
122+
123+
return rq_clock - arrival_time
124+
125+
126+
def _parse_cpus_arg(cpus_arg: str, max_cpu: int) -> Set[int]:
127+
"""
128+
Parse argument to -c for cpu restriction (e.g. '1,3,5-8').
129+
130+
:param cpus_arg: str
131+
:param max_cpu: int
132+
:returns: a set of specified cpus
133+
"""
134+
cpus = set()
135+
for part in cpus_arg.split(","):
136+
part = part.strip()
137+
if "-" in part:
138+
start, end = part.split("-")
139+
for cpu in range(int(start), int(end) + 1):
140+
if 0 <= cpu < max_cpu:
141+
cpus.add(cpu)
142+
elif part:
143+
cpu = int(part)
144+
if 0 <= cpu < max_cpu:
145+
cpus.add(cpu)
146+
return cpus
147+
148+
149+
def _get_runqueue_timestamps(runqueue: Object) -> int:
150+
"""
151+
Get runqueue clock timestamp.
152+
153+
:param runque: Object
154+
:returns: rq timestamp
155+
"""
156+
# Try common fields in order; not all will exist on all kernels
157+
rq_ts = 0
158+
for name in ("clock", "most_recent_timestamp", "timestamp_last_tick"):
159+
if has_member(runqueue, name):
160+
try:
161+
rq_ts = getattr(runqueue, name).value_()
162+
break
163+
except Exception:
164+
pass
165+
return rq_ts
166+
167+
168+
def dump_rt_runq(runqueue: Object) -> None:
169+
"""
170+
Dump runq in rt scheduler
171+
172+
:param runque: Object
173+
"""
174+
count = 0
175+
prio_array = (
176+
hex(runqueue.rt.active.address_ - 16) if runqueue.rt.active.address_ else 0
177+
)
178+
print(" RT PRIO_ARRAY:", prio_array)
179+
rt_prio_array = runqueue.rt.active.queue
180+
for que in rt_prio_array:
181+
for t in list_for_each_entry(
182+
"struct sched_rt_entity", que.address_of_(), "run_list"
183+
):
184+
tsk = container_of(t, "struct task_struct", "rt")
185+
if tsk == runqueue.curr:
186+
continue
187+
count += 1
188+
print(
189+
" " * 4,
190+
'[{:3d}] PID: {:<6d} TASK: {} COMMAND: "{}"'.format(
191+
tsk.prio.value_(),
192+
tsk.pid.value_(),
193+
hex(tsk),
194+
escape_ascii_string(tsk.comm.string_()),
195+
),
196+
)
197+
if count == 0:
198+
print(" [no tasks queued]")
199+
200+
201+
def dump_cfs_runq(runqueue: Object, task_group: bool = False) -> None:
202+
"""
203+
Dump runq in cfs scheduler
204+
205+
:param runque: Object
206+
"""
207+
cfs_root = hex(runqueue.cfs.tasks_timeline.address_of_().value_())
208+
if not task_group:
209+
print(" CFS RB_ROOT:", cfs_root)
210+
count = 0
211+
runq = runqueue.address_of_()
212+
for t in list_for_each_entry(
213+
"struct task_struct", runq.cfs_tasks.address_of_(), "se.group_node"
214+
):
215+
if t == runqueue.curr:
216+
continue
217+
count += 1
218+
print(
219+
" " * 4,
220+
'[{:3d}] PID: {:<6d} TASK: {} COMMAND: "{}"'.format(
221+
t.prio.value_(),
222+
t.pid.value_(),
223+
hex(t),
224+
escape_ascii_string(t.comm.string_()),
225+
),
226+
)
227+
if count == 0:
228+
print(" [no tasks queued]")
229+
230+
231+
def timestamp_str(ns: int) -> str:
232+
"""Convert timestamp int to formatted str"""
233+
value = ns // 1000000
234+
ms = value % 1000
235+
value = value // 1000
236+
secs = value % 60
237+
value = value // 60
238+
mins = value % 60
239+
value = value // 60
240+
hours = value % 24
241+
days = value // 24
242+
return "%d %02d:%02d:%02d.%03d" % (days, hours, mins, secs, ms)
243+
244+
245+
def run_queue(prog: Program, args: argparse.Namespace) -> None:
246+
"""
247+
Print runqueue with detailed info.
248+
249+
:param prog: drgn program
250+
:param args: argparse Namespace
251+
"""
252+
online_cpus = list(for_each_online_cpu(prog))
253+
max_cpu = max(online_cpus) + 1 if online_cpus else 0
254+
255+
if args.cpus:
256+
selected_cpus = _parse_cpus_arg(args.cpus, max_cpu)
257+
cpus = [cpu for cpu in online_cpus if cpu in selected_cpus]
258+
else:
259+
cpus = online_cpus
260+
table_format = False
261+
if args.show_timestamps or args.show_lag or args.pretty_runtime:
262+
table_format = True
263+
table = []
264+
runq_clocks = {}
265+
for cpu, i in enumerate(cpus):
266+
runqueue = per_cpu(prog["runqueues"], cpu)
267+
curr_task_addr = runqueue.curr.value_()
268+
curr_task = runqueue.curr[0]
269+
run_time = task_lastrun2now(curr_task)
270+
if args.show_lag:
271+
runq_clocks[cpu] = runq_clock(prog, cpu)
272+
if i == len(cpus) - 1:
273+
max_clock = max(runq_clocks.values())
274+
lags = {
275+
cpu: max_clock - runq_clock
276+
for cpu, runq_clock in runq_clocks.items()
277+
}
278+
sorted_lags = dict(sorted(lags.items(), key=lambda item: item[1]))
279+
[
280+
print(f"CPU {cpu}: {lag/1e9:.2f} secs")
281+
for cpu, lag in sorted_lags.items()
282+
]
283+
return
284+
else:
285+
continue
286+
comm = escape_ascii_string(curr_task.comm.string_())
287+
pid = curr_task.pid.value_()
288+
prio = curr_task.prio.value_()
289+
290+
if table_format:
291+
row = [
292+
CellFormat(cpu, ">"),
293+
CellFormat(pid, ">"),
294+
CellFormat(curr_task_addr, "x"),
295+
CellFormat(prio, ">"),
296+
CellFormat(comm, "<"),
297+
]
298+
if args.pretty_runtime:
299+
row.append(CellFormat(timestamp_str(run_time), ">"))
300+
301+
if args.show_timestamps:
302+
rq_ts = _get_runqueue_timestamps(runqueue)
303+
task_ts = get_task_arrival_time(curr_task)
304+
# newest_rq_ts = max(rq_timestamps.values()) if rq_timestamps and args.show_lag else None
305+
306+
row += [
307+
CellFormat(f"{rq_ts:013d}", ">"),
308+
CellFormat(f"{task_ts:013d}", "<"),
309+
]
310+
311+
table.append(row)
312+
else:
313+
print(f"CPU {cpu} RUNQUEUE: {hex(runqueue.address_of_().value_())}")
314+
print(
315+
f" CURRENT: PID: {pid:<6d} TASK: {hex(curr_task_addr)} PRIO: {prio}"
316+
f' COMMAND: "{comm}"'
317+
# f" RUNTIME: {timestamp_str(run_time)}",
318+
)
319+
root_task_group_addr = prog["root_task_group"].address_of_().value_()
320+
if args.group:
321+
print(f" ROOT_TASK_GROUP: {hex(root_task_group_addr)}")
322+
print(
323+
" " * 4,
324+
'[{:3d}] PID: {:<6d} TASK: {} COMMAND: "{}" [CURRENT]'.format(
325+
prio,
326+
pid,
327+
hex(curr_task_addr),
328+
comm,
329+
),
330+
)
331+
332+
else:
333+
# RT PRIO_ARRAY
334+
dump_rt_runq(runqueue)
335+
# CFS RB_ROOT
336+
dump_cfs_runq(runqueue, args.group)
337+
print()
338+
continue
339+
headers = [
340+
CellFormat("CPU", "<"),
341+
CellFormat("PID", "<"),
342+
CellFormat("TASK", "<"),
343+
CellFormat("PRIO", "<"),
344+
CellFormat("COMMAND", "<"),
345+
]
346+
if args.show_timestamps:
347+
headers += [
348+
CellFormat("RQ_TIMESTAMP", "<"),
349+
CellFormat("TASK_TIMESTAMP", "<"),
350+
]
351+
if args.pretty_runtime:
352+
headers.append(CellFormat("RUNTIME", "<"))
353+
if table_format:
354+
print_table([headers] + table)
355+
356+
357+
@crash_command(
358+
description="Display the tasks on the run queues of each cpu.",
359+
arguments=(
360+
argument("-t", action="store_true", dest="show_timestamps"),
361+
argument("-T", action="store_true", dest="show_lag"),
362+
argument("-m", action="store_true", dest="pretty_runtime"),
363+
argument("-g", action="store_true", dest="group"),
364+
argument("-c", type=str, default="", dest="cpus"),
365+
drgn_argument,
366+
),
367+
)
368+
def _crash_cmd_runq(
369+
prog: Program, name: str, args: argparse.Namespace, **kwargs: Any
370+
) -> None:
371+
run_queue(prog, args)

0 commit comments

Comments
 (0)