|
| 1 | +# Copyright (c) 2025, Oracle and/or its affiliates. |
| 2 | +# SPDX-License-Identifier: LGPL-2.1-or-later |
| 3 | + |
| 4 | +""" |
| 5 | +crash runq - Display the tasks on the run queues of each cpu. |
| 6 | +
|
| 7 | +Implements the crash "runq" command for drgn |
| 8 | +""" |
| 9 | + |
| 10 | +import argparse |
| 11 | +from typing import Any, Set |
| 12 | + |
| 13 | +from drgn import Object, Program, cast, container_of |
| 14 | +from drgn.commands import argument, drgn_argument |
| 15 | +from drgn.commands.crash import crash_command |
| 16 | +from drgn.helpers.common.format import CellFormat, escape_ascii_string, print_table |
| 17 | +from drgn.helpers.linux.cpumask import for_each_online_cpu |
| 18 | +from drgn.helpers.linux.list import list_for_each_entry |
| 19 | +from drgn.helpers.linux.percpu import per_cpu |
| 20 | + |
| 21 | + |
| 22 | +def has_member(obj: Object, name: str) -> bool: |
| 23 | + """ |
| 24 | + Return true if a given object has a member with the given name. |
| 25 | + :param obj: Drgn object to check |
| 26 | + :param name: string member name to check |
| 27 | + :returns: whether the object has a member by that name |
| 28 | + """ |
| 29 | + try: |
| 30 | + obj.member_(name) |
| 31 | + return True |
| 32 | + except LookupError: |
| 33 | + return False |
| 34 | + |
| 35 | + |
| 36 | +def task_thread_info(task: Object) -> Object: |
| 37 | + """ |
| 38 | + Return a task's ``thread_info`` |
| 39 | +
|
| 40 | + This is an equivalent to the kernel function / inline / macro |
| 41 | + ``task_thread_info()``, but it must cover a wide variety of versions and |
| 42 | + configurations. |
| 43 | +
|
| 44 | + :param task: Object of type ``struct task_struct *`` |
| 45 | + :returns: The ``struct thread_info *`` for this task |
| 46 | + """ |
| 47 | + if has_member(task, "thread_info"): |
| 48 | + return task.thread_info.address_of_() |
| 49 | + return cast("struct thread_info *", task.stack) |
| 50 | + |
| 51 | + |
| 52 | +def task_cpu(task: Object) -> int: |
| 53 | + """ |
| 54 | + Return the CPU on which a task is running. |
| 55 | +
|
| 56 | + This is an equivalent to the kernel function ``task_cpu()``, but it covers |
| 57 | + a wide variety of variations in kernel version and configuration. It would |
| 58 | + be a bit impractical to spell out all the variants, but essentially, if |
| 59 | + there's a "cpu" field in ``struct task_struct``, then we can just use that. |
| 60 | + Otherwise, we need to get it from the ``thread_info``. |
| 61 | +
|
| 62 | + :param task: Object of type ``struct task_struct *`` |
| 63 | + :retruns: The cpu as a Python int |
| 64 | + """ |
| 65 | + if has_member(task, "cpu"): |
| 66 | + return task.cpu.value_() |
| 67 | + return task_thread_info(task).cpu.value_() |
| 68 | + |
| 69 | + |
| 70 | +def runq_clock(prog: Program, cpu: int) -> int: |
| 71 | + """ |
| 72 | + Get clock of cpu runqueue ``struct rq`` |
| 73 | +
|
| 74 | + :param prog: drgn program |
| 75 | + :param cpu: cpu index |
| 76 | + :returns: cpu runqueue clock in ns granularity |
| 77 | + """ |
| 78 | + rq = per_cpu(prog["runqueues"], cpu) |
| 79 | + return rq.clock.value_() |
| 80 | + |
| 81 | + |
| 82 | +def get_task_arrival_time(task: Object) -> int: |
| 83 | + """ |
| 84 | + Get a task's arrival time on cpu |
| 85 | +
|
| 86 | + A task's arrival time is only updated when the task is put ON a cpu via |
| 87 | + context_switch. |
| 88 | +
|
| 89 | + :param task: ``struct task_struct *`` |
| 90 | + :returns: arrival time instance in ns granularity |
| 91 | + """ |
| 92 | + |
| 93 | + if has_member(task, "last_run"): |
| 94 | + arrival_time = task.last_run.value_() |
| 95 | + elif has_member(task, "timestamp"): |
| 96 | + arrival_time = task.timestamp.value_() |
| 97 | + else: |
| 98 | + arrival_time = task.sched_info.last_arrival.value_() |
| 99 | + |
| 100 | + return arrival_time |
| 101 | + |
| 102 | + |
| 103 | +def task_lastrun2now(task: Object) -> int: |
| 104 | + """ |
| 105 | + Get the duration from task last run timestamp to now |
| 106 | +
|
| 107 | + The return duration will cover task's last run time on cpu and also |
| 108 | + the time staying in current status, usually the time slice for task |
| 109 | + on cpu will be short, so this can roughly tell how long this task |
| 110 | + has been staying in current status. |
| 111 | + For task status in "RU" status, if it's still on cpu, then this return |
| 112 | + the duration time this task has been running, otherwise it roughly tell |
| 113 | + how long this task has been staying in runqueue. |
| 114 | +
|
| 115 | + :param prog: drgn program |
| 116 | + :param task: ``struct task_struct *`` |
| 117 | + :returns: duration in ns granularity |
| 118 | + """ |
| 119 | + prog = task.prog_ |
| 120 | + arrival_time = get_task_arrival_time(task) |
| 121 | + rq_clock = runq_clock(prog, task_cpu(task)) |
| 122 | + |
| 123 | + return rq_clock - arrival_time |
| 124 | + |
| 125 | + |
| 126 | +def _parse_cpus_arg(cpus_arg: str, max_cpu: int) -> Set[int]: |
| 127 | + """ |
| 128 | + Parse argument to -c for cpu restriction (e.g. '1,3,5-8'). |
| 129 | +
|
| 130 | + :param cpus_arg: str |
| 131 | + :param max_cpu: int |
| 132 | + :returns: a set of specified cpus |
| 133 | + """ |
| 134 | + cpus = set() |
| 135 | + for part in cpus_arg.split(","): |
| 136 | + part = part.strip() |
| 137 | + if "-" in part: |
| 138 | + start, end = part.split("-") |
| 139 | + for cpu in range(int(start), int(end) + 1): |
| 140 | + if 0 <= cpu < max_cpu: |
| 141 | + cpus.add(cpu) |
| 142 | + elif part: |
| 143 | + cpu = int(part) |
| 144 | + if 0 <= cpu < max_cpu: |
| 145 | + cpus.add(cpu) |
| 146 | + return cpus |
| 147 | + |
| 148 | + |
| 149 | +def _get_runqueue_timestamps(runqueue: Object) -> int: |
| 150 | + """ |
| 151 | + Get runqueue clock timestamp. |
| 152 | +
|
| 153 | + :param runque: Object |
| 154 | + :returns: rq timestamp |
| 155 | + """ |
| 156 | + # Try common fields in order; not all will exist on all kernels |
| 157 | + rq_ts = 0 |
| 158 | + for name in ("clock", "most_recent_timestamp", "timestamp_last_tick"): |
| 159 | + if has_member(runqueue, name): |
| 160 | + try: |
| 161 | + rq_ts = getattr(runqueue, name).value_() |
| 162 | + break |
| 163 | + except Exception: |
| 164 | + pass |
| 165 | + return rq_ts |
| 166 | + |
| 167 | + |
| 168 | +def dump_rt_runq(runqueue: Object) -> None: |
| 169 | + """ |
| 170 | + Dump runq in rt scheduler |
| 171 | +
|
| 172 | + :param runque: Object |
| 173 | + """ |
| 174 | + count = 0 |
| 175 | + prio_array = ( |
| 176 | + hex(runqueue.rt.active.address_ - 16) if runqueue.rt.active.address_ else 0 |
| 177 | + ) |
| 178 | + print(" RT PRIO_ARRAY:", prio_array) |
| 179 | + rt_prio_array = runqueue.rt.active.queue |
| 180 | + for que in rt_prio_array: |
| 181 | + for t in list_for_each_entry( |
| 182 | + "struct sched_rt_entity", que.address_of_(), "run_list" |
| 183 | + ): |
| 184 | + tsk = container_of(t, "struct task_struct", "rt") |
| 185 | + if tsk == runqueue.curr: |
| 186 | + continue |
| 187 | + count += 1 |
| 188 | + print( |
| 189 | + " " * 4, |
| 190 | + '[{:3d}] PID: {:<6d} TASK: {} COMMAND: "{}"'.format( |
| 191 | + tsk.prio.value_(), |
| 192 | + tsk.pid.value_(), |
| 193 | + hex(tsk), |
| 194 | + escape_ascii_string(tsk.comm.string_()), |
| 195 | + ), |
| 196 | + ) |
| 197 | + if count == 0: |
| 198 | + print(" [no tasks queued]") |
| 199 | + |
| 200 | + |
| 201 | +def dump_cfs_runq(runqueue: Object, task_group: bool = False) -> None: |
| 202 | + """ |
| 203 | + Dump runq in cfs scheduler |
| 204 | +
|
| 205 | + :param runque: Object |
| 206 | + """ |
| 207 | + cfs_root = hex(runqueue.cfs.tasks_timeline.address_of_().value_()) |
| 208 | + if not task_group: |
| 209 | + print(" CFS RB_ROOT:", cfs_root) |
| 210 | + count = 0 |
| 211 | + runq = runqueue.address_of_() |
| 212 | + for t in list_for_each_entry( |
| 213 | + "struct task_struct", runq.cfs_tasks.address_of_(), "se.group_node" |
| 214 | + ): |
| 215 | + if t == runqueue.curr: |
| 216 | + continue |
| 217 | + count += 1 |
| 218 | + print( |
| 219 | + " " * 4, |
| 220 | + '[{:3d}] PID: {:<6d} TASK: {} COMMAND: "{}"'.format( |
| 221 | + t.prio.value_(), |
| 222 | + t.pid.value_(), |
| 223 | + hex(t), |
| 224 | + escape_ascii_string(t.comm.string_()), |
| 225 | + ), |
| 226 | + ) |
| 227 | + if count == 0: |
| 228 | + print(" [no tasks queued]") |
| 229 | + |
| 230 | + |
| 231 | +def timestamp_str(ns: int) -> str: |
| 232 | + """Convert timestamp int to formatted str""" |
| 233 | + value = ns // 1000000 |
| 234 | + ms = value % 1000 |
| 235 | + value = value // 1000 |
| 236 | + secs = value % 60 |
| 237 | + value = value // 60 |
| 238 | + mins = value % 60 |
| 239 | + value = value // 60 |
| 240 | + hours = value % 24 |
| 241 | + days = value // 24 |
| 242 | + return "%d %02d:%02d:%02d.%03d" % (days, hours, mins, secs, ms) |
| 243 | + |
| 244 | + |
| 245 | +def run_queue(prog: Program, args: argparse.Namespace) -> None: |
| 246 | + """ |
| 247 | + Print runqueue with detailed info. |
| 248 | +
|
| 249 | + :param prog: drgn program |
| 250 | + :param args: argparse Namespace |
| 251 | + """ |
| 252 | + online_cpus = list(for_each_online_cpu(prog)) |
| 253 | + max_cpu = max(online_cpus) + 1 if online_cpus else 0 |
| 254 | + |
| 255 | + if args.cpus: |
| 256 | + selected_cpus = _parse_cpus_arg(args.cpus, max_cpu) |
| 257 | + cpus = [cpu for cpu in online_cpus if cpu in selected_cpus] |
| 258 | + else: |
| 259 | + cpus = online_cpus |
| 260 | + table_format = False |
| 261 | + if args.show_timestamps or args.show_lag or args.pretty_runtime: |
| 262 | + table_format = True |
| 263 | + table = [] |
| 264 | + runq_clocks = {} |
| 265 | + for cpu, i in enumerate(cpus): |
| 266 | + runqueue = per_cpu(prog["runqueues"], cpu) |
| 267 | + curr_task_addr = runqueue.curr.value_() |
| 268 | + curr_task = runqueue.curr[0] |
| 269 | + run_time = task_lastrun2now(curr_task) |
| 270 | + if args.show_lag: |
| 271 | + runq_clocks[cpu] = runq_clock(prog, cpu) |
| 272 | + if i == len(cpus) - 1: |
| 273 | + max_clock = max(runq_clocks.values()) |
| 274 | + lags = { |
| 275 | + cpu: max_clock - runq_clock |
| 276 | + for cpu, runq_clock in runq_clocks.items() |
| 277 | + } |
| 278 | + sorted_lags = dict(sorted(lags.items(), key=lambda item: item[1])) |
| 279 | + [ |
| 280 | + print(f"CPU {cpu}: {lag/1e9:.2f} secs") |
| 281 | + for cpu, lag in sorted_lags.items() |
| 282 | + ] |
| 283 | + return |
| 284 | + else: |
| 285 | + continue |
| 286 | + comm = escape_ascii_string(curr_task.comm.string_()) |
| 287 | + pid = curr_task.pid.value_() |
| 288 | + prio = curr_task.prio.value_() |
| 289 | + |
| 290 | + if table_format: |
| 291 | + row = [ |
| 292 | + CellFormat(cpu, ">"), |
| 293 | + CellFormat(pid, ">"), |
| 294 | + CellFormat(curr_task_addr, "x"), |
| 295 | + CellFormat(prio, ">"), |
| 296 | + CellFormat(comm, "<"), |
| 297 | + ] |
| 298 | + if args.pretty_runtime: |
| 299 | + row.append(CellFormat(timestamp_str(run_time), ">")) |
| 300 | + |
| 301 | + if args.show_timestamps: |
| 302 | + rq_ts = _get_runqueue_timestamps(runqueue) |
| 303 | + task_ts = get_task_arrival_time(curr_task) |
| 304 | + # newest_rq_ts = max(rq_timestamps.values()) if rq_timestamps and args.show_lag else None |
| 305 | + |
| 306 | + row += [ |
| 307 | + CellFormat(f"{rq_ts:013d}", ">"), |
| 308 | + CellFormat(f"{task_ts:013d}", "<"), |
| 309 | + ] |
| 310 | + |
| 311 | + table.append(row) |
| 312 | + else: |
| 313 | + print(f"CPU {cpu} RUNQUEUE: {hex(runqueue.address_of_().value_())}") |
| 314 | + print( |
| 315 | + f" CURRENT: PID: {pid:<6d} TASK: {hex(curr_task_addr)} PRIO: {prio}" |
| 316 | + f' COMMAND: "{comm}"' |
| 317 | + # f" RUNTIME: {timestamp_str(run_time)}", |
| 318 | + ) |
| 319 | + root_task_group_addr = prog["root_task_group"].address_of_().value_() |
| 320 | + if args.group: |
| 321 | + print(f" ROOT_TASK_GROUP: {hex(root_task_group_addr)}") |
| 322 | + print( |
| 323 | + " " * 4, |
| 324 | + '[{:3d}] PID: {:<6d} TASK: {} COMMAND: "{}" [CURRENT]'.format( |
| 325 | + prio, |
| 326 | + pid, |
| 327 | + hex(curr_task_addr), |
| 328 | + comm, |
| 329 | + ), |
| 330 | + ) |
| 331 | + |
| 332 | + else: |
| 333 | + # RT PRIO_ARRAY |
| 334 | + dump_rt_runq(runqueue) |
| 335 | + # CFS RB_ROOT |
| 336 | + dump_cfs_runq(runqueue, args.group) |
| 337 | + print() |
| 338 | + continue |
| 339 | + headers = [ |
| 340 | + CellFormat("CPU", "<"), |
| 341 | + CellFormat("PID", "<"), |
| 342 | + CellFormat("TASK", "<"), |
| 343 | + CellFormat("PRIO", "<"), |
| 344 | + CellFormat("COMMAND", "<"), |
| 345 | + ] |
| 346 | + if args.show_timestamps: |
| 347 | + headers += [ |
| 348 | + CellFormat("RQ_TIMESTAMP", "<"), |
| 349 | + CellFormat("TASK_TIMESTAMP", "<"), |
| 350 | + ] |
| 351 | + if args.pretty_runtime: |
| 352 | + headers.append(CellFormat("RUNTIME", "<")) |
| 353 | + if table_format: |
| 354 | + print_table([headers] + table) |
| 355 | + |
| 356 | + |
| 357 | +@crash_command( |
| 358 | + description="Display the tasks on the run queues of each cpu.", |
| 359 | + arguments=( |
| 360 | + argument("-t", action="store_true", dest="show_timestamps"), |
| 361 | + argument("-T", action="store_true", dest="show_lag"), |
| 362 | + argument("-m", action="store_true", dest="pretty_runtime"), |
| 363 | + argument("-g", action="store_true", dest="group"), |
| 364 | + argument("-c", type=str, default="", dest="cpus"), |
| 365 | + drgn_argument, |
| 366 | + ), |
| 367 | +) |
| 368 | +def _crash_cmd_runq( |
| 369 | + prog: Program, name: str, args: argparse.Namespace, **kwargs: Any |
| 370 | +) -> None: |
| 371 | + run_queue(prog, args) |
0 commit comments