Skip to content

Commit 6cfa021

Browse files
fix: Update Profiler (#160)
* profiler hotfix fix a bug where only the input state preprocessing was being profiled removed memory_timeline.html bc its slow to generate and the memory pickle shows the same info but better change where the profile output is written to, so new runs dont overwrite old ones add more log output to explain how to view profiler output remove saving the stack trace from pytorch profiler, otherwise the trace files couldnt be opened * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 78571a0 commit 6cfa021

File tree

2 files changed

+12
-10
lines changed

2 files changed

+12
-10
lines changed

src/anemoi/inference/profiler.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010

1111
import logging
12+
import socket
13+
import time
1214
from contextlib import contextmanager
1315

1416
import torch
@@ -47,7 +49,7 @@ def ProfilingRunner(use_profiler: bool) -> None:
4749
Weither to profile the wrapped code (True) or not (False).
4850
4951
"""
50-
dirname = "profiling-output"
52+
dirname = f"profiling-output/{socket.gethostname()}-{int(time.time())}"
5153
if use_profiler:
5254
torch.cuda.memory._record_memory_history(max_entries=100000)
5355
activities = [torch.profiler.ProfilerActivity.CPU]
@@ -56,7 +58,6 @@ def ProfilingRunner(use_profiler: bool) -> None:
5658
with torch.profiler.profile(
5759
profile_memory=True,
5860
record_shapes=True,
59-
with_stack=True,
6061
activities=activities,
6162
with_flops=True,
6263
on_trace_ready=torch.profiler.tensorboard_trace_handler(dirname),
@@ -75,7 +76,8 @@ def ProfilingRunner(use_profiler: bool) -> None:
7576
f"Top {row_limit} kernels by runtime on CUDA:\n {prof.key_averages().table(sort_by='self_cuda_time_total', row_limit=row_limit)}"
7677
)
7778
LOG.info("Memory summary \n%s", torch.cuda.memory_summary())
78-
if torch.cuda.is_available():
79-
prof.export_memory_timeline(f"{dirname}/memory_timeline.html", device="cuda:0")
79+
LOG.info(
80+
f"Memory snapshot and trace file stored to '{dirname}'. To view the memory snapshot, upload the pickle file to 'https://pytorch.org/memory_viz'. To view the trace file, see 'https://pytorch.org/tutorials/intermediate/tensorboard_profiler_tutorial.html#use-tensorboard-to-view-results-and-analyze-model-performance'"
81+
)
8082
else:
8183
yield

src/anemoi/inference/runner.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -138,12 +138,12 @@ def run(self, *, input_state, lead_time):
138138
with ProfilingLabel("Prepare input tensor", self.use_profiler):
139139
input_tensor = self.prepare_input_tensor(input_state)
140140

141-
try:
142-
yield from self.forecast(lead_time, input_tensor, input_state)
143-
except (TypeError, ModuleNotFoundError, AttributeError):
144-
if self.report_error:
145-
self.checkpoint.report_error()
146-
raise
141+
try:
142+
yield from self.forecast(lead_time, input_tensor, input_state)
143+
except (TypeError, ModuleNotFoundError, AttributeError):
144+
if self.report_error:
145+
self.checkpoint.report_error()
146+
raise
147147

148148
def add_initial_forcings_to_input_state(self, input_state):
149149
# Should that be alreay a list of dates

0 commit comments

Comments
 (0)