Skip to content
This repository was archived by the owner on Mar 19, 2024. It is now read-only.

Commit 1b4d93f

Browse files
prigoyalfacebook-github-bot
authored andcommitted
Adding a hook to profile memory usage (#175)
Summary: Pull Request resolved: #175 recently nvidia-smi was removed from fb cluster making it hard to see what the memory utilization was. using pytorch, we extract the information at various steps of training. the profiling hook is currently experimental and we will use it and adapt it for more better usability. down the line, we can move it to classy vision when the hook is trusted to be useful + changed accordingly Reviewed By: min-xu-ai Differential Revision: D26284304 fbshipit-source-id: 1a8c3cd12a498fc55999e982a6c072723d54d144
1 parent b8e30eb commit 1b4d93f

File tree

3 files changed

+75
-2
lines changed

3 files changed

+75
-2
lines changed

vissl/config/defaults.yaml

+15
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,21 @@ config:
8383
# valid for some systems.
8484
LOG_GPU_STATS: True
8585

86+
# ----------------------------------------------------------------------------------- #
87+
# HOOKS
88+
# ----------------------------------------------------------------------------------- #
89+
HOOKS:
90+
# ----------------------------------------------------------------------------------- #
91+
# torch.cuda.memory_summary()
92+
# ----------------------------------------------------------------------------------- #
93+
MEMORY_SUMMARY:
94+
# set this to true if you want to print memory summary. useful for profiling
95+
# memory consumption of model
96+
PRINT_MEMORY_SUMMARY: False
97+
# at what iteration number should the memory summary be printed. usually
98+
# set to 1 for very large models
99+
LOG_ITERATION_NUM: 0
100+
86101
# ----------------------------------------------------------------------------------- #
87102
# DATA
88103
# ----------------------------------------------------------------------------------- #

vissl/hooks/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from vissl.hooks.deepclusterv2_hooks import ClusterMemoryHook, InitMemoryHook # noqa
88
from vissl.hooks.log_hooks import ( # noqa
99
LogGpuStatsHook,
10+
LogGpuMemoryHook,
1011
LogLossLrEtaHook,
1112
LogLossMetricsCheckpointHook,
1213
LogPerfTimeMetricsHook,
@@ -100,6 +101,8 @@ def default_hook_generator(cfg: AttrDict) -> List[ClassyHook]:
100101
hooks.extend([SSLModelComplexityHook()])
101102
if cfg.LOG_GPU_STATS:
102103
hooks.extend([LogGpuStatsHook()])
104+
if cfg.HOOKS.MEMORY_SUMMARY.PRINT_MEMORY_SUMMARY:
105+
hooks.extend([LogGpuMemoryHook(cfg.HOOKS.MEMORY_SUMMARY.LOG_ITERATION_NUM)])
103106
if cfg.TENSORBOARD_SETUP.USE_TENSORBOARD:
104107
assert is_tensorboard_available(), "Tensorboard must be installed to use it."
105108
tb_hook = get_tensorboard_hook(cfg)

vissl/hooks/log_hooks.py

+57-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,61 @@
2222
from vissl.utils.perf_stats import PerfStats
2323

2424

25+
class LogGpuMemoryHook(ClassyHook):
26+
"""
27+
Hook executed at a specified iteration number and prints the
28+
memory summary for the primary device at several steps of training.
29+
"""
30+
31+
on_start = ClassyHook._noop
32+
on_loss_and_meter = ClassyHook._noop
33+
on_step = ClassyHook._noop
34+
on_phase_end = ClassyHook._noop
35+
on_end = ClassyHook._noop
36+
37+
def __init__(
38+
self,
39+
log_iteration_num: int = 1,
40+
) -> None:
41+
super().__init__()
42+
self.log_iteration_num = log_iteration_num
43+
44+
def on_phase_start(self, task: "tasks.ClassyTask") -> None:
45+
"""
46+
Print the stats just before the training epoch starts
47+
"""
48+
self._print_memory_summary(task, "on_phase_start")
49+
50+
def on_forward(self, task: "tasks.ClassyTask") -> None:
51+
"""
52+
Print the stats after the model forward pass is done
53+
"""
54+
self._print_memory_summary(task, "on_forward")
55+
56+
def on_backward(self, task: "tasks.ClassyTask") -> None:
57+
"""
58+
Print the stats just after model.backward() is done
59+
"""
60+
self._print_memory_summary(task, "on_backward")
61+
62+
def on_update(self, task: "tasks.ClassyTask") -> None:
63+
"""
64+
Print the stats just after model params are updated
65+
"""
66+
self._print_memory_summary(task, "on_update")
67+
68+
def _print_memory_summary(self, task: "tasks.ClassyTask", stage_name: str) -> None:
69+
if (
70+
is_primary()
71+
and (task.device.type == "cuda")
72+
and task.local_iteration_num == self.log_iteration_num
73+
):
74+
logging.info(
75+
f"========= Memory Summary at {stage_name} ======="
76+
f"\n{torch.cuda.memory_summary()}\n"
77+
)
78+
79+
2580
class LogGpuStatsHook(ClassyHook):
2681
"""
2782
Hook executed at the start of training and after every training iteration is done.
@@ -92,8 +147,8 @@ def on_update(self, task: "tasks.ClassyTask") -> None:
92147
monitoring the stats (optionally) for every N iterations to get better
93148
idea about the batch time and training eta.
94149
95-
Set the btime_freq input using cfg.PERF_STAT_FREQUENCY=N ensuring that
96-
cfg.MONITOR_PERF_STATS = True.
150+
Set the btime_freq input using cfg.HOOKS.PERF_STATS.PERF_STAT_FREQUENCY=N
151+
ensuring that cfg.HOOKS.PERF_STATS.MONITOR_PERF_STATS = True.
97152
"""
98153
phase_type = "train" if task.train else "test"
99154
if is_primary() and phase_type == "train":

0 commit comments

Comments
 (0)