Skip to content
This repository was archived by the owner on Jul 1, 2024. It is now read-only.

Commit 8fd9772

Browse files
vreisfacebook-github-bot
authored andcommitted
Reduce data logged to tensorboard (#436)
Summary: Pull Request resolved: #436 We were logging learning rate and loss for every single step, which makes tensorboard too slow to load in long training runs. Log every 10th step, which should be enough for all cases: we always log at the end of every phase as well. Reviewed By: mannatsingh Differential Revision: D20441202 fbshipit-source-id: 3bc4470efcd7e0cb64f66cbc483a8e26be0aca5a
1 parent 15a89ea commit 8fd9772

File tree

2 files changed

+14
-8
lines changed

2 files changed

+14
-8
lines changed

classy_vision/hooks/tensorboard_plot_hook.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class TensorboardPlotHook(ClassyHook):
3535
on_start = ClassyHook._noop
3636
on_end = ClassyHook._noop
3737

38-
def __init__(self, tb_writer) -> None:
38+
def __init__(self, tb_writer, log_period: int = 10) -> None:
3939
"""The constructor method of TensorboardPlotHook.
4040
4141
Args:
@@ -52,13 +52,15 @@ def __init__(self, tb_writer) -> None:
5252
self.tb_writer = tb_writer
5353
self.learning_rates: Optional[List[float]] = None
5454
self.wall_times: Optional[List[float]] = None
55-
self.num_steps_global: Optional[List[int]] = None
55+
self.num_updates: Optional[List[int]] = None
56+
self.log_period = log_period
5657

5758
def on_phase_start(self, task: "tasks.ClassyTask") -> None:
5859
"""Initialize losses and learning_rates."""
5960
self.learning_rates = []
6061
self.wall_times = []
61-
self.num_steps_global = []
62+
self.num_updates = []
63+
self.step_idx = 0
6264

6365
if not is_master():
6466
return
@@ -80,11 +82,14 @@ def on_step(self, task: "tasks.ClassyTask") -> None:
8082
# Only need to log the average loss during the test phase
8183
return
8284

83-
learning_rate_val = task.optimizer.parameters.lr
85+
if self.step_idx % self.log_period == 0:
86+
learning_rate_val = task.optimizer.parameters.lr
8487

85-
self.learning_rates.append(learning_rate_val)
86-
self.wall_times.append(time.time())
87-
self.num_steps_global.append(task.num_updates)
88+
self.learning_rates.append(learning_rate_val)
89+
self.wall_times.append(time.time())
90+
self.num_updates.append(task.num_updates)
91+
92+
self.step_idx += 1
8893

8994
def on_phase_end(self, task: "tasks.ClassyTask") -> None:
9095
"""Add the losses and learning rates to tensorboard."""
@@ -106,7 +111,7 @@ def on_phase_end(self, task: "tasks.ClassyTask") -> None:
106111

107112
if task.train:
108113
for learning_rate, global_step, wall_time in zip(
109-
self.learning_rates, self.num_steps_global, self.wall_times
114+
self.learning_rates, self.num_updates, self.wall_times
110115
):
111116
self.tb_writer.add_scalar(
112117
learning_rate_key,

test/manual/hooks_tensorboard_plot_hook_test.py

+1
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ def flush(self):
147147

148148
writer = DummySummaryWriter()
149149
hook = TensorboardPlotHook(writer)
150+
hook.log_period = 1
150151
task.set_hooks([hook])
151152
task.optimizer.param_schedulers["lr"] = mock_lr_scheduler
152153

0 commit comments

Comments
 (0)