Automatically use accelerator.print by default

BenjaminBossan · BenjaminBossan · commit 9d3e5e035485 · 2022-02-18T19:57:29.000+01:00
This is achieved by setting the default callbacks__print_log__print to
'auto'. When this is detected later, replace it with
self.accelerator.print if available.

This way, we can use the sane default even if we cannot directly set it
as default (because the accelerator instance does not exist yet) but
still give the user the option to set a different argument.
diff --git a/docs/user/helper.rst b/docs/user/helper.rst
@@ -81,13 +81,11 @@ the desired parameters and you're good to go:
         MyModule,
         accelerator=accelerator,
         device=None,
-        callbacks__print_log__sink=accelerator.print)
+    )
     net.fit(X, y)
 
 accelerate_ recommends to leave the device handling to the Accelerator_, which
 is why we set ``device=None`` (thus telling skorch not to change the device).
-Furthermore, using ``accelerator.print`` should avoid printing the same output
-multiple times when training concurrently on multiple machines.
 
 To install accelerate_, run the following command inside your Python environment:
 
diff --git a/skorch/helper.py b/skorch/helper.py
@@ -551,7 +551,7 @@ class was added: Using this mixin in conjunction with the accelerate library
     ...     MyModule,
     ...     accelerator=accelerator,
     ...     device=None,
-    ...     callbacks__print_log__sink=accelerator.print)
+    ... )
     >>> net.fit(X, y)
 
     The same approach works with all the other skorch net classes.
@@ -562,9 +562,17 @@ class was added: Using this mixin in conjunction with the accelerate library
       In addition to the usual parameters, pass an instance of
       ``accelerate.Accelerator`` with the desired settings.
 
+    callbacks__print_log__sink : 'auto' or callable
+      If 'auto', uses the ``print`` function of the accelerator, if it has one.
+      This avoids printing the same output multiple times when training
+      concurrently on multiple machines. If the accelerator does not have a
+      ``print`` function, use Python's ``print`` function instead.
+
     """
-    def __init__(self, *args, accelerator, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self, *args, accelerator, callbacks__print_log__sink='auto', **kwargs):
+        super().__init__(
+            *args, callbacks__print_log__sink=callbacks__print_log__sink, **kwargs
+        )
         self.accelerator = accelerator
 
     def _check_kwargs(self, kwargs):
@@ -575,6 +583,13 @@ def _check_kwargs(self, kwargs):
                 "When device placement is performed by the accelerator, set device=None"
             )
 
+    def _initialize_callbacks(self):
+        if self.callbacks__print_log__sink == 'auto':
+            print_func = getattr(self.accelerator, 'print', print)
+            self.callbacks__print_log__sink = print_func
+        super()._initialize_callbacks()
+        return self
+
     def _initialize_criterion(self, *args, **kwargs):
         super()._initialize_criterion(*args, **kwargs)
 
diff --git a/skorch/tests/test_helper.py b/skorch/tests/test_helper.py
@@ -2,6 +2,7 @@
 import pickle
 from distutils.version import LooseVersion
 from functools import partial
+from unittest.mock import Mock
 
 import numpy as np
 import pytest
@@ -760,10 +761,7 @@ def test_mixed_precision(self, net_cls, accelerator_cls, data, mixed_precision):
             fp16 = mixed_precision == 'fp16'
             accelerator = accelerator_cls(fp16=fp16)
 
-        net = net_cls(
-            accelerator=accelerator,
-            callbacks__print_log__sink=accelerator.print,
-        )
+        net = net_cls(accelerator=accelerator)
         X, y = data
         net.fit(X, y)  # does not raise
 
@@ -781,6 +779,36 @@ def test_device_placement(self, net_cls, accelerator_cls, data):
         with pytest.raises(ValueError, match=msg):
             net.fit(*data)
 
+    def test_print_log_sink_auto_uses_accelerator_print(self, net_cls, accelerator_cls):
+        # the net defaults to using the accelerator's print function
+        accelerator = accelerator_cls()
+        net = net_cls(accelerator=accelerator)
+        net.initialize()
+        print_log = dict(net.callbacks_)['print_log']
+        assert print_log.sink == accelerator.print
+
+    def test_print_log_sink_can_be_overwritten(self, net_cls, accelerator_cls):
+        # users can still set their own sinks for print log
+        accelerator = accelerator_cls()
+        net = net_cls(accelerator=accelerator, callbacks__print_log__sink=123)
+        net.initialize()
+        print_log = dict(net.callbacks_)['print_log']
+        assert print_log.sink == 123
+
+    def test_print_log_sink_uses_print_if_accelerator_has_no_print(
+            self, net_cls, accelerator_cls
+    ):
+        # we should not depend on the accelerator having a print function
+
+        # we need to use Mock here because Accelerator does not allow attr
+        # deletion
+        accelerator = Mock(spec=accelerator_cls())
+        delattr(accelerator, 'print')
+        net = net_cls(accelerator=accelerator)
+        net.initialize()
+        print_log = dict(net.callbacks_)['print_log']
+        assert print_log.sink is print
+
     def test_all_components_prepared(self, module_cls, data):
         # We cannot test whether accelerate is really performing its job.
         # Instead, we test that all modules and optimizers, even custom
@@ -858,7 +886,6 @@ def train_step_single(self, *args, **kwargs):
             device=None,
             accelerator=accelerator,
             max_epochs=2,
-            callbacks__print_log__sink=accelerator.print,
         )
         X, y = data
         # does not raise