skorch-dev · BenjaminBossan · Apr 5, 2020 · Mar 10, 2020 · Mar 10, 2020 · Mar 13, 2020
diff --git a/CHANGES.md b/CHANGES.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Added `NeptuneLogger` callback for logging experiment metadata to neptune.ai
 - Add DataFrameTransformer, an sklearn compatible transformer that helps working with pandas DataFrames by transforming the DataFrame into a representation that works well with neural networks (#507)
+- Added `WandbLogger` callback for logging to Weights & Biases
 
 ### Changed
 

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -14,3 +14,4 @@ pytest-cov
 sphinx
 sphinx_rtd_theme
 tensorboard>=1.14.0
+wandb>=0.8.30
diff --git a/skorch/callbacks/__init__.py b/skorch/callbacks/__init__.py
@@ -33,5 +33,6 @@
     'TrainEndCheckpoint',
     'TensorBoard',
     'Unfreezer',
+    'WandbLogger',
     'WarmRestartLR',
 ]
diff --git a/skorch/callbacks/logging.py b/skorch/callbacks/logging.py
@@ -5,6 +5,7 @@
 from contextlib import suppress
 from numbers import Number
 from itertools import cycle
+from pathlib import Path
 
 import numpy as np
 import tqdm
@@ -14,7 +15,8 @@
 from skorch.dataset import get_len
 from skorch.callbacks import Callback
 
-__all__ = ['EpochTimer', 'NeptuneLogger', 'PrintLog', 'ProgressBar', 'TensorBoard']
+__all__ = ['EpochTimer', 'NeptuneLogger', 'WandbLogger', 'PrintLog', 'ProgressBar',
+           'TensorBoard']
 
 
 def filter_log_keys(keys, keys_ignored=None):
@@ -205,6 +207,88 @@ def on_train_end(self, net, **kwargs):
         if self.close_after_train:
             self.experiment.stop()
 
+class WandbLogger(Callback):
+    """Logs best model and metrics to `Weights & Biases <https://docs.wandb.com/>`_
+
+    Use this callback to automatically log best trained model, all metrics from
+    your net's history, model topology and computer resources to Weights & Biases
+    after each epoch.
+
+    Every file saved in `wandb_run.dir` is automatically logged to W&B servers.
+
+    See `example run
+    <https://app.wandb.ai/borisd13/skorch/runs/s20or4ct/overview?workspace=user-borisd13>`_
+
+    Examples
+    --------
+    >>> # Install wandb
+    ... pip install wandb
+
+    >>> import wandb
+    >>> from skorch.callbacks import WandbLogger
+
+    >>> # Create a wandb Run
+    ... wandb_run = wandb.init()
+    >>> # Alternative: Create a wandb Run without having a W&B account
+    ... wandb_run = wandb.init(anonymous="allow)
+
+    >>> # Log hyper-parameters (optional)
+    ... wandb_run.config.update({"learning rate": 1e-3, "batch size": 32})
+
+    >>> net = NeuralNet(..., callbacks=[WandbLogger(wandb_run)])
+    >>> net.fit(X, y)
+
+    Parameters
+    ----------
+    wandb_run : wandb.wandb_run.Run
+      wandb Run used to log data.
+
+    save_model : bool (default=True)
+      Whether to save a checkpoint of the best model and upload it
+      to your Run on W&B servers.
+
+    keys_ignored : str or list of str (default=None)
+      Key or list of keys that should not be logged to
+      tensorboard. Note that in addition to the keys provided by the
+      user, keys such as those starting with 'event_' or ending on
+      '_best' are ignored by default.
+    """
+
+    def __init__(
+            self,
+            wandb_run,
+            save_model=True,
+            keys_ignored=None,
+    ):
+        self.wandb_run = wandb_run
+        self.save_model = save_model
+        self.keys_ignored = keys_ignored
+
+    def initialize(self):
+        keys_ignored = self.keys_ignored
+        if isinstance(keys_ignored, str):
+            keys_ignored = [keys_ignored]
+        self.keys_ignored_ = set(keys_ignored or [])
+        self.keys_ignored_.add('batches')
+        return self
+
+    def on_train_begin(self, net, **kwargs):
+        """Log model topology and add a hook for gradients"""
+        self.wandb_run.watch(net.module_)
+
+    def on_epoch_end(self, net, **kwargs):
+        """Log values from the last history step and save best model"""
+        hist = net.history[-1]
+        keys_kept = filter_log_keys(hist, keys_ignored=self.keys_ignored_)
+        logged_vals = {k: hist[k] for k in keys_kept}
+        self.wandb_run.log(logged_vals)
+
+        # save best model
+        if self.save_model and hist['valid_loss_best']:
+            model_path = Path(self.wandb_run.dir) / 'best_model.pth'
+            with model_path.open('wb') as model_file:
+                net.save_params(f_params=model_file)
+
 
 class PrintLog(Callback):
     """Print useful information from the model's history as a table.

diff --git a/skorch/tests/callbacks/test_logging.py b/skorch/tests/callbacks/test_logging.py
@@ -11,6 +11,7 @@
 from torch import nn
 
 from skorch.tests.conftest import neptune_installed
+from skorch.tests.conftest import wandb_installed
 from skorch.tests.conftest import tensorboard_installed
 
 
@@ -190,6 +191,86 @@ def test_first_batch_flag(
         npt.on_batch_end(net)
         assert npt.first_batch_ is False
 
+@pytest.mark.skipif(
+    not wandb_installed, reason='wandb is not installed')
+class TestWandb:
+    @pytest.fixture
+    def net_cls(self):
+        from skorch import NeuralNetClassifier
+        return NeuralNetClassifier
+
+    @pytest.fixture
+    def data(self, classifier_data):
+        X, y = classifier_data
+        # accelerate training since we don't care for the loss
+        X, y = X[:40], y[:40]
+        return X, y
+
+    @pytest.fixture
+    def wandb_logger_cls(self):
+        from skorch.callbacks import WandbLogger
+        return WandbLogger
+
+    @pytest.fixture
+    def wandb_run_cls(self):
+        import wandb
+        os.environ['WANDB_MODE'] = 'dryrun' # run offline
+        with wandb.init(anonymous="allow") as run:
+            return run
+
+    @pytest.fixture
+    def mock_run(self):
+        mock = Mock()
+        mock.log = Mock()
+        mock.watch = Mock()
+        mock.dir = '.'
+        return mock
+
+    def test_ignore_keys(
+            self,
+            net_cls,
+            classifier_module,
+            data,
+            wandb_logger_cls,
+            mock_run,
+    ):
+        # ignore 'dur' and 'valid_loss', 'unknown' doesn't exist but
+        # this should not cause a problem
+        wandb_cb = wandb_logger_cls(
+            mock_run, keys_ignored=['dur', 'valid_loss', 'unknown'])
+        net_cls(
+            classifier_module,
+            callbacks=[wandb_cb],
+            max_epochs=3,
+        ).fit(*data)
+
+        # 3 epochs = 3 calls
+        assert mock_run.log.call_count == 3
+        assert mock_run.watch.call_count == 1
+        call_args = [args[0][0] for args in mock_run.log.call_args_list]
+        assert 'valid_loss' not in call_args
+
+    def test_keys_ignored_is_string(self, wandb_logger_cls, mock_run):
+        wandb_cb = wandb_logger_cls(
+            mock_run, keys_ignored='a-key').initialize()
+        expected = {'a-key', 'batches'}
+        assert wandb_cb.keys_ignored_ == expected
+
+    def test_fit_with_real_experiment(
+            self,
+            net_cls,
+            classifier_module,
+            data,
+            wandb_logger_cls,
+            wandb_run_cls,
+    ):
+        net = net_cls(
+            classifier_module,
+            callbacks=[wandb_logger_cls(wandb_run_cls)],
+            max_epochs=5,
+        )
+        net.fit(*data)
+
 class TestPrintLog:
     @pytest.fixture
     def print_log_cls(self):

diff --git a/skorch/tests/conftest.py b/skorch/tests/conftest.py
@@ -142,6 +142,15 @@ def data():
 except ImportError:
     pass
 
+wandb_installed = False
+try:
+    # pylint: disable=unused-import
+    import wandb
+
+    wandb_installed = True
+except ImportError:
+    pass
+
 pandas_installed = False
 try:
     # pylint: disable=unused-import