Merge pull request #257 from cpnota/release/0.7.2

cpnota · web-flow · commit aaa5403ead52 · 2021-08-05T18:45:11.000-04:00
Release/0.7.2
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -27,7 +27,7 @@ jobs:
       run: |
         sudo apt-get install swig
         sudo apt-get install unrar
-        pip install torch==1.8.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+        pip install torch==1.9.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
         make install
         AutoROM -v
         python -m atari_py.import_roms $(python -c 'import site; print(site.getsitepackages()[0])')/multi_agent_ale_py/ROM
diff --git a/all/approximation/approximation.py b/all/approximation/approximation.py
@@ -103,7 +103,7 @@ def eval(self, *inputs):
         with torch.no_grad():
             # check current mode
             mode = self.model.training
-            # switch to eval mode
+            # switch model to eval mode
             self.model.eval()
             # run forward pass
             result = self.model(*inputs)
@@ -144,14 +144,11 @@ def step(self):
         Returns:
             self: The current Approximation object
         '''
-        if self._clip_grad != 0:
-            utils.clip_grad_norm_(self.model.parameters(), self._clip_grad)
+        self._clip_grad_norm()
         self._optimizer.step()
         self._optimizer.zero_grad()
+        self._step_lr_scheduler()
         self._target.update()
-        if self._scheduler:
-            self._writer.add_schedule(self._name + '/lr', self._optimizer.param_groups[0]['lr'])
-            self._scheduler.step()
         self._checkpointer()
         return self
 
@@ -164,3 +161,14 @@ def zero_grad(self):
         '''
         self._optimizer.zero_grad()
         return self
+
+    def _clip_grad_norm(self):
+        '''Clip the gradient norm if set. Raises RuntimeError if norm is non-finite.'''
+        if self._clip_grad != 0:
+            utils.clip_grad_norm_(self.model.parameters(), self._clip_grad, error_if_nonfinite=True)
+
+    def _step_lr_scheduler(self):
+        '''Step the . Raises RuntimeError if norm is non-finite.'''
+        if self._scheduler:
+            self._writer.add_schedule(self._name + '/lr', self._optimizer.param_groups[0]['lr'])
+            self._scheduler.step()
diff --git a/all/policies/soft_deterministic.py b/all/policies/soft_deterministic.py
@@ -75,7 +75,7 @@ def _log_prob(self, normal, raw):
         '''
         log_prob = normal.log_prob(raw)
         log_prob -= torch.log(1 - torch.tanh(raw).pow(2) + 1e-6)
-        log_prob /= self._tanh_scale
+        log_prob -= torch.log(self._tanh_scale)
         return log_prob.sum(-1)
 
     def _squash(self, x):
diff --git a/all/policies/soft_deterministic_test.py b/all/policies/soft_deterministic_test.py
@@ -56,16 +56,28 @@ def test_converge(self):
         self.assertLess(loss, 0.2)
 
     def test_scaling(self):
-        self.space = Box(np.array([-10, -5, 100]), np.array([10, -2, 200]))
-        self.policy = SoftDeterministicPolicy(
+        torch.manual_seed(0)
+        state = State(torch.randn(1, STATE_DIM))
+        policy1 = SoftDeterministicPolicy(
             self.model,
             self.optimizer,
-            self.space
+            Box(np.array([-1., -1., -1.]), np.array([1., 1., 1.]))
         )
+        action1, log_prob1 = policy1(state)
+
+        # reset seed and sample same thing, but with different scaling
+        torch.manual_seed(0)
         state = State(torch.randn(1, STATE_DIM))
-        action, log_prob = self.policy(state)
-        tt.assert_allclose(action, torch.tensor([[-3.09055, -4.752777, 188.98222]]))
-        tt.assert_allclose(log_prob, torch.tensor([-0.397002]), rtol=1e-4)
+        policy2 = SoftDeterministicPolicy(
+            self.model,
+            self.optimizer,
+            Box(np.array([-2., -1., -1.]), np.array([2., 1., 1.]))
+        )
+        action2, log_prob2 = policy2(state)
+
+        # check scaling was correct
+        tt.assert_allclose(action1 * torch.tensor([2, 1, 1]), action2)
+        tt.assert_allclose(log_prob1 - np.log(2), log_prob2)
 
 
 if __name__ == '__main__':
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -22,8 +22,7 @@
 author = 'Chris Nota'
 
 # The full version, including alpha/beta/rc tags
-release = '0.7.1'
-
+release = '0.7.2'
 
 # -- General configuration ---------------------------------------------------
 
diff --git a/setup.py b/setup.py
@@ -38,7 +38,7 @@
 
 setup(
     name="autonomous-learning-library",
-    version="0.7.1",
+    version="0.7.2",
     description=("A library for building reinforcement learning agents in Pytorch"),
     packages=find_packages(),
     url="https://github.com/cpnota/autonomous-learning-library.git",
@@ -61,8 +61,8 @@
         "gym~=0.18.0",             # common environment interface
         "numpy>=1.18.0",           # math library
         "matplotlib>=3.3.0",       # plotting library
-        "opencv-python~=3.4.0",      # used by atari wrappers
-        "torch~=1.8.0",            # core deep learning library
+        "opencv-python~=3.4.0",    # used by atari wrappers
+        "torch~=1.9.0",            # core deep learning library
         "tensorboard>=2.3.0",      # logging and visualization
         "tensorboardX>=2.1.0",     # tensorboard/pytorch compatibility
         "cloudpickle>=1.2.0",      # used to copy environments