[REQ] Support PyTorch 2.x (#307)

f-dangel · web-flow · commit 726453223d40 · 2023-06-26T17:58:53.000-04:00
* [REQ] Remove upper version restrictions for `torch` and `torchvision` * [REQ] Bump python to 3.8+ * [REF] Replace `Tensor.symeig` with `torch.linalg.eigh` * [CI] Replace `python3.7` with `python3.8` * [REF] Try fixing syntax for `flake8` in `setup.cfg` * [TEST] Skip double-backward of LSTM for PyTorch2.0.1 See pytorch/pytorch#99413 * [FIX] flake8 * [TEST] Skip `jac_mat_prod` for LSTM in PyTorch2.0.1 double-backward not supported pytorch/pytorch#99413 * [CI] Use python3.8 in RTD build * [CI] Skip LSTM for PyTorch2.0.1 in DiagGGN tests * [FIX] Imports * [FIX] Turn off MKLDNN in RNN example --------- Co-authored-by: Felix Dangel <felix.dangel@vectorinstitute.ai>
diff --git a/.conda_env.yml b/.conda_env.yml
@@ -3,10 +3,7 @@ channels:
   - pytorch
   - defaults
 dependencies:
-  - pip=19.3.1
-  - python=3.7.6
+  - pip=21.2.4
+  - python=3.8.5
   - pip:
-    - -e .
-    - -e .[lint]
-    - -e .[test]
-    - -e .[docs]
+    - -e .[lint,test,doc]
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -15,10 +15,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v1
-    - name: Set up Python 3.7
+    - name: Set up Python 3.8
       uses: actions/setup-python@v1
       with:
-        python-version: 3.7
+        python-version: 3.8
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
@@ -30,10 +30,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v1
-    - name: Set up Python 3.7
+    - name: Set up Python 3.8
       uses: actions/setup-python@v1
       with:
-        python-version: 3.7
+        python-version: 3.8
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
@@ -45,10 +45,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v1
-    - name: Set up Python 3.7
+    - name: Set up Python 3.8
       uses: actions/setup-python@v1
       with:
-        python-version: 3.7
+        python-version: 3.8
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
@@ -61,10 +61,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v1
-    - name: Set up Python 3.7
+    - name: Set up Python 3.8
       uses: actions/setup-python@v1
       with:
-        python-version: 3.7
+        python-version: 3.8
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
@@ -77,10 +77,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v1
-    - name: Set up Python 3.7
+    - name: Set up Python 3.8
       uses: actions/setup-python@v1
       with:
-        python-version: 3.7
+        python-version: 3.8
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
@@ -92,10 +92,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v1
-    - name: Set up Python 3.7
+    - name: Set up Python 3.8
       uses: actions/setup-python@v1
       with:
-        python-version: 3.7
+        python-version: 3.8
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
@@ -107,10 +107,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v1
-      - name: Set up Python 3.7
+      - name: Set up Python 3.8
         uses: actions/setup-python@v1
         with:
-          python-version: 3.7
+          python-version: 3.8
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -15,17 +15,18 @@ jobs:
     name: "py${{ matrix.python-version }} torch${{  matrix.pytorch-version}}"
     runs-on: ubuntu-latest
     env:
-      USING_COVERAGE: '3.7,3.9'
+      USING_COVERAGE: '3.9'
 
     strategy:
       matrix:
-        python-version: [3.7, 3.8, 3.9]
+        python-version: [3.8, 3.9]
         pytorch-version:
           - "==1.9.1"
           - "==1.10.1"
           - "==1.11.0"
           - "==1.12.1"
           - "==1.13.1"
+          - "==2.0.1"
           - "" # latest
     steps:
     - uses: actions/checkout@v1
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -7,7 +7,7 @@ sphinx:
   configuration: docs_src/rtd/conf.py
 
 python:
-  version: 3.7
+  version: 3.8
   install:
     - method: pip
       path: .
diff --git a/README-dev.md b/README-dev.md
@@ -1,7 +1,7 @@
 # <img alt="BackPACK" src="./logo/backpack_logo_torch.svg" height="90"> BackPACK developer manual
 
 ## General standards
-- Python version: support 3.7+, use 3.7 for development
+- Python version: support 3.8+, use 3.8 for development
 - `git` [branching model](https://nvie.com/posts/a-successful-git-branching-model/)
 - Docstring style:  [Google](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html)
 - Test runner: [`pytest`](https://docs.pytest.org/en/latest/)
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 [![Travis](https://travis-ci.org/f-dangel/backpack.svg?branch=master)](https://travis-ci.org/f-dangel/backpack)
 [![Coveralls](https://coveralls.io/repos/github/f-dangel/backpack/badge.svg?branch=master)](https://coveralls.io/github/f-dangel/backpack)
-[![Python 3.7+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://www.python.org/downloads/release/python-370/)
+[![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/release/python-370/)
 
 BackPACK is built on top of [PyTorch](https://github.com/pytorch/pytorch). It efficiently computes quantities other than the gradient.
 
diff --git a/backpack/utils/kroneckers.py b/backpack/utils/kroneckers.py
@@ -1,4 +1,5 @@
 from torch import einsum
+from torch.linalg import eigh
 
 from backpack.utils.unsqueeze import kfacmp_unsqueeze_if_missing_dim
 
@@ -101,7 +102,7 @@ def sym_mat_inv(mat, shift, truncate=1e-8):
         Computed by eigenvalue decomposition. Eigenvalues with small
         absolute values are truncated.
         """
-        eigvals, eigvecs = mat.symeig(eigenvectors=True)
+        eigvals, eigvecs = eigh(mat)
         eigvals.add_(shift)
         inv_eigvals = 1.0 / eigvals
         inv_truncate = 1.0 / truncate
diff --git a/docs_src/examples/use_cases/example_rnn.py b/docs_src/examples/use_cases/example_rnn.py
@@ -22,9 +22,12 @@
 #    Not all extensions support RNNs (yet). Please create a feature request in the
 #    repository if the extension you need is not supported.
 
+from pkg_resources import packaging
+
 # %%
 # Let's get the imports out of the way.
 from torch import (
+    _C,
     allclose,
     cat,
     device,
@@ -41,11 +44,20 @@
 from backpack.custom_module.permute import Permute
 from backpack.custom_module.reduce_tuple import ReduceTuple
 from backpack.extensions import BatchGrad, DiagGGNExact
+from backpack.utils import TORCH_VERSION
 from backpack.utils.examples import autograd_diag_ggn_exact
 
 manual_seed(0)
 DEVICE = device("cpu")  # Verification via autograd only works on CPU
 
+# %%
+#
+# .. note::
+#    Due to `#99413 <https://github.com/pytorch/pytorch/issues/99413>`_, we have to disable
+#    MKLDNN for PyTorch 2.0.1 to get the double-backward through LSTMs working.
+if TORCH_VERSION == packaging.version.parse("2.0.1"):
+    _C._set_mkldnn_enabled(False)
+
 
 # %%
 # For this demo, we will use the Tolstoi Char RNN from
diff --git a/setup.cfg b/setup.cfg
@@ -22,9 +22,9 @@ classifiers =
     Development Status :: 4 - Beta
     License :: OSI Approved :: MIT License
     Operating System :: OS Independent
-    Programming Language :: Python :: 3.7
     Programming Language :: Python :: 3.8
     Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3.10
 
 [options]
 zip_safe = False
@@ -34,12 +34,12 @@ setup_requires =
   setuptools_scm
 # Dependencies of the project (semicolon/line-separated):
 install_requires =
-    torch >= 1.9.0, < 1.13.0
-    torchvision >= 0.7.0, < 1.0.0
+    torch >= 1.9.0
+    torchvision >= 0.7.0
     einops >= 0.3.0, < 1.0.0
     unfoldNd >= 0.2.0, < 1.0.0
 # Require a specific Python version, e.g. Python 2.7 or >= 3.4
-python_requires = >=3.7
+python_requires = >=3.8
 
 [options.packages.find]
 exclude = test*
@@ -96,19 +96,28 @@ use_parentheses=True
 select = B,C,E,F,P,W,B9
 max-line-length = 88
 max-complexity = 10
+
+# E501, # max-line-length
+# # ignored because pytorch uses dict
+# C408, # use {} instead of dict()
+# # Not Black-compatible
+# E203, # whitespace before :
+# E231, # missing whitespace after ','
+# W291, # trailing whitespace
+# W503, # line break before binary operator
+# W504, # line break after binary operator
+# B905, # 'zip()' without an explicit 'strict=' parameter
+# B028, # No explicit stacklevel keyword argument found (warn)
 ignore =
-	# replaced by B950 (max-line-length + 10%)
-	E501, # max-line-length
-	# ignored because pytorch uses dict
-	C408, # use {} instead of dict()
-	# Not Black-compatible
-	E203, # whitespace before :
-	E231, # missing whitespace after ','
-	W291, # trailing whitespace
-	W503, # line break before binary operator
-	W504, # line break after binary operator
-  B905, # 'zip()' without an explicit 'strict=' parameter
-  B028, # No explicit stacklevel keyword argument found (warn)
+	E501,
+	C408,
+	E203,
+	E231,
+	W291,
+	W503,
+	W504,
+  B905,
+  B028,
 exclude = docs, build, .git, docs_src/rtd, docs_src/rtd_output, .eggs
 
 # Differences with pytorch
diff --git a/test/converter/test_converter.py b/test/converter/test_converter.py
@@ -5,6 +5,7 @@
 """
 from test.converter.converter_cases import CONVERTER_MODULES, ConverterModule
 from test.core.derivatives.utils import classification_targets, regression_targets
+from test.utils.skip_test import skip_torch_2_0_1_lstm
 from typing import Tuple
 
 from pytest import fixture
@@ -31,6 +32,7 @@ def model_and_input(request) -> Tuple[Module, Tensor, Module]:
     """
     manual_seed(0)
     model: ConverterModule = request.param()
+    skip_torch_2_0_1_lstm(model)
     inputs: Tensor = model.input_fn()
     loss_fn: Module = model.loss_fn()
     yield model, inputs, loss_fn
diff --git a/test/core/derivatives/derivatives_test.py b/test/core/derivatives/derivatives_test.py
@@ -6,6 +6,7 @@
 - Jacobian-matrix products with respect to layer parameters
 - Transposed Jacobian-matrix products with respect to layer parameters
 """
+
 from contextlib import nullcontext
 from test.automated_test import check_sizes, check_sizes_and_values
 from test.core.derivatives.batch_norm_settings import BATCH_NORM_SETTINGS
@@ -27,6 +28,7 @@
     skip_BCEWithLogitsLoss,
     skip_BCEWithLogitsLoss_non_binary_labels,
     skip_subsampling_conflict,
+    skip_torch_2_0_1_lstm,
 )
 from typing import List, Union
 from warnings import warn
@@ -136,6 +138,7 @@ def test_jac_mat_prod(problem: DerivativesTestProblem, V: int = 3) -> None:
         V: Number of vectorized Jacobian-vector products. Default: ``3``.
     """
     problem.set_up()
+    skip_torch_2_0_1_lstm(problem.module)
     mat = rand(V, *problem.input_shape).to(problem.device)
 
     backpack_res = BackpackDerivatives(problem).jac_mat_prod(mat)
diff --git a/test/extensions/problem.py b/test/extensions/problem.py
@@ -227,7 +227,7 @@ def collect_data(self, savefield: str) -> List[Any]:
             else:
                 if hasattr(p, savefield):
                     raise RuntimeError(
-                        f"Found non-differentiable parameter with attribute '{savefield}'."
+                        f"Found non-differentiable parameter with attribute {savefield}."
                     )
 
         return data
diff --git a/test/extensions/secondorder/diag_ggn/test_batch_diag_ggn.py b/test/extensions/secondorder/diag_ggn/test_batch_diag_ggn.py
@@ -5,7 +5,7 @@
 from test.extensions.problem import make_test_problems
 from test.extensions.secondorder.diag_ggn.diag_ggn_settings import DiagGGN_SETTINGS
 from test.utils.skip_extension_test import skip_BCEWithLogitsLoss_non_binary_labels
-from test.utils.skip_test import skip_adaptive_avg_pool3d_cuda
+from test.utils.skip_test import skip_adaptive_avg_pool3d_cuda, skip_torch_2_0_1_lstm
 
 import pytest
 
@@ -23,6 +23,7 @@ def test_diag_ggn_exact_batch(problem, request):
     """
     skip_adaptive_avg_pool3d_cuda(request)
     problem.set_up()
+    skip_torch_2_0_1_lstm(problem.model)
 
     backpack_res = BackpackExtensions(problem).diag_ggn_exact_batch()
     autograd_res = AutogradExtensions(problem).diag_ggn_exact_batch()
@@ -47,6 +48,7 @@ def test_diag_ggn_mc_batch_light(problem):
     """
     problem.set_up()
     skip_BCEWithLogitsLoss_non_binary_labels(problem)
+    skip_torch_2_0_1_lstm(problem.model)
 
     backpack_res = BackpackExtensions(problem).diag_ggn_exact_batch()
     mc_samples = 6000
@@ -70,6 +72,7 @@ def test_diag_ggn_mc_batch(problem):
     """
     problem.set_up()
     skip_BCEWithLogitsLoss_non_binary_labels(problem)
+    skip_torch_2_0_1_lstm(problem.model)
 
     backpack_res = BackpackExtensions(problem).diag_ggn_exact_batch()
     mc_samples = 300000
diff --git a/test/extensions/secondorder/diag_ggn/test_diag_ggn.py b/test/extensions/secondorder/diag_ggn/test_diag_ggn.py
@@ -5,7 +5,7 @@
 from test.extensions.problem import make_test_problems
 from test.extensions.secondorder.diag_ggn.diag_ggn_settings import DiagGGN_SETTINGS
 from test.utils.skip_extension_test import skip_BCEWithLogitsLoss_non_binary_labels
-from test.utils.skip_test import skip_adaptive_avg_pool3d_cuda
+from test.utils.skip_test import skip_adaptive_avg_pool3d_cuda, skip_torch_2_0_1_lstm
 
 import pytest
 
@@ -23,6 +23,7 @@ def test_diag_ggn(problem, request):
     """
     skip_adaptive_avg_pool3d_cuda(request)
     problem.set_up()
+    skip_torch_2_0_1_lstm(problem.model)
 
     backpack_res = BackpackExtensions(problem).diag_ggn()
     autograd_res = AutogradExtensions(problem).diag_ggn()
@@ -47,6 +48,7 @@ def test_diag_ggn_mc_light(problem):
     """
     problem.set_up()
     skip_BCEWithLogitsLoss_non_binary_labels(problem)
+    skip_torch_2_0_1_lstm(problem.model)
 
     backpack_res = BackpackExtensions(problem).diag_ggn()
     mc_samples = 3000
@@ -70,6 +72,7 @@ def test_diag_ggn_mc(problem):
     """
     problem.set_up()
     skip_BCEWithLogitsLoss_non_binary_labels(problem)
+    skip_torch_2_0_1_lstm(problem.model)
 
     backpack_res = BackpackExtensions(problem).diag_ggn()
     mc_samples = 300000
diff --git a/test/utils/skip_test.py b/test/utils/skip_test.py

Original file line number	Diff line number	Diff line change
`@@ -227,7 +227,7 @@ def collect_data(self, savefield: str) -> List[Any]:`
`227`	`227`	`else:`
`228`	`228`	`if hasattr(p, savefield):`
`229`	`229`	`raise RuntimeError(`
`230`		`- f"Found non-differentiable parameter with attribute '{savefield}'."`
	`230`	`+ f"Found non-differentiable parameter with attribute {savefield}."`
`231`	`231`	`)`
`232`	`232`
`233`	`233`	`return data`