From db71e37e0dd3cee47078357f00c2a0d64af3db01 Mon Sep 17 00:00:00 2001 From: atalman Date: Tue, 22 Nov 2022 05:59:21 -0800 Subject: [PATCH 1/4] Use linux_job for linux workers Test Testing Test testing Tetsing testing Change linux binary action test Simplify version check --- .github/workflows/validate-linux-binaries.yml | 142 ++++++------------ test/smoke_test/smoke_test.py | 45 +----- 2 files changed, 56 insertions(+), 131 deletions(-) diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml index 082497b0f..d431e9d74 100644 --- a/.github/workflows/validate-linux-binaries.yml +++ b/.github/workflows/validate-linux-binaries.yml @@ -7,6 +7,11 @@ on: description: "Channel to use (nightly, test, release, all)" required: true type: string + ref: + description: 'Reference to checkout, defaults to empty' + default: "" + required: false + type: string workflow_dispatch: inputs: channel: @@ -18,108 +23,57 @@ on: - nightly - test - all + ref: + description: 'Reference to checkout, defaults to empty' + default: "" + required: false + type: string jobs: - generate-linux-conda-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: conda - os: linux - channel: ${{ inputs.channel }} - generate-linux-wheel-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: linux - channel: ${{ inputs.channel }} - with-py311: enable - generate-linux-libtorch-matrix: + generate-linux-matrix: uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main with: - package-type: libtorch + package-type: all os: linux channel: ${{ inputs.channel }} - linux-conda: - needs: generate-linux-conda-matrix + linux: + needs: generate-linux-matrix strategy: - matrix: - ${{ fromJson(needs.generate-linux-conda-matrix.outputs.matrix) }} + matrix: ${{ fromJson(needs.generate-linux-matrix.outputs.matrix) }} fail-fast: false - runs-on: ${{ matrix.validation_runner }} - steps: - - name: Validate binary conda - uses: pytorch/builder/.github/actions/validate-binary@main - with: - gpu_arch_type: ${{ matrix.gpu_arch_type }} - gpu_arch_ver: ${{ matrix.gpu_arch_version }} - installation: ${{ matrix.installation }} - python_version: ${{ matrix.python_version }} - desired_cuda: ${{ matrix.desired_cuda }} - dev_toolset: '' - package_type: conda - target_os: linux - - linux-wheel: - needs: generate-linux-wheel-matrix - strategy: - matrix: - ${{ fromJson(needs.generate-linux-wheel-matrix.outputs.matrix) }} - fail-fast: false - runs-on: ${{ matrix.validation_runner }} - steps: - - name: Checkout PyTorch builder - uses: actions/checkout@v2 - - name: Validate binary wheel - uses: ./.github/actions/validate-binary - with: - gpu_arch_type: ${{ matrix.gpu_arch_type }} - gpu_arch_ver: ${{ matrix.gpu_arch_version }} - installation: ${{ matrix.installation }} - installation_pypi: ${{ matrix.installation_pypi }} - python_version: ${{ matrix.python_version }} - desired_cuda: ${{ matrix.desired_cuda }} - dev_toolset: '' - package_type: wheel - target_os: linux + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + name: ${{ matrix.build_name }} + with: + runner: ${{ matrix.validation_runner }} + repository: "pytorch/builder" + ref: ${{ inputs.ref || github.ref }} + job-name: ${{ matrix.build_name }} + script: | + set -ex + export ENV_NAME="conda-env-${{ github.run_id }}" + export GPU_ARCH_VER="${{ matrix.gpu_arch_version }}" + export GPU_ARCH_TYPE="${{ matrix.gpu_arch_type }}" + export INSTALLATION="${{ matrix.installation }}" + export CUDA_VER="${{ matrix.desired_cuda }}" + export DESIRED_PYTHON="${{ matrix.python_version }}" + export DESIRED_CUDA="${{ matrix.desired_cuda }}" + export DESIRED_DEVTOOLSET="${{ matrix.dev_toolset }}" + export PACKAGE_TYPE="${{ matrix.package_type }}" + export TARGET_OS="linux" - linux-libt: - needs: generate-linux-libtorch-matrix - strategy: - matrix: - ${{ fromJson(needs.generate-linux-libtorch-matrix.outputs.matrix) }} - fail-fast: false - runs-on: ${{ matrix.validation_runner }} - env: - PYTHON_VERSION: ${{ matrix.python_version }} - steps: - - name: Checkout PyTorch builder - uses: actions/checkout@v2 - - name: Install Conda - uses: conda-incubator/setup-miniconda@v2 - with: - python-version: ${{ matrix.python_version }} - auto-update-conda: true - miniconda-version: "latest" - activate-environment: testenv - - name: Install pytorch and smoke test - shell: bash - env: - INSTALLATION: ${{ matrix.installation }} - ENV_NAME: conda-env-${{ github.run_id }} - DESIRED_PYTHON: ${{ matrix.python_version }} - DESIRED_CUDA: ${{ matrix.desired_cuda }} - DESIRED_DEVTOOLSET: ${{ matrix.devtoolset }} - PACKAGE_TYPE: libtorch - run: | - sudo apt-get install unzip -y - set -ex - curl ${INSTALLATION} -o libtorch.zip + if [[ ${{ matrix.package_type }} == "libtorch" ]]; then + curl ${{ matrix.installation }} -o libtorch.zip unzip libtorch.zip - conda create -yp ${ENV_NAME} python=${{ matrix.python_version }} numpy - export LD_LIBRARY_PATH="$(dirname $(which python))/lib" - export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib - if [[ ${{ matrix.libtorch_variant }} == "static-with-deps" ]]; then - conda run -p ${ENV_NAME} --cwd libtorch env LD_LIBRARY_PATH=${LD_LIBRARY_PATH} bash ./../check_binary.sh - fi - conda env remove -p ${ENV_NAME} + else + conda create -y -n ${ENV_NAME} python=${{ matrix.python_version }} numpy pillow + conda activate ${ENV_NAME} + eval $INSTALLATION + export CONDA_LIBRARY_PATH="$(dirname $(which python))/../lib" + export LD_LIBRARY_PATH=$CONDA_LIBRARY_PATH:$LD_LIBRARY_PATH + + python --version + python ./test/smoke_test/smoke_test.py + + ${PWD}/check_binary.sh + fi diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py index 2c80aad04..bec9e86ac 100644 --- a/test/smoke_test/smoke_test.py +++ b/test/smoke_test/smoke_test.py @@ -14,26 +14,6 @@ SCRIPT_DIR = Path(__file__).parent NIGHTLY_ALLOWED_DELTA = 3 -# helper function to return the conda installed packages -# and return package we are insterseted in -def get_anaconda_output_for_package(pkg_name_str): - import subprocess as sp - - # If we are installing using conda just list package name - if installation_str.find("conda install") != -1: - cmd = "conda list --explicit" - output = sp.getoutput(cmd) - for item in output.split("\n"): - if pkg_name_str in item: - return item - return f"{pkg_name_str} can't be found" - else: - cmd = "conda list -f " + pkg_name_str - output = sp.getoutput(cmd) - # Get the last line only - return output.strip().split('\n')[-1] - - def check_nightly_binaries_date(package: str) -> None: from datetime import datetime, timedelta format_dt = '%Y%m%d' @@ -79,23 +59,14 @@ def smoke_test_cuda(package: str) -> None: if(package == 'all'): import torchaudio import torchvision - # There is an issue with current windows runners calling conda from python - # https://github.com/pytorch/test-infra/issues/1054 - if installation_str.find("nightly") != -1 or platform.system() == "Windows" : - # just print out cuda version, as version check were already performed during import - print(f"torchvision cuda: {torch.ops.torchvision._cuda_version()}") - print(f"torchaudio cuda: {torch.ops.torchaudio.cuda_version()}") - else: - # torchaudio runtime added the cuda verison check on 09/23/2022 via - # https://github.com/pytorch/audio/pull/2707 - # so relying on anaconda output for pytorch-test and pytorch channel - torchaudio_allstr = get_anaconda_output_for_package(torchaudio.__name__) - if ( - is_cuda_system - and "cu" + str(gpu_arch_ver).replace(".", "") not in torchaudio_allstr - ): - raise RuntimeError( - f"CUDA version issue. Loaded: {torchaudio_allstr} Expected: {gpu_arch_ver}" + + print(f"torchvision cuda: {torch.ops.torchvision._cuda_version()}") + print(f"torchaudio cuda: {torch.ops.torchaudio.cuda_version()}") + if gpu_arch_ver != torch.ops.torchvision._cuda_version() or + gpu_arch_ver != torch.ops.torchaudio.cuda_version(): + raise RuntimeError( + f"Wrong CUDA version. Vision: {torch.ops.torchvision._cuda_version()} \ + Audio: {ttorch.ops.torchaudio.cuda_version()} Expected: {gpu_arch_ver}" ) def smoke_test_conv2d() -> None: From f86d58bedf027176caa26c7c77bbaf82118d1912 Mon Sep 17 00:00:00 2001 From: atalman Date: Tue, 22 Nov 2022 10:32:12 -0800 Subject: [PATCH 2/4] Fix if statement --- test/smoke_test/smoke_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py index bec9e86ac..a509ff03a 100644 --- a/test/smoke_test/smoke_test.py +++ b/test/smoke_test/smoke_test.py @@ -62,8 +62,10 @@ def smoke_test_cuda(package: str) -> None: print(f"torchvision cuda: {torch.ops.torchvision._cuda_version()}") print(f"torchaudio cuda: {torch.ops.torchaudio.cuda_version()}") - if gpu_arch_ver != torch.ops.torchvision._cuda_version() or + if ( + gpu_arch_ver != torch.ops.torchvision._cuda_version() or gpu_arch_ver != torch.ops.torchaudio.cuda_version(): + ) raise RuntimeError( f"Wrong CUDA version. Vision: {torch.ops.torchvision._cuda_version()} \ Audio: {ttorch.ops.torchaudio.cuda_version()} Expected: {gpu_arch_ver}" From 0b8c2f377e9c1cd32feff4ecd25fbd2a2a6d5d3b Mon Sep 17 00:00:00 2001 From: atalman Date: Tue, 22 Nov 2022 10:39:40 -0800 Subject: [PATCH 3/4] Fix typo --- test/smoke_test/smoke_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py index a509ff03a..826fb40cd 100644 --- a/test/smoke_test/smoke_test.py +++ b/test/smoke_test/smoke_test.py @@ -64,8 +64,8 @@ def smoke_test_cuda(package: str) -> None: print(f"torchaudio cuda: {torch.ops.torchaudio.cuda_version()}") if ( gpu_arch_ver != torch.ops.torchvision._cuda_version() or - gpu_arch_ver != torch.ops.torchaudio.cuda_version(): - ) + gpu_arch_ver != torch.ops.torchaudio.cuda_version() + ): raise RuntimeError( f"Wrong CUDA version. Vision: {torch.ops.torchvision._cuda_version()} \ Audio: {ttorch.ops.torchaudio.cuda_version()} Expected: {gpu_arch_ver}" From 8cae5af62897afbb4dbdb07c8b885b019aa1366c Mon Sep 17 00:00:00 2001 From: atalman Date: Tue, 22 Nov 2022 10:43:59 -0800 Subject: [PATCH 4/4] Fix cuda version check Fix Audio and Vision version check Add check binary to libtorch test test testing testing testing Testing Testing testing --- .github/workflows/validate-linux-binaries.yml | 13 ++++------ test/smoke_test/smoke_test.py | 26 ++++++++++++------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml index d431e9d74..f3c1a8264 100644 --- a/.github/workflows/validate-linux-binaries.yml +++ b/.github/workflows/validate-linux-binaries.yml @@ -58,22 +58,19 @@ jobs: export CUDA_VER="${{ matrix.desired_cuda }}" export DESIRED_PYTHON="${{ matrix.python_version }}" export DESIRED_CUDA="${{ matrix.desired_cuda }}" - export DESIRED_DEVTOOLSET="${{ matrix.dev_toolset }}" + export DESIRED_DEVTOOLSET="${{ matrix.devtoolset }}" export PACKAGE_TYPE="${{ matrix.package_type }}" export TARGET_OS="linux" + conda create -y -n ${ENV_NAME} python=${{ matrix.python_version }} numpy pillow + conda activate ${ENV_NAME} + export CONDA_LIBRARY_PATH="$(dirname $(which python))/../lib" + export LD_LIBRARY_PATH=$CONDA_LIBRARY_PATH:$LD_LIBRARY_PATH if [[ ${{ matrix.package_type }} == "libtorch" ]]; then curl ${{ matrix.installation }} -o libtorch.zip unzip libtorch.zip else - conda create -y -n ${ENV_NAME} python=${{ matrix.python_version }} numpy pillow - conda activate ${ENV_NAME} eval $INSTALLATION - export CONDA_LIBRARY_PATH="$(dirname $(which python))/../lib" - export LD_LIBRARY_PATH=$CONDA_LIBRARY_PATH:$LD_LIBRARY_PATH - - python --version python ./test/smoke_test/smoke_test.py - ${PWD}/check_binary.sh fi diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py index 826fb40cd..a8de58bd6 100644 --- a/test/smoke_test/smoke_test.py +++ b/test/smoke_test/smoke_test.py @@ -42,6 +42,18 @@ def check_nightly_binaries_date(package: str) -> None: f"Expected torchaudio, torchvision to be less then {NIGHTLY_ALLOWED_DELTA} days. But they are from {date_ta_str}, {date_tv_str} respectively" ) +def check_cuda_version(version: str, dlibary: str): + version = torch.ops.torchaudio.cuda_version() + if version is not None and torch.version.cuda is not None: + version_str = str(version) + ta_version = f"{version_str[:-3]}.{version_str[-2]}" + t_version = torch.version.cuda.split(".") + t_version = f"{t_version[0]}.{t_version[1]}" + if ta_version != t_version: + raise RuntimeError( + "Detected that PyTorch and {dlibary} were compiled with different CUDA versions. " + f"PyTorch has CUDA version {t_version} whereas {dlibary} has CUDA version {ta_version}. " + ) def smoke_test_cuda(package: str) -> None: if not torch.cuda.is_available() and is_cuda_system: @@ -56,20 +68,14 @@ def smoke_test_cuda(package: str) -> None: print(f"torch cudnn: {torch.backends.cudnn.version()}") print(f"cuDNN enabled? {torch.backends.cudnn.enabled}") - if(package == 'all'): + if(package == 'all' and is_cuda_system): import torchaudio import torchvision - print(f"torchvision cuda: {torch.ops.torchvision._cuda_version()}") print(f"torchaudio cuda: {torch.ops.torchaudio.cuda_version()}") - if ( - gpu_arch_ver != torch.ops.torchvision._cuda_version() or - gpu_arch_ver != torch.ops.torchaudio.cuda_version() - ): - raise RuntimeError( - f"Wrong CUDA version. Vision: {torch.ops.torchvision._cuda_version()} \ - Audio: {ttorch.ops.torchaudio.cuda_version()} Expected: {gpu_arch_ver}" - ) + check_cuda_version(torch.ops.torchvision._cuda_version(), "TorchVision") + check_cuda_version(torch.ops.torchaudio.cuda_version(), "TorchAudio") + def smoke_test_conv2d() -> None: import torch.nn as nn