From 02e7d96d5495d06e7eb250581820b0b2ce2366a8 Mon Sep 17 00:00:00 2001 From: atalman Date: Mon, 19 Dec 2022 15:31:11 -0800 Subject: [PATCH] Revert "Adds infra to use nvidia dependencies from pypi and cleans up patches (#1196)" This reverts commit ee59264c887b08514463894460e3803892ae5c3c. --- common/install_cuda.sh | 24 +------- manywheel/build_cuda.sh | 86 +++++++++++++++++++++------- release/pypi/prep_binary_for_pypi.sh | 14 +++++ 3 files changed, 80 insertions(+), 44 deletions(-) diff --git a/common/install_cuda.sh b/common/install_cuda.sh index c9b9c9d1a..de64d43df 100644 --- a/common/install_cuda.sh +++ b/common/install_cuda.sh @@ -24,7 +24,7 @@ function install_116 { } function install_117 { - echo "Installing CUDA 11.7 and CuDNN 8.5 and NCCL 2.14" + echo "Installing CUDA 11.7 and CuDNN 8.5" rm -rf /usr/local/cuda-11.7 /usr/local/cuda # install CUDA 11.7.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run @@ -42,20 +42,10 @@ function install_117 { cd .. rm -rf tmp_cudnn ldconfig - - # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses - mkdir tmp_nccl && cd tmp_nccl - wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.14/nccl_2.14.3-1+cuda11.7_x86_64.txz - tar xf nccl_2.14.3-1+cuda11.7_x86_64.txz - cp -a nccl_2.14.3-1+cuda11.7_x86_64/include/* /usr/local/cuda/include/ - cp -a nccl_2.14.3-1+cuda11.7_x86_64/lib/* /usr/local/cuda/lib64/ - cd .. - rm -rf tmp_nccl - ldconfig } function install_118 { - echo "Installing CUDA 11.8 and cuDNN 8.5 and NCCL 2.15" + echo "Installing CUDA 11.8 and cuDNN 8.5" rm -rf /usr/local/cuda-11.8 /usr/local/cuda # install CUDA 11.8.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run @@ -73,16 +63,6 @@ function install_118 { cd .. rm -rf tmp_cudnn ldconfig - - # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses - mkdir tmp_nccl && cd tmp_nccl - wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.15.5/nccl_2.15.5-1+cuda11.8_x86_64.txz - tar xf nccl_2.15.5-1+cuda11.8_x86_64.txz - cp -a nccl_2.15.5-1+cuda11.8_x86_64/include/* /usr/local/cuda/include/ - cp -a nccl_2.15.5-1+cuda11.8_x86_64/lib/* /usr/local/cuda/lib64/ - cd .. - rm -rf tmp_nccl - ldconfig } function prune_116 { diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh index d389b066c..2498f7a22 100644 --- a/manywheel/build_cuda.sh +++ b/manywheel/build_cuda.sh @@ -142,14 +142,22 @@ DEPS_SONAME=( "libcublasLt.so.11" "libgomp.so.1" ) -elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then +elif [[ $CUDA_VERSION == "11.7" ]]; then export USE_STATIC_CUDNN=0 # Try parallelizing nvcc as well export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" DEPS_LIST=( + "/usr/local/cuda/lib64/libcudart.so.11.0" + "/usr/local/cuda/lib64/libnvToolsExt.so.1" + "/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.7, it links to 11.7.50 + "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7" "$LIBGOMP_PATH" ) DEPS_SONAME=( + "libcudart.so.11.0" + "libnvToolsExt.so.1" + "libnvrtc.so.11.2" + "libnvrtc-builtins.so.11.7" "libgomp.so.1" ) @@ -165,10 +173,6 @@ elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then "/usr/local/cuda/lib64/libcudnn.so.8" "/usr/local/cuda/lib64/libcublas.so.11" "/usr/local/cuda/lib64/libcublasLt.so.11" - "/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.7, it links to 11.7.50 - "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7" - "/usr/local/cuda/lib64/libcudart.so.11.0" - "/usr/local/cuda/lib64/libnvToolsExt.so.1" ) DEPS_SONAME+=( "libcudnn_adv_infer.so.8" @@ -182,31 +186,69 @@ elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then "libcublasLt.so.11" ) else - echo "Using nvidia libs from pypi." + echo "Using cudnn and cublas from pypi." + CUDA_RPATHS=( + '$ORIGIN/../../nvidia/cublas/lib' + '$ORIGIN/../../nvidia/cudnn/lib' + ) + CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}") + export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib' + export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN' + export FORCE_RPATH="--force-rpath" + fi +elif [[ $CUDA_VERSION == "11.8" ]]; then + export USE_STATIC_CUDNN=0 + # Try parallelizing nvcc as well + export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" + DEPS_LIST=( + "/usr/local/cuda/lib64/libcudart.so.11.0" + "/usr/local/cuda/lib64/libnvToolsExt.so.1" + "/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.8, it links to 11.8.89 + "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.8" + "$LIBGOMP_PATH" + ) + DEPS_SONAME=( + "libcudart.so.11.0" + "libnvToolsExt.so.1" + "libnvrtc.so.11.2" + "libnvrtc-builtins.so.11.8" + "libgomp.so.1" + ) + + if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then + echo "Bundling with cudnn and cublas." + DEPS_LIST+=( + "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8" + "/usr/local/cuda/lib64/libcudnn_adv_train.so.8" + "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8" + "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8" + "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8" + "/usr/local/cuda/lib64/libcudnn_ops_train.so.8" + "/usr/local/cuda/lib64/libcudnn.so.8" + "/usr/local/cuda/lib64/libcublas.so.11" + "/usr/local/cuda/lib64/libcublasLt.so.11" + ) + DEPS_SONAME+=( + "libcudnn_adv_infer.so.8" + "libcudnn_adv_train.so.8" + "libcudnn_cnn_infer.so.8" + "libcudnn_cnn_train.so.8" + "libcudnn_ops_infer.so.8" + "libcudnn_ops_train.so.8" + "libcudnn.so.8" + "libcublas.so.11" + "libcublasLt.so.11" + ) + else + echo "Using cudnn and cublas from pypi." CUDA_RPATHS=( '$ORIGIN/../../nvidia/cublas/lib' - '$ORIGIN/../../nvidia/cuda_cupti/lib' - '$ORIGIN/../../nvidia/cuda_nvrtc/lib' - '$ORIGIN/../../nvidia/cuda_runtime/lib' '$ORIGIN/../../nvidia/cudnn/lib' - '$ORIGIN/../../nvidia/cufft/lib' - '$ORIGIN/../../nvidia/curand/lib' - '$ORIGIN/../../nvidia/cusolver/lib' - '$ORIGIN/../../nvidia/cusparse/lib' - '$ORIGIN/../../nvidia/nccl/lib' - '$ORIGIN/../../nvidia/nvtx/lib' ) CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}") export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib' export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN' export FORCE_RPATH="--force-rpath" - export USE_STATIC_NCCL=0 - export USE_SYSTEM_NCCL=1 - export ATEN_STATIC_CUDA=0 - export USE_CUDA_STATIC_LINK=0 - export USE_CUPTI_SO=1 - export NCCL_INCLUDE_DIR="/usr/local/cuda/include/" - export NCCL_LIB_DIR="/usr/local/cuda/lib64/" fi else echo "Unknown cuda version $CUDA_VERSION" diff --git a/release/pypi/prep_binary_for_pypi.sh b/release/pypi/prep_binary_for_pypi.sh index fdd9bf4a0..e3b2b28c6 100755 --- a/release/pypi/prep_binary_for_pypi.sh +++ b/release/pypi/prep_binary_for_pypi.sh @@ -56,8 +56,22 @@ for whl_file in "$@"; do if [[ $whl_file == *"with.pypi.cudnn"* ]]; then rm -rf "${whl_dir}/caffe2" rm -rf "${whl_dir}"/torch/lib/libnvrtc* + sed -i -e "s/Requires-Dist: nvidia-cuda-runtime-cu11/Requires-Dist: nvidia-cuda-runtime-cu11 (==11.7.99)/" "${whl_dir}"/*/METADATA + sed -i -e "/^Requires-Dist: nvidia-cublas-cu11 (==11.10.3.66).*/a Requires-Dist: nvidia-cuda-nvrtc-cu11 (==11.7.99) ; platform_system == \"Linux\"" "${whl_dir}"/*/METADATA sed -i -e "s/-with-pypi-cudnn//g" "${whl_dir}/torch/version.py" + find "${whl_dir}/torch/" -maxdepth 1 -type f -name "*.so*" | while read sofile; do + patchelf --set-rpath '$ORIGIN/../../nvidia/cublas/lib:$ORIGIN/../../nvidia/cudnn/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN:$ORIGIN/lib' \ + --force-rpath $sofile + patchelf --print-rpath $sofile + done + + find "${whl_dir}/torch/lib" -maxdepth 1 -type f -name "*.so*" | while read sofile; do + patchelf --set-rpath '$ORIGIN/../../nvidia/cublas/lib:$ORIGIN/../../nvidia/cudnn/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN' \ + --force-rpath $sofile + patchelf --print-rpath $sofile + done + patchelf --replace-needed libnvrtc-d833c4f3.so.11.2 libnvrtc.so.11.2 "${whl_dir}/torch/lib/libcaffe2_nvrtc.so" fi find "${dist_info_folder}" -type f -exec sed -i "s!${version_with_suffix}!${version_no_suffix}!" {} \;