Skip to content

Commit 02e7d96

Browse files
committed
Revert "Adds infra to use nvidia dependencies from pypi and cleans up patches (pytorch#1196)"
This reverts commit ee59264.
1 parent b0587d7 commit 02e7d96

File tree

3 files changed

+80
-44
lines changed

3 files changed

+80
-44
lines changed

common/install_cuda.sh

+2-22
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ function install_116 {
2424
}
2525

2626
function install_117 {
27-
echo "Installing CUDA 11.7 and CuDNN 8.5 and NCCL 2.14"
27+
echo "Installing CUDA 11.7 and CuDNN 8.5"
2828
rm -rf /usr/local/cuda-11.7 /usr/local/cuda
2929
# install CUDA 11.7.0 in the same container
3030
wget -q https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run
@@ -42,20 +42,10 @@ function install_117 {
4242
cd ..
4343
rm -rf tmp_cudnn
4444
ldconfig
45-
46-
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
47-
mkdir tmp_nccl && cd tmp_nccl
48-
wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.14/nccl_2.14.3-1+cuda11.7_x86_64.txz
49-
tar xf nccl_2.14.3-1+cuda11.7_x86_64.txz
50-
cp -a nccl_2.14.3-1+cuda11.7_x86_64/include/* /usr/local/cuda/include/
51-
cp -a nccl_2.14.3-1+cuda11.7_x86_64/lib/* /usr/local/cuda/lib64/
52-
cd ..
53-
rm -rf tmp_nccl
54-
ldconfig
5545
}
5646

5747
function install_118 {
58-
echo "Installing CUDA 11.8 and cuDNN 8.5 and NCCL 2.15"
48+
echo "Installing CUDA 11.8 and cuDNN 8.5"
5949
rm -rf /usr/local/cuda-11.8 /usr/local/cuda
6050
# install CUDA 11.8.0 in the same container
6151
wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
@@ -73,16 +63,6 @@ function install_118 {
7363
cd ..
7464
rm -rf tmp_cudnn
7565
ldconfig
76-
77-
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
78-
mkdir tmp_nccl && cd tmp_nccl
79-
wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.15.5/nccl_2.15.5-1+cuda11.8_x86_64.txz
80-
tar xf nccl_2.15.5-1+cuda11.8_x86_64.txz
81-
cp -a nccl_2.15.5-1+cuda11.8_x86_64/include/* /usr/local/cuda/include/
82-
cp -a nccl_2.15.5-1+cuda11.8_x86_64/lib/* /usr/local/cuda/lib64/
83-
cd ..
84-
rm -rf tmp_nccl
85-
ldconfig
8666
}
8767

8868
function prune_116 {

manywheel/build_cuda.sh

+64-22
Original file line numberDiff line numberDiff line change
@@ -142,14 +142,22 @@ DEPS_SONAME=(
142142
"libcublasLt.so.11"
143143
"libgomp.so.1"
144144
)
145-
elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then
145+
elif [[ $CUDA_VERSION == "11.7" ]]; then
146146
export USE_STATIC_CUDNN=0
147147
# Try parallelizing nvcc as well
148148
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
149149
DEPS_LIST=(
150+
"/usr/local/cuda/lib64/libcudart.so.11.0"
151+
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
152+
"/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.7, it links to 11.7.50
153+
"/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7"
150154
"$LIBGOMP_PATH"
151155
)
152156
DEPS_SONAME=(
157+
"libcudart.so.11.0"
158+
"libnvToolsExt.so.1"
159+
"libnvrtc.so.11.2"
160+
"libnvrtc-builtins.so.11.7"
153161
"libgomp.so.1"
154162
)
155163

@@ -165,10 +173,6 @@ elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then
165173
"/usr/local/cuda/lib64/libcudnn.so.8"
166174
"/usr/local/cuda/lib64/libcublas.so.11"
167175
"/usr/local/cuda/lib64/libcublasLt.so.11"
168-
"/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.7, it links to 11.7.50
169-
"/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7"
170-
"/usr/local/cuda/lib64/libcudart.so.11.0"
171-
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
172176
)
173177
DEPS_SONAME+=(
174178
"libcudnn_adv_infer.so.8"
@@ -182,31 +186,69 @@ elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then
182186
"libcublasLt.so.11"
183187
)
184188
else
185-
echo "Using nvidia libs from pypi."
189+
echo "Using cudnn and cublas from pypi."
190+
CUDA_RPATHS=(
191+
'$ORIGIN/../../nvidia/cublas/lib'
192+
'$ORIGIN/../../nvidia/cudnn/lib'
193+
)
194+
CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
195+
export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
196+
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
197+
export FORCE_RPATH="--force-rpath"
198+
fi
199+
elif [[ $CUDA_VERSION == "11.8" ]]; then
200+
export USE_STATIC_CUDNN=0
201+
# Try parallelizing nvcc as well
202+
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
203+
DEPS_LIST=(
204+
"/usr/local/cuda/lib64/libcudart.so.11.0"
205+
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
206+
"/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.8, it links to 11.8.89
207+
"/usr/local/cuda/lib64/libnvrtc-builtins.so.11.8"
208+
"$LIBGOMP_PATH"
209+
)
210+
DEPS_SONAME=(
211+
"libcudart.so.11.0"
212+
"libnvToolsExt.so.1"
213+
"libnvrtc.so.11.2"
214+
"libnvrtc-builtins.so.11.8"
215+
"libgomp.so.1"
216+
)
217+
218+
if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
219+
echo "Bundling with cudnn and cublas."
220+
DEPS_LIST+=(
221+
"/usr/local/cuda/lib64/libcudnn_adv_infer.so.8"
222+
"/usr/local/cuda/lib64/libcudnn_adv_train.so.8"
223+
"/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8"
224+
"/usr/local/cuda/lib64/libcudnn_cnn_train.so.8"
225+
"/usr/local/cuda/lib64/libcudnn_ops_infer.so.8"
226+
"/usr/local/cuda/lib64/libcudnn_ops_train.so.8"
227+
"/usr/local/cuda/lib64/libcudnn.so.8"
228+
"/usr/local/cuda/lib64/libcublas.so.11"
229+
"/usr/local/cuda/lib64/libcublasLt.so.11"
230+
)
231+
DEPS_SONAME+=(
232+
"libcudnn_adv_infer.so.8"
233+
"libcudnn_adv_train.so.8"
234+
"libcudnn_cnn_infer.so.8"
235+
"libcudnn_cnn_train.so.8"
236+
"libcudnn_ops_infer.so.8"
237+
"libcudnn_ops_train.so.8"
238+
"libcudnn.so.8"
239+
"libcublas.so.11"
240+
"libcublasLt.so.11"
241+
)
242+
else
243+
echo "Using cudnn and cublas from pypi."
186244
CUDA_RPATHS=(
187245
'$ORIGIN/../../nvidia/cublas/lib'
188-
'$ORIGIN/../../nvidia/cuda_cupti/lib'
189-
'$ORIGIN/../../nvidia/cuda_nvrtc/lib'
190-
'$ORIGIN/../../nvidia/cuda_runtime/lib'
191246
'$ORIGIN/../../nvidia/cudnn/lib'
192-
'$ORIGIN/../../nvidia/cufft/lib'
193-
'$ORIGIN/../../nvidia/curand/lib'
194-
'$ORIGIN/../../nvidia/cusolver/lib'
195-
'$ORIGIN/../../nvidia/cusparse/lib'
196-
'$ORIGIN/../../nvidia/nccl/lib'
197-
'$ORIGIN/../../nvidia/nvtx/lib'
198247
)
199248
CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
200249
export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
201250
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
202251
export FORCE_RPATH="--force-rpath"
203-
export USE_STATIC_NCCL=0
204-
export USE_SYSTEM_NCCL=1
205-
export ATEN_STATIC_CUDA=0
206-
export USE_CUDA_STATIC_LINK=0
207-
export USE_CUPTI_SO=1
208-
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
209-
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
210252
fi
211253
else
212254
echo "Unknown cuda version $CUDA_VERSION"

release/pypi/prep_binary_for_pypi.sh

+14
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,22 @@ for whl_file in "$@"; do
5656
if [[ $whl_file == *"with.pypi.cudnn"* ]]; then
5757
rm -rf "${whl_dir}/caffe2"
5858
rm -rf "${whl_dir}"/torch/lib/libnvrtc*
59+
sed -i -e "s/Requires-Dist: nvidia-cuda-runtime-cu11/Requires-Dist: nvidia-cuda-runtime-cu11 (==11.7.99)/" "${whl_dir}"/*/METADATA
60+
sed -i -e "/^Requires-Dist: nvidia-cublas-cu11 (==11.10.3.66).*/a Requires-Dist: nvidia-cuda-nvrtc-cu11 (==11.7.99) ; platform_system == \"Linux\"" "${whl_dir}"/*/METADATA
5961

6062
sed -i -e "s/-with-pypi-cudnn//g" "${whl_dir}/torch/version.py"
63+
find "${whl_dir}/torch/" -maxdepth 1 -type f -name "*.so*" | while read sofile; do
64+
patchelf --set-rpath '$ORIGIN/../../nvidia/cublas/lib:$ORIGIN/../../nvidia/cudnn/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN:$ORIGIN/lib' \
65+
--force-rpath $sofile
66+
patchelf --print-rpath $sofile
67+
done
68+
69+
find "${whl_dir}/torch/lib" -maxdepth 1 -type f -name "*.so*" | while read sofile; do
70+
patchelf --set-rpath '$ORIGIN/../../nvidia/cublas/lib:$ORIGIN/../../nvidia/cudnn/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN' \
71+
--force-rpath $sofile
72+
patchelf --print-rpath $sofile
73+
done
74+
patchelf --replace-needed libnvrtc-d833c4f3.so.11.2 libnvrtc.so.11.2 "${whl_dir}/torch/lib/libcaffe2_nvrtc.so"
6175
fi
6276

6377
find "${dist_info_folder}" -type f -exec sed -i "s!${version_with_suffix}!${version_no_suffix}!" {} \;

0 commit comments

Comments
 (0)