@@ -113,13 +113,13 @@ case "$gpu_info" in
113
113
printf " Experimental support for Renoir: make sure to have at least 4GB of VRAM and 10GB of RAM or enable cpu mode: --use-cpu all --no-half"
114
114
printf " \n%s\n" " ${delimiter} "
115
115
;;
116
- * )
116
+ * )
117
117
;;
118
118
esac
119
119
if echo " $gpu_info " | grep -q " AMD" && [[ -z " ${TORCH_COMMAND} " ]]
120
120
then
121
121
export TORCH_COMMAND=" pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/rocm5.2"
122
- fi
122
+ fi
123
123
124
124
for preq in " ${GIT} " " ${python_cmd} "
125
125
do
@@ -172,15 +172,30 @@ else
172
172
exit 1
173
173
fi
174
174
175
+ # Try using TCMalloc on Linux
176
+ prepare_tcmalloc () {
177
+ if [[ " ${OSTYPE} " == " linux" * ]] && [[ -z " ${NO_TCMALLOC} " ]] && [[ -z " ${LD_PRELOAD} " ]]; then
178
+ TCMALLOC=" $( ldconfig -p | grep -Po " libtcmalloc.so.\d" | head -n 1) "
179
+ if [[ ! -z " ${TCMALLOC} " ]]; then
180
+ echo " Using TCMalloc: ${TCMALLOC} "
181
+ export LD_PRELOAD=" ${TCMALLOC} "
182
+ else
183
+ printf " \e[1m\e[31mCannot locate TCMalloc (improves CPU memory usage)\e[0m\n"
184
+ fi
185
+ fi
186
+ }
187
+
175
188
if [[ ! -z " ${ACCELERATE} " ]] && [ ${ACCELERATE} = " True" ] && [ -x " $( command -v accelerate) " ]
176
189
then
177
190
printf " \n%s\n" " ${delimiter} "
178
191
printf " Accelerating launch.py..."
179
192
printf " \n%s\n" " ${delimiter} "
193
+ prepare_tcmalloc
180
194
exec accelerate launch --num_cpu_threads_per_process=6 " ${LAUNCH_SCRIPT} " " $@ "
181
195
else
182
196
printf " \n%s\n" " ${delimiter} "
183
197
printf " Launching launch.py..."
184
- printf " \n%s\n" " ${delimiter} "
198
+ printf " \n%s\n" " ${delimiter} "
199
+ prepare_tcmalloc
185
200
exec " ${python_cmd} " " ${LAUNCH_SCRIPT} " " $@ "
186
201
fi
0 commit comments