@@ -1305,17 +1305,54 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
1305
1305
# Add dependencies needed for tensorrtllm backend
1306
1306
if "tensorrtllm" in backends :
1307
1307
be = "tensorrtllm"
1308
- url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py" .format (
1308
+ # url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py".format(
1309
+ # backends[be]
1310
+ # )
1311
+
1312
+ # response = requests.get(url)
1313
+ # spec = importlib.util.spec_from_loader(
1314
+ # "trtllm_buildscript", loader=None, origin=url
1315
+ # )
1316
+ # trtllm_buildscript = importlib.util.module_from_spec(spec)
1317
+ # exec(response.content, trtllm_buildscript.__dict__)
1318
+ # df += trtllm_buildscript.create_postbuild(backends[be])
1319
+
1320
+ df += """
1321
+ WORKDIR /workspace
1322
+ # Remove previous TRT installation
1323
+ RUN apt-get remove --purge -y tensorrt* libnvinfer*
1324
+ RUN pip uninstall -y tensorrt
1325
+ # Install new version of TRT using the script from TRT-LLM
1326
+ RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3
1327
+ RUN git clone --single-branch --depth=1 -b {} https://github.com/triton-inference-server/tensorrtllm_backend.git tensorrtllm_backend
1328
+ RUN cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git
1329
+ RUN cd tensorrtllm_backend && git submodule sync
1330
+ RUN cd tensorrtllm_backend && git submodule update --init --recursive
1331
+ RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/
1332
+ RUN rm -fr tensorrtllm_backend
1333
+ """ .format (
1309
1334
backends [be ]
1310
1335
)
1311
1336
1312
- response = requests .get (url )
1313
- spec = importlib .util .spec_from_loader (
1314
- "trtllm_buildscript" , loader = None , origin = url
1315
- )
1316
- trtllm_buildscript = importlib .util .module_from_spec (spec )
1317
- exec (response .content , trtllm_buildscript .__dict__ )
1318
- df += trtllm_buildscript .create_postbuild (backends [be ])
1337
+ df += """
1338
+ RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh
1339
+ ENV TRT_ROOT=/usr/local/tensorrt
1340
+ # Remove TRT contents that are not needed in runtime
1341
+ RUN ARCH="$(uname -i)" && \
1342
+ rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \
1343
+ rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \
1344
+ rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples
1345
+ # Install required packages for TRT-LLM models
1346
+ RUN python3 -m pip install --upgrade pip && \
1347
+ pip3 install transformers && \
1348
+ pip3 install torch
1349
+ # Uninstall unused nvidia packages
1350
+ RUN if pip freeze | grep -q "nvidia.*"; then \
1351
+ pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \
1352
+ fi
1353
+ RUN pip cache purge
1354
+ ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
1355
+ """
1319
1356
1320
1357
if "vllm" in backends :
1321
1358
# [DLIS-5606] Build Conda environment for vLLM backend
0 commit comments