@@ -1319,18 +1319,50 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
1319
1319
# Add dependencies needed for tensorrtllm backend
1320
1320
if "tensorrtllm" in backends :
1321
1321
be = "tensorrtllm"
1322
- url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py" .format (
1323
- backends [be ]
1324
- )
1325
-
1326
- response = requests .get (url )
1327
- spec = importlib .util .spec_from_loader (
1328
- "trtllm_buildscript" , loader = None , origin = url
1322
+ df += """
1323
+ WORKDIR /workspace
1324
+ RUN apt-get update && apt-get install -y --no-install-recommends python3-pip
1325
+
1326
+ # Remove previous TRT installation
1327
+ RUN apt-get remove --purge -y tensorrt* libnvinfer*
1328
+ RUN pip uninstall -y tensorrt
1329
+ # Install new version of TRT using the script from TRT-LLM
1330
+ RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3
1331
+ RUN git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend
1332
+ RUN cd tensorrtllm_backend && git submodule update --init --recursive
1333
+ ENV TRT_VER=9.2.0.4
1334
+ ENV CUDA_VER=12.3
1335
+ ENV CUDNN_VER=8.9.6.50-1+cuda12.2
1336
+ ENV NCCL_VER=2.19.3-1+cuda12.3
1337
+ ENV CUBLAS_VER=12.3.2.9-1
1338
+ RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/
1339
+ RUN rm -fr tensorrtllm_backend
1340
+ """ .format (
1341
+ backends [be ],
1342
+ os .environ ["REMOVE_ME_TRTLLM_USERNAME" ],
1343
+ os .environ ["REMOVE_ME_TRTLLM_TOKEN" ],
1329
1344
)
1330
- trtllm_buildscript = importlib .util .module_from_spec (spec )
1331
- exec (response .content , trtllm_buildscript .__dict__ )
1332
- df += trtllm_buildscript .create_postbuild (backends [be ])
1333
1345
1346
+ df += """
1347
+ RUN bash /tmp/install_tensorrt.sh --CUDA_VER=$CUDA_VER --CUDNN_VER=$CUDNN_VER --NCCL_VER=$NCCL_VER --CUBLAS_VER=$CUBLAS_VER && rm /tmp/install_tensorrt.sh
1348
+ ENV TRT_ROOT=/usr/local/tensorrt
1349
+ # Remove TRT contents that are not needed in runtime
1350
+ RUN ARCH="$(uname -i)" && \
1351
+ rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \
1352
+ rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \
1353
+ rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples
1354
+
1355
+ # Install required packages for TRT-LLM models
1356
+ RUN python3 -m pip install --upgrade pip && \
1357
+ pip3 install transformers
1358
+
1359
+ # Uninstall unused nvidia packages
1360
+ RUN if pip freeze | grep -q "nvidia.*"; then \
1361
+ pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \
1362
+ fi
1363
+ RUN pip cache purge
1364
+ ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
1365
+ """
1334
1366
if "vllm" in backends :
1335
1367
# [DLIS-5606] Build Conda environment for vLLM backend
1336
1368
# Remove Pip install once vLLM backend moves to Conda environment.
@@ -1790,6 +1822,12 @@ def core_build(
1790
1822
1791
1823
def tensorrtllm_prebuild (cmake_script ):
1792
1824
# Export the TRT_ROOT environment variable
1825
+ cmake_script .cmd ("export TRT_VER=9.2.0.4" )
1826
+ cmake_script .cmd ("export CUDA_VER=12.3" )
1827
+ cmake_script .cmd ("export CUDNN_VER=8.9.6.50-1+cuda12.2" )
1828
+ cmake_script .cmd ("export NCCL_VER=2.19.3-1+cuda12.3" )
1829
+ cmake_script .cmd ("export CUBLAS_VER=12.3.2.9-1" )
1830
+
1793
1831
cmake_script .cmd ("export TRT_ROOT=/usr/local/tensorrt" )
1794
1832
cmake_script .cmd ("export ARCH=$(uname -m)" )
1795
1833
@@ -1820,10 +1858,18 @@ def backend_build(
1820
1858
cmake_script .comment ()
1821
1859
cmake_script .mkdir (build_dir )
1822
1860
cmake_script .cwd (build_dir )
1823
- cmake_script .gitclone (backend_repo (be ), tag , be , github_organization )
1824
1861
1825
1862
if be == "tensorrtllm" :
1863
+ cmake_script .cmd (
1864
+ "git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm" .format (
1865
+ tag ,
1866
+ os .environ ["REMOVE_ME_TRTLLM_USERNAME" ],
1867
+ os .environ ["REMOVE_ME_TRTLLM_TOKEN" ],
1868
+ )
1869
+ )
1826
1870
tensorrtllm_prebuild (cmake_script )
1871
+ else :
1872
+ cmake_script .gitclone (backend_repo (be ), tag , be , github_organization )
1827
1873
1828
1874
cmake_script .mkdir (repo_build_dir )
1829
1875
cmake_script .cwd (repo_build_dir )
0 commit comments