@@ -1302,12 +1302,10 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
1302
1302
pip3 install --upgrade numpy && \
1303
1303
rm -rf /var/lib/apt/lists/*
1304
1304
"""
1305
- # FIXME: Use the postbuild script here
1306
1305
# Add dependencies needed for tensorrtllm backend
1307
1306
if "tensorrtllm" in backends :
1308
1307
be = "tensorrtllm"
1309
- # # FIXME: Update the url
1310
- # url = "https://gitlab-master.nvidia.com/ftp/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format(
1308
+ # url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py".format(
1311
1309
# backends[be]
1312
1310
# )
1313
1311
@@ -1317,49 +1315,42 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
1317
1315
# )
1318
1316
# trtllm_buildscript = importlib.util.module_from_spec(spec)
1319
1317
# exec(response.content, trtllm_buildscript.__dict__)
1320
- # df += trtllm_buildscript.create_postbuild(
1321
- # backends[be] # repo tag
1322
- # )
1318
+ # df += trtllm_buildscript.create_postbuild(backends[be])
1319
+
1323
1320
df += """
1324
1321
WORKDIR /workspace
1325
-
1326
1322
# Remove previous TRT installation
1327
1323
RUN apt-get remove --purge -y tensorrt* libnvinfer*
1328
1324
RUN pip uninstall -y tensorrt
1329
-
1330
1325
# Install new version of TRT using the script from TRT-LLM
1331
1326
RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3
1332
- RUN git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend
1327
+ RUN git clone --single-branch --depth=1 -b {} https://github.com/triton-inference-server/tensorrtllm_backend.git tensorrtllm_backend
1328
+ RUN cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git
1329
+ RUN cd tensorrtllm_backend && git submodule sync
1333
1330
RUN cd tensorrtllm_backend && git submodule update --init --recursive
1334
1331
RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/
1335
1332
RUN rm -fr tensorrtllm_backend
1336
1333
""" .format (
1337
- backends [be ],
1338
- os .environ ["REMOVE_ME_TRTLLM_USERNAME" ],
1339
- os .environ ["REMOVE_ME_TRTLLM_TOKEN" ],
1334
+ backends [be ]
1340
1335
)
1341
1336
1342
1337
df += """
1343
1338
RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh
1344
1339
ENV TRT_ROOT=/usr/local/tensorrt
1345
-
1346
1340
# Remove TRT contents that are not needed in runtime
1347
1341
RUN ARCH="$(uname -i)" && \
1348
1342
rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \
1349
1343
rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \
1350
1344
rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples
1351
-
1352
1345
# Install required packages for TRT-LLM models
1353
1346
RUN python3 -m pip install --upgrade pip && \
1354
1347
pip3 install transformers && \
1355
1348
pip3 install torch
1356
-
1357
1349
# Uninstall unused nvidia packages
1358
1350
RUN if pip freeze | grep -q "nvidia.*"; then \
1359
1351
pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \
1360
1352
fi
1361
1353
RUN pip cache purge
1362
-
1363
1354
ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
1364
1355
"""
1365
1356
@@ -1827,10 +1818,6 @@ def tensorrtllm_prebuild(cmake_script):
1827
1818
1828
1819
# FIXME: Update the file structure to the one Triton expects. This is a temporary fix
1829
1820
# to get the build working for r23.10.
1830
- # Uncomment the patch once moving to the GitHub repo
1831
- # cmake_script.cmd(
1832
- # "patch tensorrtllm/inflight_batcher_llm/CMakeLists.txt < tensorrtllm/inflight_batcher_llm/CMakeLists.txt.patch"
1833
- # )
1834
1821
cmake_script .cmd ("mv tensorrtllm/inflight_batcher_llm/src tensorrtllm" )
1835
1822
cmake_script .cmd ("mv tensorrtllm/inflight_batcher_llm/cmake tensorrtllm" )
1836
1823
cmake_script .cmd ("mv tensorrtllm/inflight_batcher_llm/CMakeLists.txt tensorrtllm" )
@@ -1856,24 +1843,23 @@ def backend_build(
1856
1843
cmake_script .comment ()
1857
1844
cmake_script .mkdir (build_dir )
1858
1845
cmake_script .cwd (build_dir )
1859
- # FIXME: Use GitHub repo
1846
+
1860
1847
if be == "tensorrtllm" :
1861
- # cmake_script.gitclone(
1862
- # backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/ftp"
1863
- # )
1864
1848
cmake_script .cmd (
1865
- "git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm" .format (
1866
- tag ,
1867
- os .environ ["REMOVE_ME_TRTLLM_USERNAME" ],
1868
- os .environ ["REMOVE_ME_TRTLLM_TOKEN" ],
1849
+ "git clone --single-branch --depth=1 -b {} https://github.com/triton-inference-server/tensorrtllm_backend tensorrtllm" .format (
1850
+ tag
1869
1851
)
1870
1852
)
1853
+ cmake_script .cmd ("cd tensorrtllm" )
1854
+ cmake_script .cmd (
1855
+ "git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git"
1856
+ )
1857
+ cmake_script .cmd ("git submodule sync" )
1858
+ cmake_script .cmd ("cd .." )
1859
+ tensorrtllm_prebuild (cmake_script )
1871
1860
else :
1872
1861
cmake_script .gitclone (backend_repo (be ), tag , be , github_organization )
1873
1862
1874
- if be == "tensorrtllm" :
1875
- tensorrtllm_prebuild (cmake_script )
1876
-
1877
1863
cmake_script .mkdir (repo_build_dir )
1878
1864
cmake_script .cwd (repo_build_dir )
1879
1865
cmake_script .cmake (
0 commit comments