Skip to content

Commit c162205

Browse files
authored
Update TRT-LLM backend url (#6455)
* TRTLLM backend post release * TRTLLM backend post release * Update submodule url for permission issue * Update submodule url * Fix up * Not using postbuild function to workaround submodule url permission issue
1 parent e19cfe7 commit c162205

File tree

1 file changed

+17
-31
lines changed

1 file changed

+17
-31
lines changed

build.py

+17-31
Original file line numberDiff line numberDiff line change
@@ -1302,12 +1302,10 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
13021302
pip3 install --upgrade numpy && \
13031303
rm -rf /var/lib/apt/lists/*
13041304
"""
1305-
# FIXME: Use the postbuild script here
13061305
# Add dependencies needed for tensorrtllm backend
13071306
if "tensorrtllm" in backends:
13081307
be = "tensorrtllm"
1309-
# # FIXME: Update the url
1310-
# url = "https://gitlab-master.nvidia.com/ftp/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format(
1308+
# url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py".format(
13111309
# backends[be]
13121310
# )
13131311

@@ -1317,49 +1315,42 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
13171315
# )
13181316
# trtllm_buildscript = importlib.util.module_from_spec(spec)
13191317
# exec(response.content, trtllm_buildscript.__dict__)
1320-
# df += trtllm_buildscript.create_postbuild(
1321-
# backends[be] # repo tag
1322-
# )
1318+
# df += trtllm_buildscript.create_postbuild(backends[be])
1319+
13231320
df += """
13241321
WORKDIR /workspace
1325-
13261322
# Remove previous TRT installation
13271323
RUN apt-get remove --purge -y tensorrt* libnvinfer*
13281324
RUN pip uninstall -y tensorrt
1329-
13301325
# Install new version of TRT using the script from TRT-LLM
13311326
RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3
1332-
RUN git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend
1327+
RUN git clone --single-branch --depth=1 -b {} https://github.com/triton-inference-server/tensorrtllm_backend.git tensorrtllm_backend
1328+
RUN cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git
1329+
RUN cd tensorrtllm_backend && git submodule sync
13331330
RUN cd tensorrtllm_backend && git submodule update --init --recursive
13341331
RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/
13351332
RUN rm -fr tensorrtllm_backend
13361333
""".format(
1337-
backends[be],
1338-
os.environ["REMOVE_ME_TRTLLM_USERNAME"],
1339-
os.environ["REMOVE_ME_TRTLLM_TOKEN"],
1334+
backends[be]
13401335
)
13411336

13421337
df += """
13431338
RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh
13441339
ENV TRT_ROOT=/usr/local/tensorrt
1345-
13461340
# Remove TRT contents that are not needed in runtime
13471341
RUN ARCH="$(uname -i)" && \
13481342
rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \
13491343
rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \
13501344
rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples
1351-
13521345
# Install required packages for TRT-LLM models
13531346
RUN python3 -m pip install --upgrade pip && \
13541347
pip3 install transformers && \
13551348
pip3 install torch
1356-
13571349
# Uninstall unused nvidia packages
13581350
RUN if pip freeze | grep -q "nvidia.*"; then \
13591351
pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \
13601352
fi
13611353
RUN pip cache purge
1362-
13631354
ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
13641355
"""
13651356

@@ -1827,10 +1818,6 @@ def tensorrtllm_prebuild(cmake_script):
18271818

18281819
# FIXME: Update the file structure to the one Triton expects. This is a temporary fix
18291820
# to get the build working for r23.10.
1830-
# Uncomment the patch once moving to the GitHub repo
1831-
# cmake_script.cmd(
1832-
# "patch tensorrtllm/inflight_batcher_llm/CMakeLists.txt < tensorrtllm/inflight_batcher_llm/CMakeLists.txt.patch"
1833-
# )
18341821
cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/src tensorrtllm")
18351822
cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/cmake tensorrtllm")
18361823
cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/CMakeLists.txt tensorrtllm")
@@ -1856,24 +1843,23 @@ def backend_build(
18561843
cmake_script.comment()
18571844
cmake_script.mkdir(build_dir)
18581845
cmake_script.cwd(build_dir)
1859-
# FIXME: Use GitHub repo
1846+
18601847
if be == "tensorrtllm":
1861-
# cmake_script.gitclone(
1862-
# backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/ftp"
1863-
# )
18641848
cmake_script.cmd(
1865-
"git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm".format(
1866-
tag,
1867-
os.environ["REMOVE_ME_TRTLLM_USERNAME"],
1868-
os.environ["REMOVE_ME_TRTLLM_TOKEN"],
1849+
"git clone --single-branch --depth=1 -b {} https://github.com/triton-inference-server/tensorrtllm_backend tensorrtllm".format(
1850+
tag
18691851
)
18701852
)
1853+
cmake_script.cmd("cd tensorrtllm")
1854+
cmake_script.cmd(
1855+
"git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git"
1856+
)
1857+
cmake_script.cmd("git submodule sync")
1858+
cmake_script.cmd("cd ..")
1859+
tensorrtllm_prebuild(cmake_script)
18711860
else:
18721861
cmake_script.gitclone(backend_repo(be), tag, be, github_organization)
18731862

1874-
if be == "tensorrtllm":
1875-
tensorrtllm_prebuild(cmake_script)
1876-
18771863
cmake_script.mkdir(repo_build_dir)
18781864
cmake_script.cwd(repo_build_dir)
18791865
cmake_script.cmake(

0 commit comments

Comments
 (0)