Update TRT-LLM backend url (#6455)

krishung5 · web-flow · commit c16220591319 · 2023-10-19T18:22:39.000-07:00
* TRTLLM backend post release

* TRTLLM backend post release

* Update submodule url for permission issue

* Update submodule url

* Fix up

* Not using postbuild function to workaround submodule url permission issue
diff --git a/build.py b/build.py
@@ -1302,12 +1302,10 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
     pip3 install --upgrade numpy && \
     rm -rf /var/lib/apt/lists/*
 """
-    # FIXME: Use the postbuild script here
     # Add dependencies needed for tensorrtllm backend
     if "tensorrtllm" in backends:
         be = "tensorrtllm"
-        # # FIXME: Update the url
-        # url = "https://gitlab-master.nvidia.com/ftp/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format(
+        # url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py".format(
         #     backends[be]
         # )
 
@@ -1317,49 +1315,42 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
         # )
         # trtllm_buildscript = importlib.util.module_from_spec(spec)
         # exec(response.content, trtllm_buildscript.__dict__)
-        # df += trtllm_buildscript.create_postbuild(
-        #     backends[be] # repo tag
-        # )
+        # df += trtllm_buildscript.create_postbuild(backends[be])
+
         df += """
 WORKDIR /workspace
-
 # Remove previous TRT installation
 RUN apt-get remove --purge -y tensorrt* libnvinfer*
 RUN pip uninstall -y tensorrt
-
 # Install new version of TRT using the script from TRT-LLM
 RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3
-RUN git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend
+RUN git clone --single-branch --depth=1 -b {} https://github.com/triton-inference-server/tensorrtllm_backend.git tensorrtllm_backend
+RUN cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git
+RUN cd tensorrtllm_backend && git submodule sync
 RUN cd tensorrtllm_backend && git submodule update --init --recursive
 RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/
 RUN rm -fr tensorrtllm_backend
     """.format(
-            backends[be],
-            os.environ["REMOVE_ME_TRTLLM_USERNAME"],
-            os.environ["REMOVE_ME_TRTLLM_TOKEN"],
+            backends[be]
         )
 
         df += """
 RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh
 ENV TRT_ROOT=/usr/local/tensorrt
-
 # Remove TRT contents that are not needed in runtime
 RUN ARCH="$(uname -i)" && \
     rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \
     rm -fr  ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \
     rm -fr ${TRT_ROOT}/samples  ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples
-
 # Install required packages for TRT-LLM models
 RUN python3 -m pip install --upgrade pip && \
         pip3 install transformers && \
         pip3 install torch
-
 # Uninstall unused nvidia packages
 RUN if pip freeze | grep -q "nvidia.*"; then \
         pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \
     fi
 RUN pip cache purge
-
 ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
 """
 
@@ -1827,10 +1818,6 @@ def tensorrtllm_prebuild(cmake_script):
 
     # FIXME: Update the file structure to the one Triton expects. This is a temporary fix
     # to get the build working for r23.10.
-    # Uncomment the patch once moving to the GitHub repo
-    # cmake_script.cmd(
-    #     "patch tensorrtllm/inflight_batcher_llm/CMakeLists.txt  < tensorrtllm/inflight_batcher_llm/CMakeLists.txt.patch"
-    # )
     cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/src tensorrtllm")
     cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/cmake tensorrtllm")
     cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/CMakeLists.txt tensorrtllm")
@@ -1856,24 +1843,23 @@ def backend_build(
     cmake_script.comment()
     cmake_script.mkdir(build_dir)
     cmake_script.cwd(build_dir)
-    # FIXME: Use GitHub repo
+
     if be == "tensorrtllm":
-        # cmake_script.gitclone(
-        #     backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/ftp"
-        # )
         cmake_script.cmd(
-            "git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm".format(
-                tag,
-                os.environ["REMOVE_ME_TRTLLM_USERNAME"],
-                os.environ["REMOVE_ME_TRTLLM_TOKEN"],
+            "git clone --single-branch --depth=1 -b {} https://github.com/triton-inference-server/tensorrtllm_backend tensorrtllm".format(
+                tag
             )
         )
+        cmake_script.cmd("cd tensorrtllm")
+        cmake_script.cmd(
+            "git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git"
+        )
+        cmake_script.cmd("git submodule sync")
+        cmake_script.cmd("cd ..")
+        tensorrtllm_prebuild(cmake_script)
     else:
         cmake_script.gitclone(backend_repo(be), tag, be, github_organization)
 
-    if be == "tensorrtllm":
-        tensorrtllm_prebuild(cmake_script)
-
     cmake_script.mkdir(repo_build_dir)
     cmake_script.cwd(repo_build_dir)
     cmake_script.cmake(