From 17e7b6d566bb3bc56c81f38e4577533d1ed39e92 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Thu, 19 Oct 2023 10:24:57 -0700
Subject: [PATCH 1/6] TRTLLM backend post release

---
 build.py | 85 ++++++++------------------------------------------------
 1 file changed, 12 insertions(+), 73 deletions(-)

diff --git a/build.py b/build.py
index 5ee3f08ee6..305bc237c7 100755
--- a/build.py
+++ b/build.py
@@ -1302,66 +1302,22 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
     pip3 install --upgrade numpy && \
     rm -rf /var/lib/apt/lists/*
 """
-    # FIXME: Use the postbuild script here
     # Add dependencies needed for tensorrtllm backend
     if "tensorrtllm" in backends:
         be = "tensorrtllm"
-        # # FIXME: Update the url
-        # url = "https://gitlab-master.nvidia.com/ftp/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format(
-        #     backends[be]
-        # )
-
-        # response = requests.get(url)
-        # spec = importlib.util.spec_from_loader(
-        #     "trtllm_buildscript", loader=None, origin=url
-        # )
-        # trtllm_buildscript = importlib.util.module_from_spec(spec)
-        # exec(response.content, trtllm_buildscript.__dict__)
-        # df += trtllm_buildscript.create_postbuild(
-        #     backends[be] # repo tag
-        # )
-        df += """
-WORKDIR /workspace
-
-# Remove previous TRT installation
-RUN apt-get remove --purge -y tensorrt* libnvinfer*
-RUN pip uninstall -y tensorrt
-
-# Install new version of TRT using the script from TRT-LLM
-RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3
-RUN git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend
-RUN cd tensorrtllm_backend && git submodule update --init --recursive
-RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/
-RUN rm -fr tensorrtllm_backend
-    """.format(
-            backends[be],
-            os.environ["REMOVE_ME_TRTLLM_USERNAME"],
-            os.environ["REMOVE_ME_TRTLLM_TOKEN"],
+        url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py".format(
+            backends[be]
         )
 
-        df += """
-RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh
-ENV TRT_ROOT=/usr/local/tensorrt
-
-# Remove TRT contents that are not needed in runtime
-RUN ARCH="$(uname -i)" && \
-    rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \
-    rm -fr  ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \
-    rm -fr ${TRT_ROOT}/samples  ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples
-
-# Install required packages for TRT-LLM models
-RUN python3 -m pip install --upgrade pip && \
-        pip3 install transformers && \
-        pip3 install torch
-
-# Uninstall unused nvidia packages
-RUN if pip freeze | grep -q "nvidia.*"; then \
-        pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \
-    fi
-RUN pip cache purge
-
-ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
-"""
+        response = requests.get(url)
+        spec = importlib.util.spec_from_loader(
+            "trtllm_buildscript", loader=None, origin=url
+        )
+        trtllm_buildscript = importlib.util.module_from_spec(spec)
+        exec(response.content, trtllm_buildscript.__dict__)
+        df += trtllm_buildscript.create_postbuild(
+            backends[be] # repo tag
+        )
 
     if "vllm" in backends:
         # [DLIS-5606] Build Conda environment for vLLM backend
@@ -1827,10 +1783,6 @@ def tensorrtllm_prebuild(cmake_script):
 
     # FIXME: Update the file structure to the one Triton expects. This is a temporary fix
     # to get the build working for r23.10.
-    # Uncomment the patch once moving to the GitHub repo
-    # cmake_script.cmd(
-    #     "patch tensorrtllm/inflight_batcher_llm/CMakeLists.txt  < tensorrtllm/inflight_batcher_llm/CMakeLists.txt.patch"
-    # )
     cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/src tensorrtllm")
     cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/cmake tensorrtllm")
     cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/CMakeLists.txt tensorrtllm")
@@ -1856,20 +1808,7 @@ def backend_build(
     cmake_script.comment()
     cmake_script.mkdir(build_dir)
     cmake_script.cwd(build_dir)
-    # FIXME: Use GitHub repo
-    if be == "tensorrtllm":
-        # cmake_script.gitclone(
-        #     backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/ftp"
-        # )
-        cmake_script.cmd(
-            "git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm".format(
-                tag,
-                os.environ["REMOVE_ME_TRTLLM_USERNAME"],
-                os.environ["REMOVE_ME_TRTLLM_TOKEN"],
-            )
-        )
-    else:
-        cmake_script.gitclone(backend_repo(be), tag, be, github_organization)
+    cmake_script.gitclone(backend_repo(be), tag, be, github_organization)
 
     if be == "tensorrtllm":
         tensorrtllm_prebuild(cmake_script)

From 2c86f6ee53a1d8b23fe183377418b935726b9c8b Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Thu, 19 Oct 2023 10:26:10 -0700
Subject: [PATCH 2/6] TRTLLM backend post release

---
 build.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/build.py b/build.py
index 305bc237c7..274fa68292 100755
--- a/build.py
+++ b/build.py
@@ -1315,9 +1315,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
         )
         trtllm_buildscript = importlib.util.module_from_spec(spec)
         exec(response.content, trtllm_buildscript.__dict__)
-        df += trtllm_buildscript.create_postbuild(
-            backends[be] # repo tag
-        )
+        df += trtllm_buildscript.create_postbuild(backends[be])
 
     if "vllm" in backends:
         # [DLIS-5606] Build Conda environment for vLLM backend

From e89ee4439b693576206d9f4c798b2cf320ba73b1 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Thu, 19 Oct 2023 11:43:29 -0700
Subject: [PATCH 3/6] Update submodule url for permission issue

---
 build.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/build.py b/build.py
index 274fa68292..9a45470570 100755
--- a/build.py
+++ b/build.py
@@ -1785,6 +1785,11 @@ def tensorrtllm_prebuild(cmake_script):
     cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/cmake tensorrtllm")
     cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/CMakeLists.txt tensorrtllm")
 
+    cmake_script.cmd(
+        "cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git"
+    )
+    cmake_script.cmd("cd tensorrtllm_backend && git submodule sync")
+
 
 def backend_build(
     be,

From b30a7951b6ff40d449a9fccc90642fadb275c8e4 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Thu, 19 Oct 2023 14:00:10 -0700
Subject: [PATCH 4/6] Update submodule url

---
 build.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/build.py b/build.py
index 9a45470570..de6300396c 100755
--- a/build.py
+++ b/build.py
@@ -1785,11 +1785,6 @@ def tensorrtllm_prebuild(cmake_script):
     cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/cmake tensorrtllm")
     cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/CMakeLists.txt tensorrtllm")
 
-    cmake_script.cmd(
-        "cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git"
-    )
-    cmake_script.cmd("cd tensorrtllm_backend && git submodule sync")
-
 
 def backend_build(
     be,
@@ -1811,10 +1806,20 @@ def backend_build(
     cmake_script.comment()
     cmake_script.mkdir(build_dir)
     cmake_script.cwd(build_dir)
-    cmake_script.gitclone(backend_repo(be), tag, be, github_organization)
 
     if be == "tensorrtllm":
+        cmake_script.cmd(
+            "git clone --single-branch --depth=1 -b {} https://github.com/triton-inference-server/tensorrtllm_backend tensorrtllm".format(
+                tag
+            )
+        )
+        cmake_script.cmd(
+            "cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git"
+        )
+        cmake_script.cmd("cd tensorrtllm_backend && git submodule sync")
         tensorrtllm_prebuild(cmake_script)
+    else:
+        cmake_script.gitclone(backend_repo(be), tag, be, github_organization)
 
     cmake_script.mkdir(repo_build_dir)
     cmake_script.cwd(repo_build_dir)

From eb8046085e10cd3a515af3133691a813dff19b18 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Thu, 19 Oct 2023 14:18:03 -0700
Subject: [PATCH 5/6] Fix up

---
 build.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/build.py b/build.py
index de6300396c..8668a0d4f7 100755
--- a/build.py
+++ b/build.py
@@ -1813,10 +1813,12 @@ def backend_build(
                 tag
             )
         )
+        cmake_script.cmd("cd tensorrtllm")
         cmake_script.cmd(
-            "cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git"
+            "git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git"
         )
-        cmake_script.cmd("cd tensorrtllm_backend && git submodule sync")
+        cmake_script.cmd("git submodule sync")
+        cmake_script.cmd("cd ..")
         tensorrtllm_prebuild(cmake_script)
     else:
         cmake_script.gitclone(backend_repo(be), tag, be, github_organization)

From 5bcc3270dc376b112049eea19c3746a103ca2074 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Thu, 19 Oct 2023 15:39:41 -0700
Subject: [PATCH 6/6] Not using postbuild function to workaround submodule url
 permission issue

---
 build.py | 53 +++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 45 insertions(+), 8 deletions(-)

diff --git a/build.py b/build.py
index 8668a0d4f7..1c98c2f00a 100755
--- a/build.py
+++ b/build.py
@@ -1305,17 +1305,54 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
     # Add dependencies needed for tensorrtllm backend
     if "tensorrtllm" in backends:
         be = "tensorrtllm"
-        url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py".format(
+        # url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py".format(
+        #     backends[be]
+        # )
+
+        # response = requests.get(url)
+        # spec = importlib.util.spec_from_loader(
+        #     "trtllm_buildscript", loader=None, origin=url
+        # )
+        # trtllm_buildscript = importlib.util.module_from_spec(spec)
+        # exec(response.content, trtllm_buildscript.__dict__)
+        # df += trtllm_buildscript.create_postbuild(backends[be])
+
+        df += """
+WORKDIR /workspace
+# Remove previous TRT installation
+RUN apt-get remove --purge -y tensorrt* libnvinfer*
+RUN pip uninstall -y tensorrt
+# Install new version of TRT using the script from TRT-LLM
+RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3
+RUN git clone --single-branch --depth=1 -b {} https://github.com/triton-inference-server/tensorrtllm_backend.git tensorrtllm_backend
+RUN cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git
+RUN cd tensorrtllm_backend && git submodule sync
+RUN cd tensorrtllm_backend && git submodule update --init --recursive
+RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/
+RUN rm -fr tensorrtllm_backend
+    """.format(
             backends[be]
         )
 
-        response = requests.get(url)
-        spec = importlib.util.spec_from_loader(
-            "trtllm_buildscript", loader=None, origin=url
-        )
-        trtllm_buildscript = importlib.util.module_from_spec(spec)
-        exec(response.content, trtllm_buildscript.__dict__)
-        df += trtllm_buildscript.create_postbuild(backends[be])
+        df += """
+RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh
+ENV TRT_ROOT=/usr/local/tensorrt
+# Remove TRT contents that are not needed in runtime
+RUN ARCH="$(uname -i)" && \
+    rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \
+    rm -fr  ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \
+    rm -fr ${TRT_ROOT}/samples  ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples
+# Install required packages for TRT-LLM models
+RUN python3 -m pip install --upgrade pip && \
+        pip3 install transformers && \
+        pip3 install torch
+# Uninstall unused nvidia packages
+RUN if pip freeze | grep -q "nvidia.*"; then \
+        pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \
+    fi
+RUN pip cache purge
+ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
+"""
 
     if "vllm" in backends:
         # [DLIS-5606] Build Conda environment for vLLM backend