From 17e7b6d566bb3bc56c81f38e4577533d1ed39e92 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 19 Oct 2023 10:24:57 -0700 Subject: [PATCH 1/6] TRTLLM backend post release --- build.py | 85 ++++++++------------------------------------------------ 1 file changed, 12 insertions(+), 73 deletions(-) diff --git a/build.py b/build.py index 5ee3f08ee6..305bc237c7 100755 --- a/build.py +++ b/build.py @@ -1302,66 +1302,22 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach pip3 install --upgrade numpy && \ rm -rf /var/lib/apt/lists/* """ - # FIXME: Use the postbuild script here # Add dependencies needed for tensorrtllm backend if "tensorrtllm" in backends: be = "tensorrtllm" - # # FIXME: Update the url - # url = "https://gitlab-master.nvidia.com/ftp/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format( - # backends[be] - # ) - - # response = requests.get(url) - # spec = importlib.util.spec_from_loader( - # "trtllm_buildscript", loader=None, origin=url - # ) - # trtllm_buildscript = importlib.util.module_from_spec(spec) - # exec(response.content, trtllm_buildscript.__dict__) - # df += trtllm_buildscript.create_postbuild( - # backends[be] # repo tag - # ) - df += """ -WORKDIR /workspace - -# Remove previous TRT installation -RUN apt-get remove --purge -y tensorrt* libnvinfer* -RUN pip uninstall -y tensorrt - -# Install new version of TRT using the script from TRT-LLM -RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3 -RUN git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend -RUN cd tensorrtllm_backend && git submodule update --init --recursive -RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/ -RUN rm -fr tensorrtllm_backend - """.format( - backends[be], - os.environ["REMOVE_ME_TRTLLM_USERNAME"], - os.environ["REMOVE_ME_TRTLLM_TOKEN"], + url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py".format( + backends[be] ) - df += """ -RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh -ENV TRT_ROOT=/usr/local/tensorrt - -# Remove TRT contents that are not needed in runtime -RUN ARCH="$(uname -i)" && \ - rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \ - rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \ - rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples - -# Install required packages for TRT-LLM models -RUN python3 -m pip install --upgrade pip && \ - pip3 install transformers && \ - pip3 install torch - -# Uninstall unused nvidia packages -RUN if pip freeze | grep -q "nvidia.*"; then \ - pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \ - fi -RUN pip cache purge - -ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH -""" + response = requests.get(url) + spec = importlib.util.spec_from_loader( + "trtllm_buildscript", loader=None, origin=url + ) + trtllm_buildscript = importlib.util.module_from_spec(spec) + exec(response.content, trtllm_buildscript.__dict__) + df += trtllm_buildscript.create_postbuild( + backends[be] # repo tag + ) if "vllm" in backends: # [DLIS-5606] Build Conda environment for vLLM backend @@ -1827,10 +1783,6 @@ def tensorrtllm_prebuild(cmake_script): # FIXME: Update the file structure to the one Triton expects. This is a temporary fix # to get the build working for r23.10. - # Uncomment the patch once moving to the GitHub repo - # cmake_script.cmd( - # "patch tensorrtllm/inflight_batcher_llm/CMakeLists.txt < tensorrtllm/inflight_batcher_llm/CMakeLists.txt.patch" - # ) cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/src tensorrtllm") cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/cmake tensorrtllm") cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/CMakeLists.txt tensorrtllm") @@ -1856,20 +1808,7 @@ def backend_build( cmake_script.comment() cmake_script.mkdir(build_dir) cmake_script.cwd(build_dir) - # FIXME: Use GitHub repo - if be == "tensorrtllm": - # cmake_script.gitclone( - # backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/ftp" - # ) - cmake_script.cmd( - "git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm".format( - tag, - os.environ["REMOVE_ME_TRTLLM_USERNAME"], - os.environ["REMOVE_ME_TRTLLM_TOKEN"], - ) - ) - else: - cmake_script.gitclone(backend_repo(be), tag, be, github_organization) + cmake_script.gitclone(backend_repo(be), tag, be, github_organization) if be == "tensorrtllm": tensorrtllm_prebuild(cmake_script) From 2c86f6ee53a1d8b23fe183377418b935726b9c8b Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 19 Oct 2023 10:26:10 -0700 Subject: [PATCH 2/6] TRTLLM backend post release --- build.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/build.py b/build.py index 305bc237c7..274fa68292 100755 --- a/build.py +++ b/build.py @@ -1315,9 +1315,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach ) trtllm_buildscript = importlib.util.module_from_spec(spec) exec(response.content, trtllm_buildscript.__dict__) - df += trtllm_buildscript.create_postbuild( - backends[be] # repo tag - ) + df += trtllm_buildscript.create_postbuild(backends[be]) if "vllm" in backends: # [DLIS-5606] Build Conda environment for vLLM backend From e89ee4439b693576206d9f4c798b2cf320ba73b1 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 19 Oct 2023 11:43:29 -0700 Subject: [PATCH 3/6] Update submodule url for permission issue --- build.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/build.py b/build.py index 274fa68292..9a45470570 100755 --- a/build.py +++ b/build.py @@ -1785,6 +1785,11 @@ def tensorrtllm_prebuild(cmake_script): cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/cmake tensorrtllm") cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/CMakeLists.txt tensorrtllm") + cmake_script.cmd( + "cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git" + ) + cmake_script.cmd("cd tensorrtllm_backend && git submodule sync") + def backend_build( be, From b30a7951b6ff40d449a9fccc90642fadb275c8e4 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 19 Oct 2023 14:00:10 -0700 Subject: [PATCH 4/6] Update submodule url --- build.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/build.py b/build.py index 9a45470570..de6300396c 100755 --- a/build.py +++ b/build.py @@ -1785,11 +1785,6 @@ def tensorrtllm_prebuild(cmake_script): cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/cmake tensorrtllm") cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/CMakeLists.txt tensorrtllm") - cmake_script.cmd( - "cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git" - ) - cmake_script.cmd("cd tensorrtllm_backend && git submodule sync") - def backend_build( be, @@ -1811,10 +1806,20 @@ def backend_build( cmake_script.comment() cmake_script.mkdir(build_dir) cmake_script.cwd(build_dir) - cmake_script.gitclone(backend_repo(be), tag, be, github_organization) if be == "tensorrtllm": + cmake_script.cmd( + "git clone --single-branch --depth=1 -b {} https://github.com/triton-inference-server/tensorrtllm_backend tensorrtllm".format( + tag + ) + ) + cmake_script.cmd( + "cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git" + ) + cmake_script.cmd("cd tensorrtllm_backend && git submodule sync") tensorrtllm_prebuild(cmake_script) + else: + cmake_script.gitclone(backend_repo(be), tag, be, github_organization) cmake_script.mkdir(repo_build_dir) cmake_script.cwd(repo_build_dir) From eb8046085e10cd3a515af3133691a813dff19b18 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 19 Oct 2023 14:18:03 -0700 Subject: [PATCH 5/6] Fix up --- build.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/build.py b/build.py index de6300396c..8668a0d4f7 100755 --- a/build.py +++ b/build.py @@ -1813,10 +1813,12 @@ def backend_build( tag ) ) + cmake_script.cmd("cd tensorrtllm") cmake_script.cmd( - "cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git" + "git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git" ) - cmake_script.cmd("cd tensorrtllm_backend && git submodule sync") + cmake_script.cmd("git submodule sync") + cmake_script.cmd("cd ..") tensorrtllm_prebuild(cmake_script) else: cmake_script.gitclone(backend_repo(be), tag, be, github_organization) From 5bcc3270dc376b112049eea19c3746a103ca2074 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 19 Oct 2023 15:39:41 -0700 Subject: [PATCH 6/6] Not using postbuild function to workaround submodule url permission issue --- build.py | 53 +++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/build.py b/build.py index 8668a0d4f7..1c98c2f00a 100755 --- a/build.py +++ b/build.py @@ -1305,17 +1305,54 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach # Add dependencies needed for tensorrtllm backend if "tensorrtllm" in backends: be = "tensorrtllm" - url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py".format( + # url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py".format( + # backends[be] + # ) + + # response = requests.get(url) + # spec = importlib.util.spec_from_loader( + # "trtllm_buildscript", loader=None, origin=url + # ) + # trtllm_buildscript = importlib.util.module_from_spec(spec) + # exec(response.content, trtllm_buildscript.__dict__) + # df += trtllm_buildscript.create_postbuild(backends[be]) + + df += """ +WORKDIR /workspace +# Remove previous TRT installation +RUN apt-get remove --purge -y tensorrt* libnvinfer* +RUN pip uninstall -y tensorrt +# Install new version of TRT using the script from TRT-LLM +RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3 +RUN git clone --single-branch --depth=1 -b {} https://github.com/triton-inference-server/tensorrtllm_backend.git tensorrtllm_backend +RUN cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git +RUN cd tensorrtllm_backend && git submodule sync +RUN cd tensorrtllm_backend && git submodule update --init --recursive +RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/ +RUN rm -fr tensorrtllm_backend + """.format( backends[be] ) - response = requests.get(url) - spec = importlib.util.spec_from_loader( - "trtllm_buildscript", loader=None, origin=url - ) - trtllm_buildscript = importlib.util.module_from_spec(spec) - exec(response.content, trtllm_buildscript.__dict__) - df += trtllm_buildscript.create_postbuild(backends[be]) + df += """ +RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh +ENV TRT_ROOT=/usr/local/tensorrt +# Remove TRT contents that are not needed in runtime +RUN ARCH="$(uname -i)" && \ + rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \ + rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \ + rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples +# Install required packages for TRT-LLM models +RUN python3 -m pip install --upgrade pip && \ + pip3 install transformers && \ + pip3 install torch +# Uninstall unused nvidia packages +RUN if pip freeze | grep -q "nvidia.*"; then \ + pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \ + fi +RUN pip cache purge +ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH +""" if "vllm" in backends: # [DLIS-5606] Build Conda environment for vLLM backend