Skip to content

Commit 2fc47c8

Browse files
authored
Merge branch 'main' into zhanga5-fix-sdk-dockerfile
2 parents ded1c20 + 69d768d commit 2fc47c8

File tree

50 files changed

+903
-215
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+903
-215
lines changed

Dockerfile.sdk

+39-11
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,14 @@
2929
#
3030

3131
# Base image on the minimum Triton container
32-
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.06-py3-min
32+
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.07-py3-min
3333

3434
ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
3535
ARG TRITON_REPO_ORGANIZATION=http://github.com/triton-inference-server
36+
ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo
3637
ARG TRITON_COMMON_REPO_TAG=main
3738
ARG TRITON_CORE_REPO_TAG=main
39+
ARG TRITON_CLIENT_REPO_TAG=main
3840
ARG TRITON_THIRD_PARTY_REPO_TAG=main
3941
ARG TRITON_MODEL_ANALYZER_REPO_TAG=main
4042
ARG TRITON_ENABLE_GPU=ON
@@ -104,8 +106,10 @@ RUN rm -f /usr/bin/python && \
104106
# Build the client library and examples
105107
ARG TRITON_REPO_ORGANIZATION
106108
ARG TRITON_CLIENT_REPO_SUBDIR
109+
ARG TRITON_PA_REPO_SUBDIR
107110
ARG TRITON_COMMON_REPO_TAG
108111
ARG TRITON_CORE_REPO_TAG
112+
ARG TRITON_CLIENT_REPO_TAG
109113
ARG TRITON_THIRD_PARTY_REPO_TAG
110114
ARG TRITON_ENABLE_GPU
111115
ARG JAVA_BINDINGS_MAVEN_VERSION
@@ -115,26 +119,53 @@ ARG TARGETPLATFORM
115119
WORKDIR /workspace
116120
COPY TRITON_VERSION .
117121
COPY ${TRITON_CLIENT_REPO_SUBDIR} client
122+
COPY ${TRITON_PA_REPO_SUBDIR} perf_analyzer
118123

119-
WORKDIR /workspace/build
124+
WORKDIR /workspace/client_build
120125
RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
121126
-DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
122127
-DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
123128
-DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
124129
-DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
125130
-DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
131+
-DTRITON_ENABLE_PERF_ANALYZER=OFF \
126132
-DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \
127-
-DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \
133+
-DTRITON_ENABLE_PYTHON_HTTP=OFF -DTRITON_ENABLE_PYTHON_GRPC=OFF \
128134
-DTRITON_ENABLE_JAVA_HTTP=ON \
129-
-DTRITON_ENABLE_PERF_ANALYZER=ON \
135+
-DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
136+
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
137+
RUN make -j16 cc-clients java-clients && \
138+
rm -fr ~/.m2
139+
140+
# TODO: PA will rebuild the CC clients since it depends on it.
141+
# This should be optimized so that we do not have to build
142+
# the CC clients twice. Similarly, because the SDK expectation is
143+
# that PA is packaged with the python client, we hold off on building
144+
# the python client until now. Post-migration we should focus
145+
# effort on de-tangling these flows.
146+
WORKDIR /workspace/pa_build
147+
RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
148+
-DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
149+
-DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
150+
-DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
151+
-DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
152+
-DTRITON_CLIENT_REPO_TAG=${TRITON_CLIENT_REPO_TAG} \
130153
-DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \
131154
-DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \
132155
-DTRITON_ENABLE_PERF_ANALYZER_TS=ON \
133156
-DTRITON_ENABLE_PERF_ANALYZER_OPENAI=ON \
134-
-DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
135-
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
136-
RUN make -j16 cc-clients python-clients java-clients && \
137-
rm -fr ~/.m2
157+
-DTRITON_ENABLE_CC_HTTP=ON \
158+
-DTRITON_ENABLE_CC_GRPC=ON \
159+
-DTRITON_ENABLE_PYTHON_HTTP=ON \
160+
-DTRITON_ENABLE_PYTHON_GRPC=ON \
161+
-DTRITON_PACKAGE_PERF_ANALYZER=ON \
162+
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
163+
/workspace/perf_analyzer
164+
RUN make -j16 perf-analyzer python-clients
165+
166+
RUN pip3 install build \
167+
&& cd /workspace/perf_analyzer/genai-perf \
168+
&& python3 -m build --wheel --outdir /workspace/install/python
138169

139170
# Install Java API Bindings
140171
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
@@ -145,9 +176,6 @@ RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
145176
--jar-install-path /workspace/install/java-api-bindings; \
146177
fi
147178

148-
RUN pip3 install build \
149-
&& cd /workspace/client/src/c++/perf_analyzer/genai-perf \
150-
&& python3 -m build --wheel --outdir /workspace/install/python
151179
############################################################################
152180
## Create sdk container
153181
############################################################################

Dockerfile.win10.min

+10-10
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
#
33
# Redistribution and use in source and binary forms, with or without
44
# modification, are permitted provided that the following conditions
@@ -37,9 +37,9 @@ RUN choco install unzip -y
3737
#
3838
# Installing TensorRT
3939
#
40-
ARG TENSORRT_VERSION=10.0.1.6
41-
ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.4.zip"
42-
ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/zip/TensorRT-10.0.1.6.Windows10.win10.cuda-12.4.zip
40+
ARG TENSORRT_VERSION=10.2.0.19
41+
ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.5.zip"
42+
ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.2.0/zip/TensorRT-10.2.0.19.Windows10.x86_64.cuda-12.5.zip
4343
# COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP}
4444
ADD ${TENSORRT_SOURCE} /tmp/${TENSORRT_ZIP}
4545
RUN unzip /tmp/%TENSORRT_ZIP%
@@ -51,9 +51,9 @@ LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
5151
#
5252
# Installing cuDNN
5353
#
54-
ARG CUDNN_VERSION=9.1.0.70
54+
ARG CUDNN_VERSION=9.2.1.18
5555
ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
56-
ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.1.0.70_cuda12-archive.zip
56+
ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.2.1.18_cuda12-archive.zip
5757
ADD ${CUDNN_SOURCE} /tmp/${CUDNN_ZIP}
5858
RUN unzip /tmp/%CUDNN_ZIP%
5959
RUN move cudnn-* cudnn
@@ -88,7 +88,7 @@ LABEL PYTHON_VERSION=${PYTHON_VERSION}
8888
#
8989
# Installing CMake
9090
#
91-
ARG CMAKE_VERSION=3.29.3
91+
ARG CMAKE_VERSION=3.30.0
9292
RUN pip install cmake==%CMAKE_VERSION%
9393

9494
ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake
@@ -150,7 +150,7 @@ WORKDIR /
150150
#
151151
ARG CUDA_MAJOR=12
152152
ARG CUDA_MINOR=5
153-
ARG CUDA_PATCH=0
153+
ARG CUDA_PATCH=1
154154
ARG CUDA_VERSION=${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}
155155
ARG CUDA_PACKAGES="nvcc_${CUDA_MAJOR}.${CUDA_MINOR} \
156156
cudart_${CUDA_MAJOR}.${CUDA_MINOR} \
@@ -175,15 +175,15 @@ RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensi
175175

176176
RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%"
177177

178-
ARG CUDNN_VERSION=9.1.0.70
178+
ARG CUDNN_VERSION=9.2.1.18
179179
ENV CUDNN_VERSION ${CUDNN_VERSION}
180180
COPY --from=dependency_base /cudnn /cudnn
181181
RUN copy cudnn\bin\cudnn*.dll "%CUDA_INSTALL_ROOT_WP%\bin\."
182182
RUN copy cudnn\lib\x64\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\."
183183
RUN copy cudnn\include\cudnn*.h "%CUDA_INSTALL_ROOT_WP%\include\."
184184
LABEL CUDNN_VERSION="${CUDNN_VERSION}"
185185

186-
ARG TENSORRT_VERSION=10.0.1.6
186+
ARG TENSORRT_VERSION=10.2.0.19
187187
ENV TRT_VERSION ${TENSORRT_VERSION}
188188
COPY --from=dependency_base /TensorRT /TensorRT
189189
RUN setx PATH "c:\TensorRT\lib;%PATH%"

README.md

+9-8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -30,10 +30,11 @@
3030

3131
[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
3232

33-
> [!WARNING]
34-
> ##### LATEST RELEASE
35-
> You are currently on the `main` branch which tracks under-development progress towards the next release.
36-
> The current release is version [2.47.0](https://github.com/triton-inference-server/server/releases/latest) and corresponds to the 24.06 container release on NVIDIA GPU Cloud (NGC).
33+
[!WARNING]
34+
35+
##### LATEST RELEASE
36+
You are currently on the `main` branch which tracks under-development progress towards the next release.
37+
The current release is version [2.48.0](https://github.com/triton-inference-server/server/releases/latest) and corresponds to the 24.07 container release on NVIDIA GPU Cloud (NGC).
3738

3839
Triton Inference Server is an open source inference serving software that
3940
streamlines AI inferencing. Triton enables teams to deploy any AI model from
@@ -91,16 +92,16 @@ Inference Server with the
9192

9293
```bash
9394
# Step 1: Create the example model repository
94-
git clone -b r24.06 https://github.com/triton-inference-server/server.git
95+
git clone -b r24.07 https://github.com/triton-inference-server/server.git
9596
cd server/docs/examples
9697
./fetch_models.sh
9798

9899
# Step 2: Launch triton from the NGC Triton container
99-
docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.06-py3 tritonserver --model-repository=/models
100+
docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.07-py3 tritonserver --model-repository=/models
100101

101102
# Step 3: Sending an Inference Request
102103
# In a separate console, launch the image_client example from the NGC Triton SDK container
103-
docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.06-py3-sdk
104+
docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.07-py3-sdk
104105
/workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
105106

106107
# Inference should return the following

TRITON_VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.48.0dev
1+
2.49.0dev

build.py

+28-15
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,14 @@
6969
# incorrectly load the other version of the openvino libraries.
7070
#
7171
TRITON_VERSION_MAP = {
72-
"2.48.0dev": (
73-
"24.06dev", # triton container
74-
"24.06", # upstream container
72+
"2.49.0dev": (
73+
"24.08dev", # triton container
74+
"24.07", # upstream container
7575
"1.18.1", # ORT
7676
"2024.0.0", # ORT OpenVINO
7777
"2024.0.0", # Standalone OpenVINO
7878
"3.2.6", # DCGM version
79-
"0.5.0.post1", # vLLM version
79+
"0.5.3.post1", # vLLM version
8080
)
8181
}
8282

@@ -1086,18 +1086,23 @@ def create_dockerfile_linux(
10861086
# Remove contents that are not needed in runtime
10871087
# Setuptools has breaking changes in version 70.0.0, so fix it to 69.5.1
10881088
# The generated code in grpc_service_pb2_grpc.py depends on grpcio>=1.64.0, so fix it to 1.64.0
1089-
RUN ldconfig && \
1090-
ARCH="$(uname -i)" && \
1091-
rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \
1092-
rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \
1093-
rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples && \
1094-
python3 -m pip install --upgrade pip && \
1095-
pip3 install --no-cache-dir transformers && \
1096-
find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf && \
1097-
find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf && \
1098-
pip3 install --no-cache-dir setuptools==69.5.1 grpcio-tools==1.64.0
1099-
1089+
RUN ldconfig && \\
1090+
ARCH="$(uname -i)" && \\
1091+
rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \\
1092+
rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \\
1093+
rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples && \\
1094+
python3 -m pip install --upgrade pip && \\
1095+
pip3 install --no-cache-dir transformers && \\
1096+
find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf && \\
1097+
find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf && \\
1098+
pip3 install --no-cache-dir grpcio-tools==1.64.0 && \\
1099+
pip3 uninstall -y setuptools
11001100
ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
1101+
1102+
# There are some ucc issues when spawning mpi processes with ompi v4.1.7a1.
1103+
# Downgrade to ompi v4.1.5rc2 to avoid the issue.
1104+
RUN rm -fr /opt/hpcx/ompi
1105+
COPY --from=nvcr.io/nvidia/tritonserver:24.02-py3-min /opt/hpcx/ompi /opt/hpcx/ompi
11011106
"""
11021107
with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
11031108
dfile.write(df)
@@ -1229,6 +1234,14 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
12291234
virtualenv \\
12301235
&& rm -rf /var/lib/apt/lists/*
12311236
"""
1237+
if "tensorrtllm" in backends:
1238+
df += """
1239+
# Updating the openssh-client to fix for the CVE-2024-6387. This can be removed when trtllm uses a later CUDA container(12.5 or later)
1240+
RUN apt-get update \\
1241+
&& apt-get install -y --no-install-recommends \\
1242+
openssh-client \\
1243+
&& rm -rf /var/lib/apt/lists/*
1244+
"""
12321245

12331246
if "vllm" in backends:
12341247
df += """

deploy/aws/values.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
1+
# Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved.
22
#
33
# Redistribution and use in source and binary forms, with or without
44
# modification, are permitted provided that the following conditions
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:24.06-py3
30+
imageName: nvcr.io/nvidia/tritonserver:24.07-py3
3131
pullPolicy: IfNotPresent
3232
modelRepositoryPath: s3://triton-inference-server-repository/model_repository
3333
numGpus: 1
@@ -38,4 +38,4 @@ service:
3838
secret:
3939
region: AWS_REGION
4040
id: AWS_SECRET_KEY_ID
41-
key: AWS_SECRET_ACCESS_KEY
41+
key: AWS_SECRET_ACCESS_KEY

deploy/fleetcommand/Chart.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
1+
# Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved.
22
#
33
# Redistribution and use in source and binary forms, with or without
44
# modification, are permitted provided that the following conditions
@@ -26,7 +26,7 @@
2626

2727
apiVersion: v1
2828
# appVersion is the Triton version; update when changing release
29-
appVersion: "2.47.0"
29+
appVersion: "2.48.0"
3030
description: Triton Inference Server (Fleet Command)
3131
name: triton-inference-server
3232
# version is the Chart version; update when changing anything in the chart

deploy/fleetcommand/values.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
1+
# Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved.
22
#
33
# Redistribution and use in source and binary forms, with or without
44
# modification, are permitted provided that the following conditions
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:24.06-py3
30+
imageName: nvcr.io/nvidia/tritonserver:24.07-py3
3131
pullPolicy: IfNotPresent
3232
numGpus: 1
3333
serverCommand: tritonserver
@@ -47,13 +47,13 @@ image:
4747
#
4848
# To set model control mode, uncomment and configure below
4949
# TODO: Fix the following url, it is invalid
50-
# See https://github.com/triton-inference-server/server/blob/r24.06/docs/model_management.md
50+
# See https://github.com/triton-inference-server/server/blob/r24.07/docs/model_management.md
5151
# for more details
5252
#- --model-control-mode=explicit|poll|none
5353
#
5454
# Additional server args
5555
#
56-
# see https://github.com/triton-inference-server/server/blob/r24.06/README.md
56+
# see https://github.com/triton-inference-server/server/blob/r24.07/README.md
5757
# for more details
5858

5959
service:

deploy/gcp/values.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
1+
# Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved.
22
#
33
# Redistribution and use in source and binary forms, with or without
44
# modification, are permitted provided that the following conditions
@@ -27,10 +27,10 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:24.06-py3
30+
imageName: nvcr.io/nvidia/tritonserver:24.07-py3
3131
pullPolicy: IfNotPresent
3232
modelRepositoryPath: gs://triton-inference-server-repository/model_repository
3333
numGpus: 1
3434

3535
service:
36-
type: LoadBalancer
36+
type: LoadBalancer

deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
#
33
# Redistribution and use in source and binary forms, with or without
44
# modification, are permitted provided that the following conditions
@@ -33,7 +33,7 @@ metadata:
3333
namespace: default
3434
spec:
3535
containers:
36-
- image: nvcr.io/nvidia/tritonserver:24.06-py3-sdk
36+
- image: nvcr.io/nvidia/tritonserver:24.07-py3-sdk
3737
imagePullPolicy: Always
3838
name: nv-triton-client
3939
securityContext:

0 commit comments

Comments
 (0)