Skip to content

Commit 8d5c7df

Browse files
authored
Update README and versions for 23.09 branch (#6280)
1 parent 6930d00 commit 8d5c7df

File tree

24 files changed

+50
-56
lines changed

24 files changed

+50
-56
lines changed

Dockerfile.sdk

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
#
3030

3131
# Base image on the minimum Triton container
32-
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:23.08-py3-min
32+
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:23.09-py3-min
3333

3434
ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
3535
ARG TRITON_COMMON_REPO_TAG=main

README.md

+3-9
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,6 @@
3030

3131
[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
3232

33-
**LATEST RELEASE: You are currently on the main branch which tracks
34-
under-development progress towards the next release. The current release is
35-
version [2.37.0](https://github.com/triton-inference-server/server/tree/r23.08)
36-
and corresponds to the 23.08 container release on
37-
[NVIDIA GPU Cloud (NGC)](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver).**
38-
3933
----
4034
Triton Inference Server is an open source inference serving software that
4135
streamlines AI inferencing. Triton enables teams to deploy any AI model from
@@ -91,16 +85,16 @@ Inference Server with the
9185

9286
```bash
9387
# Step 1: Create the example model repository
94-
git clone -b r23.08 https://github.com/triton-inference-server/server.git
88+
git clone -b r23.09 https://github.com/triton-inference-server/server.git
9589
cd server/docs/examples
9690
./fetch_models.sh
9791

9892
# Step 2: Launch triton from the NGC Triton container
99-
docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.08-py3 tritonserver --model-repository=/models
93+
docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.09-py3 tritonserver --model-repository=/models
10094

10195
# Step 3: Sending an Inference Request
10296
# In a separate console, launch the image_client example from the NGC Triton SDK container
103-
docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:23.08-py3-sdk
97+
docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:23.09-py3-sdk
10498
/workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
10599

106100
# Inference should return the following

TRITON_VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.38.0dev
1+
2.38.0

build.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,10 @@
6767
# incorrectly load the other version of the openvino libraries.
6868
#
6969
TRITON_VERSION_MAP = {
70-
"2.38.0dev": (
71-
"23.09dev", # triton container
72-
"23.08", # upstream container
73-
"1.15.1", # ORT
70+
"2.38.0": (
71+
"23.09", # triton container
72+
"23.09", # upstream container
73+
"1.16.0", # ORT
7474
"2023.0.0", # ORT OpenVINO
7575
"2023.0.0", # Standalone OpenVINO
7676
"2.4.7", # DCGM version

deploy/aws/values.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:23.08-py3
30+
imageName: nvcr.io/nvidia/tritonserver:23.09-py3
3131
pullPolicy: IfNotPresent
3232
modelRepositoryPath: s3://triton-inference-server-repository/model_repository
3333
numGpus: 1

deploy/fleetcommand/Chart.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
apiVersion: v1
2828
# appVersion is the Triton version; update when changing release
29-
appVersion: "2.37.0"
29+
appVersion: "2.38.0"
3030
description: Triton Inference Server (Fleet Command)
3131
name: triton-inference-server
3232
# version is the Chart version; update when changing anything in the chart

deploy/fleetcommand/values.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:23.08-py3
30+
imageName: nvcr.io/nvidia/tritonserver:23.09-py3
3131
pullPolicy: IfNotPresent
3232
numGpus: 1
3333
serverCommand: tritonserver
@@ -46,13 +46,13 @@ image:
4646
# Model Control Mode (Optional, default: none)
4747
#
4848
# To set model control mode, uncomment and configure below
49-
# See https://github.com/triton-inference-server/server/blob/r23.08/docs/model_management.md
49+
# See https://github.com/triton-inference-server/server/blob/r23.09/docs/model_management.md
5050
# for more details
5151
#- --model-control-mode=explicit|poll|none
5252
#
5353
# Additional server args
5454
#
55-
# see https://github.com/triton-inference-server/server/blob/r23.08/README.md
55+
# see https://github.com/triton-inference-server/server/blob/r23.09/README.md
5656
# for more details
5757

5858
service:

deploy/gcp/values.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:23.08-py3
30+
imageName: nvcr.io/nvidia/tritonserver:23.09-py3
3131
pullPolicy: IfNotPresent
3232
modelRepositoryPath: gs://triton-inference-server-repository/model_repository
3333
numGpus: 1

deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ metadata:
3333
namespace: default
3434
spec:
3535
containers:
36-
- image: nvcr.io/nvidia/tritonserver:23.08-py3-sdk
36+
- image: nvcr.io/nvidia/tritonserver:23.09-py3-sdk
3737
imagePullPolicy: Always
3838
name: nv-triton-client
3939
securityContext:

deploy/gke-marketplace-app/server-deployer/build_and_push.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727

2828
export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/')
2929
export APP_NAME=tritonserver
30-
export MAJOR_VERSION=2.37
31-
export MINOR_VERSION=2.37.0
32-
export NGC_VERSION=23.08-py3
30+
export MAJOR_VERSION=2.38
31+
export MINOR_VERSION=2.38.0
32+
export NGC_VERSION=23.09-py3
3333

3434
docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION
3535

deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626

2727
apiVersion: v1
28-
appVersion: "2.37"
28+
appVersion: "2.38"
2929
description: Triton Inference Server
3030
name: triton-inference-server
31-
version: 2.37.0
31+
version: 2.38.0

deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,14 @@ maxReplicaCount: 3
3131
tritonProtocol: HTTP
3232
# HPA GPU utilization autoscaling target
3333
HPATargetAverageValue: 85
34-
modelRepositoryPath: gs://triton_sample_models/23_08
35-
publishedVersion: '2.37.0'
34+
modelRepositoryPath: gs://triton_sample_models/23_09
35+
publishedVersion: '2.38.0'
3636
gcpMarketplace: true
3737

3838
image:
3939
registry: gcr.io
4040
repository: nvidia-ngc-public/tritonserver
41-
tag: 23.08-py3
41+
tag: 23.09-py3
4242
pullPolicy: IfNotPresent
4343
# modify the model repository here to match your GCP storage bucket
4444
numGpus: 1

deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
x-google-marketplace:
2828
schemaVersion: v2
2929
applicationApiVersion: v1beta1
30-
publishedVersion: '2.37.0'
30+
publishedVersion: '2.38.0'
3131
publishedVersionMetadata:
3232
releaseNote: >-
3333
Initial release.

deploy/gke-marketplace-app/server-deployer/schema.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
x-google-marketplace:
2828
schemaVersion: v2
2929
applicationApiVersion: v1beta1
30-
publishedVersion: '2.37.0'
30+
publishedVersion: '2.38.0'
3131
publishedVersionMetadata:
3232
releaseNote: >-
3333
Initial release.
@@ -89,7 +89,7 @@ properties:
8989
modelRepositoryPath:
9090
type: string
9191
title: Bucket where models are stored. Please make sure the user/service account to create the GKE app has permission to this GCS bucket. Read Triton documentation on configs and formatting details, supporting TensorRT, TensorFlow, Pytorch, Onnx ... etc.
92-
default: gs://triton_sample_models/23_08
92+
default: gs://triton_sample_models/23_09
9393
image.ldPreloadPath:
9494
type: string
9595
title: Leave this empty by default. Triton allows users to create custom layers for backend such as TensorRT plugin or Tensorflow custom ops, the compiled shared library must be provided via LD_PRELOAD environment variable.

deploy/gke-marketplace-app/trt-engine/README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
```
3434
docker run --gpus all -it --network host \
3535
--shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
36-
-v ~:/scripts nvcr.io/nvidia/tensorrt:23.08-py3
36+
-v ~:/scripts nvcr.io/nvidia/tensorrt:23.09-py3
3737
3838
pip install onnx six torch tf2onnx tensorflow
3939
@@ -57,7 +57,7 @@ mkdir -p engines
5757
5858
python3 builder.py -m models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/model.ckpt -o engines/bert_large_int8_bs1_s128.engine -b 1 -s 128 -c models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/ -v models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/vocab.txt --int8 --fp16 --strict --calib-num 1 -iln -imh
5959
60-
gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/23_08/bert/1/model.plan
60+
gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/23_09/bert/1/model.plan
6161
```
6262

63-
For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/23_08/` should be updated accordingly with the correct version.
63+
For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/23_09/` should be updated accordingly with the correct version.

deploy/k8s-onprem/values.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ tags:
2929
loadBalancing: true
3030

3131
image:
32-
imageName: nvcr.io/nvidia/tritonserver:23.08-py3
32+
imageName: nvcr.io/nvidia/tritonserver:23.09-py3
3333
pullPolicy: IfNotPresent
3434
modelRepositoryServer: < Replace with the IP Address of your file server >
3535
modelRepositoryPath: /srv/models

docs/customization_guide/build.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ $ ./build.py ... --repo-tag=common:<container tag> --repo-tag=core:<container ta
173173

174174
If you are building on a release branch then `<container tag>` will
175175
default to the branch name. For example, if you are building on the
176-
r23.08 branch, `<container tag>` will default to r23.08. If you are
176+
r23.09 branch, `<container tag>` will default to r23.09. If you are
177177
building on any other branch (including the *main* branch) then
178178
`<container tag>` will default to "main". Therefore, you typically do
179179
not need to provide `<container tag>` at all (nor the preceding
@@ -334,8 +334,8 @@ python build.py --cmake-dir=<path/to/repo>/build --build-dir=/tmp/citritonbuild
334334
If you are building on *main* branch then '<container tag>' will
335335
default to "main". If you are building on a release branch then
336336
'<container tag>' will default to the branch name. For example, if you
337-
are building on the r23.08 branch, '<container tag>' will default to
338-
r23.08. Therefore, you typically do not need to provide '<container
337+
are building on the r23.09 branch, '<container tag>' will default to
338+
r23.09. Therefore, you typically do not need to provide '<container
339339
tag>' at all (nor the preceding colon). You can use a different
340340
'<container tag>' for a component to instead use the corresponding
341341
branch/tag in the build. For example, if you have a branch called

docs/customization_guide/compose.md

+7-7
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ from source to get more exact customization.
4444
The `compose.py` script can be found in the [server repository](https://github.com/triton-inference-server/server).
4545
Simply clone the repository and run `compose.py` to create a custom container.
4646
Note: Created container version will depend on the branch that was cloned.
47-
For example branch [r23.08](https://github.com/triton-inference-server/server/tree/r23.08)
48-
should be used to create a image based on the NGC 23.08 Triton release.
47+
For example branch [r23.09](https://github.com/triton-inference-server/server/tree/r23.09)
48+
should be used to create a image based on the NGC 23.09 Triton release.
4949

5050
`compose.py` provides `--backend`, `--repoagent` options that allow you to
5151
specify which backends and repository agents to include in the custom image.
@@ -76,19 +76,19 @@ For example, running
7676
```
7777
python3 compose.py --backend tensorflow1 --repoagent checksum
7878
```
79-
on branch [r23.08](https://github.com/triton-inference-server/server/tree/r23.08) pulls:
80-
- `min` container `nvcr.io/nvidia/tritonserver:23.08-py3-min`
81-
- `full` container `nvcr.io/nvidia/tritonserver:23.08-py3`
79+
on branch [r23.09](https://github.com/triton-inference-server/server/tree/r23.09) pulls:
80+
- `min` container `nvcr.io/nvidia/tritonserver:23.09-py3-min`
81+
- `full` container `nvcr.io/nvidia/tritonserver:23.09-py3`
8282

8383
Alternatively, users can specify the version of Triton container to pull from any branch by either:
8484
1. Adding flag `--container-version <container version>` to branch
8585
```
86-
python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 23.08
86+
python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 23.09
8787
```
8888
2. Specifying `--image min,<min container image name> --image full,<full container image name>`.
8989
The user is responsible for specifying compatible `min` and `full` containers.
9090
```
91-
python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:23.08-py3-min --image full,nvcr.io/nvidia/tritonserver:23.08-py3
91+
python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:23.09-py3-min --image full,nvcr.io/nvidia/tritonserver:23.09-py3
9292
```
9393
Method 1 and 2 will result in the same composed container. Furthermore, `--image` flag overrides the `--container-version` flag when both are specified.
9494

docs/customization_guide/test.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ $ ./gen_qa_custom_ops
4949
```
5050

5151
This will create multiple model repositories in /tmp/<version>/qa_*
52-
(for example /tmp/23.08/qa_model_repository). The TensorRT models
52+
(for example /tmp/23.09/qa_model_repository). The TensorRT models
5353
will be created for the GPU on the system that CUDA considers device 0
5454
(zero). If you have multiple GPUs on your system see the documentation
5555
in the scripts for how to target a specific GPU.

docs/user_guide/custom_operations.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ simple way to ensure you are using the correct version of TensorRT is
6464
to use the [NGC TensorRT
6565
container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt)
6666
corresponding to the Triton container. For example, if you are using
67-
the 23.08 version of Triton, use the 23.08 version of the TensorRT
67+
the 23.09 version of Triton, use the 23.09 version of the TensorRT
6868
container.
6969

7070
## TensorFlow
@@ -123,7 +123,7 @@ simple way to ensure you are using the correct version of TensorFlow
123123
is to use the [NGC TensorFlow
124124
container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow)
125125
corresponding to the Triton container. For example, if you are using
126-
the 23.08 version of Triton, use the 23.08 version of the TensorFlow
126+
the 23.09 version of Triton, use the 23.09 version of the TensorFlow
127127
container.
128128
129129
## PyTorch
@@ -167,7 +167,7 @@ simple way to ensure you are using the correct version of PyTorch is
167167
to use the [NGC PyTorch
168168
container](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch)
169169
corresponding to the Triton container. For example, if you are using
170-
the 23.08 version of Triton, use the 23.08 version of the PyTorch
170+
the 23.09 version of Triton, use the 23.09 version of the PyTorch
171171
container.
172172
173173
## ONNX

docs/user_guide/performance_tuning.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ with a `tritonserver` binary.
235235

236236
```bash
237237
# Start server container
238-
docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:23.08-py3
238+
docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:23.09-py3
239239

240240
# Start serving your models
241241
tritonserver --model-repository=/mnt/models
@@ -284,7 +284,7 @@ by setting the `-u` flag, such as `perf_analyzer -m densenet_onnx -u
284284

285285
```bash
286286
# Start the SDK container interactively
287-
docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:23.08-py3-sdk
287+
docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:23.09-py3-sdk
288288

289289
# Benchmark model being served from step 3
290290
perf_analyzer -m densenet_onnx --concurrency-range 1:4

qa/common/gen_jetson_trt_models

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
##
5151
############################################################################
5252

53-
TRITON_VERSION=${TRITON_VERSION:=23.08}
53+
TRITON_VERSION=${TRITON_VERSION:=23.09}
5454
CUDA_DEVICE=${RUNNER_ID:=0}
5555

5656
HOST_BUILD_DIR=${HOST_BUILD_DIR:=/tmp}

qa/common/gen_qa_custom_ops

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
##
3838
############################################################################
3939

40-
TRITON_VERSION=${TRITON_VERSION:=23.08}
40+
TRITON_VERSION=${TRITON_VERSION:=23.09}
4141
NVIDIA_UPSTREAM_VERSION=${NVIDIA_UPSTREAM_VERSION:=$TRITON_VERSION}
4242
TENSORFLOW_IMAGE=${TENSORFLOW_IMAGE:=nvcr.io/nvidia/tensorflow:$NVIDIA_UPSTREAM_VERSION-tf2-py3}
4343
PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$NVIDIA_UPSTREAM_VERSION-py3}

qa/common/gen_qa_model_repository

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
##
4949
############################################################################
5050

51-
TRITON_VERSION=${TRITON_VERSION:=23.08}
51+
TRITON_VERSION=${TRITON_VERSION:=23.09}
5252

5353
# ONNX. Use ONNX_OPSET 0 to use the default for ONNX version
5454
ONNX_VERSION=1.13.0

0 commit comments

Comments
 (0)