From 4652f7aedea0ee4fdd0c235828fa42935f547960 Mon Sep 17 00:00:00 2001 From: pvijayakrish Date: Tue, 23 Jul 2024 13:22:09 -0700 Subject: [PATCH 01/12] Update NGC versions post-24.07 release --- README.md | 8 ++++---- build.py | 6 +++--- deploy/aws/values.yaml | 4 ++-- deploy/fleetcommand/Chart.yaml | 2 +- deploy/fleetcommand/values.yaml | 6 +++--- deploy/gcp/values.yaml | 4 ++-- .../perf-analyzer-script/triton_client.yaml | 2 +- .../server-deployer/build_and_push.sh | 6 +++--- .../server-deployer/chart/triton/Chart.yaml | 4 ++-- .../server-deployer/chart/triton/values.yaml | 8 ++++---- .../server-deployer/data-test/schema.yaml | 2 +- .../server-deployer/schema.yaml | 2 +- .../gke-marketplace-app/trt-engine/README.md | 2 +- deploy/k8s-onprem/values.yaml | 4 ++-- deploy/oci/values.yaml | 4 ++-- docs/customization_guide/build.md | 6 +++--- docs/customization_guide/compose.md | 18 +++++++++--------- docs/customization_guide/test.md | 2 +- docs/generate_docs.py | 4 ++-- docs/user_guide/custom_operations.md | 6 +++--- docs/user_guide/performance_tuning.md | 4 ++-- qa/common/gen_jetson_trt_models | 2 +- qa/common/gen_qa_custom_ops | 2 +- qa/common/gen_qa_model_repository | 2 +- 24 files changed, 55 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index 38b4759c48..dcf0a3420f 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ > [!WARNING] > ##### LATEST RELEASE > You are currently on the `main` branch which tracks under-development progress towards the next release. -> The current release is version [2.47.0](https://github.com/triton-inference-server/server/releases/latest) and corresponds to the 24.06 container release on NVIDIA GPU Cloud (NGC). +> The current release is version [2.48.0](https://github.com/triton-inference-server/server/releases/latest) and corresponds to the 24.07 container release on NVIDIA GPU Cloud (NGC). Triton Inference Server is an open source inference serving software that streamlines AI inferencing. Triton enables teams to deploy any AI model from @@ -91,16 +91,16 @@ Inference Server with the ```bash # Step 1: Create the example model repository -git clone -b r24.06 https://github.com/triton-inference-server/server.git +git clone -b r24.07 https://github.com/triton-inference-server/server.git cd server/docs/examples ./fetch_models.sh # Step 2: Launch triton from the NGC Triton container -docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.06-py3 tritonserver --model-repository=/models +docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.07-py3 tritonserver --model-repository=/models # Step 3: Sending an Inference Request # In a separate console, launch the image_client example from the NGC Triton SDK container -docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.06-py3-sdk +docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.07-py3-sdk /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg # Inference should return the following diff --git a/build.py b/build.py index 24bde0f3a4..7b59680151 100755 --- a/build.py +++ b/build.py @@ -69,9 +69,9 @@ # incorrectly load the other version of the openvino libraries. # TRITON_VERSION_MAP = { - "2.48.0dev": ( - "24.06dev", # triton container - "24.06", # upstream container + "2.49.0dev": ( + "24.07", # triton container + "24.07", # upstream container "1.18.1", # ORT "2024.0.0", # ORT OpenVINO "2024.0.0", # Standalone OpenVINO diff --git a/deploy/aws/values.yaml b/deploy/aws/values.yaml index 7fd88c5a04..85c70799af 100644 --- a/deploy/aws/values.yaml +++ b/deploy/aws/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:24.06-py3 + imageName: nvcr.io/nvidia/tritonserver:24.07-py3 pullPolicy: IfNotPresent modelRepositoryPath: s3://triton-inference-server-repository/model_repository numGpus: 1 @@ -38,4 +38,4 @@ service: secret: region: AWS_REGION id: AWS_SECRET_KEY_ID - key: AWS_SECRET_ACCESS_KEY \ No newline at end of file + key: AWS_SECRET_ACCESS_KEY diff --git a/deploy/fleetcommand/Chart.yaml b/deploy/fleetcommand/Chart.yaml index cca541167c..a9668dcf4c 100644 --- a/deploy/fleetcommand/Chart.yaml +++ b/deploy/fleetcommand/Chart.yaml @@ -26,7 +26,7 @@ apiVersion: v1 # appVersion is the Triton version; update when changing release -appVersion: "2.47.0" +appVersion: "2.48.0" description: Triton Inference Server (Fleet Command) name: triton-inference-server # version is the Chart version; update when changing anything in the chart diff --git a/deploy/fleetcommand/values.yaml b/deploy/fleetcommand/values.yaml index 3f7d95ea45..9b86a5a495 100644 --- a/deploy/fleetcommand/values.yaml +++ b/deploy/fleetcommand/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:24.06-py3 + imageName: nvcr.io/nvidia/tritonserver:24.07-py3 pullPolicy: IfNotPresent numGpus: 1 serverCommand: tritonserver @@ -47,13 +47,13 @@ image: # # To set model control mode, uncomment and configure below # TODO: Fix the following url, it is invalid - # See https://github.com/triton-inference-server/server/blob/r24.06/docs/model_management.md + # See https://github.com/triton-inference-server/server/blob/r24.07/docs/model_management.md # for more details #- --model-control-mode=explicit|poll|none # # Additional server args # - # see https://github.com/triton-inference-server/server/blob/r24.06/README.md + # see https://github.com/triton-inference-server/server/blob/r24.07/README.md # for more details service: diff --git a/deploy/gcp/values.yaml b/deploy/gcp/values.yaml index cd45058c9d..6e53efc103 100644 --- a/deploy/gcp/values.yaml +++ b/deploy/gcp/values.yaml @@ -27,10 +27,10 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:24.06-py3 + imageName: nvcr.io/nvidia/tritonserver:24.07-py3 pullPolicy: IfNotPresent modelRepositoryPath: gs://triton-inference-server-repository/model_repository numGpus: 1 service: - type: LoadBalancer \ No newline at end of file + type: LoadBalancer diff --git a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml index ddbfeeda1f..d101fa910b 100644 --- a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml +++ b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml @@ -33,7 +33,7 @@ metadata: namespace: default spec: containers: - - image: nvcr.io/nvidia/tritonserver:24.06-py3-sdk + - image: nvcr.io/nvidia/tritonserver:24.07-py3-sdk imagePullPolicy: Always name: nv-triton-client securityContext: diff --git a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh index 04b7eb9b7f..1091d961b4 100755 --- a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh +++ b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh @@ -27,9 +27,9 @@ export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/') export APP_NAME=tritonserver -export MAJOR_VERSION=2.45 -export MINOR_VERSION=2.45.0 -export NGC_VERSION=24.06-py3 +export MAJOR_VERSION=2.48 +export MINOR_VERSION=2.48.0 +export NGC_VERSION=24.07-py3 docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml index 356f25efa3..295271466d 100644 --- a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml +++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml @@ -25,7 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. apiVersion: v1 -appVersion: "2.47" +appVersion: "2.48" description: Triton Inference Server name: triton-inference-server -version: 2.47.0 +version: 2.48.0 diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml index ef88d0109b..3d460f8aa0 100644 --- a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml +++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -31,14 +31,14 @@ maxReplicaCount: 3 tritonProtocol: HTTP # HPA GPU utilization autoscaling target HPATargetAverageValue: 85 -modelRepositoryPath: gs://triton_sample_models/24.06 -publishedVersion: '2.47.0' +modelRepositoryPath: gs://triton_sample_models/24.07 +publishedVersion: '2.48.0' gcpMarketplace: true image: registry: gcr.io repository: nvidia-ngc-public/tritonserver - tag: 24.06-py3 + tag: 24.07-py3 pullPolicy: IfNotPresent # modify the model repository here to match your GCP storage bucket numGpus: 1 diff --git a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml index 979bfe15a9..8136ad5834 100644 --- a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml +++ b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml @@ -27,7 +27,7 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: '2.47.0' + publishedVersion: '2.48.0' publishedVersionMetadata: releaseNote: >- Initial release. diff --git a/deploy/gke-marketplace-app/server-deployer/schema.yaml b/deploy/gke-marketplace-app/server-deployer/schema.yaml index 43ad5c8535..1324f4cc3d 100644 --- a/deploy/gke-marketplace-app/server-deployer/schema.yaml +++ b/deploy/gke-marketplace-app/server-deployer/schema.yaml @@ -27,7 +27,7 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: '2.47.0' + publishedVersion: '2.48.0' publishedVersionMetadata: releaseNote: >- Initial release. diff --git a/deploy/gke-marketplace-app/trt-engine/README.md b/deploy/gke-marketplace-app/trt-engine/README.md index 6fc22d1e9a..b23b405cb9 100644 --- a/deploy/gke-marketplace-app/trt-engine/README.md +++ b/deploy/gke-marketplace-app/trt-engine/README.md @@ -33,7 +33,7 @@ ``` docker run --gpus all -it --network host \ --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \ - -v ~:/scripts nvcr.io/nvidia/tensorrt:24.06-py3 + -v ~:/scripts nvcr.io/nvidia/tensorrt:24.07-py3 pip install onnx six torch tf2onnx tensorflow diff --git a/deploy/k8s-onprem/values.yaml b/deploy/k8s-onprem/values.yaml index f3e275c196..23d5b2581c 100644 --- a/deploy/k8s-onprem/values.yaml +++ b/deploy/k8s-onprem/values.yaml @@ -29,7 +29,7 @@ tags: loadBalancing: true image: - imageName: nvcr.io/nvidia/tritonserver:24.06-py3 + imageName: nvcr.io/nvidia/tritonserver:24.07-py3 pullPolicy: IfNotPresent modelRepositoryServer: < Replace with the IP Address of your file server > modelRepositoryPath: /srv/models @@ -80,4 +80,4 @@ prometheus-adapter: name: matches: "nv_inference_queue_duration_us" as: "avg_time_queue_us" - metricsQuery: 'avg(delta(nv_inference_queue_duration_us{<<.LabelMatchers>>}[30s])/(1+delta(nv_inference_request_success{<<.LabelMatchers>>}[30s]))) by (<<.GroupBy>>)' \ No newline at end of file + metricsQuery: 'avg(delta(nv_inference_queue_duration_us{<<.LabelMatchers>>}[30s])/(1+delta(nv_inference_request_success{<<.LabelMatchers>>}[30s]))) by (<<.GroupBy>>)' diff --git a/deploy/oci/values.yaml b/deploy/oci/values.yaml index 466bb18a3b..045d1c6768 100644 --- a/deploy/oci/values.yaml +++ b/deploy/oci/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:24.06-py3 + imageName: nvcr.io/nvidia/tritonserver:24.07-py3 pullPolicy: IfNotPresent modelRepositoryPath: s3://https://.compat.objectstorage..oraclecloud.com:443/triton-inference-server-repository numGpus: 1 @@ -38,4 +38,4 @@ service: secret: region: OCI_REGION id: OCI_SECRET_KEY_ID - key: OCI_SECRET_ACCESS_KEY \ No newline at end of file + key: OCI_SECRET_ACCESS_KEY diff --git a/docs/customization_guide/build.md b/docs/customization_guide/build.md index bdcdad09ec..b5fe0dbf43 100644 --- a/docs/customization_guide/build.md +++ b/docs/customization_guide/build.md @@ -173,7 +173,7 @@ $ ./build.py ... --repo-tag=common: --repo-tag=core:` will default to the branch name. For example, if you are building on the -r24.06 branch, `` will default to r24.06. If you are +r24.07 branch, `` will default to r24.07. If you are building on any other branch (including the *main* branch) then `` will default to "main". Therefore, you typically do not need to provide `` at all (nor the preceding @@ -334,8 +334,8 @@ python build.py --cmake-dir=/build --build-dir=/tmp/citritonbuild If you are building on *main* branch then '' will default to "main". If you are building on a release branch then '' will default to the branch name. For example, if you -are building on the r24.06 branch, '' will default to -r24.06. Therefore, you typically do not need to provide '' will default to +r24.07. Therefore, you typically do not need to provide '' at all (nor the preceding colon). You can use a different '' for a component to instead use the corresponding branch/tag in the build. For example, if you have a branch called diff --git a/docs/customization_guide/compose.md b/docs/customization_guide/compose.md index 40562603bf..48ecea5ea7 100644 --- a/docs/customization_guide/compose.md +++ b/docs/customization_guide/compose.md @@ -46,8 +46,8 @@ The `compose.py` script can be found in the Simply clone the repository and run `compose.py` to create a custom container. Note: Created container version will depend on the branch that was cloned. For example branch - [r24.06](https://github.com/triton-inference-server/server/tree/r24.06) -should be used to create a image based on the NGC 24.06 Triton release. + [r24.07](https://github.com/triton-inference-server/server/tree/r24.07) +should be used to create a image based on the NGC 24.07 Triton release. `compose.py` provides `--backend`, `--repoagent` options that allow you to specify which backends and repository agents to include in the custom image. @@ -79,20 +79,20 @@ For example, running ``` python3 compose.py --backend pytorch --repoagent checksum ``` -on branch [r24.06](https://github.com/triton-inference-server/server/tree/r24.06) pulls: -- `min` container `nvcr.io/nvidia/tritonserver:24.06-py3-min` -- `full` container `nvcr.io/nvidia/tritonserver:24.06-py3` +on branch [r24.07](https://github.com/triton-inference-server/server/tree/r24.07) pulls: +- `min` container `nvcr.io/nvidia/tritonserver:24.07-py3-min` +- `full` container `nvcr.io/nvidia/tritonserver:24.07-py3` Alternatively, users can specify the version of Triton container to pull from any branch by either: 1. Adding flag `--container-version ` to branch ``` -python3 compose.py --backend pytorch --repoagent checksum --container-version 24.06 +python3 compose.py --backend pytorch --repoagent checksum --container-version 24.07 ``` 2. Specifying `--image min, --image full,`. The user is responsible for specifying compatible `min` and `full` containers. ``` -python3 compose.py --backend pytorch --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:24.06-py3-min --image full,nvcr.io/nvidia/tritonserver:24.06-py3 +python3 compose.py --backend pytorch --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:24.07-py3-min --image full,nvcr.io/nvidia/tritonserver:24.07-py3 ``` Method 1 and 2 will result in the same composed container. Furthermore, `--image` flag overrides the `--container-version` flag when both are specified. @@ -103,8 +103,8 @@ Note: 2. vLLM and TensorRT-LLM backends are currently not supported backends for `compose.py`. If you want to build additional backends on top of these backends, it would be better to [build it yourself](#build-it-yourself) by using -`nvcr.io/nvidia/tritonserver:24.06-vllm-python-py3` or -`nvcr.io/nvidia/tritonserver:24.06-trtllm-python-py3` as a `min` container. +`nvcr.io/nvidia/tritonserver:24.07-vllm-python-py3` or +`nvcr.io/nvidia/tritonserver:24.07-trtllm-python-py3` as a `min` container. ### CPU-only container composition diff --git a/docs/customization_guide/test.md b/docs/customization_guide/test.md index a1b10dcf35..7ee68fd6b2 100644 --- a/docs/customization_guide/test.md +++ b/docs/customization_guide/test.md @@ -49,7 +49,7 @@ $ ./gen_qa_custom_ops ``` This will create multiple model repositories in /tmp//qa_* -(for example /tmp/24.06/qa_model_repository). The TensorRT models +(for example /tmp/24.07/qa_model_repository). The TensorRT models will be created for the GPU on the system that CUDA considers device 0 (zero). If you have multiple GPUs on your system see the documentation in the scripts for how to target a specific GPU. diff --git a/docs/generate_docs.py b/docs/generate_docs.py index 9c7dd5931e..1cc6644fde 100755 --- a/docs/generate_docs.py +++ b/docs/generate_docs.py @@ -43,11 +43,11 @@ """ TODO: Needs to handle cross-branch linkage. -For example, server/docs/user_guide/architecture.md on branch 24.06 links to +For example, server/docs/user_guide/architecture.md on branch 24.07 links to server/docs/user_guide/model_analyzer.md on main branch. In this case, the hyperlink of model_analyzer.md should be a URL instead of relative path. -Another example can be server/docs/user_guide/model_analyzer.md on branch 24.06 +Another example can be server/docs/user_guide/model_analyzer.md on branch 24.07 links to a file in server repo with relative path. Currently all URLs are hardcoded to main branch. We need to make sure that the URL actually points to the correct branch. We also need to handle cases like deprecated or removed files from diff --git a/docs/user_guide/custom_operations.md b/docs/user_guide/custom_operations.md index 89e6216011..263d013ff2 100644 --- a/docs/user_guide/custom_operations.md +++ b/docs/user_guide/custom_operations.md @@ -64,7 +64,7 @@ simple way to ensure you are using the correct version of TensorRT is to use the [NGC TensorRT container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt) corresponding to the Triton container. For example, if you are using -the 24.06 version of Triton, use the 24.06 version of the TensorRT +the 24.07 version of Triton, use the 24.07 version of the TensorRT container. ## TensorFlow @@ -123,7 +123,7 @@ simple way to ensure you are using the correct version of TensorFlow is to use the [NGC TensorFlow container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow) corresponding to the Triton container. For example, if you are using -the 24.06 version of Triton, use the 24.06 version of the TensorFlow +the 24.07 version of Triton, use the 24.07 version of the TensorFlow container. ## PyTorch @@ -167,7 +167,7 @@ simple way to ensure you are using the correct version of PyTorch is to use the [NGC PyTorch container](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch) corresponding to the Triton container. For example, if you are using -the 24.06 version of Triton, use the 24.06 version of the PyTorch +the 24.07 version of Triton, use the 24.07 version of the PyTorch container. ## ONNX diff --git a/docs/user_guide/performance_tuning.md b/docs/user_guide/performance_tuning.md index f67e238c6d..2a3ee09d2a 100644 --- a/docs/user_guide/performance_tuning.md +++ b/docs/user_guide/performance_tuning.md @@ -235,7 +235,7 @@ with a `tritonserver` binary. ```bash # Start server container -docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:24.06-py3 +docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:24.07-py3 # Start serving your models tritonserver --model-repository=/mnt/models @@ -284,7 +284,7 @@ by setting the `-u` flag, such as `perf_analyzer -m densenet_onnx -u ```bash # Start the SDK container interactively -docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:24.06-py3-sdk +docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:24.07-py3-sdk # Benchmark model being served from step 3 perf_analyzer -m densenet_onnx --concurrency-range 1:4 diff --git a/qa/common/gen_jetson_trt_models b/qa/common/gen_jetson_trt_models index 8c5a74a3ec..160282240c 100755 --- a/qa/common/gen_jetson_trt_models +++ b/qa/common/gen_jetson_trt_models @@ -34,7 +34,7 @@ # Make all generated files accessible outside of container umask 0000 # Set the version of the models -TRITON_VERSION=${TRITON_VERSION:=24.06} +TRITON_VERSION=${TRITON_VERSION:=24.07} # Set the CUDA device to use CUDA_DEVICE=${RUNNER_ID:=0} # Set TensorRT image diff --git a/qa/common/gen_qa_custom_ops b/qa/common/gen_qa_custom_ops index d8ca748f8a..d8a41e9f55 100755 --- a/qa/common/gen_qa_custom_ops +++ b/qa/common/gen_qa_custom_ops @@ -37,7 +37,7 @@ ## ############################################################################ -TRITON_VERSION=${TRITON_VERSION:=24.06} +TRITON_VERSION=${TRITON_VERSION:=24.07} NVIDIA_UPSTREAM_VERSION=${NVIDIA_UPSTREAM_VERSION:=$TRITON_VERSION} TENSORFLOW_IMAGE=${TENSORFLOW_IMAGE:=nvcr.io/nvidia/tensorflow:$NVIDIA_UPSTREAM_VERSION-tf2-py3} PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$NVIDIA_UPSTREAM_VERSION-py3} diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index 96131107d3..cab497aa86 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -48,7 +48,7 @@ ## ############################################################################ -TRITON_VERSION=${TRITON_VERSION:=24.06} +TRITON_VERSION=${TRITON_VERSION:=24.07} # ONNX. Use ONNX_OPSET 0 to use the default for ONNX version ONNX_VERSION=1.13.0 From b93cb6b921178bf927aca1c5a7ca59a4adc43702 Mon Sep 17 00:00:00 2001 From: Francesco Petrini Date: Mon, 8 Jul 2024 15:39:08 -0700 Subject: [PATCH 02/12] Update README and versions for 2.48.0 / 24.07 (#7425) * Update README and versions for 2.48.0 / 24.07 --- Dockerfile.sdk | 2 +- Dockerfile.win10.min | 20 +++++++++---------- README.md | 12 ++++++----- TRITON_VERSION | 2 +- build.py | 2 +- deploy/aws/values.yaml | 2 +- deploy/fleetcommand/Chart.yaml | 2 +- deploy/fleetcommand/values.yaml | 2 +- deploy/gcp/values.yaml | 2 +- .../perf-analyzer-script/triton_client.yaml | 2 +- .../server-deployer/build_and_push.sh | 2 +- .../server-deployer/chart/triton/Chart.yaml | 2 +- .../server-deployer/data-test/schema.yaml | 2 +- .../server-deployer/schema.yaml | 4 ++-- .../gke-marketplace-app/trt-engine/README.md | 6 +++--- deploy/k8s-onprem/values.yaml | 2 +- deploy/oci/values.yaml | 2 +- docs/customization_guide/build.md | 2 +- docs/customization_guide/compose.md | 2 +- docs/customization_guide/test.md | 2 +- docs/user_guide/custom_operations.md | 2 +- docs/user_guide/performance_tuning.md | 2 +- qa/common/gen_jetson_trt_models | 2 +- qa/common/gen_qa_custom_ops | 2 +- 24 files changed, 42 insertions(+), 40 deletions(-) diff --git a/Dockerfile.sdk b/Dockerfile.sdk index 9e83ecca47..e92b4bcb89 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -29,7 +29,7 @@ # # Base image on the minimum Triton container -ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.06-py3-min +ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.07-py3-min ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo ARG TRITON_COMMON_REPO_TAG=main diff --git a/Dockerfile.win10.min b/Dockerfile.win10.min index 7d954d62de..0a554fbcf4 100644 --- a/Dockerfile.win10.min +++ b/Dockerfile.win10.min @@ -1,4 +1,4 @@ -# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -37,9 +37,9 @@ RUN choco install unzip -y # # Installing TensorRT # -ARG TENSORRT_VERSION=10.0.1.6 -ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.4.zip" -ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/zip/TensorRT-10.0.1.6.Windows10.win10.cuda-12.4.zip +ARG TENSORRT_VERSION=10.2.0.19 +ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.5.zip" +ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.2.0/zip/TensorRT-10.2.0.19.Windows10.x86_64.cuda-12.5.zip # COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP} ADD ${TENSORRT_SOURCE} /tmp/${TENSORRT_ZIP} RUN unzip /tmp/%TENSORRT_ZIP% @@ -51,9 +51,9 @@ LABEL TENSORRT_VERSION="${TENSORRT_VERSION}" # # Installing cuDNN # -ARG CUDNN_VERSION=9.1.0.70 +ARG CUDNN_VERSION=9.2.1.18 ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip -ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.1.0.70_cuda12-archive.zip +ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.2.1.18_cuda12-archive.zip ADD ${CUDNN_SOURCE} /tmp/${CUDNN_ZIP} RUN unzip /tmp/%CUDNN_ZIP% RUN move cudnn-* cudnn @@ -88,7 +88,7 @@ LABEL PYTHON_VERSION=${PYTHON_VERSION} # # Installing CMake # -ARG CMAKE_VERSION=3.29.3 +ARG CMAKE_VERSION=3.30.0 RUN pip install cmake==%CMAKE_VERSION% ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake @@ -150,7 +150,7 @@ WORKDIR / # ARG CUDA_MAJOR=12 ARG CUDA_MINOR=5 -ARG CUDA_PATCH=0 +ARG CUDA_PATCH=1 ARG CUDA_VERSION=${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH} ARG CUDA_PACKAGES="nvcc_${CUDA_MAJOR}.${CUDA_MINOR} \ cudart_${CUDA_MAJOR}.${CUDA_MINOR} \ @@ -175,7 +175,7 @@ RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensi RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%" -ARG CUDNN_VERSION=9.1.0.70 +ARG CUDNN_VERSION=9.2.1.18 ENV CUDNN_VERSION ${CUDNN_VERSION} COPY --from=dependency_base /cudnn /cudnn RUN copy cudnn\bin\cudnn*.dll "%CUDA_INSTALL_ROOT_WP%\bin\." @@ -183,7 +183,7 @@ RUN copy cudnn\lib\x64\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\." RUN copy cudnn\include\cudnn*.h "%CUDA_INSTALL_ROOT_WP%\include\." LABEL CUDNN_VERSION="${CUDNN_VERSION}" -ARG TENSORRT_VERSION=10.0.1.6 +ARG TENSORRT_VERSION=10.2.0.19 ENV TRT_VERSION ${TENSORRT_VERSION} COPY --from=dependency_base /TensorRT /TensorRT RUN setx PATH "c:\TensorRT\lib;%PATH%" diff --git a/README.md b/README.md index dcf0a3420f..8bb2302dea 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@