Add end-to-end CI test for decoupled model support (#6131) (#6184)

matthewkotila · web-flow · commit d0599dabac99 · 2023-08-15T09:20:17.000-07:00
* Add end-to-end CI test for decoupled model support

* Address feedback
diff --git a/Dockerfile.QA b/Dockerfile.QA
@@ -337,7 +337,8 @@ RUN rm -f /usr/bin/python && \
 
 RUN pip3 install --upgrade wheel setuptools && \
     pip3 install --upgrade numpy pillow attrdict future grpcio requests gsutil \
-                           awscli six grpcio-channelz prettytable virtualenv
+                           awscli six grpcio-channelz prettytable virtualenv \
+                           check-jsonschema
 
 # go needed for example go client test.
 RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
diff --git a/qa/L0_perf_analyzer/perf_analyzer_profile_export_schema.json b/qa/L0_perf_analyzer/perf_analyzer_profile_export_schema.json
@@ -0,0 +1,95 @@
+{
+    "$schema": "https://json-schema.org/draft/2020-12/schema",
+    "$id": "https://github.com/triton-inference-server/client/blob/main/src/c%2B%2B/perf_analyzer/examples/schema.json",
+    "title": "Perf Analyzer output data",
+    "description": "A json file describing the output from a Perf Analyzer run.",
+    "type": "object",
+    "required": [
+        "experiments",
+        "version"
+    ],
+    "properties": {
+        "experiments": {
+            "description": "The array of all experiments run by Perf Analyzer.",
+            "type": "array",
+            "required": [
+                "experiment",
+                "requests",
+                "window_boundaries"
+            ],
+            "minItems": 1,
+            "uniqueItems": true,
+            "items": {
+                "type": "object",
+                "properties": {
+                    "experiment": {
+                        "description": "A single experiment run by Perf Analyzer.",
+                        "type": "object",
+                        "required": [
+                            "mode",
+                            "value"
+                        ],
+                        "minItems": 1,
+                        "maxItems": 1,
+                        "properties": {
+                            "mode": {
+                                "description": "Operating mode of Perf Analyzer: For example, 'concurrency' or 'request rate'.",
+                                "type": "string"
+                            },
+                            "value": {
+                                "description": "Concurrency or request rate for the current experiment.",
+                                "type": "integer"
+                            }
+                        }
+                    },
+                    "requests": {
+                        "description": "The array of requests sent by Perf Analyzer for this experiment.",
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/properties/experiments/items/properties/$defs/request"
+                        }
+                    },
+                    "$defs": {
+                        "request": {
+                            "description": "Info for a single request.",
+                            "type": "object",
+                            "required": [
+                                "timestamp",
+                                "response_timestamps"
+                            ],
+                            "properties": {
+                                "timestamp": {
+                                    "description": "Time stamp of the request.",
+                                    "type": "integer"
+                                },
+                                "sequence_id": {
+                                    "description": "The sequence_id of the request.",
+                                    "type": "integer"
+                                },
+                                "response_timestamps": {
+                                    "description": "All associated responses to this request.",
+                                    "type": "array",
+                                    "items": {
+                                        "type": "integer"
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "window_boundaries": {
+                        "description": "An array of time stamps describing window boundaries.",
+                        "type": "array",
+                        "items": {
+                            "type": "integer"
+                        },
+                        "uniqueItems": true
+                    }
+                }
+            }
+        },
+        "version": {
+            "description": "The version of Perf Analyzer that generated the report.",
+            "type": "string"
+        }
+    }
+}
diff --git a/qa/L0_perf_analyzer/test.sh b/qa/L0_perf_analyzer/test.sh
@@ -64,6 +64,8 @@ WRONG_OUTPUT_2_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/wrong_
 SEQ_OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/seq_output.json
 SEQ_WRONG_OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/seq_wrong_output.json
 
+REPEAT_INT32_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/repeat_int32_data.json
+
 SERVER=/opt/tritonserver/bin/tritonserver
 SERVER_ARGS="--model-repository=${DATADIR}"
 SERVER_LOG="./inference_server.log"
@@ -121,6 +123,12 @@ cp -r ../python_models/optional $DATADIR && \
   mv $DATADIR/optional/model.py $DATADIR/optional/1 && \
   sed -i 's/max_batch_size: 0/max_batch_size: 2/g' $DATADIR/optional/config.pbtxt
 
+# Copy decoupled model
+git clone --depth=1 https://github.com/triton-inference-server/python_backend
+mkdir -p $DATADIR/repeat_int32/1
+cp python_backend/examples/decoupled/repeat_config.pbtxt $DATADIR/repeat_int32/config.pbtxt
+cp python_backend/examples/decoupled/repeat_model.py $DATADIR/repeat_int32/1/model.py
+
 # Generating test data
 mkdir -p $TESTDATADIR
 for INPUT in INPUT0 INPUT1; do
@@ -871,6 +879,30 @@ if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
 fi
 set -e
 
+## Test decoupled model support
+$PERF_ANALYZER -v -m repeat_int32 --input-data=$REPEAT_INT32_JSONDATAFILE \
+    --profile-export-file profile_export.json -i grpc --async --streaming -s \
+    ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+python3 -c "import json ; \
+    requests = json.load(open('profile_export.json'))['experiments'][0]['requests'] ; \
+    assert any(len(r['response_timestamps']) > 1 for r in requests)"
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+check-jsonschema --schemafile perf_analyzer_profile_export_schema.json profile_export.json
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
 ## Test perf_analyzer with MPI / multiple models
 
 is_synchronized() {
diff --git a/qa/common/perf_analyzer_input_data_json/repeat_int32_data.json b/qa/common/perf_analyzer_input_data_json/repeat_int32_data.json
@@ -0,0 +1,31 @@
+{
+    "data": [
+        {
+            "IN": {
+                "content": [
+                    4,
+                    2,
+                    0,
+                    1
+                ],
+                "shape": [
+                    4
+                ]
+            },
+            "DELAY": {
+                "content": [
+                    1,
+                    2,
+                    3,
+                    4
+                ],
+                "shape": [
+                    4
+                ]
+            },
+            "WAIT": [
+                5
+            ]
+        }
+    ]
+}