Improve testing

Tabrizian · Tabrizian · commit 693bd2b4e1ba · 2023-11-07T00:11:31.000-05:00
diff --git a/qa/L0_implicit_state/implicit_state.py b/qa/L0_implicit_state/implicit_state.py
@@ -115,29 +115,47 @@ def test_implicit_state_growable_memory(self):
         inputs[0].set_data_from_numpy(np.random.randint(5, size=[1], dtype=np.int32))
         inputs[1].set_data_from_numpy(np.asarray([3], dtype=np.int32))
 
-        triton_client.infer(
+        output = triton_client.infer(
             model_name="growable_memory",
             inputs=inputs,
             sequence_id=2,
             sequence_start=True,
             sequence_end=False,
         )
+        output_state = output.as_numpy("OUTPUT_STATE")
+        expected_output_state = np.zeros(output_state.shape, dtype=np.int8)
+        np.testing.assert_equal(output_state, expected_output_state)
 
-        triton_client.infer(
+        output = triton_client.infer(
             model_name="growable_memory",
             inputs=inputs,
             sequence_id=2,
             sequence_start=False,
             sequence_end=False,
         )
+        output_state = output.as_numpy("OUTPUT_STATE")
+        expected_output_state = np.concatenate(
+            [expected_output_state, np.ones(expected_output_state.shape, dtype=np.int8)]
+        )
+        np.testing.assert_equal(output_state, expected_output_state)
 
-        triton_client.infer(
+        output = triton_client.infer(
             model_name="growable_memory",
             inputs=inputs,
             sequence_id=2,
             sequence_start=False,
             sequence_end=True,
         )
+        output_state = output.as_numpy("OUTPUT_STATE")
+        expected_output_state = np.concatenate(
+            [
+                expected_output_state,
+                np.full(
+                    (expected_output_state.shape[0] // 2,), dtype=np.int8, fill_value=2
+                ),
+            ]
+        )
+        np.testing.assert_equal(output_state, expected_output_state)
 
     def test_no_update(self):
         # Test implicit state without updating any state
diff --git a/qa/L0_implicit_state/models/growable_memory/config.pbtxt b/qa/L0_implicit_state/models/growable_memory/config.pbtxt
@@ -0,0 +1,102 @@
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions # are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "growable_memory"
+backend: "implicit_state"
+max_batch_size: 0
+sequence_batching {
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "END"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_END
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+  state [
+    {
+        input_name: "INPUT_STATE"
+        output_name: "OUTPUT_STATE"
+        data_type: TYPE_INT8
+        dims: [1024, 1024]
+        use_single_buffer: true
+        use_growable_memory: true
+    }
+  ]
+}
+
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "TEST_CASE"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "OUTPUT_STATE"
+    data_type: TYPE_INT8
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_GPU
+  }
+]
diff --git a/qa/L0_sequence_batcher/test.sh b/qa/L0_sequence_batcher/test.sh
@@ -337,21 +337,22 @@ for MODEL in $MODELS; do
         sed -i "s/$(basename $MODEL)/$(basename $MODEL)_full/" config.pbtxt && \
         sed -i "s/minimum_slot_utilization: 0/minimum_slot_utilization: 1/" config.pbtxt)
 
-    if [ "$USE_SINGLE_BUFFER" == "1" && "$IMPLICIT_STATE" == "1" ]; then
-      SED_REPLACE_PATTERN="N;N;N;N;N;/state.*dims:.*/a use_single_buffer: true"
-      (cd models0/$(basename $MODEL) && \
-        sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
-      (cd models1/$(basename $MODEL) && \
-        sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
-      (cd models2/$(basename $MODEL) && \
-        sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
-      (cd models4/$(basename $MODEL) && \
-        sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
-      (cd queue_delay_models/$(basename $MODEL)_full && \
-        sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
-      (cd queue_delay_models/$(basename $MODEL)_half && \
-        sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
-    fi
+    # TODO: Enable single state buffer testing for sequence batcher
+    # if [ "$USE_SINGLE_BUFFER" == "1" && "$IMPLICIT_STATE" == "1" ]; then
+    #   SED_REPLACE_PATTERN="N;N;N;N;N;/state.*dims:.*/a use_single_buffer: true"
+    #   (cd models0/$(basename $MODEL) && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    #   (cd models1/$(basename $MODEL) && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    #   (cd models2/$(basename $MODEL) && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    #   (cd models4/$(basename $MODEL) && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    #   (cd queue_delay_models/$(basename $MODEL)_full && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    #   (cd queue_delay_models/$(basename $MODEL)_half && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    # fi
   else
     cp -r $MODEL queue_delay_models/$(basename $MODEL)_full && \
       (cd queue_delay_models/$(basename $MODEL)_full && \
diff --git a/src/test/implicit_state/src/implicit_state.cc b/src/test/implicit_state/src/implicit_state.cc
@@ -1060,12 +1060,17 @@ TRITONBACKEND_ModelInstanceExecute(
       } break;
       case 3: {
         TRITONBACKEND_State* response_state;
-        std::vector<int64_t> shape{1};
+        size_t block_size = sizeof(int8_t) * 1024 * 1024;
+        int64_t current_elements =
+            (instance_state->request_index_ + 1) * 1024 * 1024;
+        std::cout << "current elements are "
+                  << (instance_state->request_index_ + 1) << std::endl;
+        std::vector<int64_t> shape{current_elements};
         GUARDED_RESPOND_IF_ERROR(
             responses, r, request,
             TRITONBACKEND_StateNew(
                 &response_state, request, "OUTPUT_STATE",
-                TRITONSERVER_TYPE_INT32, shape.data() /* data */,
+                TRITONSERVER_TYPE_INT8, shape.data() /* data */,
                 shape.size() /* dim_count */));
 
         if (responses[r] == nullptr) {
@@ -1081,8 +1086,6 @@ TRITONBACKEND_ModelInstanceExecute(
         int64_t actual_memory_type_id = 0;
         char* buffer;
 
-        size_t block_size = sizeof(int32_t) * 1024 * 1024;
-
         // Request an output buffer in GPU. This is only for testing purposes
         // to make sure that GPU output buffers can be requested.
         GUARDED_RESPOND_IF_ERROR(
@@ -1098,6 +1101,43 @@ TRITONBACKEND_ModelInstanceExecute(
             buffer + block_size * (instance_state->request_index_),
             instance_state->request_index_, block_size);
 
+        TRITONBACKEND_Output* response_output;
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONBACKEND_ResponseOutput(
+                responses[r], &response_output, "OUTPUT_STATE",
+                TRITONSERVER_TYPE_INT8, shape.data() /* data */,
+                shape.size() /* dim_count */));
+
+        actual_memory_type = TRITONSERVER_MEMORY_CPU;
+        actual_memory_type_id = 0;
+        char* output_buffer;
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONBACKEND_OutputBuffer(
+                response_output, reinterpret_cast<void**>(&output_buffer),
+                block_size * (instance_state->request_index_ + 1),
+                &actual_memory_type, &actual_memory_type_id));
+        if ((responses[r] == nullptr) ||
+            (actual_memory_type != TRITONSERVER_MEMORY_CPU)) {
+          GUARDED_RESPOND_IF_ERROR(
+              responses, r, request,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_UNSUPPORTED,
+                  "the backend can only handle CPU tensors"));
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               "the backend can only handle CPU tensors"
+               "response sent")
+                  .c_str());
+          continue;
+        }
+        cudaMemcpy(
+            output_buffer, buffer,
+            block_size * (instance_state->request_index_ + 1),
+            cudaMemcpyDeviceToHost);
+
         instance_state->state_ = buffer;
       } break;
     }