triton-inference-server · Tabrizian · Nov 15, 2023 · Nov 6, 2023 · Nov 6, 2023 · Nov 7, 2023
diff --git a/qa/L0_implicit_state/implicit_state.py b/qa/L0_implicit_state/implicit_state.py
@@ -83,6 +83,80 @@ def test_wrong_implicit_state_name(self):
         err_str = str(e.exception).lower()
         self.assertIn("state 'undefined_state' is not a valid state name", err_str)
 
+    def test_implicit_state_single_buffer(self):
+        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+        inputs = []
+        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
+        inputs.append(tritonhttpclient.InferInput("TEST_CASE", [1], "INT32"))
+        inputs[0].set_data_from_numpy(np.random.randint(5, size=[1], dtype=np.int32))
+        inputs[1].set_data_from_numpy(np.asarray([2], dtype=np.int32))
+
+        triton_client.infer(
+            model_name="single_state_buffer",
+            inputs=inputs,
+            sequence_id=2,
+            sequence_start=True,
+            sequence_end=False,
+        )
+
+        triton_client.infer(
+            model_name="single_state_buffer",
+            inputs=inputs,
+            sequence_id=2,
+            sequence_start=False,
+            sequence_end=True,
+        )
+
+    def test_implicit_state_growable_memory(self):
+        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+        inputs = []
+        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
+        inputs.append(tritonhttpclient.InferInput("TEST_CASE", [1], "INT32"))
+        inputs[0].set_data_from_numpy(np.random.randint(5, size=[1], dtype=np.int32))
+        inputs[1].set_data_from_numpy(np.asarray([3], dtype=np.int32))
+
+        output = triton_client.infer(
+            model_name="growable_memory",
+            inputs=inputs,
+            sequence_id=2,
+            sequence_start=True,
+            sequence_end=False,
+        )
+        output_state = output.as_numpy("OUTPUT_STATE")
+        expected_output_state = np.zeros(output_state.shape, dtype=np.int8)
+        np.testing.assert_equal(output_state, expected_output_state)
+
+        output = triton_client.infer(
+            model_name="growable_memory",
+            inputs=inputs,
+            sequence_id=2,
+            sequence_start=False,
+            sequence_end=False,
+        )
+        output_state = output.as_numpy("OUTPUT_STATE")
+        expected_output_state = np.concatenate(
+            [expected_output_state, np.ones(expected_output_state.shape, dtype=np.int8)]
+        )
+        np.testing.assert_equal(output_state, expected_output_state)
+
+        output = triton_client.infer(
+            model_name="growable_memory",
+            inputs=inputs,
+            sequence_id=2,
+            sequence_start=False,
+            sequence_end=False,
+        )
+        output_state = output.as_numpy("OUTPUT_STATE")
+        expected_output_state = np.concatenate(
+            [
+                expected_output_state,
+                np.full(
+                    (expected_output_state.shape[0] // 2,), dtype=np.int8, fill_value=2
+                ),
+            ]
+        )
+        np.testing.assert_equal(output_state, expected_output_state)
+
     def test_no_update(self):
         # Test implicit state without updating any state
         triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")

diff --git a/qa/L0_implicit_state/models/growable_memory/config.pbtxt b/qa/L0_implicit_state/models/growable_memory/config.pbtxt
@@ -0,0 +1,102 @@
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions # are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "growable_memory"
+backend: "implicit_state"
+max_batch_size: 0
+sequence_batching {
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "END"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_END
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+  state [
+    {
+        input_name: "INPUT_STATE"
+        output_name: "OUTPUT_STATE"
+        data_type: TYPE_INT8
+        dims: [1024, 1024]
+        use_same_buffer_for_input_output: true
+        use_growable_memory: true
+    }
+  ]
+}
+
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "TEST_CASE"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "OUTPUT_STATE"
+    data_type: TYPE_INT8
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_GPU
+  }
+]
diff --git a/qa/L0_implicit_state/models/single_state_buffer/config.pbtxt b/qa/L0_implicit_state/models/single_state_buffer/config.pbtxt
@@ -0,0 +1,96 @@
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions # are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "single_state_buffer"
+backend: "implicit_state"
+max_batch_size: 0
+sequence_batching {
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "END"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_END
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+  state [
+    {
+        input_name: "INPUT_STATE"
+        output_name: "OUTPUT_STATE"
+        data_type: TYPE_INT32
+        dims: 1
+        use_same_buffer_for_input_output: true
+    }
+  ]
+}
+
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "TEST_CASE"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/L0_implicit_state/test.sh b/qa/L0_implicit_state/test.sh
@@ -46,9 +46,11 @@ export BACKENDS
 export IMPLICIT_STATE=1
 INITIAL_STATE_ZERO=${INITIAL_STATE_ZERO:="0"}
 INITIAL_STATE_FILE=${INITIAL_STATE_FILE:="0"}
+SINGLE_STATE_BUFFER=${SINGLE_STATE_BUFFER:="0"}
 
 export INITIAL_STATE_ZERO
 export INITIAL_STATE_FILE
+export SINGLE_STATE_BUFFER
 
 MODELDIR=${MODELDIR:=`pwd`/models}
 TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
@@ -60,10 +62,14 @@ source ../common/util.sh
 cp ./libtriton_implicit_state.so models/no_implicit_state/
 cp ./libtriton_implicit_state.so models/no_state_update/
 cp ./libtriton_implicit_state.so models/wrong_internal_state/
+cp ./libtriton_implicit_state.so models/single_state_buffer/
+cp ./libtriton_implicit_state.so models/growable_memory/
 
 mkdir -p models/no_implicit_state/1/
 mkdir -p models/no_state_update/1/
 mkdir -p models/wrong_internal_state/1/
+mkdir -p models/single_state_buffer/1/
+mkdir -p models/growable_memory/1/
 
 for BACKEND in $BACKENDS; do
     dtype="int32"
@@ -90,9 +96,9 @@ for BACKEND in $BACKENDS; do
 done
 
 CLIENT_LOG=`pwd`/client.log
-SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --cuda-virtual-address-size=0:$((1024*1024*4))"
 IMPLICIT_STATE_CLIENT='implicit_state.py'
-EXPECTED_TEST_NUM=5
+EXPECTED_TEST_NUM=7
 rm -rf $CLIENT_LOG
 
 run_server

diff --git a/qa/L0_sequence_batcher/test.sh b/qa/L0_sequence_batcher/test.sh
@@ -169,6 +169,10 @@ export INITIAL_STATE_FILE
 INITIAL_STATE_ZERO=${INITIAL_STATE_ZERO:="0"}
 export INITIAL_STATE_ZERO
 
+# If USE_SINGLE_BUFFER is not specified, set to 0
+USE_SINGLE_BUFFER=${USE_SINGLE_BUFFER:="0"}
+export USE_SINGLE_BUFFER
+
 # Setup non-variable-size model repositories. The same models are in each
 # repository but they are configured as:
 #   models0 - four instances with non-batching model
@@ -332,6 +336,23 @@ for MODEL in $MODELS; do
       (cd queue_delay_models/$(basename $MODEL)_full && \
         sed -i "s/$(basename $MODEL)/$(basename $MODEL)_full/" config.pbtxt && \
         sed -i "s/minimum_slot_utilization: 0/minimum_slot_utilization: 1/" config.pbtxt)
+
+    # TODO: Enable single state buffer testing for sequence batcher
+    # if [ "$USE_SINGLE_BUFFER" == "1" && "$IMPLICIT_STATE" == "1" ]; then
+    #   SED_REPLACE_PATTERN="N;N;N;N;N;/state.*dims:.*/a use_single_buffer: true"
+    #   (cd models0/$(basename $MODEL) && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    #   (cd models1/$(basename $MODEL) && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    #   (cd models2/$(basename $MODEL) && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    #   (cd models4/$(basename $MODEL) && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    #   (cd queue_delay_models/$(basename $MODEL)_full && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    #   (cd queue_delay_models/$(basename $MODEL)_half && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    # fi
   else
     cp -r $MODEL queue_delay_models/$(basename $MODEL)_full && \
       (cd queue_delay_models/$(basename $MODEL)_full && \