Add test for sending instance update config via load API (#5937)

kthui · rmccorm4 · web-flow · commit 07839401e3b3 · 2023-06-13T11:50:44.000-07:00
* Add test for passing config via load api

* Add more docs on instance update behavior

* Update to suggested docs

Co-authored-by: Ryan McCormick &lt;rmccormick@nvidia.com&gt;

* Use dictionary for json config

* Modify the config fetched from Triton instead

---------

Co-authored-by: Ryan McCormick &lt;rmccormick@nvidia.com&gt;
diff --git a/docs/user_guide/model_management.md b/docs/user_guide/model_management.md
@@ -218,6 +218,12 @@ request is received under
 [Model Control Mode EXPLICIT](#model-control-mode-explicit) or change to the
 'config.pbtxt' is detected under
 [Model Control Mode POLL](#model-control-mode-poll).
+  * The new model configuration may also be passed to Triton via the
+[load API](../protocol/extension_model_repository.md#load).
+  * Some text editors create a swap file in the model directory when the
+'config.pbtxt' is modified in place. The swap file is not part of the model
+configuration, so its presence in the model directory may be detected as a new file
+and cause the model to fully reload when only an update is expected.
 
 * If a sequence model is updated with in-flight sequence(s), Triton does not
 guarentee any remaining request(s) from the in-flight sequence(s) will be routed
diff --git a/qa/L0_model_update/instance_update_test.py b/qa/L0_model_update/instance_update_test.py
@@ -29,6 +29,7 @@
 import random
 import time
 import concurrent.futures
+import json
 import numpy as np
 import tritonclient.grpc as grpcclient
 from tritonclient.utils import InferenceServerException
@@ -278,6 +279,31 @@ def test_non_instance_config_update(self):
                                      batching=True)
         self.__unload_model(batching=True)
 
+    # Test passing new instance config via load API
+    def test_load_api_with_config(self):
+        # Load model with 1 instance
+        self.__load_model(1)
+        # Get the model config from Triton
+        config = self.__triton.get_model_config(self.__model_name, as_json=True)
+        self.assertIn("config", config)
+        self.assertIsInstance(config["config"], dict)
+        config = config["config"]
+        self.assertIn("instance_group", config)
+        self.assertIsInstance(config["instance_group"], list)
+        self.assertEqual(len(config["instance_group"]), 1)
+        self.assertIn("count", config["instance_group"][0])
+        self.assertIsInstance(config["instance_group"][0]["count"], int)
+        # Add an extra instance into the model config
+        config["instance_group"][0]["count"] += 1
+        self.assertEqual(config["instance_group"][0]["count"], 2)
+        # Load the extra instance via the load API
+        self.__triton.load_model(self.__model_name, config=json.dumps(config))
+        self.__check_count("initialize", 2)  # 2 instances in total
+        self.__check_count("finalize", 0)  # no instance is removed
+        self.__infer()
+        # Unload model
+        self.__unload_model()
+
     # Test instance update with an ongoing inference
     def test_update_while_inferencing(self):
         # Load model with 1 instance