triton-inference-server · kthui · Jun 13, 2023 · Jun 12, 2023 · Jun 12, 2023 · Jun 12, 2023
diff --git a/docs/user_guide/model_management.md b/docs/user_guide/model_management.md
@@ -218,6 +218,12 @@ request is received under
 [Model Control Mode EXPLICIT](#model-control-mode-explicit) or change to the
 'config.pbtxt' is detected under
 [Model Control Mode POLL](#model-control-mode-poll).
+  * The new model configuration may also be passed to Triton via the
+[load API](../protocol/extension_model_repository.md#load).
+  * Some text editors create a swap file in the model directory when the
+'config.pbtxt' is modified in place. The swap file is not part of the model
+configuration which its presence in the model directory may cause the model to
+reload when an update is expected.
 
 * If a sequence model is updated with in-flight sequence(s), Triton does not
 guarentee any remaining request(s) from the in-flight sequence(s) will be routed

diff --git a/qa/L0_model_update/instance_update_test.py b/qa/L0_model_update/instance_update_test.py
@@ -278,6 +278,24 @@ def test_non_instance_config_update(self):
                                      batching=True)
         self.__unload_model(batching=True)
 
+    # Test passing new instance config via load API
+    def test_load_api_with_config(self):
+        # Load model with 1 instance
+        self.__load_model(1)
+        # Add 1 instance via the load API
+        new_config = "{\"name\": \"" + self.__model_name + "\",\n"
+        new_config += "\"backend\": \"python\",\n"
+        new_config += "\"max_batch_size\": 0,\n"
+        new_config += "\"input\": {\"name\": \"INPUT0\", \"data_type\": \"TYPE_FP32\", \"dims\": -1},\n"
+        new_config += "\"output\": {\"name\": \"OUTPUT0\", \"data_type\": \"TYPE_FP32\", \"dims\": -1},\n"
+        new_config += "\"instance_group\": {\"count\": 2, \"kind\": \"KIND_CPU\"}}"
+        self.__triton.load_model(self.__model_name, config=new_config)
+        self.__check_count("initialize", 2)  # 2 instances in total
+        self.__check_count("finalize", 0)  # no instance is removed
+        self.__infer()
+        # Unload model
+        self.__unload_model()
+
     # Test instance update with an ongoing inference
     def test_update_while_inferencing(self):
         # Load model with 1 instance