Skip to content

Commit 0783940

Browse files
kthuirmccorm4
andauthored
Add test for sending instance update config via load API (#5937)
* Add test for passing config via load api * Add more docs on instance update behavior * Update to suggested docs Co-authored-by: Ryan McCormick <[email protected]> * Use dictionary for json config * Modify the config fetched from Triton instead --------- Co-authored-by: Ryan McCormick <[email protected]>
1 parent cfa1efe commit 0783940

File tree

2 files changed

+32
-0
lines changed

2 files changed

+32
-0
lines changed

docs/user_guide/model_management.md

+6
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,12 @@ request is received under
218218
[Model Control Mode EXPLICIT](#model-control-mode-explicit) or change to the
219219
'config.pbtxt' is detected under
220220
[Model Control Mode POLL](#model-control-mode-poll).
221+
* The new model configuration may also be passed to Triton via the
222+
[load API](../protocol/extension_model_repository.md#load).
223+
* Some text editors create a swap file in the model directory when the
224+
'config.pbtxt' is modified in place. The swap file is not part of the model
225+
configuration, so its presence in the model directory may be detected as a new file
226+
and cause the model to fully reload when only an update is expected.
221227

222228
* If a sequence model is updated with in-flight sequence(s), Triton does not
223229
guarentee any remaining request(s) from the in-flight sequence(s) will be routed

qa/L0_model_update/instance_update_test.py

+26
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import random
3030
import time
3131
import concurrent.futures
32+
import json
3233
import numpy as np
3334
import tritonclient.grpc as grpcclient
3435
from tritonclient.utils import InferenceServerException
@@ -278,6 +279,31 @@ def test_non_instance_config_update(self):
278279
batching=True)
279280
self.__unload_model(batching=True)
280281

282+
# Test passing new instance config via load API
283+
def test_load_api_with_config(self):
284+
# Load model with 1 instance
285+
self.__load_model(1)
286+
# Get the model config from Triton
287+
config = self.__triton.get_model_config(self.__model_name, as_json=True)
288+
self.assertIn("config", config)
289+
self.assertIsInstance(config["config"], dict)
290+
config = config["config"]
291+
self.assertIn("instance_group", config)
292+
self.assertIsInstance(config["instance_group"], list)
293+
self.assertEqual(len(config["instance_group"]), 1)
294+
self.assertIn("count", config["instance_group"][0])
295+
self.assertIsInstance(config["instance_group"][0]["count"], int)
296+
# Add an extra instance into the model config
297+
config["instance_group"][0]["count"] += 1
298+
self.assertEqual(config["instance_group"][0]["count"], 2)
299+
# Load the extra instance via the load API
300+
self.__triton.load_model(self.__model_name, config=json.dumps(config))
301+
self.__check_count("initialize", 2) # 2 instances in total
302+
self.__check_count("finalize", 0) # no instance is removed
303+
self.__infer()
304+
# Unload model
305+
self.__unload_model()
306+
281307
# Test instance update with an ongoing inference
282308
def test_update_while_inferencing(self):
283309
# Load model with 1 instance

0 commit comments

Comments
 (0)