|
29 | 29 | import random
|
30 | 30 | import time
|
31 | 31 | import concurrent.futures
|
| 32 | +import json |
32 | 33 | import numpy as np
|
33 | 34 | import tritonclient.grpc as grpcclient
|
34 | 35 | from tritonclient.utils import InferenceServerException
|
@@ -278,6 +279,31 @@ def test_non_instance_config_update(self):
|
278 | 279 | batching=True)
|
279 | 280 | self.__unload_model(batching=True)
|
280 | 281 |
|
| 282 | + # Test passing new instance config via load API |
| 283 | + def test_load_api_with_config(self): |
| 284 | + # Load model with 1 instance |
| 285 | + self.__load_model(1) |
| 286 | + # Get the model config from Triton |
| 287 | + config = self.__triton.get_model_config(self.__model_name, as_json=True) |
| 288 | + self.assertIn("config", config) |
| 289 | + self.assertIsInstance(config["config"], dict) |
| 290 | + config = config["config"] |
| 291 | + self.assertIn("instance_group", config) |
| 292 | + self.assertIsInstance(config["instance_group"], list) |
| 293 | + self.assertEqual(len(config["instance_group"]), 1) |
| 294 | + self.assertIn("count", config["instance_group"][0]) |
| 295 | + self.assertIsInstance(config["instance_group"][0]["count"], int) |
| 296 | + # Add an extra instance into the model config |
| 297 | + config["instance_group"][0]["count"] += 1 |
| 298 | + self.assertEqual(config["instance_group"][0]["count"], 2) |
| 299 | + # Load the extra instance via the load API |
| 300 | + self.__triton.load_model(self.__model_name, config=json.dumps(config)) |
| 301 | + self.__check_count("initialize", 2) # 2 instances in total |
| 302 | + self.__check_count("finalize", 0) # no instance is removed |
| 303 | + self.__infer() |
| 304 | + # Unload model |
| 305 | + self.__unload_model() |
| 306 | + |
281 | 307 | # Test instance update with an ongoing inference
|
282 | 308 | def test_update_while_inferencing(self):
|
283 | 309 | # Load model with 1 instance
|
|
0 commit comments