@@ -338,10 +338,10 @@ def test_infer_while_updating(self):
338
338
# Unload model
339
339
self .__unload_model ()
340
340
341
- # Test instance resource requirement update
342
- @unittest .skipUnless (os .environ ["RATE_LIMIT_MODE" ] == "execution_count" ,
341
+ # Test instance resource requirement increase
342
+ @unittest .skipUnless ("execution_count" in os .environ ["RATE_LIMIT_MODE" ],
343
343
"Rate limiter precondition not met for this test" )
344
- def test_instance_resource_update (self ):
344
+ def test_instance_resource_increase (self ):
345
345
# Load model
346
346
self .__load_model (
347
347
1 ,
@@ -365,19 +365,46 @@ def infer():
365
365
time .sleep (infer_count / 2 ) # each infer should take < 0.5 seconds
366
366
self .assertNotIn (False , infer_complete , "Infer possibly stuck" )
367
367
infer_thread .result ()
368
- # Decrease the resource requirement
368
+ # Unload model
369
+ self .__unload_model ()
370
+
371
+ # Test instance resource requirement increase above explicit resource
372
+ @unittest .skipUnless (os .environ ["RATE_LIMIT_MODE" ] ==
373
+ "execution_count_with_explicit_resource" ,
374
+ "Rate limiter precondition not met for this test" )
375
+ def test_instance_resource_increase_above_explicit (self ):
376
+ # Load model
377
+ self .__load_model (
378
+ 1 ,
379
+ "{\n count: 1\n kind: KIND_CPU\n rate_limiter {\n resources [\n {\n name: \" R1\" \n count: 2\n }\n ]\n }\n }"
380
+ )
381
+ # Increase resource requirement
382
+ with self .assertRaises (InferenceServerException ):
383
+ self .__update_instance_count (
384
+ 0 , 0 ,
385
+ "{\n count: 1\n kind: KIND_CPU\n rate_limiter {\n resources [\n {\n name: \" R1\" \n count: 32\n }\n ]\n }\n }"
386
+ )
387
+ # Correct the resource requirement to match the explicit resource
369
388
self .__update_instance_count (
370
389
1 , 1 ,
371
- "{\n count: 1\n kind: KIND_CPU\n rate_limiter {\n resources [\n {\n name: \" R1\" \n count: 6 \n }\n ]\n }\n }"
390
+ "{\n count: 1\n kind: KIND_CPU\n rate_limiter {\n resources [\n {\n name: \" R1\" \n count: 10 \n }\n ]\n }\n }"
372
391
)
373
- # Further decrease the resource requirement. The previous decrease
374
- # should have lower the max resource in the rate limiter, which the
375
- # error "Should not print this ..." should not be printed into the
376
- # server log because the max resource is above the previously set limit
377
- # and it will be checked by the main bash test script.
392
+ # Unload model
393
+ self .__unload_model ()
394
+
395
+ # Test instance resource requirement decrease
396
+ @unittest .skipUnless ("execution_count" in os .environ ["RATE_LIMIT_MODE" ],
397
+ "Rate limiter precondition not met for this test" )
398
+ def test_instance_resource_decrease (self ):
399
+ # Load model
400
+ self .__load_model (
401
+ 1 ,
402
+ "{\n count: 1\n kind: KIND_CPU\n rate_limiter {\n resources [\n {\n name: \" R1\" \n count: 4\n }\n ]\n }\n }"
403
+ )
404
+ # Decrease resource requirement
378
405
self .__update_instance_count (
379
406
1 , 1 ,
380
- "{\n count: 1\n kind: KIND_CPU\n rate_limiter {\n resources [\n {\n name: \" R1\" \n count: 4 \n }\n ]\n }\n }"
407
+ "{\n count: 1\n kind: KIND_CPU\n rate_limiter {\n resources [\n {\n name: \" R1\" \n count: 2 \n }\n ]\n }\n }"
381
408
)
382
409
# Unload model
383
410
self .__unload_model ()
0 commit comments