Skip to content

Add test when unload/load requests for same model is received at the same time #6150

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 14, 2023
75 changes: 75 additions & 0 deletions qa/L0_lifecycle/lifecycle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2917,6 +2917,81 @@ def test_concurrent_model_load_unload(self):
for model_name in model_names:
self.assertEqual(is_load, triton_client.is_model_ready(model_name))

def test_concurrent_same_model_load_unload_stress(self):
model_name = "identity_zero_1_int32"
num_threads = 16
num_iterations = 1024
try:
triton_client = grpcclient.InferenceServerClient(
"localhost:8001", verbose=True
)
except Exception as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

load_fail_reasons = [
"unexpected miss in global map",
"no version is available",
"failed to poll from model repository",
]
unload_fail_reasons = ["versions that are still available: 1"]
load_fail_messages = [
("failed to load '" + model_name + "', " + reason)
for reason in load_fail_reasons
]
unload_fail_messages = [
("failed to unload '" + model_name + "', " + reason)
for reason in unload_fail_reasons
]
global_exception_stats = {} # { "exception message": number of occurrence }
load_before_unload_finish = [False] # use list to access by reference

def _load_unload():
exception_stats = {} # { "exception message": number of occurrence }
for i in range(num_iterations):
try:
triton_client.load_model(model_name)
except InferenceServerException as ex:
# Acceptable for an unload to happen after a load completes, but
# before the load can verify its load state.
error_message = ex.message()
self.assertIn(error_message, load_fail_messages)
if error_message not in exception_stats:
exception_stats[error_message] = 0
exception_stats[error_message] += 1
try:
triton_client.unload_model(model_name)
except InferenceServerException as ex:
# Acceptable for a load to happen during an async unload
error_message = ex.message()
self.assertIn(error_message, unload_fail_messages)
if error_message not in exception_stats:
exception_stats[error_message] = 0
exception_stats[error_message] += 1
load_before_unload_finish[0] = True
return exception_stats

with concurrent.futures.ThreadPoolExecutor() as pool:
threads = []
for i in range(num_threads):
threads.append(pool.submit(_load_unload))
for t in threads:
exception_stats = t.result()
for key, count in exception_stats.items():
if key not in global_exception_stats:
global_exception_stats[key] = 0
global_exception_stats[key] += count

self.assertTrue(triton_client.is_server_live())
self.assertTrue(triton_client.is_server_ready())
self.assertTrue(
load_before_unload_finish[0],
"The test case did not replicate a load while async unloading. Consider increase concurrency.",
)

stats_path = "./test_concurrent_same_model_load_unload_stress.statistics.log"
with open(stats_path, mode="w", encoding="utf-8") as f:
f.write(str(global_exception_stats) + "\n")

def test_concurrent_model_instance_load_speedup(self):
# Initialize client
try:
Expand Down
38 changes: 35 additions & 3 deletions qa/L0_lifecycle/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1824,6 +1824,41 @@ set -e
kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_concurrent_same_model_load_unload_stress
rm -rf models
mkdir models
cp -r identity_zero_1_int32 models && \
(cd models/identity_zero_1_int32 && \
mkdir 1 && \
sed -i "s/string_value: \"10\"/string_value: \"0\"/" config.pbtxt)

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --model-load-thread-count=16 --log-verbose=2"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi

set +e
python $LC_TEST LifeCycleTest.test_concurrent_same_model_load_unload_stress >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Failed\n***"
RET=1
else
cat ./test_concurrent_same_model_load_unload_stress.statistics.log
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_concurrent_model_instance_load_speedup
rm -rf models
mkdir models
Expand Down Expand Up @@ -1897,9 +1932,6 @@ set -e
kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))


if [ $RET -eq 0 ]; then
echo -e "\n***\n*** Test Passed\n***"
fi
Expand Down