Skip to content

Commit 731109e

Browse files
committed
Allow changing ping behavior based on env variable in SageMaker
1 parent ab7b70a commit 731109e

File tree

4 files changed

+12
-7
lines changed

4 files changed

+12
-7
lines changed

docker/sagemaker/serve

+7-2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727

2828
SAGEMAKER_SINGLE_MODEL_REPO=/opt/ml/model/
2929

30+
# Use 'ready' for ping check in single-model endpoint mode, and use 'live' for ping check in multi-model endpoint model
31+
# https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/rest_predict_v2.yaml#L10-L26
32+
SAGEMAKER_TRITON_PING_MODE="ready"
33+
3034
# Note: in Triton on SageMaker, each model url is registered as a separate repository
3135
# e.g., /opt/ml/models/<hash>/model. Specifying MME model repo path as /opt/ml/models causes Triton
3236
# to treat it as an additional empty repository and changes
@@ -42,8 +46,9 @@ if [ -n "$SAGEMAKER_MULTI_MODEL" ]; then
4246
if [ "$SAGEMAKER_MULTI_MODEL" == "true" ]; then
4347
mkdir -p ${SAGEMAKER_MULTI_MODEL_REPO}
4448
SAGEMAKER_MODEL_REPO=${SAGEMAKER_MULTI_MODEL_REPO}
49+
SAGEMAKER_TRITON_PING_MODE="live"
4550
is_mme_mode=true
46-
echo "Triton is running in SageMaker MME mode."
51+
echo -e "Triton is running in SageMaker MME mode. Using Triton ping mode: \"${SAGEMAKER_TRITON_PING_MODE}\""
4752
fi
4853
fi
4954

@@ -134,4 +139,4 @@ elif [ "${is_mme_mode}" = false ]; then
134139
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --load-model=${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME}"
135140
fi
136141

137-
tritonserver --allow-sagemaker=true --allow-grpc=false --allow-http=false --allow-metrics=false --model-control-mode=explicit $SAGEMAKER_ARGS
142+
tritonserver --allow-sagemaker=true --allow-grpc=true --allow-http=false --allow-metrics=true --model-control-mode=explicit $SAGEMAKER_ARGS

qa/L0_sagemaker/test.sh

+2-3
Original file line numberDiff line numberDiff line change
@@ -353,12 +353,11 @@ if [ "$SERVER_PID" == "0" ]; then
353353
exit 1
354354
fi
355355

356-
# Ping and expect server to still be running (using 'live' instead of 'ready')
357-
# https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/rest_predict_v2.yaml#L10-L26
356+
# Ping and expect error code in SME mode.
358357
set +e
359358
code=`curl -s -w %{http_code} -o ./ping.out localhost:8080/ping`
360359
set -e
361-
if [ "$code" != "200" ]; then
360+
if [ "$code" == "200" ]; then
362361
cat ./ping.out
363362
echo -e "\n***\n*** Test Failed\n***"
364363
RET=1

src/sagemaker_server.cc

+2-1
Original file line numberDiff line numberDiff line change
@@ -904,7 +904,8 @@ SagemakerAPIServer::SageMakerMMECheckOOMError(TRITONSERVER_Error* err)
904904
"CUBLAS_STATUS_ALLOC_FAILED",
905905
"CUBLAS_STATUS_NOT_INITIALIZED",
906906
"Failed to allocate memory",
907-
"failed to allocate memory"};
907+
"failed to allocate memory",
908+
"No space left on device"};
908909

909910
/*
910911
TODO: Improve the search to do pattern match on whole words only

src/sagemaker_server.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class SagemakerAPIServer : public HTTPAPIServer {
7878
model_path_regex_(
7979
R"((\/opt\/ml\/models\/[0-9A-Za-z._]+)\/(model)\/?([0-9A-Za-z._]+)?)"),
8080
platform_ensemble_regex_(R"(platform:(\s)*\"ensemble\")"),
81-
ping_mode_("live"),
81+
ping_mode_(GetEnvironmentVariableOrDefault("SAGEMAKER_TRITON_PING_MODE", "ready")),
8282
model_name_(GetEnvironmentVariableOrDefault(
8383
"SAGEMAKER_TRITON_DEFAULT_MODEL_NAME",
8484
"unspecified_SAGEMAKER_TRITON_DEFAULT_MODEL_NAME")),

0 commit comments

Comments
 (0)