File tree 4 files changed +12
-7
lines changed
4 files changed +12
-7
lines changed Original file line number Diff line number Diff line change 27
27
28
28
SAGEMAKER_SINGLE_MODEL_REPO=/opt/ml/model/
29
29
30
+ # Use 'ready' for ping check in single-model endpoint mode, and use 'live' for ping check in multi-model endpoint model
31
+ # https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/rest_predict_v2.yaml#L10-L26
32
+ SAGEMAKER_TRITON_PING_MODE=" ready"
33
+
30
34
# Note: in Triton on SageMaker, each model url is registered as a separate repository
31
35
# e.g., /opt/ml/models/<hash>/model. Specifying MME model repo path as /opt/ml/models causes Triton
32
36
# to treat it as an additional empty repository and changes
@@ -42,8 +46,9 @@ if [ -n "$SAGEMAKER_MULTI_MODEL" ]; then
42
46
if [ " $SAGEMAKER_MULTI_MODEL " == " true" ]; then
43
47
mkdir -p ${SAGEMAKER_MULTI_MODEL_REPO}
44
48
SAGEMAKER_MODEL_REPO=${SAGEMAKER_MULTI_MODEL_REPO}
49
+ SAGEMAKER_TRITON_PING_MODE=" live"
45
50
is_mme_mode=true
46
- echo " Triton is running in SageMaker MME mode."
51
+ echo -e " Triton is running in SageMaker MME mode. Using Triton ping mode: \" ${SAGEMAKER_TRITON_PING_MODE} \" "
47
52
fi
48
53
fi
49
54
@@ -134,4 +139,4 @@ elif [ "${is_mme_mode}" = false ]; then
134
139
SAGEMAKER_ARGS=" ${SAGEMAKER_ARGS} --load-model=${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME} "
135
140
fi
136
141
137
- tritonserver --allow-sagemaker=true --allow-grpc=false --allow-http=false --allow-metrics=false --model-control-mode=explicit $SAGEMAKER_ARGS
142
+ tritonserver --allow-sagemaker=true --allow-grpc=true --allow-http=false --allow-metrics=true --model-control-mode=explicit $SAGEMAKER_ARGS
Original file line number Diff line number Diff line change @@ -353,12 +353,11 @@ if [ "$SERVER_PID" == "0" ]; then
353
353
exit 1
354
354
fi
355
355
356
- # Ping and expect server to still be running (using 'live' instead of 'ready')
357
- # https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/rest_predict_v2.yaml#L10-L26
356
+ # Ping and expect error code in SME mode.
358
357
set +e
359
358
code=` curl -s -w %{http_code} -o ./ping.out localhost:8080/ping`
360
359
set -e
361
- if [ " $code " ! = " 200" ]; then
360
+ if [ " $code " = = " 200" ]; then
362
361
cat ./ping.out
363
362
echo -e " \n***\n*** Test Failed\n***"
364
363
RET=1
Original file line number Diff line number Diff line change @@ -904,7 +904,8 @@ SagemakerAPIServer::SageMakerMMECheckOOMError(TRITONSERVER_Error* err)
904
904
" CUBLAS_STATUS_ALLOC_FAILED" ,
905
905
" CUBLAS_STATUS_NOT_INITIALIZED" ,
906
906
" Failed to allocate memory" ,
907
- " failed to allocate memory" };
907
+ " failed to allocate memory" ,
908
+ " No space left on device" };
908
909
909
910
/*
910
911
TODO: Improve the search to do pattern match on whole words only
Original file line number Diff line number Diff line change @@ -78,7 +78,7 @@ class SagemakerAPIServer : public HTTPAPIServer {
78
78
model_path_regex_(
79
79
R"( (\/opt\/ml\/models\/[0-9A-Za-z._]+)\/(model)\/?([0-9A-Za-z._]+)?)" ),
80
80
platform_ensemble_regex_(R"( platform:(\s)*\"ensemble\")" ),
81
- ping_mode_(" live " ),
81
+ ping_mode_(GetEnvironmentVariableOrDefault( " SAGEMAKER_TRITON_PING_MODE " , " ready " ) ),
82
82
model_name_(GetEnvironmentVariableOrDefault(
83
83
" SAGEMAKER_TRITON_DEFAULT_MODEL_NAME" ,
84
84
" unspecified_SAGEMAKER_TRITON_DEFAULT_MODEL_NAME" )),
You can’t perform that action at this time.
0 commit comments