Add Redis cache build, tests, and docs (#5916)

rmccorm4 · mc-nv · commit fa4aca4b158d · 2023-06-08T16:20:05.000-07:00
diff --git a/build.py b/build.py
@@ -1769,8 +1769,7 @@ def enable_all():
             'tensorrt'
         ]
         all_repoagents = ['checksum']
-        # DLIS-4491: Add redis cache to build
-        all_caches = ['local']
+        all_caches = ['local', 'redis']
         all_filesystems = ['gcs', 's3', 'azure_storage']
         all_endpoints = ['http', 'grpc', 'sagemaker', 'vertex-ai']
 
@@ -1788,8 +1787,7 @@ def enable_all():
             'openvino', 'tensorrt'
         ]
         all_repoagents = ['checksum']
-        # DLIS-4491: Add redis cache to build
-        all_caches = ['local']
+        all_caches = ['local', 'redis']
         all_filesystems = []
         all_endpoints = ['http', 'grpc']
 
diff --git a/docs/user_guide/response_cache.md b/docs/user_guide/response_cache.md
@@ -101,10 +101,12 @@ that are used to communicate with a cache implementation of the user's choice.
 
 A cache implementation is a shared library that implements the required
 TRITONCACHE APIs and is dynamically loaded on server startup, if enabled. 
-For tags `>=23.03`, 
+
+Triton's most recent
 [tritonserver release containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver)
 come with the following cache implementations out of the box:
 - [local](https://github.com/triton-inference-server/local_cache): `/opt/tritonserver/caches/local/libtritoncache_local.so`
+- [redis](https://github.com/triton-inference-server/redis_cache): `/opt/tritonserver/caches/redis/libtritoncache_redis.so`
 
 With these TRITONCACHE APIs, `tritonserver` exposes a new `--cache-config` 
 CLI flag that gives the user flexible customization of which cache implementation
@@ -124,18 +126,44 @@ When `--cache-config local,size=SIZE` is specified with a non-zero `SIZE`,
 Triton allocates the requested size in CPU memory and **shares the
 cache across all inference requests and across all models**. 
 
+#### Redis Cache
+
+The `redis` cache implementation exposes the ability for Triton to communicate
+with a Redis server for caching. The `redis_cache` implementation is essentially
+a Redis client that acts as an intermediary between Triton and Redis. 
+
+To list a few benefits of the `redis` cache compared to the `local` cache in
+the context of Triton:
+- The Redis server can be hosted remotely as long as it is accesible by Triton,
+  so it is not tied directly to the Triton process lifetime. 
+  - This means Triton can be restarted and still have access to previously cached entries.
+  - This also means that Triton doesn't have to compete with the cache for memory/resource usage.
+- Multiple Triton instances can share a cache by configuring each Triton instance
+  to communicate with the same Redis server.
+- The Redis server can be updated/restarted independently of Triton, and
+  Triton will fallback to operating as it would with no cache access during 
+  any Redis server downtime, and log appropriate errors.
+
+In general, the Redis server can be configured/deployed as needed for your use 
+case, and Triton's `redis` cache will simply act as a client of your Redis 
+deployment. The [Redis docs](https://redis.io/docs/) should be consulted for 
+questions and details about configuring the Redis server.
+
+For Triton-specific `redis` cache implementation details/configuration, see the
+[redis cache implementation](https://github.com/triton-inference-server/redis_cache).
+
 #### Custom Cache
 
-With the new the TRITONCACHE API interface, it is now possible for
+With the TRITONCACHE API interface, it is now possible for
 users to implement their own cache to suit any use-case specific needs.
 To see the required interface that must be implemented by a cache
 developer, see the 
 [TRITONCACHE API header](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritoncache.h).
-The `local` cache implementation may be used as a reference implementation.
+The `local` or `redis` cache implementations may be used as reference.
 
 Upon successfully developing and building a custom cache, the resulting shared
 library (ex: `libtritoncache_<name>.so`) must be placed in the cache directory
-similar to where the `local` cache implementation lives. By default,
+similar to where the `local` and `redis` cache implementations live. By default,
 this directory is `/opt/tritonserver/caches`, but a custom directory may be
 specified with `--cache-dir` as needed. 
 
@@ -184,9 +212,10 @@ a response.
 For cases where cache hits are common and computation is expensive, 
 the cache can significantly improve overall performance.
 
-For cases where all or most requests are unique (cache misses), the
-cache may negatively impact the overall performance due to the overhead
-of managing the cache.
+For cases where most requests are unique (cache misses) or the compute is
+fast/cheap (the model is not compute-bound), the cache can negatively impact
+the overall performance due to the overhead of managing and communicating with
+the cache.
 
 ## Known Limitations
 
diff --git a/qa/L0_response_cache/test.sh b/qa/L0_response_cache/test.sh
@@ -29,12 +29,69 @@ RET=0
 
 TEST_LOG="./response_cache_test.log"
 UNIT_TEST=./response_cache_test
+export CUDA_VISIBLE_DEVICES=0
+
+# Only localhost supported in this test for now, but in future could make
+# use of a persistent remote redis server, or similarly use --replicaof arg.
+export TRITON_REDIS_HOST="localhost"
+export TRITON_REDIS_PORT="6379"
 
 rm -fr *.log
 
-# UNIT TEST
+function install_redis() {
+  ## Install redis if not already installed
+  if ! command -v redis-server >/dev/null 2>&1; then
+    apt update -y && apt install -y redis
+  fi
+}
+
+function start_redis() {
+  # Run redis server in background
+  redis-server --daemonize yes --port "${TRITON_REDIS_PORT}"
+
+  # Check redis server is running
+  REDIS_PING_RESPONSE=$(redis-cli -h ${TRITON_REDIS_HOST} -p ${TRITON_REDIS_PORT} ping)
+  if [ "${REDIS_PING_RESPONSE}" == "PONG" ]; then
+    echo "Redis successfully started in background"
+  else
+    echo -e "\n***\n*** Failed: Redis server did not start successfully\n***"
+    RET=1
+  fi
+}
+
+function stop_redis() {
+  echo "Stopping Redis server..."
+  redis-cli -h "${TRITON_REDIS_HOST}" -p "${TRITON_REDIS_PORT}" shutdown || true
+  echo "Redis server shutdown"
+}
+
+function set_redis_auth() {
+  # NOTE: Per-user auth [Access Control List (ACL)] is only supported in 
+  #       Redis >= 6.0 and is more comprehensive in what can be configured. 
+  #       For simplicity and wider range of Redis version support, use
+  #       server-wide password  via "requirepass" for now.
+  redis-cli -h "${TRITON_REDIS_HOST}" -p "${TRITON_REDIS_PORT}" config set requirepass "${REDIS_PW}"
+  export REDISCLI_AUTH="${REDIS_PW}"
+}
+
+function unset_redis_auth() {
+  # Authenticate implicitly via REDISCLI_AUTH env var, then unset password/var
+  redis-cli -h "${TRITON_REDIS_HOST}" -p "${TRITON_REDIS_PORT}" config set requirepass ""
+  unset REDISCLI_AUTH
+}
+
+# UNIT TESTS
 set +e
-export CUDA_VISIBLE_DEVICES=0
+
+## Unit tests currently run for both Local and Redis cache implementaitons
+## by default. However, we could break out the unit tests for each
+## into separate runs gtest filters if needed in the future:
+## - `${UNIT_TEST} --gtest_filter=*Local*`
+## - `${UNIT_TEST} --gtest_filter=*Redis*`
+install_redis
+# Stop any existing redis server first for good measure
+stop_redis
+start_redis
 LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $UNIT_TEST >>$TEST_LOG 2>&1
 if [ $? -ne 0 ]; then
     cat $TEST_LOG
@@ -48,10 +105,33 @@ function check_server_success_and_kill {
     if [ "${SERVER_PID}" == "0" ]; then
         echo -e "\n***\n*** Failed to start ${SERVER}\n***"
         cat ${SERVER_LOG}
-        exit 1
+        RET=1
+    else
+        kill ${SERVER_PID}
+        wait ${SERVER_PID}
+    fi
+}
+
+function check_server_expected_failure {
+    EXPECTED_MESSAGE="${1}"
+    if [ "${SERVER_PID}" != "0" ]; then
+        echo -e "\n***\n*** Failed: ${SERVER} started successfully when it was expected to fail\n***"
+        cat ${SERVER_LOG}
+        RET=1
+
+        kill ${SERVER_PID}
+        wait ${SERVER_PID}
+    else
+        # Check that server fails with the correct error message
+        set +e
+        grep -i "${EXPECTED_MESSAGE}" ${SERVER_LOG} 
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Failed: Expected [${EXPECTED_MESSAGE}] error message in output\n***"
+            cat $SERVER_LOG
+            RET=1
+        fi
+        set -e
     fi
-    kill $SERVER_PID
-    wait $SERVER_PID
 }
 
 MODEL_DIR="${PWD}/models"
@@ -102,46 +182,66 @@ check_server_success_and_kill
 # Test that specifying multiple cache types is not supported and should fail
 SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=local,size=8192 --cache-config=redis,key=value ${EXTRA_ARGS}"
 run_server
-if [ "$SERVER_PID" != "0" ]; then
-    echo -e "\n***\n*** Failed: $SERVER started successfully when it was expected to fail\n***"
-    cat $SERVER_LOG
-    RET=1
-
-    kill $SERVER_PID
-    wait $SERVER_PID
-else
-    # Check that server fails with the correct error message
-    set +e
-    grep -i "multiple cache configurations" ${SERVER_LOG}
-    if [ $? -ne 0 ]; then
-        echo -e "\n***\n*** Failed: Expected multiple cache configuration error message in output\n***"
-        cat $SERVER_LOG
-        RET=1
-    fi
-    set -e
-fi
+check_server_expected_failure "multiple cache configurations"
 
 # Test that specifying both config styles is incompatible and should fail
 SERVER_ARGS="--model-repository=${MODEL_DIR} --response-cache-byte-size=12345 --cache-config=local,size=67890 ${EXTRA_ARGS}"
 run_server
-if [ "$SERVER_PID" != "0" ]; then
-    echo -e "\n***\n*** Failed: $SERVER started successfully when it was expected to fail\n***"
-    cat $SERVER_LOG
-    RET=1
+check_server_expected_failure "incompatible flags"
 
-    kill $SERVER_PID
-    wait $SERVER_PID
-else
-    # Check that server fails with the correct error message
-    set +e
-    grep -i "incompatible flags" ${SERVER_LOG}
-    if [ $? -ne 0 ]; then
-        echo -e "\n***\n*** Failed: Expected incompatible cache config flags error message in output\n***"
-        cat $SERVER_LOG
-        RET=1
-    fi
-    set -e
-fi
+## Redis Cache CLI tests
+REDIS_ENDPOINT="--cache-config redis,host=${TRITON_REDIS_HOST} --cache-config redis,port=${TRITON_REDIS_PORT}"
+
+# Test simple redis cache config succeeds
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${EXTRA_ARGS}"
+run_server
+check_server_success_and_kill
+
+# Test triton fails to initialize if it can't connect to redis cache
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,host=localhost --cache-config=redis,port=nonexistent ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "Failed to connect to Redis: Connection refused"
+
+# Test triton fails to initialize if it can't resolve host for redis cache
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,host=nonexistent --cache-config=redis,port=nonexistent ${EXTRA_ARGS}"
+run_server
+# Either of these errors can be returned for bad hostname, so check for either.
+MSG1="Temporary failure in name resolution"
+MSG2="Name or service not known"
+check_server_expected_failure "${MSG1}\|${MSG2}"
+
+# Test triton fails to initialize if minimum required args (host & port) not all provided
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,port=${TRITON_REDIS_HOST} ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "Must at a minimum specify"
+
+## Redis Authentication tests
+
+# Automatically provide auth via REDISCLI_AUTH env var when set: https://redis.io/docs/ui/cli/
+REDIS_PW="redis123!"
+set_redis_auth
+
+# Test simple redis authentication succeeds with correct credentials
+REDIS_CACHE_AUTH="--cache-config redis,password=${REDIS_PW}"
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${REDIS_CACHE_AUTH} ${EXTRA_ARGS}"
+run_server
+check_server_success_and_kill
+
+# Test simple redis authentication fails with wrong credentials
+REDIS_CACHE_AUTH="--cache-config redis,password=wrong"
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${REDIS_CACHE_AUTH} ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "WRONGPASS"
+
+
+# Test simple redis authentication fails with no credentials
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "NOAUTH Authentication required"
+
+# Clean up redis server before exiting test
+unset_redis_auth
+stop_redis
 
 if [ $RET -eq 0 ]; then
   echo -e "\n***\n*** Test Passed\n***"
diff --git a/qa/common/util.sh b/qa/common/util.sh
@@ -66,7 +66,7 @@ function wait_for_server_ready() {
 
     local wait_secs=$wait_time_secs
     until test $wait_secs -eq 0 ; do
-        if ! kill -0 $spid; then
+        if ! kill -0 $spid > /dev/null 2>&1; then
             echo "=== Server not running."
             WAIT_RET=1
             return
@@ -147,13 +147,13 @@ function wait_for_model_stable() {
 }
 
 function gdb_helper () {
-  if ! command -v gdb; then
+  if ! command -v gdb > /dev/null 2>&1; then
     echo "=== WARNING: gdb not installed"
     return
   fi
 
   ### Server Hang ###
-  if kill -0 ${SERVER_PID}; then
+  if kill -0 ${SERVER_PID} > /dev/null 2>&1; then
     # If server process is still alive, try to get backtrace and core dump from it
     GDB_LOG="gdb_bt.${SERVER_PID}.log"
     echo -e "=== WARNING: SERVER HANG DETECTED, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
@@ -166,7 +166,7 @@ function gdb_helper () {
 
   ### Server Segfaulted ###
   # If there are any core dumps locally from a segfault, load them and get a backtrace
-  for corefile in $(ls core.*); do
+  for corefile in $(ls core.* > /dev/null 2>&1); do
     GDB_LOG="${corefile}.log"
     echo -e "=== WARNING: SEGFAULT DETECTED, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
     gdb -batch ${SERVER} ${corefile} -ex "thread apply all bt" | tee "${corefile}.log" || true; 
@@ -204,7 +204,7 @@ function run_server () {
         gdb_helper || true
 
         # Cleanup
-        kill $SERVER_PID || true
+        kill $SERVER_PID > /dev/null 2>&1 || true
         SERVER_PID=0
     fi
 }