Skip to content

Commit 427d102

Browse files
yhwangruivieiraRobGeada
authored
sync: sync dev/lm-eval with main branch (#271)
* feat: Initial database support (#246) * Initial database support - Add status checking - Add better storage flags - Add spec.storage.format validation - Add DDL -Add HIBERNATE format to DB (test) - Update service image - Revert identifier to DATABASE - Update CR options (remove mandatory data) * Remove default DDL generation env var * Update service image to latest tag * Add migration awareness * Add updating pods for migration * Change JDBC url from mysql to mariadb * Fix TLS mount * Revert images * Remove redundant logic * Fix comments * feat: Add TLS certificate mount on ModelMesh (#255) * feat: Add TLS certificate mount on ModelMesh * Revert from http to https until kserve/modelmesh#147 is merged * Pin oc version, ubi version (#263) * Restore checkout of trustyai-exp (#265) * Add operator installation robustness (#266) * fix: Skip InferenceService patching for KServe RawDeployment (#262) * feat: ConfigMap key to disable KServe Serverless configuration (#267) * feat: Add support for custom certificates in database connection (#259) * Add TLS endpoint for ModelMesh payload processors. (#268) Keep non-TLS endpoint for KServe Serverless (disabled by default) --------- Signed-off-by: Yihong Wang <[email protected]> Co-authored-by: Rui Vieira <[email protected]> Co-authored-by: Rob Geada <[email protected]>
1 parent 2173aae commit 427d102

File tree

10 files changed

+177
-66
lines changed

10 files changed

+177
-66
lines changed

config/base/kustomization.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,10 @@ vars:
4141
apiVersion: v1
4242
fieldref:
4343
fieldpath: data.oauthProxyImage
44+
- name: kServeServerless
45+
objref:
46+
kind: ConfigMap
47+
name: config
48+
apiVersion: v1
49+
fieldref:
50+
fieldpath: data.kServeServerless

config/base/params.env

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
trustyaiServiceImage=quay.io/trustyai/trustyai-service:latest
22
trustyaiOperatorImage=quay.io/trustyai/trustyai-service-operator:latest
33
oauthProxyImage=quay.io/openshift/origin-oauth-proxy:4.14.0
4+
kServeServerless=disabled

controllers/tas/config_maps.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,44 @@ func (r *TrustyAIServiceReconciler) getImageFromConfigMap(ctx context.Context, k
4848
}
4949
}
5050

51+
// getKServeServerlessConfig checks the kServeServerless value in a ConfigMap in the operator's namespace
52+
func (r *TrustyAIServiceReconciler) getKServeServerlessConfig(ctx context.Context) (bool, error) {
53+
54+
if r.Namespace != "" {
55+
// Define the key for the ConfigMap
56+
configMapKey := types.NamespacedName{
57+
Namespace: r.Namespace,
58+
Name: constants.ConfigMap,
59+
}
60+
61+
// Create an empty ConfigMap object
62+
var cm corev1.ConfigMap
63+
64+
// Try to get the ConfigMap
65+
if err := r.Get(ctx, configMapKey, &cm); err != nil {
66+
if errors.IsNotFound(err) {
67+
// ConfigMap not found, return false as the default behavior
68+
return false, nil
69+
}
70+
// Other error occurred when trying to fetch the ConfigMap
71+
return false, fmt.Errorf("error reading configmap %s", configMapKey)
72+
}
73+
74+
// ConfigMap is found, extract the kServeServerless value
75+
kServeServerless, ok := cm.Data[configMapkServeServerlessKey]
76+
77+
if !ok || kServeServerless != "enabled" {
78+
// Key is missing or its value is not "enabled", return false
79+
return false, nil
80+
}
81+
82+
// kServeServerless is "enabled"
83+
return true, nil
84+
} else {
85+
return false, nil
86+
}
87+
}
88+
5189
// getConfigMapNamesWithLabel retrieves the names of ConfigMaps that have the specified label
5290
func (r *TrustyAIServiceReconciler) getConfigMapNamesWithLabel(ctx context.Context, namespace string, labelSelector client.MatchingLabels) ([]string, error) {
5391
configMapList := &corev1.ConfigMapList{}

controllers/tas/constants.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@ const (
2727

2828
// Configuration constants
2929
const (
30-
configMapOAuthProxyImageKey = "oauthProxyImage"
31-
configMapServiceImageKey = "trustyaiServiceImage"
30+
configMapOAuthProxyImageKey = "oauthProxyImage"
31+
configMapServiceImageKey = "trustyaiServiceImage"
32+
configMapkServeServerlessKey = "kServeServerless"
3233
)
3334

3435
// OAuth constants

controllers/tas/deployment.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ type DeploymentConfig struct {
4040
CustomCertificatesBundle CustomCertificatesBundle
4141
Version string
4242
BatchSize int
43+
UseDBTLSCerts bool
4344
}
4445

4546
// createDeploymentObject returns a Deployment for the TrustyAI Service instance
@@ -69,7 +70,20 @@ func (r *TrustyAIServiceReconciler) createDeploymentObject(ctx context.Context,
6970
PVCClaimName: pvcName,
7071
CustomCertificatesBundle: caBunble,
7172
Version: constants.Version,
72-
BatchSize: batchSize,
73+
}
74+
75+
if instance.Spec.Storage.IsStorageDatabase() {
76+
_, err := r.getSecret(ctx, instance.Name+"-db-tls", instance.Namespace)
77+
if err != nil {
78+
deploymentConfig.UseDBTLSCerts = false
79+
log.FromContext(ctx).Error(err, "Using insecure database connection. Certificates "+instance.Name+"-db-tls not found")
80+
} else {
81+
deploymentConfig.UseDBTLSCerts = true
82+
log.FromContext(ctx).Info("Using secure database connection with certificates " + instance.Name + "-db-tls")
83+
}
84+
} else {
85+
deploymentConfig.UseDBTLSCerts = false
86+
log.FromContext(ctx).Info("No need to check database secrets. Using PVC-mode.")
7387
}
7488

7589
var deployment *appsv1.Deployment

controllers/tas/inference_services.go

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@ import (
1515
"sigs.k8s.io/controller-runtime/pkg/log"
1616
)
1717

18+
const (
19+
DEPLOYMENT_MODE_MODELMESH = "ModelMesh"
20+
DEPLOYMENT_MODE_RAW = "RawDeployment"
21+
DEPLOYMENT_MODE_SERVERLESS = "Serverless"
22+
)
23+
1824
// GetDeploymentsByLabel returns a list of Deployments that match a label key-value pair
1925
func (r *TrustyAIServiceReconciler) GetDeploymentsByLabel(ctx context.Context, namespace string, labelKey string, labelValue string) ([]appsv1.Deployment, error) {
2026
// Prepare a DeploymentList object
@@ -213,23 +219,37 @@ func (r *TrustyAIServiceReconciler) handleInferenceServices(ctx context.Context,
213219
return false, err
214220
}
215221

222+
kServeServerlessEnabled, err := r.getKServeServerlessConfig(ctx)
223+
if err != nil {
224+
log.FromContext(ctx).Error(err, "Could not read KServeServerless configuration. Defaulting to disabled")
225+
kServeServerlessEnabled = false
226+
}
227+
216228
if len(inferenceServices.Items) == 0 {
217229
return true, nil
218230
}
219231

220232
for _, infService := range inferenceServices.Items {
221233
annotations := infService.GetAnnotations()
222-
// Check the annotation "serving.kserve.io/deploymentMode: ModelMesh"
223-
if val, ok := annotations["serving.kserve.io/deploymentMode"]; ok && val == "ModelMesh" {
224-
shouldContinue, err := r.patchEnvVarsByLabelForDeployments(ctx, instance, namespace, labelKey, labelValue, envVarName, crName, remove)
225-
if err != nil {
226-
log.FromContext(ctx).Error(err, "Could not patch environment variables for ModelMesh deployments.")
227-
return shouldContinue, err
234+
235+
// Check the annotation "serving.kserve.io/deploymentMode"
236+
if val, ok := annotations["serving.kserve.io/deploymentMode"]; ok {
237+
if val == DEPLOYMENT_MODE_RAW {
238+
log.FromContext(ctx).Info("RawDeployment mode not supported by TrustyAI")
239+
continue
240+
} else if val == DEPLOYMENT_MODE_MODELMESH {
241+
shouldContinue, err := r.patchEnvVarsByLabelForDeployments(ctx, instance, namespace, labelKey, labelValue, envVarName, crName, remove)
242+
if err != nil {
243+
log.FromContext(ctx).Error(err, "could not patch environment variables for ModelMesh deployments")
244+
return shouldContinue, err
245+
}
246+
continue
228247
}
229-
} else {
248+
}
249+
if kServeServerlessEnabled {
230250
err := r.patchKServe(ctx, instance, infService, namespace, crName, remove)
231251
if err != nil {
232-
log.FromContext(ctx).Error(err, "Could not path InferenceLogger for KServe deployment.")
252+
log.FromContext(ctx).Error(err, "could not patch InferenceLogger for KServe deployment")
233253
return false, err
234254
}
235255
}

controllers/tas/secrets.go

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,34 +10,42 @@ import (
1010
"sigs.k8s.io/controller-runtime/pkg/client"
1111
)
1212

13+
// getSecret retrieves a secret if it exists, returns an error if not
14+
func (r *TrustyAIServiceReconciler) getSecret(ctx context.Context, name, namespace string) (*corev1.Secret, error) {
15+
secret := &corev1.Secret{}
16+
err := r.Get(ctx, client.ObjectKey{Name: name, Namespace: namespace}, secret)
17+
if err != nil {
18+
if errors.IsNotFound(err) {
19+
return nil, fmt.Errorf("secret %s not found in namespace %s: %w", name, namespace, err)
20+
}
21+
return nil, fmt.Errorf("failed to get secret %s in namespace %s: %w", name, namespace, err)
22+
}
23+
return secret, nil
24+
}
25+
1326
// findDatabaseSecret finds the DB configuration secret named (specified or default) in the same namespace as the CR
1427
func (r *TrustyAIServiceReconciler) findDatabaseSecret(ctx context.Context, instance *trustyaiopendatahubiov1alpha1.TrustyAIService) (*corev1.Secret, error) {
1528

1629
databaseConfigurationsName := instance.Spec.Storage.DatabaseConfigurations
1730
defaultDatabaseConfigurationsName := instance.Name + dbCredentialsSuffix
1831

19-
secret := &corev1.Secret{}
20-
2132
if databaseConfigurationsName != "" {
22-
secret := &corev1.Secret{}
23-
err := r.Get(ctx, client.ObjectKey{Name: databaseConfigurationsName, Namespace: instance.Namespace}, secret)
24-
if err == nil {
25-
return secret, nil
33+
secret, err := r.getSecret(ctx, databaseConfigurationsName, instance.Namespace)
34+
if err != nil {
35+
return nil, err
2636
}
27-
if !errors.IsNotFound(err) {
28-
return nil, fmt.Errorf("failed to get secret %s in namespace %s: %w", databaseConfigurationsName, instance.Namespace, err)
37+
if secret != nil {
38+
return secret, nil
2939
}
3040
} else {
3141
// If specified not found, try the default
32-
33-
err := r.Get(ctx, client.ObjectKey{Name: defaultDatabaseConfigurationsName, Namespace: instance.Namespace}, secret)
34-
if err == nil {
35-
return secret, nil
42+
secret, err := r.getSecret(ctx, defaultDatabaseConfigurationsName, instance.Namespace)
43+
if err != nil {
44+
return nil, err
3645
}
37-
if !errors.IsNotFound(err) {
38-
return nil, fmt.Errorf("failed to get secret %s in namespace %s: %w", defaultDatabaseConfigurationsName, instance.Namespace, err)
46+
if secret != nil {
47+
return secret, nil
3948
}
40-
4149
}
4250

4351
return nil, fmt.Errorf("neither secret %s nor %s found in namespace %s", databaseConfigurationsName, defaultDatabaseConfigurationsName, instance.Namespace)

controllers/tas/templates/service/deployment.tmpl.yaml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,11 @@ spec:
9494
name: {{ .Instance.Spec.Storage.DatabaseConfigurations }}
9595
key: databasePort
9696
- name: QUARKUS_DATASOURCE_JDBC_URL
97+
{{ if .UseDBTLSCerts }}
98+
value: "jdbc:${QUARKUS_DATASOURCE_DB_KIND}://${DATABASE_SERVICE}:${DATABASE_PORT}/trustyai_database?sslMode=verify-ca&serverSslCert=/etc/tls/db/tls.crt"
99+
{{ else }}
97100
value: "jdbc:${QUARKUS_DATASOURCE_DB_KIND}://${DATABASE_SERVICE}:${DATABASE_PORT}/trustyai_database"
101+
{{ end }}
98102
- name: SERVICE_DATA_FORMAT
99103
value: "HIBERNATE"
100104
- name: QUARKUS_DATASOURCE_GENERATION
@@ -121,7 +125,12 @@ spec:
121125
- name: {{ .VolumeMountName }}
122126
mountPath: {{ .Instance.Spec.Storage.Folder }}
123127
readOnly: false
124-
{{ end }}
128+
{{ end }}
129+
{{ if .UseDBTLSCerts }}
130+
- name: db-tls-certs
131+
mountPath: /etc/tls/db
132+
readOnly: true
133+
{{ end }}
125134
- resources:
126135
limits:
127136
cpu: 100m
@@ -209,3 +218,9 @@ spec:
209218
secret:
210219
secretName: {{ .Instance.Name }}-internal
211220
defaultMode: 420
221+
{{ if .UseDBTLSCerts }}
222+
- name: db-tls-certs
223+
secret:
224+
secretName: {{ .Instance.Name }}-db-tls
225+
defaultMode: 420
226+
{{ end }}

tests/Dockerfile

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
1-
FROM registry.access.redhat.com/ubi8:8.10-901.1716497712
1+
FROM registry.access.redhat.com/ubi8:8.10-1020
22

33
ARG ORG=trustyai-explainability
44
ARG BRANCH=main
55
ARG ODS_CI_REPO=https://github.com/red-hat-data-services/ods-ci
66
# This git reference should always reference a stable commit from ods-ci that supports ODH
77
# This hash corresponds to a March 24th, 2023 commit
8-
ARG ODS_CI_GITREF=867a617bc224726cf98fa3354293f8e50b4f5eb5
9-
ARG OC_CLI_URL=https://mirror.openshift.com/pub/openshift-v4/amd64/clients/ocp/latest/openshift-client-linux.tar.gz
8+
ARG ODS_CI_GITREF=a8cf770b37caa4ef7ce6596acc8bdd6866cc7772
9+
ARG OC_CLI_URL=https://mirror.openshift.com/pub/openshift-v4/amd64/clients/ocp/4.14.33/openshift-client-linux.tar.gz
1010

1111
ENV HOME /root
1212
WORKDIR /root
1313

14-
RUN dnf -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm &&\
15-
dnf install -y jq bc git go-toolset python3.11 python3.11-pip python3.11-devel unzip && \
14+
RUN dnf install -y jq bc git go-toolset python3.11 python3.11-devel python3.11-pip unzip && \
1615
dnf clean all && \
1716
git clone https://github.com/opendatahub-io/peak $HOME/peak && \
1817
cd $HOME/peak && \
@@ -25,9 +24,6 @@ RUN curl -L https://github.com/mikefarah/yq/releases/download/v4.25.1/yq_linux_a
2524
RUN mkdir -p $HOME/src && \
2625
cd $HOME/src && \
2726
git clone --depth=1 --branch ${BRANCH} https://github.com/${ORG}/trustyai-explainability && \
28-
# Clone ods-ci repo at specified git ref for the ODH Dashboard webUI tests
29-
git clone --depth=1 ${ODS_CI_REPO} ods-ci && cd ods-ci && \
30-
git fetch origin ${ODS_CI_GITREF} && git checkout FETCH_HEAD && \
3127
chmod -R 777 $HOME/src
3228

3329
# Use a specific destination file name in case the url download name changes
@@ -37,16 +33,6 @@ RUN tar -C /usr/local/bin -xvf $HOME/peak/oc-cli.tar.gz && \
3733

3834
COPY Pipfile Pipfile.lock $HOME/peak/
3935

40-
RUN pip3 install micropipenv &&\
41-
ln -s `which pip3` /usr/bin/pip &&\
42-
cd $HOME/peak &&\
43-
micropipenv install
44-
45-
# Install poetry to support the exeuction of ods-ci test framework
46-
RUN curl -sSL https://install.python-poetry.org | python3 -
47-
ENV PATH="${PATH}:$HOME/.local/bin"
48-
RUN cd $HOME/src/ods-ci && poetry install
49-
5036
## Grab CI scripts from single-source-of-truth
5137
RUN mkdir -p $HOME/peak/operator-tests/trustyai-explainability/ &&\
5238
mkdir $HOME/kfdef/ &&\

tests/scripts/install.sh

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,44 @@ if ! [ -z "${SKIP_OPERATOR_INSTALL}" ]; then
1414
./setup.sh -t ~/peak/operatorsetup 2>&1
1515
else
1616
echo "Installing operator from community marketplace"
17-
while [[ $retry -gt 0 ]]; do
1817

19-
# patch bug in peak setup script
20-
sed -i "s/path=\"{.status.channels.*/ | jq '.status.channels | .[0].currentCSVDesc.installModes | map(select(.type == \"AllNamespaces\")) | .[0].supported')/" setup.sh
21-
sed -i "s/csource=.*/echo \$3; csource=\$3/" setup.sh
22-
sed -i 's/installop \$.*/installop \${vals[0]} \${vals[1]} \${vals[3]}/' setup.sh
18+
start_t=$(date +%s) 2>&1
19+
ready=false 2>&1
20+
while ! $ready; do
21+
CATALOG_SOURCES=$(oc get catalogsources -n openshift-marketplace 2> /dev/null | grep 'community-operators')
22+
if [ ! -z "${CATALOG_SOURCES}" ]; then
23+
echo $CATALOG_SOURCES
24+
ready=true 2>&1
25+
else
26+
sleep 10
27+
fi
28+
if [ $(($(date +%s)-start_t)) -gt 300 ]; then
29+
echo "Marketplace pods never started"
30+
exit 1
31+
fi
32+
done
33+
34+
start_t=$(date +%s) 2>&1
35+
ready=false 2>&1
36+
while ! $ready; do
37+
MANIFESTS=$(oc get packagemanifests -n openshift-marketplace 2> /dev/null | grep 'opendatahub')
38+
echo $MANIFESTS
39+
if [ ! -z "${MANIFESTS}" ]; then
40+
echo $MANIFESTS
41+
ready=true 2>&1
42+
else
43+
sleep 10
44+
fi
45+
if [ $(($(date +%s)-start_t)) -gt 900 ]; then
46+
echo "Package manifests never downloaded"
47+
exit 1
48+
fi
49+
done
50+
51+
while [[ $retry -gt 0 ]]; do
52+
./setup.sh -o ~/peak/operatorsetup\
2353

24-
./setup.sh -o ~/peak/operatorsetup
54+
# approve installplans
2555
if [ $? -eq 0 ]; then
2656
retry=-1
2757
else
@@ -31,11 +61,16 @@ else
3161
fi
3262
retry=$(( retry - 1))
3363

64+
sleep 30
65+
echo "Approving Install Plans, if needed"
66+
oc patch installplan $(oc get installplan -n openshift-operators | grep $ODH_VERSION | awk '{print $1}') -n openshift-operators --type merge --patch '{"spec":{"approved":true}}' || true
67+
oc patch installplan $(oc get installplan -n openshift-operators | grep authorino | awk '{print $1}') -n openshift-operators --type merge --patch '{"spec":{"approved":true}}' || true
68+
3469
finished=false 2>&1
3570
start_t=$(date +%s) 2>&1
3671
echo "Verifying installation of ODH operator"
3772
while ! $finished; do
38-
if [ ! -z "$(oc get pods -n openshift-operators | grep 'opendatahub-operator-controller-manager' | grep '1/1')" ]; then
73+
if [ ! -z "$(oc get pods -n openshift-operators | grep 'opendatahub-operator-controller-manager' | grep '1/1')" ]; then
3974
finished=true 2>&1
4075
else
4176
sleep 10
@@ -50,20 +85,6 @@ else
5085
done
5186
fi
5287

53-
#popd
54-
### Grabbing and applying the patch in the PR we are testing
55-
#pushd ~/src/${REPO_NAME}
56-
#if [ -z "$PULL_NUMBER" ]; then
57-
# echo "No pull number, assuming nightly run"
58-
#else
59-
# if [ $REPO_OWNER == "trustyai-explainability" ]; then
60-
# curl -O -L https://github.com/${REPO_OWNER}/${REPO_NAME}/pull/${PULL_NUMBER}.patch
61-
# echo "Applying followng patch:"
62-
# cat ${PULL_NUMBER}.patch > ${ARTIFACT_DIR}/github-pr-${PULL_NUMBER}.patch
63-
# git apply ${PULL_NUMBER}.patch
64-
# fi
65-
#fi
66-
6788
popd
6889
## Point manifests repo uri in the KFDEF to the manifests in the PR
6990
pushd ~/kfdef

0 commit comments

Comments
 (0)