Skip to content
This repository was archived by the owner on Jun 23, 2025. It is now read-only.

Commit c985e95

Browse files
authored
Reverting the TGI image version for LLAMA multiple GPUs in GKE samples (#931)
1 parent 434a149 commit c985e95

File tree

7 files changed

+23
-9
lines changed

7 files changed

+23
-9
lines changed

modules/inference-service/main.tf

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ resource "kubernetes_deployment" "inference_deployment" {
8989
}
9090
}
9191
container {
92-
image = "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310"
92+
image = "ghcr.io/huggingface/text-generation-inference:1.4.3"
9393
name = "mistral-7b-instruct"
9494

9595
port {
@@ -130,7 +130,9 @@ resource "kubernetes_deployment" "inference_deployment" {
130130
mount_path = "/dev/shm"
131131
name = "dshm"
132132
}
133-
133+
# mountPath is set to /data as it's the path where the HF_HOME environment
134+
# variable points to in the TGI container image i.e. where the downloaded model from the Hub will be
135+
# stored
134136
volume_mount {
135137
mount_path = "/data"
136138
name = "data"

tutorials-and-examples/genAI-LLM/deploying-mistral-7b-instruct-L4gpus/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ Pod Template:
104104
Labels: app=mistral-7b
105105
Containers:
106106
mistral-7b:
107-
Image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310
107+
Image: ghcr.io/huggingface/text-generation-inference:1.4.3
108108
Port: 8080/TCP
109109
Host Port: 0/TCP
110110
Limits:

tutorials-and-examples/genAI-LLM/deploying-mistral-7b-instruct-L4gpus/mistral-7b.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ spec:
2828
spec:
2929
containers:
3030
- name: mistral-7b
31-
image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310
31+
image: ghcr.io/huggingface/text-generation-inference:1.4.3
3232
resources:
3333
limits:
3434
nvidia.com/gpu: 1
@@ -47,6 +47,9 @@ spec:
4747
volumeMounts:
4848
- mountPath: /dev/shm
4949
name: dshm
50+
# mountPath is set to /data as it's the path where the HF_HOME environment
51+
# variable points to in the TGI container image i.e. where the downloaded model from the Hub will be
52+
# stored
5053
- mountPath: /data
5154
name: data
5255
volumes:

tutorials-and-examples/genAI-LLM/deploying-mixtral-8x7b-instruct-L4-gpus/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ Create a node pool for deploying Mixtral 7B with quadpod deployment L4 GPU {4 x
7777
```bash
7878
gcloud container node-pools create mixtral-moe-gpu-pool \
7979
--cluster=mixtral8x7-cluster-gke \
80-
--project=gke-aishared-dev \
80+
--project=${PROJECT_ID} \
8181
--machine-type=g2-standard-48 \
8282
--ephemeral-storage-local-ssd=count=4 \
8383
--accelerator=type=nvidia-l4,count=4 \
@@ -127,7 +127,7 @@ Pod Template:
127127
Labels: app=mixtral8x7b
128128
Containers:
129129
mixtral8x7b:
130-
Image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310
130+
Image: ghcr.io/huggingface/text-generation-inference:1.4.3
131131
Port: 8080/TCP
132132
Host Port: 0/TCP
133133
Limits:

tutorials-and-examples/genAI-LLM/deploying-mixtral-8x7b-instruct-L4-gpus/mixtral-8x7b.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ spec:
3030
cloud.google.com/gke-accelerator: "nvidia-l4"
3131
containers:
3232
- name: mixtral8x7b
33-
image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310
33+
image: ghcr.io/huggingface/text-generation-inference:1.4.3
3434
ports:
3535
- name: server-port
3636
containerPort: 8080
@@ -53,6 +53,9 @@ spec:
5353
memory: "42Gi"
5454
nvidia.com/gpu: "2"
5555
volumeMounts:
56+
# mountPath is set to /data as it's the path where the HF_HOME environment
57+
# variable points to in the TGI container image i.e. where the downloaded model from the Hub will be
58+
# stored
5659
- mountPath: /data
5760
name: ephemeral-volume
5861
- mountPath: /dev/shm

tutorials-and-examples/genAI-LLM/serving-llama2-70b-on-l4-gpus/README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ spec:
7676
spec:
7777
containers:
7878
- name: llama-2-70b
79-
image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310
79+
image: ghcr.io/huggingface/text-generation-inference:1.4.3
8080
resources:
8181
limits:
8282
nvidia.com/gpu: 2
@@ -97,6 +97,9 @@ spec:
9797
volumeMounts:
9898
- mountPath: /dev/shm
9999
name: dshm
100+
# mountPath is set to /data as it's the path where the HF_HOME environment
101+
# variable points to in the TGI container image i.e. where the downloaded model from the Hub will be
102+
# stored
100103
- mountPath: /data
101104
name: data
102105
volumes:

tutorials-and-examples/genAI-LLM/serving-llama2-70b-on-l4-gpus/text-generation-interface.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ spec:
2828
spec:
2929
containers:
3030
- name: llama-2-70b
31-
image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310
31+
image: ghcr.io/huggingface/text-generation-inference:1.4.3
3232
resources:
3333
limits:
3434
nvidia.com/gpu: 2
@@ -49,6 +49,9 @@ spec:
4949
volumeMounts:
5050
- mountPath: /dev/shm
5151
name: dshm
52+
# mountPath is set to /data as it's the path where the HF_HOME environment
53+
# variable points to in the TGI container image i.e. where the downloaded model from the Hub will be
54+
# stored
5255
- mountPath: /data
5356
name: data
5457
volumes:

0 commit comments

Comments
 (0)