Skip to content

Commit abc9360

Browse files
authored
feat(aio): entrypoint, update workflows (#1872)
1 parent 743095b commit abc9360

File tree

9 files changed

+191
-16
lines changed

9 files changed

+191
-16
lines changed

.github/workflows/image.yml

+5
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ jobs:
2626
platforms: ${{ matrix.platforms }}
2727
runs-on: ${{ matrix.runs-on }}
2828
base-image: ${{ matrix.base-image }}
29+
aio: ${{ matrix.aio }}
2930
makeflags: "-j3"
3031
secrets:
3132
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
@@ -86,6 +87,7 @@ jobs:
8687
image-type: 'extras'
8788
runs-on: 'arc-runner-set'
8889
base-image: "ubuntu:22.04"
90+
aio: "-aio-gpu-nvidia-cuda-11"
8991
- build-type: 'cublas'
9092
cuda-major-version: "12"
9193
cuda-minor-version: "1"
@@ -96,6 +98,7 @@ jobs:
9698
image-type: 'extras'
9799
runs-on: 'arc-runner-set'
98100
base-image: "ubuntu:22.04"
101+
aio: "-aio-gpu-nvidia-cuda-12"
99102
- build-type: ''
100103
#platforms: 'linux/amd64,linux/arm64'
101104
platforms: 'linux/amd64'
@@ -199,6 +202,7 @@ jobs:
199202
cuda-minor-version: ${{ matrix.cuda-minor-version }}
200203
platforms: ${{ matrix.platforms }}
201204
runs-on: ${{ matrix.runs-on }}
205+
aio: ${{ matrix.aio }}
202206
base-image: ${{ matrix.base-image }}
203207
makeflags: "-j3"
204208
secrets:
@@ -217,6 +221,7 @@ jobs:
217221
image-type: 'core'
218222
base-image: "ubuntu:22.04"
219223
runs-on: 'ubuntu-latest'
224+
aio: "-aio-cpu"
220225
- build-type: 'cublas'
221226
cuda-major-version: "11"
222227
cuda-minor-version: "7"

.github/workflows/image_build.yml

+67-1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ on:
5151
required: false
5252
default: ''
5353
type: string
54+
aio:
55+
description: 'AIO Image Name'
56+
required: false
57+
default: ''
58+
type: string
5459
secrets:
5560
dockerUsername:
5661
required: true
@@ -129,7 +134,30 @@ jobs:
129134
flavor: |
130135
latest=${{ inputs.tag-latest }}
131136
suffix=${{ inputs.tag-suffix }}
132-
137+
- name: Docker meta AIO (quay.io)
138+
if: inputs.aio != ''
139+
id: meta_aio
140+
uses: docker/metadata-action@v5
141+
with:
142+
images: |
143+
quay.io/go-skynet/local-ai
144+
tags: |
145+
type=ref,event=branch
146+
type=semver,pattern={{raw}}
147+
flavor: |
148+
suffix=${{ inputs.aio }}
149+
- name: Docker meta AIO (dockerhub)
150+
if: inputs.aio != ''
151+
id: meta_aio_dockerhub
152+
uses: docker/metadata-action@v5
153+
with:
154+
images: |
155+
localai/localai
156+
tags: |
157+
type=ref,event=branch
158+
type=semver,pattern={{raw}}
159+
flavor: |
160+
suffix=${{ inputs.aio }}
133161
- name: Set up QEMU
134162
uses: docker/setup-qemu-action@master
135163
with:
@@ -172,6 +200,44 @@ jobs:
172200
push: ${{ github.event_name != 'pull_request' }}
173201
tags: ${{ steps.meta.outputs.tags }}
174202
labels: ${{ steps.meta.outputs.labels }}
203+
-
204+
name: Inspect image
205+
if: github.event_name != 'pull_request'
206+
run: |
207+
docker pull localai/localai:${{ steps.meta.outputs.version }}
208+
docker image inspect localai/localai:${{ steps.meta.outputs.version }}
209+
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
210+
docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
211+
- name: Build and push AIO image
212+
if: inputs.aio != ''
213+
uses: docker/build-push-action@v5
214+
with:
215+
builder: ${{ steps.buildx.outputs.name }}
216+
build-args: |
217+
BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
218+
context: .
219+
file: ./Dockerfile.aio
220+
platforms: ${{ inputs.platforms }}
221+
push: ${{ github.event_name != 'pull_request' }}
222+
tags: ${{ steps.meta_aio.outputs.tags }}
223+
labels: ${{ steps.meta_aio.outputs.labels }}
224+
- name: Build and push AIO image (dockerhub)
225+
if: inputs.aio != ''
226+
uses: docker/build-push-action@v5
227+
with:
228+
builder: ${{ steps.buildx.outputs.name }}
229+
build-args: |
230+
BASE_IMAGE=localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
231+
context: .
232+
file: ./Dockerfile.aio
233+
platforms: ${{ inputs.platforms }}
234+
push: ${{ github.event_name != 'pull_request' }}
235+
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
236+
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
175237
- name: job summary
176238
run: |
177239
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
240+
- name: job summary(AIO)
241+
if: inputs.aio != ''
242+
run: |
243+
echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY

Dockerfile.aio

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
ARG BASE_IMAGE=ubuntu:22.04
22

33
FROM ${BASE_IMAGE}
4-
ARG SIZE=cpu
5-
ENV MODELS="/aio-models/embeddings.yaml,/aio-models/text-to-speech.yaml,/aio-models/image-gen.yaml,/aio-models/text-to-text.yaml,/aio-models/speech-to-text.yaml,/aio-models/vision.yaml"
64

7-
COPY aio/${SIZE} /aio-models
5+
RUN apt-get update && apt-get install -y pciutils && apt-get clean
86

9-
ENTRYPOINT [ "/build/entrypoint.sh" ]
7+
COPY aio/ /aio
8+
ENTRYPOINT [ "/aio/entrypoint.sh" ]

Makefile

+1-4
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,6 @@ grpcs: prepare $(GRPC_BACKENDS)
536536

537537
DOCKER_IMAGE?=local-ai
538538
DOCKER_AIO_IMAGE?=local-ai-aio
539-
DOCKER_AIO_SIZE?=cpu
540539
IMAGE_TYPE?=core
541540
BASE_IMAGE?=ubuntu:22.04
542541

@@ -549,11 +548,9 @@ docker:
549548
-t $(DOCKER_IMAGE) .
550549

551550
docker-aio:
552-
@echo "Building AIO image with size $(DOCKER_AIO_SIZE)"
553-
@echo "Building AIO image with base image $(BASE_IMAGE)"
551+
@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
554552
docker build \
555553
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
556-
--build-arg SIZE=$(DOCKER_AIO_SIZE) \
557554
-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
558555

559556
docker-aio-all:

aio/cpu/README.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
## AIO CPU size
2+
3+
Use this image with CPU-only.
4+
5+
Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc).

aio/cpu/embeddings.yaml

+9-4
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
1-
name: all-minilm-l6-v2
2-
backend: sentencetransformers
1+
backend: bert-embeddings
32
embeddings: true
3+
f16: true
4+
5+
gpu_layers: 90
6+
mmap: true
7+
name: text-embedding-ada-002
8+
49
parameters:
5-
model: all-MiniLM-L6-v2
10+
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
611

712
usage: |
813
You can test this model with curl like this:
914
1015
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
1116
"input": "Your text string goes here",
12-
"model": "all-minilm-l6-v2"
17+
"model": "text-embedding-ada-002"
1318
}'

aio/entrypoint.sh

+98
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/bin/bash
2+
3+
echo "===> LocalAI All-in-One (AIO) container starting..."
4+
5+
GPU_ACCELERATION=false
6+
GPU_VENDOR=""
7+
8+
function detect_gpu() {
9+
case "$(uname -s)" in
10+
Linux)
11+
if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
12+
echo "NVIDIA GPU detected"
13+
# nvidia-smi should be installed in the container
14+
if nvidia-smi; then
15+
GPU_ACCELERATION=true
16+
GPU_VENDOR=nvidia
17+
else
18+
echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
19+
fi
20+
elif lspci | grep -E 'VGA|3D' | grep -iq amd; then
21+
echo "AMD GPU detected"
22+
# Check if ROCm is installed
23+
if [ -d /opt/rocm ]; then
24+
GPU_ACCELERATION=true
25+
GPU_VENDOR=amd
26+
else
27+
echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
28+
fi
29+
elif lspci | grep -E 'VGA|3D' | grep -iq intel; then
30+
echo "Intel GPU detected"
31+
if [ -d /opt/intel ]; then
32+
GPU_ACCELERATION=true
33+
else
34+
echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
35+
fi
36+
fi
37+
;;
38+
Darwin)
39+
if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
40+
echo "Apple Metal supported GPU detected"
41+
GPU_ACCELERATION=true
42+
GPU_VENDOR=apple
43+
fi
44+
;;
45+
esac
46+
}
47+
48+
function detect_gpu_size() {
49+
if [ "$GPU_ACCELERATION" = true ]; then
50+
GPU_SIZE=gpu-8g
51+
fi
52+
53+
# Attempting to find GPU memory size for NVIDIA GPUs
54+
if echo "$gpu_model" | grep -iq nvidia; then
55+
echo "NVIDIA GPU detected. Attempting to find memory size..."
56+
nvidia_sm=($(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits))
57+
if [ ! -z "$nvidia_sm" ]; then
58+
echo "Total GPU Memory: ${nvidia_sm[0]} MiB"
59+
else
60+
echo "Unable to determine NVIDIA GPU memory size."
61+
fi
62+
# if bigger than 8GB, use 16GB
63+
#if [ "$nvidia_sm" -gt 8192 ]; then
64+
# GPU_SIZE=gpu-16g
65+
#fi
66+
else
67+
echo "Non-NVIDIA GPU detected. GPU memory size detection for non-NVIDIA GPUs is not supported in this script."
68+
fi
69+
70+
# default to cpu if GPU_SIZE is not set
71+
if [ -z "$GPU_SIZE" ]; then
72+
GPU_SIZE=cpu
73+
fi
74+
}
75+
76+
function check_vars() {
77+
if [ -z "$MODELS" ]; then
78+
echo "MODELS environment variable is not set. Please set it to a comma-separated list of model YAML files to load."
79+
exit 1
80+
fi
81+
82+
if [ -z "$SIZE" ]; then
83+
echo "SIZE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
84+
exit 1
85+
fi
86+
}
87+
88+
detect_gpu
89+
detect_gpu_size
90+
91+
SIZE=${SIZE:-$GPU_SIZE} # default to cpu
92+
MODELS=${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml}
93+
94+
check_vars
95+
96+
echo "Starting LocalAI with the following models: $MODELS"
97+
98+
/build/entrypoint.sh "$@"

aio/gpu-8g/embeddings.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: all-minilm-l6-v2
1+
name: text-embedding-ada-002
22
backend: sentencetransformers
33
embeddings: true
44
parameters:
@@ -9,5 +9,5 @@ usage: |
99
1010
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
1111
"input": "Your text string goes here",
12-
"model": "all-minilm-l6-v2"
12+
"model": "text-embedding-ada-002"
1313
}'

aio/gpu-8g/image-gen.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ parameters:
44
backend: diffusers
55
step: 25
66
f16: true
7-
cuda: true
7+
88
diffusers:
99
pipeline_type: StableDiffusionPipeline
1010
cuda: true

0 commit comments

Comments
 (0)