feat(aio): entrypoint, update workflows (#1872)

mudler · web-flow · commit abc9360dc628 · 2024-03-21T22:09:04.000+01:00
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
@@ -26,6 +26,7 @@ jobs:
       platforms: ${{ matrix.platforms }}
       runs-on: ${{ matrix.runs-on }}
       base-image: ${{ matrix.base-image }}
+      aio: ${{ matrix.aio }}
       makeflags: "-j3"
     secrets:
       dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
@@ -86,6 +87,7 @@ jobs:
             image-type: 'extras'
             runs-on: 'arc-runner-set'
             base-image: "ubuntu:22.04"
+            aio: "-aio-gpu-nvidia-cuda-11"
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "1"
@@ -96,6 +98,7 @@ jobs:
             image-type: 'extras'
             runs-on: 'arc-runner-set'
             base-image: "ubuntu:22.04"
+            aio: "-aio-gpu-nvidia-cuda-12"
           - build-type: ''
             #platforms: 'linux/amd64,linux/arm64'
             platforms: 'linux/amd64'
@@ -199,6 +202,7 @@ jobs:
       cuda-minor-version: ${{ matrix.cuda-minor-version }}
       platforms: ${{ matrix.platforms }}
       runs-on: ${{ matrix.runs-on }}
+      aio: ${{ matrix.aio }}
       base-image: ${{ matrix.base-image }}
       makeflags: "-j3"
     secrets:
@@ -217,6 +221,7 @@ jobs:
             image-type: 'core'
             base-image: "ubuntu:22.04"
             runs-on: 'ubuntu-latest'
+            aio: "-aio-cpu"
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
@@ -51,6 +51,11 @@ on:
         required: false
         default: ''
         type: string
+      aio:
+        description: 'AIO Image Name'
+        required: false
+        default: ''
+        type: string
     secrets:
       dockerUsername:
         required: true
@@ -129,7 +134,30 @@ jobs:
           flavor: |
             latest=${{ inputs.tag-latest }}
             suffix=${{ inputs.tag-suffix }}
-
+      - name: Docker meta AIO (quay.io)
+        if: inputs.aio != ''
+        id: meta_aio
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            quay.io/go-skynet/local-ai
+          tags: |
+            type=ref,event=branch
+            type=semver,pattern={{raw}}
+          flavor: |
+            suffix=${{ inputs.aio }}
+      - name: Docker meta AIO (dockerhub)
+        if: inputs.aio != ''
+        id: meta_aio_dockerhub
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            localai/localai
+          tags: |
+            type=ref,event=branch
+            type=semver,pattern={{raw}}
+          flavor: |
+            suffix=${{ inputs.aio }}
       - name: Set up QEMU
         uses: docker/setup-qemu-action@master
         with:
@@ -172,6 +200,44 @@ jobs:
           push: ${{ github.event_name != 'pull_request' }}
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
+      -
+        name: Inspect image
+        if: github.event_name != 'pull_request'
+        run: |
+          docker pull localai/localai:${{ steps.meta.outputs.version }}
+          docker image inspect localai/localai:${{ steps.meta.outputs.version }}
+          docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
+          docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
+      - name: Build and push AIO image
+        if: inputs.aio != ''
+        uses: docker/build-push-action@v5
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          build-args: |
+            BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
+          context: .
+          file: ./Dockerfile.aio
+          platforms: ${{ inputs.platforms }}
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta_aio.outputs.tags }}
+          labels: ${{ steps.meta_aio.outputs.labels }}
+      - name: Build and push AIO image (dockerhub)
+        if: inputs.aio != ''
+        uses: docker/build-push-action@v5
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          build-args: |
+            BASE_IMAGE=localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
+          context: .
+          file: ./Dockerfile.aio
+          platforms: ${{ inputs.platforms }}
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
+          labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
       - name: job summary
         run: |
           echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
+      - name: job summary(AIO)
+        if: inputs.aio != ''
+        run: |
+          echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
diff --git a/Dockerfile.aio b/Dockerfile.aio
@@ -1,9 +1,8 @@
 ARG BASE_IMAGE=ubuntu:22.04
 
 FROM ${BASE_IMAGE} 
-ARG SIZE=cpu
-ENV MODELS="/aio-models/embeddings.yaml,/aio-models/text-to-speech.yaml,/aio-models/image-gen.yaml,/aio-models/text-to-text.yaml,/aio-models/speech-to-text.yaml,/aio-models/vision.yaml"
 
-COPY aio/${SIZE} /aio-models
+RUN apt-get update && apt-get install -y pciutils && apt-get clean
 
-ENTRYPOINT [ "/build/entrypoint.sh" ]
+COPY aio/ /aio
+ENTRYPOINT [ "/aio/entrypoint.sh" ]
diff --git a/Makefile b/Makefile
@@ -536,7 +536,6 @@ grpcs: prepare $(GRPC_BACKENDS)
 
 DOCKER_IMAGE?=local-ai
 DOCKER_AIO_IMAGE?=local-ai-aio
-DOCKER_AIO_SIZE?=cpu
 IMAGE_TYPE?=core
 BASE_IMAGE?=ubuntu:22.04
 
@@ -549,11 +548,9 @@ docker:
 		-t $(DOCKER_IMAGE) .
 	
 docker-aio:
-	@echo "Building AIO image with size $(DOCKER_AIO_SIZE)"
-	@echo "Building AIO image with base image $(BASE_IMAGE)"
+	@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
 	docker build \
 		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
-		--build-arg SIZE=$(DOCKER_AIO_SIZE) \
 		-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
 
 docker-aio-all:
diff --git a/aio/cpu/README.md b/aio/cpu/README.md
@@ -0,0 +1,5 @@
+## AIO CPU size
+
+Use this image with CPU-only.
+
+Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc).
diff --git a/aio/cpu/embeddings.yaml b/aio/cpu/embeddings.yaml
@@ -1,13 +1,18 @@
-name: all-minilm-l6-v2
-backend: sentencetransformers
+backend: bert-embeddings
 embeddings: true
+f16: true
+
+gpu_layers: 90
+mmap: true
+name: text-embedding-ada-002
+
 parameters:
-  model: all-MiniLM-L6-v2
+  model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
 
 usage: |
     You can test this model with curl like this:
 
     curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
       "input": "Your text string goes here",
-      "model": "all-minilm-l6-v2"
+      "model": "text-embedding-ada-002"
     }'
diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+echo "===> LocalAI All-in-One (AIO) container starting..."
+
+GPU_ACCELERATION=false
+GPU_VENDOR=""
+
+function detect_gpu() {
+    case "$(uname -s)" in
+        Linux)
+            if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
+                echo "NVIDIA GPU detected"
+                # nvidia-smi should be installed in the container
+                if nvidia-smi; then
+                    GPU_ACCELERATION=true
+                    GPU_VENDOR=nvidia
+                else
+                    echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
+                fi
+            elif lspci | grep -E 'VGA|3D' | grep -iq amd; then
+                echo "AMD GPU detected"
+                # Check if ROCm is installed
+                if [ -d /opt/rocm ]; then
+                    GPU_ACCELERATION=true
+                    GPU_VENDOR=amd
+                else
+                    echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
+                fi
+            elif lspci | grep -E 'VGA|3D' | grep -iq intel; then
+                echo "Intel GPU detected"
+                if [ -d /opt/intel ]; then
+                    GPU_ACCELERATION=true
+                else
+                    echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
+                fi
+            fi
+            ;;
+        Darwin)
+            if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
+                echo "Apple Metal supported GPU detected"
+                GPU_ACCELERATION=true
+                GPU_VENDOR=apple
+            fi
+            ;;
+    esac
+}
+
+function detect_gpu_size() {
+    if [ "$GPU_ACCELERATION" = true ]; then
+        GPU_SIZE=gpu-8g
+    fi
+
+    # Attempting to find GPU memory size for NVIDIA GPUs
+    if echo "$gpu_model" | grep -iq nvidia; then
+        echo "NVIDIA GPU detected. Attempting to find memory size..."
+        nvidia_sm=($(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits))
+        if [ ! -z "$nvidia_sm" ]; then
+            echo "Total GPU Memory: ${nvidia_sm[0]} MiB"
+        else
+            echo "Unable to determine NVIDIA GPU memory size."
+        fi
+        # if bigger than 8GB, use 16GB
+        #if [ "$nvidia_sm" -gt 8192 ]; then
+        #    GPU_SIZE=gpu-16g
+        #fi
+    else
+        echo "Non-NVIDIA GPU detected. GPU memory size detection for non-NVIDIA GPUs is not supported in this script."
+    fi
+
+    # default to cpu if GPU_SIZE is not set
+    if [ -z "$GPU_SIZE" ]; then
+        GPU_SIZE=cpu
+    fi
+}
+
+function check_vars() {
+    if [ -z "$MODELS" ]; then
+        echo "MODELS environment variable is not set. Please set it to a comma-separated list of model YAML files to load."
+        exit 1
+    fi
+
+    if [ -z "$SIZE" ]; then
+        echo "SIZE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
+        exit 1
+    fi
+}
+
+detect_gpu
+detect_gpu_size
+
+SIZE=${SIZE:-$GPU_SIZE} # default to cpu
+MODELS=${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml}
+
+check_vars
+
+echo "Starting LocalAI with the following models: $MODELS"
+
+/build/entrypoint.sh "$@"
diff --git a/aio/gpu-8g/embeddings.yaml b/aio/gpu-8g/embeddings.yaml
@@ -1,4 +1,4 @@
-name: all-minilm-l6-v2
+name: text-embedding-ada-002
 backend: sentencetransformers
 embeddings: true
 parameters:
@@ -9,5 +9,5 @@ usage: |
 
     curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
       "input": "Your text string goes here",
-      "model": "all-minilm-l6-v2"
+      "model": "text-embedding-ada-002"
     }'
diff --git a/aio/gpu-8g/image-gen.yaml b/aio/gpu-8g/image-gen.yaml
@@ -4,7 +4,7 @@ parameters:
 backend: diffusers
 step: 25
 f16: true
-cuda: true
+
 diffusers:
   pipeline_type: StableDiffusionPipeline
   cuda: true