Skip to content

Commit 8042e9a

Browse files
committed
Add docker-compose
Fixes #14 Signed-off-by: mudler <[email protected]>
1 parent 624092c commit 8042e9a

File tree

9 files changed

+110
-82
lines changed

9 files changed

+110
-82
lines changed

.dockerignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
models/*.bin

.env

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
THREADS=14

.github/workflows/image.yml

+27-39
Original file line numberDiff line numberDiff line change
@@ -12,78 +12,66 @@ jobs:
1212
docker:
1313
runs-on: ubuntu-latest
1414
steps:
15-
- name: Release space from worker
16-
run: |
17-
echo "Listing top largest packages"
18-
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
19-
head -n 30 <<< "${pkgs}"
20-
echo
21-
df -h
22-
echo
23-
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
24-
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
25-
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
26-
sudo rm -rf /usr/local/lib/android
27-
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
28-
sudo rm -rf /usr/share/dotnet
29-
sudo apt-get remove -y '^mono-.*' || true
30-
sudo apt-get remove -y '^ghc-.*' || true
31-
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
32-
sudo apt-get remove -y 'php.*' || true
33-
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
34-
sudo apt-get remove -y '^google-.*' || true
35-
sudo apt-get remove -y azure-cli || true
36-
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
37-
sudo apt-get remove -y '^gfortran-.*' || true
38-
sudo apt-get autoremove -y
39-
sudo apt-get clean
40-
echo
41-
echo "Listing top largest packages"
42-
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
43-
head -n 30 <<< "${pkgs}"
44-
echo
45-
sudo rm -rfv build || true
46-
df -h
4715
- name: Checkout
4816
uses: actions/checkout@v3
17+
4918
- name: Prepare
5019
id: prep
5120
run: |
5221
DOCKER_IMAGE=quay.io/go-skynet/llama-cli
53-
VERSION=latest
22+
VERSION=master
5423
SHORTREF=${GITHUB_SHA::8}
24+
5525
# If this is git tag, use the tag name as a docker tag
5626
if [[ $GITHUB_REF == refs/tags/* ]]; then
5727
VERSION=${GITHUB_REF#refs/tags/}
5828
fi
5929
TAGS="${DOCKER_IMAGE}:${VERSION},${DOCKER_IMAGE}:${SHORTREF}"
30+
6031
# If the VERSION looks like a version number, assume that
6132
# this is the most recent version of the image and also
6233
# tag it 'latest'.
6334
if [[ $VERSION =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
6435
TAGS="$TAGS,${DOCKER_IMAGE}:latest"
6536
fi
37+
6638
# Set output parameters.
6739
echo ::set-output name=tags::${TAGS}
6840
echo ::set-output name=docker_image::${DOCKER_IMAGE}
69-
echo ::set-output name=image::${DOCKER_IMAGE}:${VERSION}
41+
7042
- name: Set up QEMU
7143
uses: docker/setup-qemu-action@master
7244
with:
7345
platforms: all
46+
7447
- name: Set up Docker Buildx
7548
id: buildx
7649
uses: docker/setup-buildx-action@master
50+
7751
- name: Login to DockerHub
7852
if: github.event_name != 'pull_request'
7953
uses: docker/login-action@v2
8054
with:
8155
registry: quay.io
8256
username: ${{ secrets.QUAY_USERNAME }}
8357
password: ${{ secrets.QUAY_PASSWORD }}
84-
- uses: earthly/actions/setup-earthly@v1
58+
- name: Build PRs
59+
if: github.event_name != 'pull_request'
60+
uses: docker/build-push-action@v4
61+
with:
62+
builder: ${{ steps.buildx.outputs.name }}
63+
context: .
64+
file: ./Dockerfile
65+
platforms: linux/amd64,linux/arm64,linux/arm
66+
push: true
67+
tags: ${{ steps.prep.outputs.tags }}
8568
- name: Build
86-
run: |
87-
earthly config "global.conversion_parallelism" "1"
88-
earthly config "global.buildkit_max_parallelism" "1"
89-
earthly --push +image-all --IMAGE=${{ steps.prep.outputs.image }}
69+
if: github.event_name == 'pull_request'
70+
uses: docker/build-push-action@v4
71+
with:
72+
builder: ${{ steps.buildx.outputs.name }}
73+
context: .
74+
file: ./Dockerfile
75+
platforms: linux/amd64
76+
push: false
77+
tags: ${{ steps.prep.outputs.tags }}

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
llama-cli
1+
llama-cli
2+
models/*.bin

Dockerfile

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
ARG GO_VERSION=1.20
2+
ARG DEBIAN_VERSION=11
3+
4+
FROM golang:$GO_VERSION as builder
5+
6+
WORKDIR /build
7+
RUN git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp
8+
RUN cd go-llama.cpp && make libbinding.a
9+
COPY go.mod ./
10+
COPY go.sum ./
11+
RUN go mod download
12+
RUN apt-get update
13+
COPY . .
14+
RUN go mod edit -replace github.com/go-skynet/go-llama.cpp=/build/go-llama.cpp
15+
RUN C_INCLUDE_PATH=/build/go-llama.cpp LIBRARY_PATH=/build/go-llama.cpp go build -o llama-cli ./
16+
17+
FROM debian:$DEBIAN_VERSION
18+
COPY --from=builder /build/llama-cli /usr/bin/llama-cli
19+
ENTRYPOINT [ "/usr/bin/llama-cli" ]

Earthfile

+2-29
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,5 @@
11
VERSION 0.7
22

3-
go-deps:
4-
ARG GO_VERSION=1.20
5-
FROM golang:$GO_VERSION
6-
WORKDIR /build
7-
COPY go.mod ./
8-
COPY go.sum ./
9-
RUN go mod download
10-
RUN apt-get update
11-
SAVE ARTIFACT go.mod AS LOCAL go.mod
12-
SAVE ARTIFACT go.sum AS LOCAL go.sum
13-
143
build:
15-
FROM +go-deps
16-
WORKDIR /build
17-
RUN git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp
18-
RUN cd go-llama.cpp && make libbinding.a
19-
COPY . .
20-
RUN go mod edit -replace github.com/go-skynet/go-llama.cpp=/build/go-llama.cpp
21-
RUN C_INCLUDE_PATH=$GOPATH/src/github.com/go-skynet/go-llama.cpp LIBRARY_PATH=$GOPATH/src/github.com/go-skynet/go-llama.cpp go build -o llama-cli ./
22-
SAVE ARTIFACT llama-cli AS LOCAL llama-cli
23-
24-
image:
25-
FROM +go-deps
26-
ARG IMAGE=alpaca-cli-nomodel
27-
COPY +build/llama-cli /llama-cli
28-
ENTRYPOINT [ "/llama-cli" ]
29-
SAVE IMAGE --push $IMAGE
30-
31-
image-all:
32-
BUILD --platform=linux/amd64 --platform=linux/arm64 +image
4+
FROM DOCKERFILE -f Dockerfile .
5+
SAVE ARTIFACT /usr/bin/llama-cli AS LOCAL llama-cli

README.md

+43-13
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,47 @@ It is compatible with the models supported by `llama.cpp`. You might need to con
77

88
`llama-cli` doesn't shell-out, it uses https://github.com/go-skynet/go-llama.cpp, which is a golang binding of [llama.cpp](https://github.com/ggerganov/llama.cpp).
99

10+
## Usage
11+
12+
You can use `docker-compose`:
13+
14+
```bash
15+
16+
git clone https://github.com/go-skynet/llama-cli
17+
cd llama-cli
18+
19+
# copy your models to models/
20+
cp your-model.bin models/
21+
22+
# (optional) Edit the .env file to set the number of concurrent threads used for inference
23+
# echo "THREADS=14" > .env
24+
25+
# start with docker-compose
26+
docker compose up -d --build
27+
28+
# Now API is accessible at localhost:8080
29+
curl http://localhost:8080/v1/models
30+
# {"object":"list","data":[{"id":"your-model.bin","object":"model"}]}
31+
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
32+
"model": "your-model.bin",
33+
"prompt": "A long time ago in a galaxy far, far away",
34+
"temperature": 0.7
35+
}'
36+
37+
38+
```
39+
40+
Note: You can use a use a default template for every model in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance this can be used with alpaca:
41+
42+
```
43+
Below is an instruction that describes a task. Write a response that appropriately completes the request.
44+
45+
### Instruction:
46+
{{.Input}}
47+
48+
### Response:
49+
```
50+
1051
## Container images
1152

1253
`llama-cli` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/llama-cli?tab=tags&tag=latest)
@@ -158,16 +199,6 @@ Below is an instruction that describes a task. Write a response that appropriate
158199
### Response:
159200
```
160201

161-
Note: You can use a use a default template for every model in your model path, by creating a corresponding file with the `.tmpl` suffix. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance:
162-
163-
```
164-
Below is an instruction that describes a task. Write a response that appropriately completes the request.
165-
166-
### Instruction:
167-
{{.Input}}
168-
169-
### Response:
170-
```
171202

172203
## Using other models
173204

@@ -229,9 +260,8 @@ In order to build the `llama-cli` container image locally you can use `docker`:
229260

230261
```
231262
# build the image as "alpaca-image"
232-
docker run --privileged -v /var/run/docker.sock:/var/run/docker.sock --rm -t -v "$(pwd)":/workspace -v earthly-tmp:/tmp/earthly:rw earthly/earthly:v0.7.2 +image --IMAGE=alpaca-image
233-
# run the image
234-
docker run alpaca-image --instruction "What's an alpaca?"
263+
docker build -t llama-cli .
264+
docker run llama-cli --instruction "What's an alpaca?"
235265
```
236266

237267
Or build the binary with:

docker-compose.yaml

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
version: '3.6'
2+
3+
services:
4+
api:
5+
image: quay.io/go-skynet/llama-cli:latest
6+
build: .
7+
volumes:
8+
- ./models:/models
9+
ports:
10+
- 8080:8080
11+
environment:
12+
- MODELS_PATH=/models
13+
- CONTEXT_SIZE=700
14+
- THREADS=$THREADS
15+
command: api

models/.keep

Whitespace-only changes.

0 commit comments

Comments
 (0)