mudler · mudler · Feb 1, 2024 · Jan 29, 2024 · Jan 30, 2024 · Jan 30, 2024
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
@@ -75,6 +75,13 @@ jobs:
             ffmpeg: 'true'
             image-type: 'core'
             runs-on: 'ubuntu-latest'
+          - build-type: 'sycl_f16'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: 'sycl-f16-ffmpeg-core'
+            ffmpeg: 'true'
+            image-type: 'core'
+            runs-on: 'arc-runner-set'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "1"

diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
@@ -122,6 +122,34 @@ jobs:
             ffmpeg: 'true'
             image-type: 'core'
             runs-on: 'ubuntu-latest'
+          - build-type: 'sycl_f16'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: 'sycl-f16-core'
+            ffmpeg: 'false'
+            image-type: 'core'
+            runs-on: 'arc-runner-set'
+          - build-type: 'sycl_f32'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: 'sycl-f32-core'
+            ffmpeg: 'false'
+            image-type: 'core'
+            runs-on: 'arc-runner-set'
+          - build-type: 'sycl_f16'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: 'sycl-f16-ffmpeg-core'
+            ffmpeg: 'true'
+            image-type: 'core'
+            runs-on: 'arc-runner-set'
+          - build-type: 'sycl_f32'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: 'sycl-f32-ffmpeg-core'
+            ffmpeg: 'true'
+            image-type: 'core'
+            runs-on: 'arc-runner-set'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"

diff --git a/Dockerfile b/Dockerfile
@@ -2,7 +2,6 @@ ARG GO_VERSION=1.21-bullseye
 ARG IMAGE_TYPE=extras
 # extras or core
 
-
 FROM golang:$GO_VERSION as requirements-core
 
 ARG BUILD_TYPE
@@ -38,6 +37,14 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
     apt-get update && \
     apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}  && apt-get clean \
     ; fi
+
+# oneapi requirements
+RUN if [ "${BUILD_TYPE}" = "sycl_f16" ] || [ "${BUILD_TYPE}" = "sycl_f32" ]; then \
+    wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/163da6e4-56eb-4948-aba3-debcec61c064/l_BaseKit_p_2024.0.1.46_offline.sh && \
+    sh ./l_BaseKit_p_2024.0.1.46_offline.sh -a -s --eula accept && \
+    rm -rf l_BaseKit_p_2024.0.1.46_offline.sh \
+    ; fi
+
 ENV PATH /usr/local/cuda/bin:${PATH}
 
 # OpenBLAS requirements and stable diffusion

diff --git a/Makefile b/Makefile
@@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0
 
 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
 
-CPPLLAMA_VERSION?=e0085fdf7c758f0bc2746fc106fb29dd9df959de
+CPPLLAMA_VERSION?=1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

diff --git a/backend/cpp/llama/CMakeLists.txt b/backend/cpp/llama/CMakeLists.txt
@@ -70,7 +70,7 @@ add_library(hw_grpc_proto
   ${hw_proto_srcs}
   ${hw_proto_hdrs} )
 
-add_executable(${TARGET} grpc-server.cpp json.hpp )
+add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
 target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
   absl::flags_parse
   gRPC::${_REFLECTION}

diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile
@@ -3,6 +3,7 @@ LLAMA_VERSION?=
 
 CMAKE_ARGS?=
 BUILD_TYPE?=
+ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
 
 # If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
 ifeq ($(BUILD_TYPE),cublas)
@@ -19,6 +20,14 @@ else ifeq ($(BUILD_TYPE),hipblas)
 	CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
 endif
 
+ifeq ($(BUILD_TYPE),sycl_f16)
+	CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
+endif
+
+ifeq ($(BUILD_TYPE),sycl_f32)
+	CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+endif
+
 llama.cpp:
 	git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
 	if [ -z "$(LLAMA_VERSION)" ]; then \
@@ -31,6 +40,7 @@ llama.cpp/examples/grpc-server:
 	cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
 	cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
 	cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
+	cp -rfv $(abspath ./)/utils.hpp llama.cpp/examples/grpc-server/
 	echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
 ## XXX: In some versions of CMake clip wasn't being built before llama.
 ## This is an hack for now, but it should be fixed in the future.
@@ -49,5 +59,10 @@ clean:
 	rm -rf grpc-server
 
 grpc-server: llama.cpp llama.cpp/examples/grpc-server
+ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+	bash -c "source $(ONEAPI_VARS); \
+	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release"	
+else
 	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
+endif
 	cp llama.cpp/build/bin/grpc-server .