Skip to content

feat(sycl): Add support for Intel GPUs with sycl (#1647) #1660

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/image-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@ jobs:
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: 'sycl-f16-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
Expand Down
28 changes: 28 additions & 0 deletions .github/workflows/image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,34 @@ jobs:
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: 'sycl-f16-core'
ffmpeg: 'false'
image-type: 'core'
runs-on: 'arc-runner-set'
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: 'sycl-f32-core'
ffmpeg: 'false'
image-type: 'core'
runs-on: 'arc-runner-set'
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: 'sycl-f16-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: 'sycl-f32-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
Expand Down
9 changes: 8 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ ARG GO_VERSION=1.21-bullseye
ARG IMAGE_TYPE=extras
# extras or core


FROM golang:$GO_VERSION as requirements-core

ARG BUILD_TYPE
Expand Down Expand Up @@ -38,6 +37,14 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
apt-get update && \
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
; fi

# oneapi requirements
RUN if [ "${BUILD_TYPE}" = "sycl_f16" ] || [ "${BUILD_TYPE}" = "sycl_f32" ]; then \
wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/163da6e4-56eb-4948-aba3-debcec61c064/l_BaseKit_p_2024.0.1.46_offline.sh && \
sh ./l_BaseKit_p_2024.0.1.46_offline.sh -a -s --eula accept && \
rm -rf l_BaseKit_p_2024.0.1.46_offline.sh \
; fi

ENV PATH /usr/local/cuda/bin:${PATH}

# OpenBLAS requirements and stable diffusion
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0

GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7

CPPLLAMA_VERSION?=e0085fdf7c758f0bc2746fc106fb29dd9df959de
CPPLLAMA_VERSION?=1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915

# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
Expand Down
2 changes: 1 addition & 1 deletion backend/cpp/llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ add_library(hw_grpc_proto
${hw_proto_srcs}
${hw_proto_hdrs} )

add_executable(${TARGET} grpc-server.cpp json.hpp )
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
absl::flags_parse
gRPC::${_REFLECTION}
Expand Down
15 changes: 15 additions & 0 deletions backend/cpp/llama/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ LLAMA_VERSION?=

CMAKE_ARGS?=
BUILD_TYPE?=
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh

# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
Expand All @@ -19,6 +20,14 @@ else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
endif

ifeq ($(BUILD_TYPE),sycl_f16)
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
endif

ifeq ($(BUILD_TYPE),sycl_f32)
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
endif

llama.cpp:
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
if [ -z "$(LLAMA_VERSION)" ]; then \
Expand All @@ -31,6 +40,7 @@ llama.cpp/examples/grpc-server:
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/utils.hpp llama.cpp/examples/grpc-server/
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
## XXX: In some versions of CMake clip wasn't being built before llama.
## This is an hack for now, but it should be fixed in the future.
Expand All @@ -49,5 +59,10 @@ clean:
rm -rf grpc-server

grpc-server: llama.cpp llama.cpp/examples/grpc-server
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
bash -c "source $(ONEAPI_VARS); \
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release"
else
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
endif
cp llama.cpp/build/bin/grpc-server .
Loading