ggml-org · ggerganov · Jan 28, 2024 · May 7, 2023 · Jun 10, 2023 · Jun 11, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -93,6 +93,7 @@ set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
 option(LLAMA_HIPBLAS                         "llama: use hipBLAS"                               OFF)
 option(LLAMA_HIP_UMA                         "llama: use HIP unified memory architecture"       OFF)
 option(LLAMA_CLBLAST                         "llama: use CLBlast"                               OFF)
+option(LLAMA_VULKAN                          "llama: use Vulkan"                                OFF)
 option(LLAMA_METAL                           "llama: use Metal"                                 ${LLAMA_METAL_DEFAULT})
 option(LLAMA_METAL_NDEBUG                    "llama: disable Metal debugging"                   OFF)
 option(LLAMA_METAL_SHADER_DEBUG              "llama: compile Metal with -fno-fast-math"         OFF)
@@ -397,6 +398,22 @@ if (LLAMA_CLBLAST)
     endif()
 endif()
 
+if (LLAMA_VULKAN)
+    find_package(Vulkan)
+    if (Vulkan_FOUND)
+        message(STATUS "Vulkan found")
+
+        add_library(ggml-vulkan STATIC ggml-vulkan.cpp ggml-vulkan.h)
+        target_link_libraries(ggml-vulkan PUBLIC Vulkan::Vulkan)
+
+        add_compile_definitions(GGML_USE_VULKAN)
+
+        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ggml-vulkan)
+    else()
+        message(WARNING "Vulkan not found")
+    endif()
+endif()
+
 if (LLAMA_HIPBLAS)
     list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
 

diff --git a/Makefile b/Makefile
@@ -452,6 +452,21 @@ ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 endif # LLAMA_CLBLAST
 
+ifdef LLAMA_VULKAN
+	CFLAGS  += -DGGML_USE_VULKAN
+	CXXFLAGS  += -DGGML_USE_VULKAN
+	LDFLAGS += -lvulkan
+	OBJS    += ggml-vulkan.o
+
+ifdef LLAMA_VULKAN_CHECK_RESULTS
+	CFLAGS  += -DGGML_VULKAN_CHECK_RESULTS
+	CXXFLAGS  += -DGGML_VULKAN_CHECK_RESULTS
+endif
+
+ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
+	$(CXX) $(CXXFLAGS) -c $< -o $@
+endif # LLAMA_VULKAN
+
 ifdef LLAMA_HIPBLAS
 
 	ifeq ($(wildcard /opt/rocm),)

diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
@@ -562,6 +562,7 @@ struct test {
     static const int build_number;
     static const bool cuda;
     static const bool opencl;
+    static const bool vulkan;
     static const bool metal;
     static const bool gpu_blas;
     static const bool blas;
@@ -641,6 +642,9 @@ struct test {
         if (opencl) {
             return "OpenCL";
         }
+        if (vulkan) {
+            return "Vulkan";
+        }
         if (metal) {
             return "Metal";
         }
@@ -656,7 +660,7 @@ struct test {
     static const std::vector<std::string> & get_fields() {
         static const std::vector<std::string> fields = {
             "build_commit", "build_number",
-            "cuda", "opencl", "metal", "gpu_blas", "blas",
+            "cuda", "opencl", "vulkan", "metal", "gpu_blas", "blas",
             "cpu_info", "gpu_info",
             "model_filename", "model_type", "model_size", "model_n_params",
             "n_batch", "n_threads", "type_k", "type_v",
@@ -679,7 +683,7 @@ struct test {
             field == "avg_ns" || field == "stddev_ns") {
             return INT;
         }
-        if (field == "cuda" || field == "opencl" || field == "metal" || field == "gpu_blas" || field == "blas" ||
+        if (field == "cuda" || field == "opencl"  || field == "vulkan"|| field == "metal" || field == "gpu_blas" || field == "blas" ||
             field == "f16_kv" || field == "no_kv_offload" || field == "mul_mat_q") {
             return BOOL;
         }
@@ -707,7 +711,7 @@ struct test {
         }
         std::vector<std::string> values = {
             build_commit, std::to_string(build_number),
-            std::to_string(cuda), std::to_string(opencl), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
+            std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
             cpu_info, gpu_info,
             model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
             std::to_string(n_batch), std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v),
@@ -734,6 +738,7 @@ const std::string test::build_commit = LLAMA_COMMIT;
 const int         test::build_number = LLAMA_BUILD_NUMBER;
 const bool        test::cuda         = !!ggml_cpu_has_cublas();
 const bool        test::opencl       = !!ggml_cpu_has_clblast();
+const bool        test::vulkan       = !!ggml_cpu_has_vulkan();
 const bool        test::metal        = !!ggml_cpu_has_metal();
 const bool        test::gpu_blas     = !!ggml_cpu_has_gpublas();
 const bool        test::blas         = !!ggml_cpu_has_blas();