Skip to content

Commit 3b8f1ec

Browse files
authored
llamafile : tmp disable + build sgemm.o when needed (ggml-org#6716)
* build : sgemm.o only when needed ggml-ci * llamafile : tmp disable due to MoE bug ggml-ci
1 parent 8dd1ec8 commit 3b8f1ec

File tree

2 files changed

+41
-25
lines changed

2 files changed

+41
-25
lines changed

CMakeLists.txt

+26-12
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,18 @@ else()
4343
set(LLAMA_METAL_DEFAULT OFF)
4444
endif()
4545

46+
# TODO: fix this for Android CI
47+
# https://github.com/ggerganov/llama.cpp/pull/6716#issuecomment-2061509191
48+
#if (CMAKE_SYSTEM_NAME MATCHES "ANDROID")
49+
# set(LLAMA_LLAMAFILE_DEFAULT OFF)
50+
#else()
51+
# set(LLAMA_LLAMAFILE_DEFAULT ON)
52+
#endif()
53+
54+
# TODO: temporary disable until MoE is fixed
55+
# https://github.com/ggerganov/llama.cpp/pull/6716
56+
set(LLAMA_LLAMAFILE_DEFAULT OFF)
57+
4658
# general
4759
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
4860
option(LLAMA_STATIC "llama: static link libraries" OFF)
@@ -88,7 +100,7 @@ endif()
88100
# 3rd party libs
89101
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
90102
option(LLAMA_BLAS "llama: use BLAS" OFF)
91-
option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ON)
103+
option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ${LLAMA_LLAMAFILE_DEFAULT})
92104
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
93105
option(LLAMA_CUDA "llama: use CUDA" OFF)
94106
option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF)
@@ -372,6 +384,9 @@ endif()
372384

373385
if (LLAMA_LLAMAFILE)
374386
add_compile_definitions(GGML_USE_LLAMAFILE)
387+
388+
set(GGML_HEADERS_LLAMAFILE sgemm.h)
389+
set(GGML_SOURCES_LLAMAFILE sgemm.cpp)
375390
endif()
376391

377392
if (LLAMA_QKK_64)
@@ -1157,17 +1172,16 @@ add_library(ggml OBJECT
11571172
ggml-backend.h
11581173
ggml-quants.c
11591174
ggml-quants.h
1160-
sgemm.cpp
1161-
sgemm.h
1162-
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1163-
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
1164-
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1165-
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
1166-
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
1167-
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1168-
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
1169-
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
1170-
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
1175+
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1176+
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
1177+
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1178+
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
1179+
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
1180+
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1181+
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
1182+
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
1183+
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
1184+
${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
11711185
)
11721186

11731187
target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})

Makefile

+15-13
Original file line numberDiff line numberDiff line change
@@ -219,13 +219,6 @@ ifdef LLAMA_DISABLE_LOGS
219219
MK_CPPFLAGS += -DLOG_DISABLE_LOGS
220220
endif # LLAMA_DISABLE_LOGS
221221

222-
# disable ggml.c's use of sgemm.cpp
223-
ifdef LLAMA_NO_LLAMAFILE
224-
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=0
225-
else
226-
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=1
227-
endif
228-
229222
# warnings
230223
WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
231224
MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
@@ -391,6 +384,15 @@ ifdef LLAMA_OPENBLAS
391384
MK_LDFLAGS += $(shell pkg-config --libs openblas)
392385
endif # LLAMA_OPENBLAS
393386

387+
# TODO: temporary disable until MoE is fixed
388+
# https://github.com/ggerganov/llama.cpp/pull/6716
389+
LLAMA_NO_LLAMAFILE := 1
390+
391+
ifndef LLAMA_NO_LLAMAFILE
392+
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
393+
OBJS += sgemm.o
394+
endif
395+
394396
ifdef LLAMA_BLIS
395397
MK_CPPFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
396398
MK_LDFLAGS += -lblis -L/usr/local/lib
@@ -487,11 +489,9 @@ ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/com
487489

488490
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
489491
$(NVCC_COMPILE)
490-
491492
endif # LLAMA_CUDA
492493

493494
ifdef LLAMA_CLBLAST
494-
495495
MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
496496
MK_CFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
497497
MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
@@ -610,6 +610,11 @@ ggml-mpi.o: ggml-mpi.c ggml-mpi.h
610610
$(CC) $(CFLAGS) -c $< -o $@
611611
endif # LLAMA_MPI
612612

613+
ifndef LLAMA_NO_LLAMAFILE
614+
sgemm.o: sgemm.cpp sgemm.h ggml.h
615+
$(CXX) $(CXXFLAGS) -c $< -o $@
616+
endif
617+
613618
GF_CC := $(CC)
614619
include scripts/get-flags.mk
615620

@@ -683,16 +688,13 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
683688
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
684689
$(CC) $(CFLAGS) -c $< -o $@
685690

686-
sgemm.o: sgemm.cpp sgemm.h ggml.h
687-
$(CXX) $(CXXFLAGS) -c $< -o $@
688-
689691
unicode.o: unicode.cpp unicode.h
690692
$(CXX) $(CXXFLAGS) -c $< -o $@
691693

692694
unicode-data.o: unicode-data.cpp unicode-data.h
693695
$(CXX) $(CXXFLAGS) -c $< -o $@
694696

695-
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o sgemm.o
697+
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
696698

697699
llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
698700
$(CXX) $(CXXFLAGS) -c $< -o $@

0 commit comments

Comments
 (0)