Skip to content

Commit b1f00fa

Browse files
authored
Fix hordeconfig max context setting, and add Makefile flags for cuda F16/KQuants per iter. (LostRuins#252)
* Fix hordeconfig maxcontext setting. * cuda: Bring DMMV_F16 and KQUANTS_ITER Makefile flags over from llama.
1 parent dfdd202 commit b1f00fa

File tree

2 files changed

+9
-1
lines changed

2 files changed

+9
-1
lines changed

Makefile

+8
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,14 @@ ifdef LLAMA_CUDA_DMMV_Y
149149
else
150150
NVCCFLAGS += -DGGML_CUDA_DMMV_Y=1
151151
endif # LLAMA_CUDA_DMMV_Y
152+
ifdef LLAMA_CUDA_DMMV_F16
153+
NVCCFLAGS += -DGGML_CUDA_DMMV_F16
154+
endif # LLAMA_CUDA_DMMV_F16
155+
ifdef LLAMA_CUDA_KQUANTS_ITER
156+
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
157+
else
158+
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
159+
endif
152160
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
153161
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
154162
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h

koboldcpp.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,7 @@ def main(args):
724724
sys.exit(2)
725725

726726
if args.hordeconfig and args.hordeconfig[0]!="":
727-
global friendlymodelname, maxhordelen, showdebug
727+
global friendlymodelname, maxhordelen, maxhordectx, showdebug
728728
friendlymodelname = "koboldcpp/"+args.hordeconfig[0]
729729
if len(args.hordeconfig) > 1:
730730
maxhordelen = int(args.hordeconfig[1])

0 commit comments

Comments
 (0)