Skip to content

Commit 9e4475f

Browse files
Fixed OpenCL offloading prints (ggml-org#2082)
1 parent 7f0e9a7 commit 9e4475f

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

llama.cpp

+12-3
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,7 @@ static void llama_model_load_internal(
11561156
}
11571157
}
11581158
#endif // GGML_USE_CUBLAS
1159+
11591160
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
11601161
const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer));
11611162

@@ -1164,6 +1165,10 @@ static void llama_model_load_internal(
11641165
fprintf(stderr, "%s: offloading non-repeating layers to GPU\n", __func__);
11651166
}
11661167
size_t vram_kv_cache = 0;
1168+
1169+
#ifdef GGML_USE_CUBLAS
1170+
const int max_backend_supported_layers = hparams.n_layer + 3;
1171+
const int max_offloadable_layers = low_vram ? hparams.n_layer + 1 : hparams.n_layer + 3;
11671172
if (n_gpu_layers > (int) hparams.n_layer + 1) {
11681173
if (low_vram) {
11691174
fprintf(stderr, "%s: cannot offload v cache to GPU due to low VRAM option\n", __func__);
@@ -1180,14 +1185,18 @@ static void llama_model_load_internal(
11801185
vram_kv_cache += MEM_REQ_KV_SELF().at(model.type) / 2;
11811186
}
11821187
}
1183-
const int max_offloadable_layers = low_vram ? hparams.n_layer + 1 : hparams.n_layer + 3;
1188+
#elif defined(GGML_USE_CLBLAST)
1189+
const int max_backend_supported_layers = hparams.n_layer + 1;
1190+
const int max_offloadable_layers = hparams.n_layer + 1;
1191+
#endif // GGML_USE_CUBLAS
1192+
11841193
fprintf(stderr, "%s: offloaded %d/%d layers to GPU\n",
1185-
__func__, std::min(n_gpu_layers, max_offloadable_layers), hparams.n_layer + 3);
1194+
__func__, std::min(n_gpu_layers, max_offloadable_layers), max_backend_supported_layers);
11861195
fprintf(stderr, "%s: total VRAM used: %zu MB\n",
11871196
__func__, (vram_weights + vram_scratch + vram_kv_cache + MB - 1) / MB); // round up
11881197
#else
11891198
(void) n_gpu_layers;
1190-
#endif
1199+
#endif // defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
11911200
}
11921201

11931202
// populate `tensors_by_name`

0 commit comments

Comments
 (0)