vad : only use CPU backend for VAD processing [no ci]

danbev · danbev · commit 845233ee87e0 · 2025-05-04T13:46:43.000+02:00
This commit modifies the VAD code to only use the CPU backend for VAD
processing. There is currently an issue with the GPU backend which I
need to investigate further. It is also not clear to me if running the
VAD processing on a GPU is actually beneficial.
diff --git a/src/whisper.cpp b/src/whisper.cpp
@@ -4879,12 +4879,15 @@ struct whisper_vad_context * whisper_vad_init_with_params_no_state(struct whispe
     buft_list_t buft_list = make_buft_list(wparams);
 
     auto create_tensor = [&](vad_tensor type, ggml_tensor * meta) -> ggml_tensor * {
+        /* TODO: Should GPU backend be used for VAD processing?
         ggml_op op = VAD_TENSOR_OPS.at(type);
         ggml_backend_buffer_type_t buft = select_weight_buft(hparams, meta, op, buft_list);
         if (!buft) {
             throw std::runtime_error(format("failed to find a compatible buffer type for tensor %s", VAD_TENSOR_NAMES.at(type)));
         }
-
+        */
+        // Only use CPU backend for now.
+        ggml_backend_buffer_type_t buft = ggml_backend_cpu_buffer_type();
         ggml_context * ctx = get_ctx(buft);
         ggml_tensor * tensor = ggml_dup_tensor(ctx, meta);
         model.tensors[VAD_TENSOR_NAMES.at(type)] = tensor;