sync with upstream llama.cpp and sync ggml-hexagon.cpp from project kantv

zhouwg · zhouwg · commit b6072fa653b0 · 2025-04-29T21:11:50.000+08:00
diff --git a/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/ggml/src/ggml-hexagon/ggml-hexagon.cpp
@@ -654,6 +654,7 @@ static constexpr const qnn_op_caps ggmlqnn_k_op_caps[] = {
         {false, GGML_OP_CONV_TRANSPOSE_1D, 0, nullptr},
         {false, GGML_OP_IM2COL, 0, nullptr},
         {false, GGML_OP_IM2COL_BACK, 0, nullptr},
+        {false, GGML_OP_CONV_2D_DW, 0, nullptr},
         {false, GGML_OP_CONV_TRANSPOSE_2D, 0, nullptr},
         {false, GGML_OP_POOL_1D, 0, nullptr},
         {false, GGML_OP_POOL_2D, 0, nullptr},
@@ -760,6 +761,7 @@ static constexpr const hexagon_op_caps ggmlhexagon_k_op_caps[] = {
         {false, GGML_OP_CONV_TRANSPOSE_1D, 0, nullptr, nullptr},
         {false, GGML_OP_IM2COL, 0, nullptr, nullptr},
         {false, GGML_OP_IM2COL_BACK, 0, nullptr, nullptr},
+        {false, GGML_OP_CONV_2D_DW, 0, nullptr, nullptr},
         {false, GGML_OP_CONV_TRANSPOSE_2D, 0, nullptr, nullptr},
         {false, GGML_OP_POOL_1D, 0, nullptr, nullptr},
         {true, GGML_OP_POOL_2D, 1, "ggmlop_dsp_pool2d", ggmlop_dsp_pool2d},
@@ -5574,13 +5576,21 @@ static bool ggmlhexagon_can_handle_op_through_cdsp(ggml_backend_dev_t dev, const
     const ggml_tensor * src0 = op_tensor->src[0];
     const ggml_tensor * src1 = op_tensor->src[1];
     const int src0_rank      = ggml_n_dims(src0);
+    const int64_t ne00       = src0->ne[0];
     int src1_rank            = 0;
     if (nullptr != src1) {
         src1_rank = ggml_n_dims(src1);
     }
     switch (op_tensor->op) {
         case GGML_OP_ADD:
         {
+            //TODO:workaround approach to fix HWACCEL_CDSP can't works in ASR inference and  LLM inference
+            //     with some LLM models in a standard Android APP
+            //     one more thing, I think the latest QNN SDK's internal also use the similar approach
+            if (ne00 < 1024) {
+                return false;
+            }
+
             if (!ggml_are_same_shape(src0, src1)) {
                 return false;
             }