@@ -654,6 +654,7 @@ static constexpr const qnn_op_caps ggmlqnn_k_op_caps[] = {
654
654
{false , GGML_OP_CONV_TRANSPOSE_1D, 0 , nullptr },
655
655
{false , GGML_OP_IM2COL, 0 , nullptr },
656
656
{false , GGML_OP_IM2COL_BACK, 0 , nullptr },
657
+ {false , GGML_OP_CONV_2D_DW, 0 , nullptr },
657
658
{false , GGML_OP_CONV_TRANSPOSE_2D, 0 , nullptr },
658
659
{false , GGML_OP_POOL_1D, 0 , nullptr },
659
660
{false , GGML_OP_POOL_2D, 0 , nullptr },
@@ -760,6 +761,7 @@ static constexpr const hexagon_op_caps ggmlhexagon_k_op_caps[] = {
760
761
{false , GGML_OP_CONV_TRANSPOSE_1D, 0 , nullptr , nullptr },
761
762
{false , GGML_OP_IM2COL, 0 , nullptr , nullptr },
762
763
{false , GGML_OP_IM2COL_BACK, 0 , nullptr , nullptr },
764
+ {false , GGML_OP_CONV_2D_DW, 0 , nullptr , nullptr },
763
765
{false , GGML_OP_CONV_TRANSPOSE_2D, 0 , nullptr , nullptr },
764
766
{false , GGML_OP_POOL_1D, 0 , nullptr , nullptr },
765
767
{true , GGML_OP_POOL_2D, 1 , " ggmlop_dsp_pool2d" , ggmlop_dsp_pool2d},
@@ -5574,13 +5576,21 @@ static bool ggmlhexagon_can_handle_op_through_cdsp(ggml_backend_dev_t dev, const
5574
5576
const ggml_tensor * src0 = op_tensor->src [0 ];
5575
5577
const ggml_tensor * src1 = op_tensor->src [1 ];
5576
5578
const int src0_rank = ggml_n_dims (src0);
5579
+ const int64_t ne00 = src0->ne [0 ];
5577
5580
int src1_rank = 0 ;
5578
5581
if (nullptr != src1) {
5579
5582
src1_rank = ggml_n_dims (src1);
5580
5583
}
5581
5584
switch (op_tensor->op ) {
5582
5585
case GGML_OP_ADD:
5583
5586
{
5587
+ // TODO:workaround approach to fix HWACCEL_CDSP can't works in ASR inference and LLM inference
5588
+ // with some LLM models in a standard Android APP
5589
+ // one more thing, I think the latest QNN SDK's internal also use the similar approach
5590
+ if (ne00 < 1024 ) {
5591
+ return false ;
5592
+ }
5593
+
5584
5594
if (!ggml_are_same_shape (src0, src1)) {
5585
5595
return false ;
5586
5596
}
0 commit comments