@@ -2127,21 +2127,18 @@ inline void ggml_sycl_op_mul_mat_sycl(
2127
2127
const sycl::half *src1_ptr = src1->type == GGML_TYPE_F16
2128
2128
? (const sycl::half *)src1->data + src1_padded_row_size
2129
2129
: src1_as_f16.get ();
2130
- ggml_sycl_pool_alloc<sycl::half> dst_f16 (ctx.pool (), row_diff * src1_ncols);
2131
2130
2132
2131
#if GGML_SYCL_DNNL
2133
2132
if (!g_ggml_sycl_disable_dnn) {
2134
2133
DnnlGemmWrapper::row_gemm (ctx, src1_ncols, row_diff, ne10, src1_ptr,
2135
2134
DnnlGemmWrapper::to_dt<sycl::half>(), src0_ptr, DnnlGemmWrapper::to_dt<sycl::half>(),
2136
- dst_f16.get (), DnnlGemmWrapper::to_dt<sycl::half>(), stream);
2137
- scope_op_debug_print scope_dbg_print (__func__, " /to_fp32_sycl" , dst, /* num_src=*/ 2 ,
2138
- " : converting dst to fp32" );
2139
- const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl (GGML_TYPE_F16, dst);
2140
- to_fp32_sycl (dst_f16.get (), dst_dd_i, row_diff* src1_ncols, stream);
2135
+ dst_dd_i, DnnlGemmWrapper::to_dt<float >(), stream);
2141
2136
}
2142
2137
else
2143
2138
#endif
2144
2139
{
2140
+ ggml_sycl_pool_alloc<sycl::half> dst_f16 (ctx.pool (), row_diff * src1_ncols);
2141
+
2145
2142
const sycl::half alpha_f16 = 1 .0f ;
2146
2143
const sycl::half beta_f16 = 0 .0f ;
2147
2144
SYCL_CHECK (CHECK_TRY_ERROR (dpct::gemm (
0 commit comments