Skip to content

Commit 2ab9d11

Browse files
committed
Merge 'origin/master' into hipblas
2 parents 3b4a531 + 04aaae1 commit 2ab9d11

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

ggml.c

+18
Original file line numberDiff line numberDiff line change
@@ -3626,6 +3626,24 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void *
36263626
}
36273627

36283628
*s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1);
3629+
#elif defined(__AVX2__)
3630+
// Initialize accumulator with zeros
3631+
__m256 acc = _mm256_setzero_ps();
3632+
3633+
// Main loop
3634+
for (int i = 0; i < nb; ++i) {
3635+
// Compute combined scale for the block
3636+
const __m256 d = _mm256_mul_ps( _mm256_broadcast_ss( &x[i].d ), _mm256_broadcast_ss( &y[i].d ) );
3637+
__m256i bx = _mm256_loadu_si256((const __m256i *)x[i].qs);
3638+
__m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs);
3639+
3640+
const __m256 q = mul_sum_i8_pairs_float(bx, by);
3641+
3642+
// Multiply q with scale and accumulate
3643+
acc = _mm256_fmadd_ps( d, q, acc );
3644+
}
3645+
3646+
*s = hsum_float_8(acc);
36293647
#else
36303648
// scalar
36313649
float sumf = 0.0;

0 commit comments

Comments
 (0)