@@ -353,144 +353,7 @@ inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b)
353
353
354
354
#if defined(__VXE__ ) || defined(__VXE2__ )
355
355
#include <vecintrin.h>
356
-
357
- #define vec_neg (a ) (-(a)) // Vector Negate
358
- #define vec_add (a , b ) ((a) + (b)) // Vector Add
359
- #define vec_sub (a , b ) ((a) - (b)) // Vector Subtract
360
- #define vec_mul (a , b ) ((a) * (b)) // Vector Multiply
361
- #define vec_div (a , b ) ((a) / (b)) // Vector Divide
362
- #define vec_sl (a , b ) ((a) << (b)) // Vector Shift Left
363
- #define vec_sra (a , b ) ((a) >> (b)) // Vector Shift Right
364
- #define vec_sr (a , b ) ((a) >> (b)) // Vector Shift Right Algebraic
365
- #define vec_slo (a , b ) vec_slb(a, (b) << 64) // Vector Shift Left by Octet
366
- #define vec_sro (a , b ) vec_srb(a, (b) << 64) // Vector Shift Right by Octet
367
-
368
- #ifndef vec_and
369
- #define vec_and (a , b ) ((a) & (b)) // Vector AND
370
- #endif
371
-
372
- #ifndef vec_or
373
- #define vec_or (a , b ) ((a) | (b)) // Vector OR
374
- #endif
375
-
376
- #ifndef vec_xor
377
- #define vec_xor (a , b ) ((a) ^ (b)) // Vector XOR
378
- #endif
379
-
380
- typedef signed char char8x16_t __attribute__((vector_size (16 )));
381
- typedef unsigned char uchar8x16_t __attribute__((vector_size (16 )));
382
-
383
- typedef int8_t int8x16_t __attribute__((vector_size (16 )));
384
- typedef int16_t int16x8_t __attribute__((vector_size (16 )));
385
- typedef int32_t int32x4_t __attribute__((vector_size (16 )));
386
-
387
- typedef uint8_t uint8x16_t __attribute__((vector_size (16 )));
388
- typedef uint16_t uint16x8_t __attribute__((vector_size (16 )));
389
- typedef uint32_t uint32x4_t __attribute__((vector_size (16 )));
390
-
391
- typedef float float32x4_t __attribute__((vector_size (16 )));
392
- typedef double double64x2_t __attribute__((vector_size (16 )));
393
-
394
- typedef signed long long long64x2_t __attribute__((vector_size (16 )));
395
- typedef unsigned long long ulong64x2_t __attribute__((vector_size (16 )));
396
-
397
- typedef struct ggml_uint8x16x2_t {
398
- uint8x16_t val [2 ];
399
- } ggml_uint8x16x2_t ;
400
-
401
- inline static ggml_uint8x16x2_t ggml_vec_xl_u8x2 (const uint8_t * ptr ) {
402
- ggml_uint8x16x2_t res ;
403
-
404
- res .val [0 ] = vec_xl ( 0 , ptr );
405
- res .val [1 ] = vec_xl (16 , ptr );
406
-
407
- return res ;
408
- }
409
-
410
- typedef struct ggml_uint8x16x4_t {
411
- uint8x16_t val [4 ];
412
- } ggml_uint8x16x4_t ;
413
-
414
- inline static ggml_uint8x16x4_t ggml_vec_xl_u8x4 (const uint8_t * ptr ) {
415
- ggml_uint8x16x4_t res ;
416
-
417
- res .val [0 ] = vec_xl ( 0 , ptr );
418
- res .val [1 ] = vec_xl (16 , ptr );
419
- res .val [2 ] = vec_xl (32 , ptr );
420
- res .val [3 ] = vec_xl (48 , ptr );
421
-
422
- return res ;
423
- }
424
-
425
- typedef struct ggml_int8x16x4_t {
426
- int8x16_t val [4 ];
427
- } ggml_int8x16x4_t ;
428
-
429
- inline static ggml_int8x16x4_t ggml_vec_xl_s8x4 (const int8_t * ptr ) {
430
- ggml_int8x16x4_t res ;
431
-
432
- res .val [0 ] = vec_xl ( 0 , ptr );
433
- res .val [1 ] = vec_xl (16 , ptr );
434
- res .val [2 ] = vec_xl (32 , ptr );
435
- res .val [3 ] = vec_xl (48 , ptr );
436
-
437
- return res ;
438
- }
439
-
440
- typedef struct ggml_int16x8x2_t {
441
- int16x8_t val [2 ];
442
- } ggml_int16x8x2_t ;
443
-
444
- inline static ggml_int16x8x2_t ggml_vec_xl_s16x2 (const int16_t * ptr ) {
445
- ggml_int16x8x2_t res ;
446
-
447
- res .val [0 ] = vec_xl ( 0 , ptr );
448
- res .val [1 ] = vec_xl (16 , ptr );
449
-
450
- return res ;
451
- }
452
-
453
- /*
454
- ! WARNING: Very slow. Use vec_perm if possible. Refer to iq4_xs
455
- ! or iq4_nl for example implementation.
456
- */
457
- inline static int8x16_t ggml_vec_tbl (int8x16_t a , uint8x16_t b ) {
458
- int8x16_t res ;
459
-
460
- res [ 0 ] = a [b [ 0 ]];
461
- res [ 1 ] = a [b [ 1 ]];
462
- res [ 2 ] = a [b [ 2 ]];
463
- res [ 3 ] = a [b [ 3 ]];
464
- res [ 4 ] = a [b [ 4 ]];
465
- res [ 5 ] = a [b [ 5 ]];
466
- res [ 6 ] = a [b [ 6 ]];
467
- res [ 7 ] = a [b [ 7 ]];
468
- res [ 8 ] = a [b [ 8 ]];
469
- res [ 9 ] = a [b [ 9 ]];
470
- res [10 ] = a [b [10 ]];
471
- res [11 ] = a [b [11 ]];
472
- res [12 ] = a [b [12 ]];
473
- res [13 ] = a [b [13 ]];
474
- res [14 ] = a [b [14 ]];
475
- res [15 ] = a [b [15 ]];
476
-
477
- return res ;
478
- }
479
-
480
- inline static int16x8_t vec_padd_s16 (int16x8_t a , int16x8_t b ) {
481
- const uchar8x16_t v_maske = { 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 ,
482
- 16 , 17 , 20 , 21 , 24 , 25 , 28 , 29 };
483
-
484
- const int16x8_t v_abo = vec_pack ((int32x4_t )a , (int32x4_t )b );
485
- const int16x8_t v_abe = vec_perm (a , b , v_maske );
486
- return v_abo + v_abe ;
487
- }
488
-
489
- inline static int32x4_t ggml_vec_dot (int32x4_t acc , int8x16_t a , int8x16_t b ) {
490
- const int16x8_t p = vec_mule (a , b ) + vec_mulo (a , b );
491
- return acc + (vec_unpackh (p ) + vec_unpackl (p ));
492
- }
493
-
356
+ #include <ggml-cpu/arch/s390/typedef.h>
494
357
#endif
495
358
496
359
#if defined(__loongarch_asx )
0 commit comments