Skip to content

Commit 157f856

Browse files
committed
ggml-cpu: move s390x typedef to own header file
Signed-off-by: Aaron Teo <[email protected]>
1 parent e7910fc commit 157f856

File tree

3 files changed

+157
-178
lines changed

3 files changed

+157
-178
lines changed

ggml/src/ggml-cpu/arch/s390/typedef.h

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
#ifndef GGML_S390X_TYPEDEF_H
2+
#define GGML_S390X_TYPEDEF_H
3+
4+
#include <stdlib.h>
5+
#include <vecintrin.h>
6+
7+
#ifdef __cplusplus
8+
extern "C" {
9+
#endif
10+
11+
#if defined(__s390x__) && defined(__VEC__)
12+
#define vec_neg(a) (-(a)) // Vector Negate
13+
#define vec_add(a, b) ((a) + (b)) // Vector Add
14+
#define vec_sub(a, b) ((a) - (b)) // Vector Subtract
15+
#define vec_mul(a, b) ((a) * (b)) // Vector Multiply
16+
#define vec_div(a, b) ((a) / (b)) // Vector Divide
17+
#define vec_sl(a, b) ((a) << (b)) // Vector Shift Left
18+
#define vec_sra(a, b) ((a) >> (b)) // Vector Shift Right
19+
#define vec_sr(a, b) ((a) >> (b)) // Vector Shift Right Algebraic
20+
#define vec_slo(a, b) vec_slb(a, (b) << 64) // Vector Shift Left by Octet
21+
#define vec_sro(a, b) vec_srb(a, (b) << 64) // Vector Shift Right by Octet
22+
23+
#ifndef vec_and
24+
#define vec_and(a, b) ((a) & (b)) // Vector AND
25+
#endif
26+
27+
#ifndef vec_or
28+
#define vec_or(a, b) ((a) | (b)) // Vector OR
29+
#endif
30+
31+
#ifndef vec_xor
32+
#define vec_xor(a, b) ((a) ^ (b)) // Vector XOR
33+
#endif
34+
35+
typedef signed char char8x16_t __attribute__((vector_size(16)));
36+
typedef unsigned char uchar8x16_t __attribute__((vector_size(16)));
37+
38+
typedef int8_t int8x16_t __attribute__((vector_size(16)));
39+
typedef int16_t int16x8_t __attribute__((vector_size(16)));
40+
typedef int32_t int32x4_t __attribute__((vector_size(16)));
41+
42+
typedef uint8_t uint8x16_t __attribute__((vector_size(16)));
43+
typedef uint16_t uint16x8_t __attribute__((vector_size(16)));
44+
typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
45+
46+
typedef float float32x4_t __attribute__((vector_size(16)));
47+
typedef double double64x2_t __attribute__((vector_size(16)));
48+
49+
typedef signed long long long64x2_t __attribute__((vector_size(16)));
50+
typedef unsigned long long ulong64x2_t __attribute__((vector_size(16)));
51+
52+
typedef struct ggml_uint8x16x2_t {
53+
uint8x16_t val[2];
54+
} ggml_uint8x16x2_t;
55+
56+
inline static ggml_uint8x16x2_t ggml_vec_xl_u8x2(const uint8_t * ptr) {
57+
ggml_uint8x16x2_t res;
58+
59+
res.val[0] = vec_xl( 0, ptr);
60+
res.val[1] = vec_xl(16, ptr);
61+
62+
return res;
63+
}
64+
65+
typedef struct ggml_uint8x16x4_t {
66+
uint8x16_t val[4];
67+
} ggml_uint8x16x4_t;
68+
69+
inline static ggml_uint8x16x4_t ggml_vec_xl_u8x4(const uint8_t * ptr) {
70+
ggml_uint8x16x4_t res;
71+
72+
res.val[0] = vec_xl( 0, ptr);
73+
res.val[1] = vec_xl(16, ptr);
74+
res.val[2] = vec_xl(32, ptr);
75+
res.val[3] = vec_xl(48, ptr);
76+
77+
return res;
78+
}
79+
80+
typedef struct ggml_int8x16x4_t {
81+
int8x16_t val[4];
82+
} ggml_int8x16x4_t;
83+
84+
inline static ggml_int8x16x4_t ggml_vec_xl_s8x4(const int8_t * ptr) {
85+
ggml_int8x16x4_t res;
86+
87+
res.val[0] = vec_xl( 0, ptr);
88+
res.val[1] = vec_xl(16, ptr);
89+
res.val[2] = vec_xl(32, ptr);
90+
res.val[3] = vec_xl(48, ptr);
91+
92+
return res;
93+
}
94+
95+
typedef struct ggml_int16x8x2_t {
96+
int16x8_t val[2];
97+
} ggml_int16x8x2_t;
98+
99+
inline static ggml_int16x8x2_t ggml_vec_xl_s16x2(const int16_t * ptr) {
100+
ggml_int16x8x2_t res;
101+
102+
res.val[0] = vec_xl( 0, ptr);
103+
res.val[1] = vec_xl(16, ptr);
104+
105+
return res;
106+
}
107+
108+
/*
109+
! WARNING: Very slow. Use vec_perm if possible. Refer to iq4_xs
110+
! or iq4_nl for example implementation.
111+
*/
112+
inline static int8x16_t ggml_vec_tbl(int8x16_t a, uint8x16_t b) {
113+
int8x16_t res;
114+
115+
res[ 0] = a[b[ 0]];
116+
res[ 1] = a[b[ 1]];
117+
res[ 2] = a[b[ 2]];
118+
res[ 3] = a[b[ 3]];
119+
res[ 4] = a[b[ 4]];
120+
res[ 5] = a[b[ 5]];
121+
res[ 6] = a[b[ 6]];
122+
res[ 7] = a[b[ 7]];
123+
res[ 8] = a[b[ 8]];
124+
res[ 9] = a[b[ 9]];
125+
res[10] = a[b[10]];
126+
res[11] = a[b[11]];
127+
res[12] = a[b[12]];
128+
res[13] = a[b[13]];
129+
res[14] = a[b[14]];
130+
res[15] = a[b[15]];
131+
132+
return res;
133+
}
134+
135+
inline static int16x8_t vec_padd_s16(int16x8_t a, int16x8_t b) {
136+
const uchar8x16_t v_maske = { 0, 1, 4, 5, 8, 9, 12, 13,
137+
16, 17, 20, 21, 24, 25, 28, 29 };
138+
139+
const int16x8_t v_abo = vec_pack((int32x4_t)a, (int32x4_t)b);
140+
const int16x8_t v_abe = vec_perm(a, b, v_maske);
141+
return v_abo + v_abe;
142+
}
143+
144+
inline static int32x4_t ggml_vec_dot(int32x4_t acc, int8x16_t a, int8x16_t b) {
145+
const int16x8_t p = vec_mule(a, b) + vec_mulo(a, b);
146+
return acc + (vec_unpackh(p) + vec_unpackl(p));
147+
}
148+
149+
#endif // __s390x__ && __VEC__
150+
151+
#ifdef __cplusplus
152+
}
153+
#endif // __cplusplus
154+
155+
#endif // GGML_S390X_TYPEDEF_H

ggml/src/ggml-cpu/ggml-cpu-impl.h

Lines changed: 1 addition & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -353,144 +353,7 @@ inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b)
353353

354354
#if defined(__VXE__) || defined(__VXE2__)
355355
#include <vecintrin.h>
356-
357-
#define vec_neg(a) (-(a)) // Vector Negate
358-
#define vec_add(a, b) ((a) + (b)) // Vector Add
359-
#define vec_sub(a, b) ((a) - (b)) // Vector Subtract
360-
#define vec_mul(a, b) ((a) * (b)) // Vector Multiply
361-
#define vec_div(a, b) ((a) / (b)) // Vector Divide
362-
#define vec_sl(a, b) ((a) << (b)) // Vector Shift Left
363-
#define vec_sra(a, b) ((a) >> (b)) // Vector Shift Right
364-
#define vec_sr(a, b) ((a) >> (b)) // Vector Shift Right Algebraic
365-
#define vec_slo(a, b) vec_slb(a, (b) << 64) // Vector Shift Left by Octet
366-
#define vec_sro(a, b) vec_srb(a, (b) << 64) // Vector Shift Right by Octet
367-
368-
#ifndef vec_and
369-
#define vec_and(a, b) ((a) & (b)) // Vector AND
370-
#endif
371-
372-
#ifndef vec_or
373-
#define vec_or(a, b) ((a) | (b)) // Vector OR
374-
#endif
375-
376-
#ifndef vec_xor
377-
#define vec_xor(a, b) ((a) ^ (b)) // Vector XOR
378-
#endif
379-
380-
typedef signed char char8x16_t __attribute__((vector_size(16)));
381-
typedef unsigned char uchar8x16_t __attribute__((vector_size(16)));
382-
383-
typedef int8_t int8x16_t __attribute__((vector_size(16)));
384-
typedef int16_t int16x8_t __attribute__((vector_size(16)));
385-
typedef int32_t int32x4_t __attribute__((vector_size(16)));
386-
387-
typedef uint8_t uint8x16_t __attribute__((vector_size(16)));
388-
typedef uint16_t uint16x8_t __attribute__((vector_size(16)));
389-
typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
390-
391-
typedef float float32x4_t __attribute__((vector_size(16)));
392-
typedef double double64x2_t __attribute__((vector_size(16)));
393-
394-
typedef signed long long long64x2_t __attribute__((vector_size(16)));
395-
typedef unsigned long long ulong64x2_t __attribute__((vector_size(16)));
396-
397-
typedef struct ggml_uint8x16x2_t {
398-
uint8x16_t val[2];
399-
} ggml_uint8x16x2_t;
400-
401-
inline static ggml_uint8x16x2_t ggml_vec_xl_u8x2(const uint8_t * ptr) {
402-
ggml_uint8x16x2_t res;
403-
404-
res.val[0] = vec_xl( 0, ptr);
405-
res.val[1] = vec_xl(16, ptr);
406-
407-
return res;
408-
}
409-
410-
typedef struct ggml_uint8x16x4_t {
411-
uint8x16_t val[4];
412-
} ggml_uint8x16x4_t;
413-
414-
inline static ggml_uint8x16x4_t ggml_vec_xl_u8x4(const uint8_t * ptr) {
415-
ggml_uint8x16x4_t res;
416-
417-
res.val[0] = vec_xl( 0, ptr);
418-
res.val[1] = vec_xl(16, ptr);
419-
res.val[2] = vec_xl(32, ptr);
420-
res.val[3] = vec_xl(48, ptr);
421-
422-
return res;
423-
}
424-
425-
typedef struct ggml_int8x16x4_t {
426-
int8x16_t val[4];
427-
} ggml_int8x16x4_t;
428-
429-
inline static ggml_int8x16x4_t ggml_vec_xl_s8x4(const int8_t * ptr) {
430-
ggml_int8x16x4_t res;
431-
432-
res.val[0] = vec_xl( 0, ptr);
433-
res.val[1] = vec_xl(16, ptr);
434-
res.val[2] = vec_xl(32, ptr);
435-
res.val[3] = vec_xl(48, ptr);
436-
437-
return res;
438-
}
439-
440-
typedef struct ggml_int16x8x2_t {
441-
int16x8_t val[2];
442-
} ggml_int16x8x2_t;
443-
444-
inline static ggml_int16x8x2_t ggml_vec_xl_s16x2(const int16_t * ptr) {
445-
ggml_int16x8x2_t res;
446-
447-
res.val[0] = vec_xl( 0, ptr);
448-
res.val[1] = vec_xl(16, ptr);
449-
450-
return res;
451-
}
452-
453-
/*
454-
! WARNING: Very slow. Use vec_perm if possible. Refer to iq4_xs
455-
! or iq4_nl for example implementation.
456-
*/
457-
inline static int8x16_t ggml_vec_tbl(int8x16_t a, uint8x16_t b) {
458-
int8x16_t res;
459-
460-
res[ 0] = a[b[ 0]];
461-
res[ 1] = a[b[ 1]];
462-
res[ 2] = a[b[ 2]];
463-
res[ 3] = a[b[ 3]];
464-
res[ 4] = a[b[ 4]];
465-
res[ 5] = a[b[ 5]];
466-
res[ 6] = a[b[ 6]];
467-
res[ 7] = a[b[ 7]];
468-
res[ 8] = a[b[ 8]];
469-
res[ 9] = a[b[ 9]];
470-
res[10] = a[b[10]];
471-
res[11] = a[b[11]];
472-
res[12] = a[b[12]];
473-
res[13] = a[b[13]];
474-
res[14] = a[b[14]];
475-
res[15] = a[b[15]];
476-
477-
return res;
478-
}
479-
480-
inline static int16x8_t vec_padd_s16(int16x8_t a, int16x8_t b) {
481-
const uchar8x16_t v_maske = { 0, 1, 4, 5, 8, 9, 12, 13,
482-
16, 17, 20, 21, 24, 25, 28, 29 };
483-
484-
const int16x8_t v_abo = vec_pack((int32x4_t)a, (int32x4_t)b);
485-
const int16x8_t v_abe = vec_perm(a, b, v_maske);
486-
return v_abo + v_abe;
487-
}
488-
489-
inline static int32x4_t ggml_vec_dot(int32x4_t acc, int8x16_t a, int8x16_t b) {
490-
const int16x8_t p = vec_mule(a, b) + vec_mulo(a, b);
491-
return acc + (vec_unpackh(p) + vec_unpackl(p));
492-
}
493-
356+
#include <ggml-cpu/arch/s390/typedef.h>
494357
#endif
495358

496359
#if defined(__loongarch_asx)

ggml/src/ggml-impl.h

Lines changed: 1 addition & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -30,46 +30,7 @@
3030

3131
#if defined(__s390x__) && defined(__VEC__)
3232
#include <vecintrin.h>
33-
34-
#define vec_neg(a) (-(a)) // Vector Negate
35-
#define vec_add(a, b) ((a) + (b)) // Vector Add
36-
#define vec_sub(a, b) ((a) - (b)) // Vector Subtract
37-
#define vec_mul(a, b) ((a) * (b)) // Vector Multiply
38-
#define vec_div(a, b) ((a) / (b)) // Vector Divide
39-
#define vec_sl(a, b) ((a) << (b)) // Vector Shift Left
40-
#define vec_sra(a, b) ((a) >> (b)) // Vector Shift Right
41-
#define vec_sr(a, b) ((a) >> (b)) // Vector Shift Right Algebraic
42-
#define vec_slo(a, b) vec_slb(a, (b) << 64) // Vector Shift Left by Octet
43-
#define vec_sro(a, b) vec_srb(a, (b) << 64) // Vector Shift Right by Octet
44-
45-
#ifndef vec_and
46-
#define vec_and(a, b) ((a) & (b)) // Vector AND
47-
#endif
48-
49-
#ifndef vec_or
50-
#define vec_or(a, b) ((a) | (b)) // Vector OR
51-
#endif
52-
53-
#ifndef vec_xor
54-
#define vec_xor(a, b) ((a) ^ (b)) // Vector XOR
55-
#endif
56-
57-
typedef signed char char8x16_t __attribute__((vector_size(16)));
58-
typedef unsigned char uchar8x16_t __attribute__((vector_size(16)));
59-
60-
typedef int8_t int8x16_t __attribute__((vector_size(16)));
61-
typedef int16_t int16x8_t __attribute__((vector_size(16)));
62-
typedef int32_t int32x4_t __attribute__((vector_size(16)));
63-
64-
typedef uint8_t uint8x16_t __attribute__((vector_size(16)));
65-
typedef uint16_t uint16x8_t __attribute__((vector_size(16)));
66-
typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
67-
68-
typedef float float32x4_t __attribute__((vector_size(16)));
69-
typedef double double64x2_t __attribute__((vector_size(16)));
70-
71-
typedef signed long long long64x2_t __attribute__((vector_size(16)));
72-
typedef unsigned long long ulong64x2_t __attribute__((vector_size(16)));
33+
#include <ggml-cpu/arch/s390/typedef.h>
7334

7435
#if defined(GGML_NNPA)
7536
#ifndef __NNPA__

0 commit comments

Comments
 (0)