Skip to content

Commit cae360d

Browse files
committed
Added MIPS32 assembly
1 parent 25a2e19 commit cae360d

File tree

4 files changed

+245
-4
lines changed

4 files changed

+245
-4
lines changed

Makefile

+6-4
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,13 @@ ifndef CC
44
CC = gcc
55
endif
66

7-
ssvb-membench: main.c util.o util.h asm-opt.h asm-opt.o x86-sse2.o arm-neon.o
8-
${CC} -O2 ${CFLAGS} -o ssvb-membench main.c util.o asm-opt.o \
9-
x86-sse2.o arm-neon.o
7+
ssvb-membench: main.c util.o util.h asm-opt.h asm-opt.o x86-sse2.o arm-neon.o mips-32.o
8+
${CC} -O2 ${CFLAGS} -o ssvb-membench main.c util.o asm-opt.o x86-sse2.o arm-neon.o mips-32.o
109

1110
util.o: util.c util.h
1211
${CC} -O2 ${CFLAGS} -c util.c
1312

14-
asm-opt.o: asm-opt.c asm-opt.h x86-sse2.h arm-neon.h
13+
asm-opt.o: asm-opt.c asm-opt.h x86-sse2.h arm-neon.h mips-32.h
1514
${CC} -O2 ${CFLAGS} -c asm-opt.c
1615

1716
x86-sse2.o: x86-sse2.S
@@ -20,6 +19,9 @@ x86-sse2.o: x86-sse2.S
2019
arm-neon.o: arm-neon.S
2120
${CC} -O2 ${CFLAGS} -c arm-neon.S
2221

22+
mips-32.o: mips-32.S
23+
${CC} -O2 ${CFLAGS} -c mips-32.S
24+
2325
clean:
2426
-rm ssvb-membench
2527
-rm *.o

asm-opt.c

+20
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,26 @@ bench_info *get_asm_benchmarks(void)
176176
return empty;
177177
}
178178

179+
#elif defined(__mips__) && defined(_ABIO32)
180+
181+
#include "mips-32.h"
182+
183+
static bench_info mips_32[] =
184+
{
185+
{ "MIPS32 copy prefetched (32 bytes step)", 0, aligned_block_copy_pf32_mips32 },
186+
{ "MIPS32 fill prefetched (32 bytes step)", 0, aligned_block_fill_pf32_mips32 },
187+
{ NULL, 0, NULL }
188+
};
189+
190+
bench_info *get_asm_benchmarks(void)
191+
{
192+
/* Enable only the processors which have 32 bytes cache line */
193+
if (check_cpu_feature("24Kc") || check_cpu_feature("74K"))
194+
return mips_32;
195+
else
196+
return empty;
197+
}
198+
179199
#else
180200

181201
bench_info *get_asm_benchmarks(void)

mips-32.S

+183
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
/*
2+
* Copyright © 2011 Siarhei Siamashka <[email protected]>
3+
*
4+
* Permission is hereby granted, free of charge, to any person obtaining a
5+
* copy of this software and associated documentation files (the "Software"),
6+
* to deal in the Software without restriction, including without limitation
7+
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8+
* and/or sell copies of the Software, and to permit persons to whom the
9+
* Software is furnished to do so, subject to the following conditions:
10+
*
11+
* The above copyright notice and this permission notice (including the next
12+
* paragraph) shall be included in all copies or substantial portions of the
13+
* Software.
14+
*
15+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18+
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21+
* DEALINGS IN THE SOFTWARE.
22+
*/
23+
24+
#if defined(__mips__) && defined(_ABIO32)
25+
26+
.text
27+
.align 2
28+
.set noreorder
29+
.set nomips16
30+
.set mips32
31+
32+
.macro asm_function function_name
33+
.global \function_name
34+
.type \function_name, @function
35+
.func \function_name
36+
\function_name:
37+
.endm
38+
39+
/*****************************************************************************/
40+
41+
/*
42+
* void aligned_block_fill_pf32_mips32(int64_t *dst, int64_t *src, int size)
43+
*
44+
* Fill memory block at 'dst' with a 8 byte pattern loaded from 'src'.
45+
* Memory block must be 32 bytes aligned and its size must be a multiple
46+
* of 64 bytes.
47+
*
48+
* Important: the size of cache line *must* be 32 bytes.
49+
*/
50+
asm_function aligned_block_fill_pf32_mips32
51+
52+
.set DST, $a0
53+
.set SRC, $a1
54+
.set SIZE, $a2
55+
.set LIMIT, $a3
56+
57+
slti $t0, SIZE, 64
58+
bnez $t0, 2f
59+
sra SIZE, SIZE, 6
60+
lw $t0, 0(SRC)
61+
sll SIZE, SIZE, 6
62+
lw $t1, 4(SRC)
63+
add LIMIT, DST, SIZE
64+
pref 30, 0(DST)
65+
addi LIMIT, LIMIT, -64
66+
b 1f
67+
pref 30, 32(DST)
68+
0:
69+
pref 30, 64(DST)
70+
pref 30, 96(DST)
71+
addiu DST, DST, 64
72+
1:
73+
nop
74+
nop
75+
sw $t0, 0(DST)
76+
sw $t1, 4(DST)
77+
sw $t0, 8(DST)
78+
sw $t1, 12(DST)
79+
sw $t0, 16(DST)
80+
sw $t1, 20(DST)
81+
sw $t0, 24(DST)
82+
sw $t1, 28(DST)
83+
sw $t0, 32(DST)
84+
sw $t1, 36(DST)
85+
sw $t0, 40(DST)
86+
sw $t1, 44(DST)
87+
sw $t0, 48(DST)
88+
sw $t1, 52(DST)
89+
sw $t0, 56(DST)
90+
sw $t1, 60(DST)
91+
bne DST, LIMIT, 0b
92+
nop
93+
2:
94+
jr $ra
95+
nop
96+
.endfunc
97+
98+
/*
99+
* void aligned_block_copy_pf32_mips32(int64_t *dst, int64_t *src, int size)
100+
*
101+
* Copy memory block from 'src' to 'dst'. Destination block must be 32 bytes
102+
* aligned and its size must be a multiple of 64 bytes. Source block must
103+
* be 4 bytes aligned.
104+
*
105+
* Important: the size of cache line *must* be 32 bytes.
106+
*/
107+
asm_function aligned_block_copy_pf32_mips32
108+
109+
.set DST, $a0
110+
.set SRC, $a1
111+
.set SIZE, $a2
112+
.set LIMIT, $a3
113+
114+
addi $sp, $sp, -32
115+
sw $s0, 0($sp)
116+
sw $s1, 4($sp)
117+
sw $s2, 8($sp)
118+
sw $s3, 12($sp)
119+
sw $s4, 16($sp)
120+
sw $s5, 20($sp)
121+
sw $s6, 24($sp)
122+
sw $s7, 28($sp)
123+
124+
slti $v0, SIZE, 64
125+
bnez $v0, 2f
126+
sra SIZE, SIZE, 6
127+
sll SIZE, SIZE, 6
128+
add LIMIT, DST, SIZE
129+
addi LIMIT, LIMIT, -64
130+
0:
131+
pref 4, 160(SRC)
132+
lw $t0, 0(SRC)
133+
lw $t1, 4(SRC)
134+
lw $t2, 8(SRC)
135+
lw $t3, 12(SRC)
136+
pref 4, 192(SRC)
137+
lw $t4, 16(SRC)
138+
lw $t5, 20(SRC)
139+
lw $t6, 24(SRC)
140+
lw $t7, 28(SRC)
141+
pref 30, 0(DST)
142+
lw $s0, 32(SRC)
143+
lw $s1, 36(SRC)
144+
lw $s2, 40(SRC)
145+
lw $s3, 44(SRC)
146+
pref 30, 32(DST)
147+
lw $s4, 48(SRC)
148+
lw $s5, 52(SRC)
149+
lw $s6, 56(SRC)
150+
lw $s7, 60(SRC)
151+
addiu SRC, SRC, 64
152+
sw $t0, 0(DST)
153+
sw $t1, 4(DST)
154+
sw $t2, 8(DST)
155+
sw $t3, 12(DST)
156+
sw $t4, 16(DST)
157+
sw $t5, 20(DST)
158+
sw $t6, 24(DST)
159+
sw $t7, 28(DST)
160+
sw $s0, 32(DST)
161+
sw $s1, 36(DST)
162+
sw $s2, 40(DST)
163+
sw $s3, 44(DST)
164+
sw $s4, 48(DST)
165+
sw $s5, 52(DST)
166+
sw $s6, 56(DST)
167+
sw $s7, 60(DST)
168+
bne DST, LIMIT, 0b
169+
addiu DST, DST, 64
170+
2:
171+
lw $s0, 0($sp)
172+
lw $s1, 4($sp)
173+
lw $s2, 8($sp)
174+
lw $s3, 12($sp)
175+
lw $s4, 16($sp)
176+
lw $s5, 20($sp)
177+
lw $s6, 24($sp)
178+
lw $s7, 28($sp)
179+
jr $ra
180+
addi $sp, $sp, 32
181+
.endfunc
182+
183+
#endif

mips-32.h

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Copyright © 2011 Siarhei Siamashka <[email protected]>
3+
*
4+
* Permission is hereby granted, free of charge, to any person obtaining a
5+
* copy of this software and associated documentation files (the "Software"),
6+
* to deal in the Software without restriction, including without limitation
7+
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8+
* and/or sell copies of the Software, and to permit persons to whom the
9+
* Software is furnished to do so, subject to the following conditions:
10+
*
11+
* The above copyright notice and this permission notice (including the next
12+
* paragraph) shall be included in all copies or substantial portions of the
13+
* Software.
14+
*
15+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18+
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21+
* DEALINGS IN THE SOFTWARE.
22+
*/
23+
24+
#ifndef __MIPS_32_H__
25+
#define __MIPS_32_H__
26+
27+
#include <stdint.h>
28+
29+
void aligned_block_copy_pf32_mips32(int64_t * __restrict dst,
30+
int64_t * __restrict src,
31+
int size);
32+
void aligned_block_fill_pf32_mips32(int64_t * __restrict dst,
33+
int64_t * __restrict src,
34+
int size);
35+
36+
#endif

0 commit comments

Comments
 (0)