|
| 1 | +/* |
| 2 | + * Copyright © 2011 Siarhei Siamashka <[email protected]> |
| 3 | + * |
| 4 | + * Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | + * copy of this software and associated documentation files (the "Software"), |
| 6 | + * to deal in the Software without restriction, including without limitation |
| 7 | + * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 8 | + * and/or sell copies of the Software, and to permit persons to whom the |
| 9 | + * Software is furnished to do so, subject to the following conditions: |
| 10 | + * |
| 11 | + * The above copyright notice and this permission notice (including the next |
| 12 | + * paragraph) shall be included in all copies or substantial portions of the |
| 13 | + * Software. |
| 14 | + * |
| 15 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 20 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| 21 | + * DEALINGS IN THE SOFTWARE. |
| 22 | + */ |
| 23 | + |
| 24 | +#if defined(__mips__) && defined(_ABIO32) |
| 25 | + |
| 26 | +.text |
| 27 | +.align 2 |
| 28 | +.set noreorder |
| 29 | +.set nomips16 |
| 30 | +.set mips32 |
| 31 | + |
| 32 | +.macro asm_function function_name |
| 33 | + .global \function_name |
| 34 | + .type \function_name, @function |
| 35 | + .func \function_name |
| 36 | +\function_name: |
| 37 | +.endm |
| 38 | + |
| 39 | +/*****************************************************************************/ |
| 40 | + |
| 41 | +/* |
| 42 | + * void aligned_block_fill_pf32_mips32(int64_t *dst, int64_t *src, int size) |
| 43 | + * |
| 44 | + * Fill memory block at 'dst' with a 8 byte pattern loaded from 'src'. |
| 45 | + * Memory block must be 32 bytes aligned and its size must be a multiple |
| 46 | + * of 64 bytes. |
| 47 | + * |
| 48 | + * Important: the size of cache line *must* be 32 bytes. |
| 49 | + */ |
| 50 | +asm_function aligned_block_fill_pf32_mips32 |
| 51 | + |
| 52 | + .set DST, $a0 |
| 53 | + .set SRC, $a1 |
| 54 | + .set SIZE, $a2 |
| 55 | + .set LIMIT, $a3 |
| 56 | + |
| 57 | + slti $t0, SIZE, 64 |
| 58 | + bnez $t0, 2f |
| 59 | + sra SIZE, SIZE, 6 |
| 60 | + lw $t0, 0(SRC) |
| 61 | + sll SIZE, SIZE, 6 |
| 62 | + lw $t1, 4(SRC) |
| 63 | + add LIMIT, DST, SIZE |
| 64 | + pref 30, 0(DST) |
| 65 | + addi LIMIT, LIMIT, -64 |
| 66 | + b 1f |
| 67 | + pref 30, 32(DST) |
| 68 | +0: |
| 69 | + pref 30, 64(DST) |
| 70 | + pref 30, 96(DST) |
| 71 | + addiu DST, DST, 64 |
| 72 | +1: |
| 73 | + nop |
| 74 | + nop |
| 75 | + sw $t0, 0(DST) |
| 76 | + sw $t1, 4(DST) |
| 77 | + sw $t0, 8(DST) |
| 78 | + sw $t1, 12(DST) |
| 79 | + sw $t0, 16(DST) |
| 80 | + sw $t1, 20(DST) |
| 81 | + sw $t0, 24(DST) |
| 82 | + sw $t1, 28(DST) |
| 83 | + sw $t0, 32(DST) |
| 84 | + sw $t1, 36(DST) |
| 85 | + sw $t0, 40(DST) |
| 86 | + sw $t1, 44(DST) |
| 87 | + sw $t0, 48(DST) |
| 88 | + sw $t1, 52(DST) |
| 89 | + sw $t0, 56(DST) |
| 90 | + sw $t1, 60(DST) |
| 91 | + bne DST, LIMIT, 0b |
| 92 | + nop |
| 93 | +2: |
| 94 | + jr $ra |
| 95 | + nop |
| 96 | +.endfunc |
| 97 | + |
| 98 | +/* |
| 99 | + * void aligned_block_copy_pf32_mips32(int64_t *dst, int64_t *src, int size) |
| 100 | + * |
| 101 | + * Copy memory block from 'src' to 'dst'. Destination block must be 32 bytes |
| 102 | + * aligned and its size must be a multiple of 64 bytes. Source block must |
| 103 | + * be 4 bytes aligned. |
| 104 | + * |
| 105 | + * Important: the size of cache line *must* be 32 bytes. |
| 106 | + */ |
| 107 | +asm_function aligned_block_copy_pf32_mips32 |
| 108 | + |
| 109 | + .set DST, $a0 |
| 110 | + .set SRC, $a1 |
| 111 | + .set SIZE, $a2 |
| 112 | + .set LIMIT, $a3 |
| 113 | + |
| 114 | + addi $sp, $sp, -32 |
| 115 | + sw $s0, 0($sp) |
| 116 | + sw $s1, 4($sp) |
| 117 | + sw $s2, 8($sp) |
| 118 | + sw $s3, 12($sp) |
| 119 | + sw $s4, 16($sp) |
| 120 | + sw $s5, 20($sp) |
| 121 | + sw $s6, 24($sp) |
| 122 | + sw $s7, 28($sp) |
| 123 | + |
| 124 | + slti $v0, SIZE, 64 |
| 125 | + bnez $v0, 2f |
| 126 | + sra SIZE, SIZE, 6 |
| 127 | + sll SIZE, SIZE, 6 |
| 128 | + add LIMIT, DST, SIZE |
| 129 | + addi LIMIT, LIMIT, -64 |
| 130 | +0: |
| 131 | + pref 4, 160(SRC) |
| 132 | + lw $t0, 0(SRC) |
| 133 | + lw $t1, 4(SRC) |
| 134 | + lw $t2, 8(SRC) |
| 135 | + lw $t3, 12(SRC) |
| 136 | + pref 4, 192(SRC) |
| 137 | + lw $t4, 16(SRC) |
| 138 | + lw $t5, 20(SRC) |
| 139 | + lw $t6, 24(SRC) |
| 140 | + lw $t7, 28(SRC) |
| 141 | + pref 30, 0(DST) |
| 142 | + lw $s0, 32(SRC) |
| 143 | + lw $s1, 36(SRC) |
| 144 | + lw $s2, 40(SRC) |
| 145 | + lw $s3, 44(SRC) |
| 146 | + pref 30, 32(DST) |
| 147 | + lw $s4, 48(SRC) |
| 148 | + lw $s5, 52(SRC) |
| 149 | + lw $s6, 56(SRC) |
| 150 | + lw $s7, 60(SRC) |
| 151 | + addiu SRC, SRC, 64 |
| 152 | + sw $t0, 0(DST) |
| 153 | + sw $t1, 4(DST) |
| 154 | + sw $t2, 8(DST) |
| 155 | + sw $t3, 12(DST) |
| 156 | + sw $t4, 16(DST) |
| 157 | + sw $t5, 20(DST) |
| 158 | + sw $t6, 24(DST) |
| 159 | + sw $t7, 28(DST) |
| 160 | + sw $s0, 32(DST) |
| 161 | + sw $s1, 36(DST) |
| 162 | + sw $s2, 40(DST) |
| 163 | + sw $s3, 44(DST) |
| 164 | + sw $s4, 48(DST) |
| 165 | + sw $s5, 52(DST) |
| 166 | + sw $s6, 56(DST) |
| 167 | + sw $s7, 60(DST) |
| 168 | + bne DST, LIMIT, 0b |
| 169 | + addiu DST, DST, 64 |
| 170 | +2: |
| 171 | + lw $s0, 0($sp) |
| 172 | + lw $s1, 4($sp) |
| 173 | + lw $s2, 8($sp) |
| 174 | + lw $s3, 12($sp) |
| 175 | + lw $s4, 16($sp) |
| 176 | + lw $s5, 20($sp) |
| 177 | + lw $s6, 24($sp) |
| 178 | + lw $s7, 28($sp) |
| 179 | + jr $ra |
| 180 | + addi $sp, $sp, 32 |
| 181 | +.endfunc |
| 182 | + |
| 183 | +#endif |
0 commit comments