27
27
28
28
// intRes = longIn1 * longIn2 >> 24
29
29
// uses:
30
- // A[tmp] to store 0
31
- // B[tmp] to store bits 16-23 of the 48bit result. The top bit is used to round the two byte result.
32
- // note that the lower two bytes and the upper byte of the 48bit result are not calculated.
33
- // this can cause the result to be out by one as the lower bytes may cause carries into the upper ones.
34
- // B A are bits 24-39 and are the returned value
35
- // C B A is longIn1
36
- // D C B A is longIn2
30
+ // r1, r0 for the result of mul.
31
+ // [tmp1] to store 0.
32
+ // [tmp2] to store bits 16-23 of the 56 bit result. The top bit of [tmp2] is used for rounding.
33
+ // Note that the lower two bytes and the upper two bytes of the 56 bit result are not calculated.
34
+ // This can cause the result to be out by one as the lower bytes may cause carries into the upper ones.
35
+ // [intRes] (A B) is bits 24-39 and is the returned value.
36
+ // [longIn1] (C B A) is a 24 bit parameter.
37
+ // [longIn2] (D C B A) is a 32 bit parameter.
37
38
//
38
39
FORCE_INLINE static uint16_t MultiU24X32toH16 (uint32_t longIn1 , uint32_t longIn2 ) {
39
40
uint8_t tmp1 ;
@@ -66,11 +67,9 @@ FORCE_INLINE static uint16_t MultiU24X32toH16(uint32_t longIn1, uint32_t longIn2
66
67
A ("add %[tmp2], r1" )
67
68
A ("adc %A[intRes], %[tmp1]" )
68
69
A ("adc %B[intRes], %[tmp1]" )
69
- A ("lsr %[tmp2]" )
70
- A ("adc %A[intRes], %[tmp1]" )
71
- A ("adc %B[intRes], %[tmp1]" )
72
70
A ("mul %D[longIn2], %A[longIn1]" )
73
- A ("add %A[intRes], r0" )
71
+ A ("lsl %[tmp2]" )
72
+ A ("adc %A[intRes], r0" )
74
73
A ("adc %B[intRes], r1" )
75
74
A ("mul %D[longIn2], %B[longIn1]" )
76
75
A ("add %B[intRes], r0" )
@@ -85,22 +84,25 @@ FORCE_INLINE static uint16_t MultiU24X32toH16(uint32_t longIn1, uint32_t longIn2
85
84
return intRes ;
86
85
}
87
86
88
- // intRes = intIn1 * intIn2 >> 16
87
+ // intRes = intIn1 * intIn2 >> 8
89
88
// uses:
90
- // r26 to store 0
91
- // r27 to store the byte 1 of the 24 bit result
92
- FORCE_INLINE static uint16_t MultiU16X8toH16 (uint8_t charIn1 , uint16_t intIn2 ) {
89
+ // r1, r0 for the result of mul. After the second mul, r0 holds bits 0-7 of the 24 bit result and
90
+ // the top bit of r0 is used for rounding.
91
+ // [tmp] to store 0.
92
+ // [intRes] (A B) is bits 8-15 and is the returned value.
93
+ // [charIn1] is an 8 bit parameter.
94
+ // [intIn2] (B A) is a 16 bit parameter.
95
+ //
96
+ FORCE_INLINE static uint16_t MultiU8X16toH16 (uint8_t charIn1 , uint16_t intIn2 ) {
93
97
uint8_t tmp ;
94
98
uint16_t intRes ;
95
99
__asm__ __volatile__ (
96
100
A ("clr %[tmp]" )
97
101
A ("mul %[charIn1], %B[intIn2]" )
98
102
A ("movw %A[intRes], r0" )
99
103
A ("mul %[charIn1], %A[intIn2]" )
100
- A ("add %A[intRes], r1" )
101
- A ("adc %B[intRes], %[tmp]" )
102
- A ("lsr r0" )
103
- A ("adc %A[intRes], %[tmp]" )
104
+ A ("lsl r0" )
105
+ A ("adc %A[intRes], r1" )
104
106
A ("adc %B[intRes], %[tmp]" )
105
107
A ("clr r1" )
106
108
: [intRes ] "= & r " (intRes),
0 commit comments