Skip to content

Commit fd45352

Browse files
committed
asm/ct_inverse_mod_256-*.pl: fix another corner case.
Thanks to Guido Vranken for report.
1 parent 482ce95 commit fd45352

File tree

2 files changed

+45
-19
lines changed

2 files changed

+45
-19
lines changed

src/asm/ct_inverse_mod_256-armv8.pl

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -230,19 +230,31 @@
230230
and @t[7], @acc[7], @t[0]
231231
adcs @acc[2], @acc[2], @t[6]
232232
adcs @acc[3], @t[3], @t[7]
233-
adc @t[1], @t[1], xzr // @t[1] is 1 or 0
233+
adc @t[1], @t[1], xzr // @t[1] is 1, 0 or -1
234234
235235
neg @t[0], @t[1]
236-
237-
and @acc[4], @acc[4], @t[0] // subtract mod<<256 conditionally
238-
and @acc[5], @acc[5], @t[0]
239-
subs @acc[0], @acc[0], @acc[4]
240-
and @acc[6], @acc[6], @t[0]
241-
sbcs @acc[1], @acc[1], @acc[5]
242-
and @acc[7], @acc[7], @t[0]
243-
sbcs @acc[2], @acc[2], @acc[6]
236+
orr @t[1], @t[1], @t[0] // excess bit or sign as mask
237+
asr @t[0], @t[0], #63 // excess bit as mask
238+
239+
and @acc[4], @acc[4], @t[1] // mask |mod|
240+
and @acc[5], @acc[5], @t[1]
241+
and @acc[6], @acc[6], @t[1]
242+
and @acc[7], @acc[7], @t[1]
243+
244+
eor @acc[4], @acc[4], @t[0] // conditionally negate |mod|
245+
eor @acc[5], @acc[5], @t[0]
246+
adds @acc[4], @acc[4], @t[0], lsr#63
247+
eor @acc[6], @acc[6], @t[0]
248+
adcs @acc[5], @acc[5], xzr
249+
eor @acc[7], @acc[7], @t[0]
250+
adcs @acc[6], @acc[6], xzr
251+
adc @acc[7], @acc[7], xzr
252+
253+
adds @acc[0], @acc[0], @acc[4] // final adjustment for |mod|<<256
254+
adcs @acc[1], @acc[1], @acc[5]
255+
adcs @acc[2], @acc[2], @acc[6]
244256
stp @acc[0], @acc[1], [$out_ptr,#8*4]
245-
sbcs @acc[3], @acc[3], @acc[7]
257+
adc @acc[3], @acc[3], @acc[7]
246258
stp @acc[2], @acc[3], [$out_ptr,#8*6]
247259
248260
add sp, sp, #$frame

src/asm/ct_inverse_mod_256-x86_64.pl

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -297,20 +297,34 @@
297297
adc %rdx, @acc[7]
298298
adc \$0, %rax
299299
300-
neg %rax # excess bit as mask
300+
mov %rax, %rdx
301+
neg %rax
302+
or %rax, %rdx # excess bit or sign as mask
303+
sar \$63, %rax # excess bit as mask
301304
302-
mov %rax, @acc[0] # mask |modulus|
303-
mov %rax, @acc[1]
305+
mov %rdx, @acc[0] # mask |modulus|
306+
mov %rdx, @acc[1]
304307
and 8*0($in_ptr), @acc[0]
305-
mov %rax, @acc[2]
308+
mov %rdx, @acc[2]
306309
and 8*1($in_ptr), @acc[1]
307310
and 8*2($in_ptr), @acc[2]
308-
and 8*3($in_ptr), %rax
311+
and 8*3($in_ptr), %rdx
309312
310-
sub @acc[0], @acc[4] # conditionally subtract |modulus|<<256
311-
sbb @acc[1], @acc[5]
312-
sbb @acc[2], @acc[6]
313-
sbb %rax, @acc[7]
313+
xor %rax, @acc[0] # conditionally negate |modulus|
314+
xor %rcx, %rcx
315+
xor %rax, @acc[1]
316+
sub %rax, %rcx
317+
xor %rax, @acc[2]
318+
xor %rax, %rdx
319+
add %rcx, @acc[0]
320+
adc \$0, @acc[1]
321+
adc \$0, @acc[2]
322+
adc \$0, %rdx
323+
324+
add @acc[0], @acc[4] # final adjustment for |modulus|<<256
325+
adc @acc[1], @acc[5]
326+
adc @acc[2], @acc[6]
327+
adc %rdx, @acc[7]
314328
315329
mov @acc[4], 8*4($out_ptr) # store absolute value
316330
mov @acc[5], 8*5($out_ptr)

0 commit comments

Comments
 (0)