Skip to content

Commit 4cc8f52

Browse files
committed
Merge bitcoin#437: Unroll secp256k1_fe_(get|set)_b32 to make them much faster.
a2b6b19 Fix benchmark print_number infinite loop. (Gregory Maxwell) 8b7680a Unroll secp256k1_fe_(get|set)_b32 for 10x26. (Gregory Maxwell) aa84990 Unroll secp256k1_fe_(get|set)_b32 for 5x52. (CryptoGuru) Tree-SHA512: b17fa454dc4ef614305e10575b0f87c3b37d398d6d3996d5bbbb8e27b0d9841aa13b3cffe93d13dc637c8c3071f8565919574ec0b07f02bf3b0eb6faf4b93251
2 parents 1199492 + a2b6b19 commit 4cc8f52

File tree

3 files changed

+110
-44
lines changed

3 files changed

+110
-44
lines changed

src/bench.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ void print_number(double x) {
2323
if (y < 0.0) {
2424
y = -y;
2525
}
26-
while (y < 100.0) {
26+
while (y > 0 && y < 100.0) {
2727
y *= 10.0;
2828
c++;
2929
}

src/field_10x26_impl.h

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -321,17 +321,17 @@ static int secp256k1_fe_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) {
321321
}
322322

323323
static int secp256k1_fe_set_b32(secp256k1_fe *r, const unsigned char *a) {
324-
int i;
325-
r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
326-
r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
327-
for (i=0; i<32; i++) {
328-
int j;
329-
for (j=0; j<4; j++) {
330-
int limb = (8*i+2*j)/26;
331-
int shift = (8*i+2*j)%26;
332-
r->n[limb] |= (uint32_t)((a[31-i] >> (2*j)) & 0x3) << shift;
333-
}
334-
}
324+
r->n[0] = (uint32_t)a[31] | ((uint32_t)a[30] << 8) | ((uint32_t)a[29] << 16) | ((uint32_t)(a[28] & 0x3) << 24);
325+
r->n[1] = (uint32_t)((a[28] >> 2) & 0x3f) | ((uint32_t)a[27] << 6) | ((uint32_t)a[26] << 14) | ((uint32_t)(a[25] & 0xf) << 22);
326+
r->n[2] = (uint32_t)((a[25] >> 4) & 0xf) | ((uint32_t)a[24] << 4) | ((uint32_t)a[23] << 12) | ((uint32_t)(a[22] & 0x3f) << 20);
327+
r->n[3] = (uint32_t)((a[22] >> 6) & 0x3) | ((uint32_t)a[21] << 2) | ((uint32_t)a[20] << 10) | ((uint32_t)a[19] << 18);
328+
r->n[4] = (uint32_t)a[18] | ((uint32_t)a[17] << 8) | ((uint32_t)a[16] << 16) | ((uint32_t)(a[15] & 0x3) << 24);
329+
r->n[5] = (uint32_t)((a[15] >> 2) & 0x3f) | ((uint32_t)a[14] << 6) | ((uint32_t)a[13] << 14) | ((uint32_t)(a[12] & 0xf) << 22);
330+
r->n[6] = (uint32_t)((a[12] >> 4) & 0xf) | ((uint32_t)a[11] << 4) | ((uint32_t)a[10] << 12) | ((uint32_t)(a[9] & 0x3f) << 20);
331+
r->n[7] = (uint32_t)((a[9] >> 6) & 0x3) | ((uint32_t)a[8] << 2) | ((uint32_t)a[7] << 10) | ((uint32_t)a[6] << 18);
332+
r->n[8] = (uint32_t)a[5] | ((uint32_t)a[4] << 8) | ((uint32_t)a[3] << 16) | ((uint32_t)(a[2] & 0x3) << 24);
333+
r->n[9] = (uint32_t)((a[2] >> 2) & 0x3f) | ((uint32_t)a[1] << 6) | ((uint32_t)a[0] << 14);
334+
335335
if (r->n[9] == 0x3FFFFFUL && (r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL && (r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL) {
336336
return 0;
337337
}
@@ -345,21 +345,42 @@ static int secp256k1_fe_set_b32(secp256k1_fe *r, const unsigned char *a) {
345345

346346
/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
347347
static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe *a) {
348-
int i;
349348
#ifdef VERIFY
350349
VERIFY_CHECK(a->normalized);
351350
secp256k1_fe_verify(a);
352351
#endif
353-
for (i=0; i<32; i++) {
354-
int j;
355-
int c = 0;
356-
for (j=0; j<4; j++) {
357-
int limb = (8*i+2*j)/26;
358-
int shift = (8*i+2*j)%26;
359-
c |= ((a->n[limb] >> shift) & 0x3) << (2 * j);
360-
}
361-
r[31-i] = c;
362-
}
352+
r[0] = (a->n[9] >> 14) & 0xff;
353+
r[1] = (a->n[9] >> 6) & 0xff;
354+
r[2] = ((a->n[9] & 0x3F) << 2) | ((a->n[8] >> 24) & 0x3);
355+
r[3] = (a->n[8] >> 16) & 0xff;
356+
r[4] = (a->n[8] >> 8) & 0xff;
357+
r[5] = a->n[8] & 0xff;
358+
r[6] = (a->n[7] >> 18) & 0xff;
359+
r[7] = (a->n[7] >> 10) & 0xff;
360+
r[8] = (a->n[7] >> 2) & 0xff;
361+
r[9] = ((a->n[7] & 0x3) << 6) | ((a->n[6] >> 20) & 0x3f);
362+
r[10] = (a->n[6] >> 12) & 0xff;
363+
r[11] = (a->n[6] >> 4) & 0xff;
364+
r[12] = ((a->n[6] & 0xf) << 4) | ((a->n[5] >> 22) & 0xf);
365+
r[13] = (a->n[5] >> 14) & 0xff;
366+
r[14] = (a->n[5] >> 6) & 0xff;
367+
r[15] = ((a->n[5] & 0x3f) << 2) | ((a->n[4] >> 24) & 0x3);
368+
r[16] = (a->n[4] >> 16) & 0xff;
369+
r[17] = (a->n[4] >> 8) & 0xff;
370+
r[18] = a->n[4] & 0xff;
371+
r[19] = (a->n[3] >> 18) & 0xff;
372+
r[20] = (a->n[3] >> 10) & 0xff;
373+
r[21] = (a->n[3] >> 2) & 0xff;
374+
r[22] = ((a->n[3] & 0x3) << 6) | ((a->n[2] >> 20) & 0x3f);
375+
r[23] = (a->n[2] >> 12) & 0xff;
376+
r[24] = (a->n[2] >> 4) & 0xff;
377+
r[25] = ((a->n[2] & 0xf) << 4) | ((a->n[1] >> 22) & 0xf);
378+
r[26] = (a->n[1] >> 14) & 0xff;
379+
r[27] = (a->n[1] >> 6) & 0xff;
380+
r[28] = ((a->n[1] & 0x3f) << 2) | ((a->n[0] >> 24) & 0x3);
381+
r[29] = (a->n[0] >> 16) & 0xff;
382+
r[30] = (a->n[0] >> 8) & 0xff;
383+
r[31] = a->n[0] & 0xff;
363384
}
364385

365386
SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe *r, const secp256k1_fe *a, int m) {

src/field_5x52_impl.h

Lines changed: 66 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -284,16 +284,40 @@ static int secp256k1_fe_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) {
284284
}
285285

286286
static int secp256k1_fe_set_b32(secp256k1_fe *r, const unsigned char *a) {
287-
int i;
288-
r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
289-
for (i=0; i<32; i++) {
290-
int j;
291-
for (j=0; j<2; j++) {
292-
int limb = (8*i+4*j)/52;
293-
int shift = (8*i+4*j)%52;
294-
r->n[limb] |= (uint64_t)((a[31-i] >> (4*j)) & 0xF) << shift;
295-
}
296-
}
287+
r->n[0] = (uint64_t)a[31]
288+
| ((uint64_t)a[30] << 8)
289+
| ((uint64_t)a[29] << 16)
290+
| ((uint64_t)a[28] << 24)
291+
| ((uint64_t)a[27] << 32)
292+
| ((uint64_t)a[26] << 40)
293+
| ((uint64_t)(a[25] & 0xF) << 48);
294+
r->n[1] = (uint64_t)((a[25] >> 4) & 0xF)
295+
| ((uint64_t)a[24] << 4)
296+
| ((uint64_t)a[23] << 12)
297+
| ((uint64_t)a[22] << 20)
298+
| ((uint64_t)a[21] << 28)
299+
| ((uint64_t)a[20] << 36)
300+
| ((uint64_t)a[19] << 44);
301+
r->n[2] = (uint64_t)a[18]
302+
| ((uint64_t)a[17] << 8)
303+
| ((uint64_t)a[16] << 16)
304+
| ((uint64_t)a[15] << 24)
305+
| ((uint64_t)a[14] << 32)
306+
| ((uint64_t)a[13] << 40)
307+
| ((uint64_t)(a[12] & 0xF) << 48);
308+
r->n[3] = (uint64_t)((a[12] >> 4) & 0xF)
309+
| ((uint64_t)a[11] << 4)
310+
| ((uint64_t)a[10] << 12)
311+
| ((uint64_t)a[9] << 20)
312+
| ((uint64_t)a[8] << 28)
313+
| ((uint64_t)a[7] << 36)
314+
| ((uint64_t)a[6] << 44);
315+
r->n[4] = (uint64_t)a[5]
316+
| ((uint64_t)a[4] << 8)
317+
| ((uint64_t)a[3] << 16)
318+
| ((uint64_t)a[2] << 24)
319+
| ((uint64_t)a[1] << 32)
320+
| ((uint64_t)a[0] << 40);
297321
if (r->n[4] == 0x0FFFFFFFFFFFFULL && (r->n[3] & r->n[2] & r->n[1]) == 0xFFFFFFFFFFFFFULL && r->n[0] >= 0xFFFFEFFFFFC2FULL) {
298322
return 0;
299323
}
@@ -307,21 +331,42 @@ static int secp256k1_fe_set_b32(secp256k1_fe *r, const unsigned char *a) {
307331

308332
/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
309333
static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe *a) {
310-
int i;
311334
#ifdef VERIFY
312335
VERIFY_CHECK(a->normalized);
313336
secp256k1_fe_verify(a);
314337
#endif
315-
for (i=0; i<32; i++) {
316-
int j;
317-
int c = 0;
318-
for (j=0; j<2; j++) {
319-
int limb = (8*i+4*j)/52;
320-
int shift = (8*i+4*j)%52;
321-
c |= ((a->n[limb] >> shift) & 0xF) << (4 * j);
322-
}
323-
r[31-i] = c;
324-
}
338+
r[0] = (a->n[4] >> 40) & 0xFF;
339+
r[1] = (a->n[4] >> 32) & 0xFF;
340+
r[2] = (a->n[4] >> 24) & 0xFF;
341+
r[3] = (a->n[4] >> 16) & 0xFF;
342+
r[4] = (a->n[4] >> 8) & 0xFF;
343+
r[5] = a->n[4] & 0xFF;
344+
r[6] = (a->n[3] >> 44) & 0xFF;
345+
r[7] = (a->n[3] >> 36) & 0xFF;
346+
r[8] = (a->n[3] >> 28) & 0xFF;
347+
r[9] = (a->n[3] >> 20) & 0xFF;
348+
r[10] = (a->n[3] >> 12) & 0xFF;
349+
r[11] = (a->n[3] >> 4) & 0xFF;
350+
r[12] = ((a->n[2] >> 48) & 0xF) | ((a->n[3] & 0xF) << 4);
351+
r[13] = (a->n[2] >> 40) & 0xFF;
352+
r[14] = (a->n[2] >> 32) & 0xFF;
353+
r[15] = (a->n[2] >> 24) & 0xFF;
354+
r[16] = (a->n[2] >> 16) & 0xFF;
355+
r[17] = (a->n[2] >> 8) & 0xFF;
356+
r[18] = a->n[2] & 0xFF;
357+
r[19] = (a->n[1] >> 44) & 0xFF;
358+
r[20] = (a->n[1] >> 36) & 0xFF;
359+
r[21] = (a->n[1] >> 28) & 0xFF;
360+
r[22] = (a->n[1] >> 20) & 0xFF;
361+
r[23] = (a->n[1] >> 12) & 0xFF;
362+
r[24] = (a->n[1] >> 4) & 0xFF;
363+
r[25] = ((a->n[0] >> 48) & 0xF) | ((a->n[1] & 0xF) << 4);
364+
r[26] = (a->n[0] >> 40) & 0xFF;
365+
r[27] = (a->n[0] >> 32) & 0xFF;
366+
r[28] = (a->n[0] >> 24) & 0xFF;
367+
r[29] = (a->n[0] >> 16) & 0xFF;
368+
r[30] = (a->n[0] >> 8) & 0xFF;
369+
r[31] = a->n[0] & 0xFF;
325370
}
326371

327372
SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe *r, const secp256k1_fe *a, int m) {

0 commit comments

Comments
 (0)