Skip to content

Commit cf12fa1

Browse files
committed
Minor optimizations to _scalar_inverse to save 4M
- Precalculate x^5 and use for "01010" patterns during accumulation. (net -2M) - Further use of x^5 to allow shorter addition chain (net -2M)
1 parent 1199492 commit cf12fa1

File tree

1 file changed

+54
-68
lines changed

1 file changed

+54
-68
lines changed

src/scalar_impl.h

Lines changed: 54 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -66,76 +66,70 @@ static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar
6666
#else
6767
secp256k1_scalar *t;
6868
int i;
69-
/* First compute x ^ (2^N - 1) for some values of N. */
70-
secp256k1_scalar x2, x3, x4, x6, x7, x8, x15, x30, x60, x120, x127;
69+
/* First compute xN as x ^ (2^N - 1) for some values of N,
70+
* and uM as x ^ M for some values of M. */
71+
secp256k1_scalar x2, x3, x6, x8, x14, x28, x56, x112, x126;
72+
secp256k1_scalar u2, u5;
7173

72-
secp256k1_scalar_sqr(&x2, x);
73-
secp256k1_scalar_mul(&x2, &x2, x);
74+
secp256k1_scalar_sqr(&u2, x);
75+
secp256k1_scalar_mul(&x2, &u2, x);
76+
secp256k1_scalar_mul(&u5, &u2, &x2);
77+
secp256k1_scalar_mul(&x3, &u5, &u2);
7478

75-
secp256k1_scalar_sqr(&x3, &x2);
76-
secp256k1_scalar_mul(&x3, &x3, x);
77-
78-
secp256k1_scalar_sqr(&x4, &x3);
79-
secp256k1_scalar_mul(&x4, &x4, x);
80-
81-
secp256k1_scalar_sqr(&x6, &x4);
82-
secp256k1_scalar_sqr(&x6, &x6);
83-
secp256k1_scalar_mul(&x6, &x6, &x2);
84-
85-
secp256k1_scalar_sqr(&x7, &x6);
86-
secp256k1_scalar_mul(&x7, &x7, x);
79+
secp256k1_scalar_sqr(&x6, &x3);
80+
for (i = 0; i < 2; i++) {
81+
secp256k1_scalar_sqr(&x6, &x6);
82+
}
83+
secp256k1_scalar_mul(&x6, &x6, &x3);
8784

88-
secp256k1_scalar_sqr(&x8, &x7);
89-
secp256k1_scalar_mul(&x8, &x8, x);
85+
secp256k1_scalar_sqr(&x8, &x6);
86+
secp256k1_scalar_sqr(&x8, &x8);
87+
secp256k1_scalar_mul(&x8, &x8, &x2);
9088

91-
secp256k1_scalar_sqr(&x15, &x8);
92-
for (i = 0; i < 6; i++) {
93-
secp256k1_scalar_sqr(&x15, &x15);
89+
secp256k1_scalar_sqr(&x14, &x8);
90+
for (i = 0; i < 5; i++) {
91+
secp256k1_scalar_sqr(&x14, &x14);
9492
}
95-
secp256k1_scalar_mul(&x15, &x15, &x7);
93+
secp256k1_scalar_mul(&x14, &x14, &x6);
9694

97-
secp256k1_scalar_sqr(&x30, &x15);
98-
for (i = 0; i < 14; i++) {
99-
secp256k1_scalar_sqr(&x30, &x30);
95+
secp256k1_scalar_sqr(&x28, &x14);
96+
for (i = 0; i < 13; i++) {
97+
secp256k1_scalar_sqr(&x28, &x28);
10098
}
101-
secp256k1_scalar_mul(&x30, &x30, &x15);
99+
secp256k1_scalar_mul(&x28, &x28, &x14);
102100

103-
secp256k1_scalar_sqr(&x60, &x30);
104-
for (i = 0; i < 29; i++) {
105-
secp256k1_scalar_sqr(&x60, &x60);
101+
secp256k1_scalar_sqr(&x56, &x28);
102+
for (i = 0; i < 27; i++) {
103+
secp256k1_scalar_sqr(&x56, &x56);
106104
}
107-
secp256k1_scalar_mul(&x60, &x60, &x30);
105+
secp256k1_scalar_mul(&x56, &x56, &x28);
108106

109-
secp256k1_scalar_sqr(&x120, &x60);
110-
for (i = 0; i < 59; i++) {
111-
secp256k1_scalar_sqr(&x120, &x120);
107+
secp256k1_scalar_sqr(&x112, &x56);
108+
for (i = 0; i < 55; i++) {
109+
secp256k1_scalar_sqr(&x112, &x112);
112110
}
113-
secp256k1_scalar_mul(&x120, &x120, &x60);
111+
secp256k1_scalar_mul(&x112, &x112, &x56);
114112

115-
secp256k1_scalar_sqr(&x127, &x120);
116-
for (i = 0; i < 6; i++) {
117-
secp256k1_scalar_sqr(&x127, &x127);
113+
secp256k1_scalar_sqr(&x126, &x112);
114+
for (i = 0; i < 13; i++) {
115+
secp256k1_scalar_sqr(&x126, &x126);
118116
}
119-
secp256k1_scalar_mul(&x127, &x127, &x7);
117+
secp256k1_scalar_mul(&x126, &x126, &x14);
120118

121-
/* Then accumulate the final result (t starts at x127). */
122-
t = &x127;
123-
for (i = 0; i < 2; i++) { /* 0 */
119+
/* Then accumulate the final result (t starts at x126). */
120+
t = &x126;
121+
for (i = 0; i < 3; i++) {
124122
secp256k1_scalar_sqr(t, t);
125123
}
126-
secp256k1_scalar_mul(t, t, x); /* 1 */
124+
secp256k1_scalar_mul(t, t, &u5); /* 101 */
127125
for (i = 0; i < 4; i++) { /* 0 */
128126
secp256k1_scalar_sqr(t, t);
129127
}
130128
secp256k1_scalar_mul(t, t, &x3); /* 111 */
131-
for (i = 0; i < 2; i++) { /* 0 */
132-
secp256k1_scalar_sqr(t, t);
133-
}
134-
secp256k1_scalar_mul(t, t, x); /* 1 */
135-
for (i = 0; i < 2; i++) { /* 0 */
129+
for (i = 0; i < 4; i++) { /* 0 */
136130
secp256k1_scalar_sqr(t, t);
137131
}
138-
secp256k1_scalar_mul(t, t, x); /* 1 */
132+
secp256k1_scalar_mul(t, t, &u5); /* 101 */
139133
for (i = 0; i < 2; i++) { /* 0 */
140134
secp256k1_scalar_sqr(t, t);
141135
}
@@ -160,34 +154,26 @@ static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar
160154
secp256k1_scalar_sqr(t, t);
161155
}
162156
secp256k1_scalar_mul(t, t, &x2); /* 11 */
163-
for (i = 0; i < 2; i++) { /* 0 */
164-
secp256k1_scalar_sqr(t, t);
165-
}
166-
secp256k1_scalar_mul(t, t, x); /* 1 */
167-
for (i = 0; i < 2; i++) { /* 0 */
157+
for (i = 0; i < 4; i++) { /* 0 */
168158
secp256k1_scalar_sqr(t, t);
169159
}
170-
secp256k1_scalar_mul(t, t, x); /* 1 */
171-
for (i = 0; i < 5; i++) { /* 0 */
160+
secp256k1_scalar_mul(t, t, &u5); /* 101 */
161+
for (i = 0; i < 4; i++) { /* 0 */
172162
secp256k1_scalar_sqr(t, t);
173163
}
174-
secp256k1_scalar_mul(t, t, &x4); /* 1111 */
175-
for (i = 0; i < 2; i++) { /* 0 */
164+
secp256k1_scalar_mul(t, t, &x3); /* 111 */
165+
for (i = 0; i < 3; i++) {
176166
secp256k1_scalar_sqr(t, t);
177167
}
178-
secp256k1_scalar_mul(t, t, x); /* 1 */
168+
secp256k1_scalar_mul(t, t, &u5); /* 101 */
179169
for (i = 0; i < 3; i++) { /* 00 */
180170
secp256k1_scalar_sqr(t, t);
181171
}
182172
secp256k1_scalar_mul(t, t, x); /* 1 */
183-
for (i = 0; i < 4; i++) { /* 000 */
184-
secp256k1_scalar_sqr(t, t);
185-
}
186-
secp256k1_scalar_mul(t, t, x); /* 1 */
187-
for (i = 0; i < 2; i++) { /* 0 */
173+
for (i = 0; i < 6; i++) { /* 000 */
188174
secp256k1_scalar_sqr(t, t);
189175
}
190-
secp256k1_scalar_mul(t, t, x); /* 1 */
176+
secp256k1_scalar_mul(t, t, &u5); /* 101 */
191177
for (i = 0; i < 10; i++) { /* 0000000 */
192178
secp256k1_scalar_sqr(t, t);
193179
}
@@ -212,14 +198,14 @@ static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar
212198
secp256k1_scalar_sqr(t, t);
213199
}
214200
secp256k1_scalar_mul(t, t, x); /* 1 */
215-
for (i = 0; i < 5; i++) { /* 0 */
201+
for (i = 0; i < 4; i++) { /* 0 */
216202
secp256k1_scalar_sqr(t, t);
217203
}
218-
secp256k1_scalar_mul(t, t, &x4); /* 1111 */
219-
for (i = 0; i < 2; i++) { /* 0 */
204+
secp256k1_scalar_mul(t, t, &x3); /* 111 */
205+
for (i = 0; i < 3; i++) {
220206
secp256k1_scalar_sqr(t, t);
221207
}
222-
secp256k1_scalar_mul(t, t, x); /* 1 */
208+
secp256k1_scalar_mul(t, t, &u5); /* 101 */
223209
for (i = 0; i < 5; i++) { /* 000 */
224210
secp256k1_scalar_sqr(t, t);
225211
}

0 commit comments

Comments
 (0)