WIP different C logic

sipa · sipa · commit e8412e44582a · 2021-07-24T22:21:00.000-07:00
diff --git a/src/field_5x64_impl.h b/src/field_5x64_impl.h
@@ -220,95 +220,43 @@ void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a);
 } while(0)
 
 /* Add a**2 to [c0,c1]. c0,c1 must all be 0 on input. */
-#define sqr2(c0,c1,a) do {\
-    uint128_t t = (uint128_t)(a) * (a); \
-    VERIFY_CHECK(c0 == 0); \
-    VERIFY_CHECK(c1 == 0); \
-    c0 = t; \
-    c1 = t >> 64; \
-} while(0)
+#define sqr2(c0,c1,a) mul2(c0,c1,a,a)
 
 /* Add a*b to [c0,c1,c2]. c2 must never overflow. */
 #define muladd3(c0,c1,c2,a,b) do {\
-    uint64_t tl, th; \
-    { \
-        uint128_t t = (uint128_t)(a) * (b); \
-        th = t >> 64;         /* at most 0xFFFFFFFFFFFFFFFE */ \
-        tl = t; \
-    } \
-    c0 += tl;                 /* overflow is handled on the next line */ \
-    th += (c0 < tl);          /* at most 0xFFFFFFFFFFFFFFFF */ \
-    c1 += th;                 /* overflow is handled on the next line */ \
-    c2 += (c1 < th);          /* never overflows by contract (verified in the next line) */ \
-    VERIFY_CHECK((c1 >= th) || (c2 != 0)); \
+    uint128_t t = (uint128_t)(a) * (b); \
+    uint128_t acc = (uint128_t)c0 + (uint64_t)t; \
+    c0 = acc; acc >>= 64; \
+    acc += c1; acc += (t >> 64); \
+    c1 = acc; c2 += (acc >> 64); \
+    VERIFY_CHECK(c2 >= (acc >> 64)); \
 } while(0)
 
 /* Add a**2 to [c0,c1,c2]. c2 must never overflow. */
-#define sqradd3(c0,c1,c2,a) do {\
-    uint64_t tl, th; \
-    { \
-        uint128_t t = (uint128_t)(a) * (a); \
-        th = t >> 64;         /* at most 0xFFFFFFFFFFFFFFFE */ \
-        tl = t; \
-    } \
-    c0 += tl;                 /* overflow is handled on the next line */ \
-    th += (c0 < tl);          /* at most 0xFFFFFFFFFFFFFFFF */ \
-    c1 += th;                 /* overflow is handled on the next line */ \
-    c2 += (c1 < th);          /* never overflows by contract (verified in the next line) */ \
-    VERIFY_CHECK((c1 >= th) || (c2 != 0)); \
-} while(0)
+#define sqradd3(c0,c1,c2,a) muladd3(c0,c1,c2,a,a)
 
 /* Add 2*a*b to [c0,c1,c2]. c2 must never overflow. */
 #define mul2add3(c0,c1,c2,a,b) do {\
-    uint64_t tl, th, th2, tl2; \
-    { \
-        uint128_t t = (uint128_t)(a) * (b); \
-        th = t >> 64;               /* at most 0xFFFFFFFFFFFFFFFE */ \
-        tl = t; \
-    } \
-    th2 = th + th;                  /* at most 0xFFFFFFFFFFFFFFFE (in case th was 0x7FFFFFFFFFFFFFFF) */ \
-    c2 += (th2 < th);               /* never overflows by contract (verified the next line) */ \
-    VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
-    tl2 = tl + tl;                  /* at most 0xFFFFFFFFFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFFFFFFFFFF) */ \
-    th2 += (tl2 < tl);              /* at most 0xFFFFFFFFFFFFFFFF */ \
-    c0 += tl2;                      /* overflow is handled on the next line */ \
-    th2 += (c0 < tl2);              /* second overflow is handled on the next line */ \
-    c2 += (c0 < tl2) & (th2 == 0);  /* never overflows by contract (verified the next line) */ \
-    VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \
-    c1 += th2;                      /* overflow is handled on the next line */ \
-    c2 += (c1 < th2);               /* never overflows by contract (verified the next line) */ \
-    VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \
+    uint128_t t = (uint128_t)(a) * (b); \
+    uint128_t acc = (uint128_t)c0 + (((uint128_t)((uint64_t)t)) << 1); \
+    c0 = acc; acc >>= 64; \
+    acc += c1; acc += ((t >> 64) << 1); \
+    c1 = acc; c2 += (acc >> 64); \
+    VERIFY_CHECK(c2 >= (acc >> 64)); \
 } while(0)
 
 /* Add a*b to [c0,c1]. c1 must never overflow. */
 #define muladd2(c0,c1,a,b) do {\
-    uint64_t tl, th; \
-    ON_VERIFY(uint64_t old_c1 = c1;) \
-    { \
-        uint128_t t = (uint128_t)(a) * (b); \
-        th = t >> 64;         /* at most 0xFFFFFFFFFFFFFFFE */ \
-        tl = t; \
-    } \
-    c0 += tl;                 /* overflow is handled on the next line */ \
-    th += (c0 < tl);          /* at most 0xFFFFFFFFFFFFFFFF */ \
-    c1 += th;                 /* overflow is handled on the next line */ \
-    ON_VERIFY(VERIFY_CHECK(c1 >= old_c1);) \
+    uint128_t t = (uint128_t)(a) * (b); \
+    uint128_t acc = (uint128_t)c0 + (uint64_t)t; \
+    c0 = acc; acc >>= 64; \
+    acc += c1; acc += (t >> 64); \
+    c1 = acc; \
+    VERIFY_CHECK((acc >> 64) == 0); \
 } while(0)
 
 /* Add a**2 to [c0,c1. c1 must never overflow. */
-#define sqradd2(c0,c1,a) do {\
-    uint64_t tl, th; \
-    ON_VERIFY(uint64_t old_c1 = c1;) \
-    { \
-        uint128_t t = (uint128_t)(a) * (a); \
-        th = t >> 64;         /* at most 0xFFFFFFFFFFFFFFFE */ \
-        tl = t; \
-    } \
-    c0 += tl;                 /* overflow is handled on the next line */ \
-    th += (c0 < tl);          /* at most 0xFFFFFFFFFFFFFFFF */ \
-    c1 += th;                 /* overflow is handled on the next line */ \
-    ON_VERIFY(VERIFY_CHECK(c1 >= old_c1);) \
-} while(0)
+#define sqradd2(c0,c1,a) muladd2(c0,c1,a,a)
 
 /* Add [a0,a1,a2,a3,a4] t0 [c0,c1,c2,c3,c4]. C4 cannot overflow. */
 #define add5x5(c0,c1,c2,c3,c4,a0,a1,a2,a3,a4) do {\
@@ -350,7 +298,6 @@ void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a);
     c3 = tmp; \
 } while(0)
 
-
 /* Add a to [c0,c1,c2]. c2 must never overflow. */
 #define add3(c0,c1,c2,a) do {\
     uint128_t tmp = (uint128_t)c0 + (a); \