Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct secp256k1_gej_add_ge with 3% performance hit on signing #261

Merged
merged 4 commits into from
Jun 29, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 75 additions & 20 deletions src/group_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -463,8 +463,9 @@ static void secp256k1_gej_add_zinv_var(secp256k1_gej_t *r, const secp256k1_gej_t
static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b) {
/* Operations: 7 mul, 5 sqr, 5 normalize, 17 mul_int/add/negate/cmov */
static const secp256k1_fe_t fe_1 = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1);
secp256k1_fe_t zz, u1, u2, s1, s2, z, t, m, n, q, rr;
int infinity;
secp256k1_fe_t zz, u1, u2, s1, s2, z, t, tt, m, n, q, rr;
secp256k1_fe_t m_alt, rr_alt;
int infinity, degenerate;
VERIFY_CHECK(!b->infinity);
VERIFY_CHECK(a->infinity == 0 || a->infinity == 1);

Expand All @@ -488,6 +489,34 @@ static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, c
* Y3 = 4*(R*(3*Q-2*R^2)-M^4)
* Z3 = 2*M*Z
* (Note that the paper uses xi = Xi / Zi and yi = Yi / Zi instead.)
*
* This formula has the benefit of being the same for both addition
* of distinct points and doubling. However, it breaks down in the
* case that either point is infinity, or that y1 = -y2. We handle
* these cases in the following ways:
*
* - If b is infinity we simply bail by means of a VERIFY_CHECK.
*
* - If a is infinity, we detect this, and at the end of the
* computation replace the result (which will be meaningless,
* but we compute to be constant-time) with b.x : b.y : 1.
*
* - If a = -b, we have y1 = -y2, which is a degenerate case.
* But here the answer is infinity, so we simply set the
* infinity flag of the result, overriding the computed values
* without even needing to cmov.
*
* - If y1 = -y2 but x1 != x2, which does occur thanks to certain
* properties of our curve (specifically, 1 has nontrivial cube
* roots in our field, and the curve equation has no x coefficient)
* then the answer is not infinity but also not given by the above
* equation. In this case, we cmov in place an alternate expression
* for lambda. Specifically (y1 - y2)/(x1 - x2). Where both these
* expressions for lambda are defined, they are equal, and can be
* obtained from each other by multiplication by (y1 + y2)/(y1 + y2)
* then substitution of x^3 + 7 for y^2 (using the curve equation).
* For all pairs of nonzero points (a, b) at least one is defined,
* so this covers everything.
*/

secp256k1_fe_sqr(&zz, &a->z); /* z = Z1^2 */
Expand All @@ -499,29 +528,55 @@ static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, c
z = a->z; /* z = Z = Z1*Z2 (8) */
t = u1; secp256k1_fe_add(&t, &u2); /* t = T = U1+U2 (2) */
m = s1; secp256k1_fe_add(&m, &s2); /* m = M = S1+S2 (2) */
secp256k1_fe_sqr(&n, &m); /* n = M^2 (1) */
secp256k1_fe_mul(&q, &n, &t); /* q = Q = T*M^2 (1) */
secp256k1_fe_sqr(&n, &n); /* n = M^4 (1) */
secp256k1_fe_sqr(&rr, &t); /* rr = T^2 (1) */
secp256k1_fe_mul(&t, &u1, &u2); secp256k1_fe_negate(&t, &t, 1); /* t = -U1*U2 (2) */
secp256k1_fe_add(&rr, &t); /* rr = R = T^2-U1*U2 (3) */
secp256k1_fe_sqr(&t, &rr); /* t = R^2 (1) */
secp256k1_fe_mul(&r->z, &m, &z); /* r->z = M*Z (1) */
secp256k1_fe_mul(&tt, &u1, &u2); secp256k1_fe_negate(&tt, &tt, 1); /* tt = -U1*U2 (2) */
secp256k1_fe_add(&rr, &tt); /* rr = R = T^2-U1*U2 (3) */
/** If lambda = R/M = 0/0 we have a problem (except in the "trivial"
* case that Z = z1z2 = 0, and this is special-cased later on). */
degenerate = secp256k1_fe_normalizes_to_zero(&m) &
secp256k1_fe_normalizes_to_zero(&rr);
/* This only occurs when y1 == -y2 and x1^3 == x2^3, but x1 != x2.
* This means either x1 == beta*x2 or beta*x1 == x2, where beta is
* a nontrivial cube root of one. In either case, an alternate
* non-indeterminate expression for lambda is (y1 - y2)/(x1 - x2),
* so we set R/M equal to this. */
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I had the expression for lambda right the first time :) it should be (y1 - y2)/(x1 - x2).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it (y1 - y2)/(x1 - x2) or the inverse? The comment still says the inverse.

secp256k1_fe_negate(&rr_alt, &s2, 1); /* rr = -Y2*Z1^3 */
secp256k1_fe_add(&rr_alt, &s1); /* rr = Y1*Z2^3 - Y2*Z1^3 */
secp256k1_fe_negate(&m_alt, &u2, 1); /* m = -X2*Z1^2 */
secp256k1_fe_add(&m_alt, &u1); /* m = X1*Z2^2 - X2*Z1^2 */

secp256k1_fe_cmov(&rr_alt, &rr, !degenerate);
secp256k1_fe_cmov(&m_alt, &m, !degenerate);
/* Now Ralt / Malt = lambda and is guaranteed not to be 0/0.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be nice if you could also update the algebra in the comments above to reflect this case.

* From here on out Ralt and Malt represent the numerator
* and denominator of lambda; R and M represent the explicit
* expressions x1^2 + x2^2 + x1x2 and y1 + y2. */
secp256k1_fe_sqr(&n, &m_alt); /* n = Malt^2 (1) */
secp256k1_fe_mul(&q, &n, &t); /* q = Q = T*Malt^2 (1) */
/* These two lines use the observation that either M == Malt or M == 0,
* so M^3 * Malt is either Malt^4 (which is computed by squaring), or
* zero (which is "computed" by cmov). So the cost is one squaring
* versus two multiplications. */
secp256k1_fe_sqr(&n, &n); /* n = M^3 * Malt (1) */
secp256k1_fe_cmov(&n, &m, degenerate);
secp256k1_fe_normalize_weak(&n);
secp256k1_fe_sqr(&t, &rr_alt); /* t = Ralt^2 (1) */
secp256k1_fe_mul(&r->z, &m_alt, &z); /* r->z = Malt*Z (1) */
infinity = secp256k1_fe_normalizes_to_zero(&r->z) * (1 - a->infinity);
secp256k1_fe_mul_int(&r->z, 2); /* r->z = Z3 = 2*M*Z (2) */
r->x = t; /* r->x = R^2 (1) */
secp256k1_fe_mul_int(&r->z, 2); /* r->z = Z3 = 2*Malt*Z (2) */
r->x = t; /* r->x = Ralt^2 (1) */
secp256k1_fe_negate(&q, &q, 1); /* q = -Q (2) */
secp256k1_fe_add(&r->x, &q); /* r->x = R^2-Q (3) */
secp256k1_fe_add(&r->x, &q); /* r->x = Ralt^2-Q (3) */
secp256k1_fe_normalize(&r->x);
secp256k1_fe_mul_int(&q, 3); /* q = -3*Q (6) */
secp256k1_fe_mul_int(&t, 2); /* t = 2*R^2 (2) */
secp256k1_fe_add(&t, &q); /* t = 2*R^2-3*Q (8) */
secp256k1_fe_mul(&t, &t, &rr); /* t = R*(2*R^2-3*Q) (1) */
secp256k1_fe_add(&t, &n); /* t = R*(2*R^2-3*Q)+M^4 (2) */
secp256k1_fe_negate(&r->y, &t, 2); /* r->y = R*(3*Q-2*R^2)-M^4 (3) */
t = r->x;
secp256k1_fe_mul_int(&t, 2); /* t = 2*x3 (2) */
secp256k1_fe_add(&t, &q); /* t = 2*x3 - Q: (8) */
secp256k1_fe_mul(&t, &t, &rr_alt); /* t = Ralt*(2*x3 - Q) (1) */
secp256k1_fe_add(&t, &n); /* t = Ralt*(2*x3 - Q) + M^3*Malt (2) */
secp256k1_fe_negate(&r->y, &t, 2); /* r->y = Ralt*(Q - 2x3) - M^3*Malt (3) */
secp256k1_fe_normalize_weak(&r->y);
secp256k1_fe_mul_int(&r->x, 4); /* r->x = X3 = 4*(R^2-Q) */
secp256k1_fe_mul_int(&r->y, 4); /* r->y = Y3 = 4*R*(3*Q-2*R^2)-4*M^4 (4) */
secp256k1_fe_mul_int(&r->x, 4); /* r->x = X3 = 4*(Ralt^2-Q) */
secp256k1_fe_mul_int(&r->y, 4); /* r->y = Y3 = 4*Ralt*(Q - 2x3) - 4*M^3*Malt (4) */

/** In case a->infinity == 1, replace r with (b->x, b->y, 1). */
secp256k1_fe_cmov(&r->x, &b->x, a->infinity);
Expand Down
82 changes: 82 additions & 0 deletions src/tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -958,11 +958,17 @@ void ge_equals_gej(const secp256k1_ge_t *a, const secp256k1_gej_t *b) {

void test_ge(void) {
int i, i1;
#ifdef USE_ENDOMORPHISM
int runs = 6;
#else
int runs = 4;
#endif
/* Points: (infinity, p1, p1, -p1, -p1, p2, p2, -p2, -p2, p3, p3, -p3, -p3, p4, p4, -p4, -p4).
* The second in each pair of identical points uses a random Z coordinate in the Jacobian form.
* All magnitudes are randomized.
* All 17*17 combinations of points are added to eachother, using all applicable methods.
*
* When the endomorphism code is compiled in, p5 = lambda*p1 and p6 = lambda^2*p1 are added as well.
*/
secp256k1_ge_t *ge = (secp256k1_ge_t *)malloc(sizeof(secp256k1_ge_t) * (1 + 4 * runs));
secp256k1_gej_t *gej = (secp256k1_gej_t *)malloc(sizeof(secp256k1_gej_t) * (1 + 4 * runs));
Expand All @@ -977,6 +983,14 @@ void test_ge(void) {
int j;
secp256k1_ge_t g;
random_group_element_test(&g);
#ifdef USE_ENDOMORPHISM
if (i >= runs - 2) {
secp256k1_ge_mul_lambda(&g, &ge[1]);
}
if (i >= runs - 1) {
secp256k1_ge_mul_lambda(&g, &g);
}
#endif
ge[1 + 4 * i] = g;
ge[2 + 4 * i] = g;
secp256k1_ge_neg(&ge[3 + 4 * i], &g);
Expand Down Expand Up @@ -1146,11 +1160,79 @@ void test_ge(void) {
free(zinv);
}

void test_add_neg_y_diff_x(void) {
/* The point of this test is to check that we can add two points
* whose y-coordinates are negatives of each other but whose x
* coordinates differ. If the x-coordinates were the same, these
* points would be negatives of each other and their sum is
* infinity. This is cool because it "covers up" any degeneracy
* in the addition algorithm that would cause the xy coordinates
* of the sum to be wrong (since infinity has no xy coordinates).
* HOWEVER, if the x-coordinates are different, infinity is the
* wrong answer, and such degeneracies are exposed. This is the
* root of https://github.com/bitcoin/secp256k1/issues/257 which
* this test is a regression test for.
*
* These points were generated in sage as
* # secp256k1 params
* F = FiniteField (0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F)
* C = EllipticCurve ([F (0), F (7)])
* G = C.lift_x(0x79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798)
* N = FiniteField(G.order())
*
* # endomorphism values (lambda is 1^{1/3} in N, beta is 1^{1/3} in F)
* x = polygen(N)
* lam = (1 - x^3).roots()[1][0]
*
* # random "bad pair"
* P = C.random_element()
* Q = -int(lam) * P
* print " P: %x %x" % P.xy()
* print " Q: %x %x" % Q.xy()
* print "P + Q: %x %x" % (P + Q).xy()
*/
secp256k1_gej_t aj = SECP256K1_GEJ_CONST(
0x8d24cd95, 0x0a355af1, 0x3c543505, 0x44238d30,
0x0643d79f, 0x05a59614, 0x2f8ec030, 0xd58977cb,
0x001e337a, 0x38093dcd, 0x6c0f386d, 0x0b1293a8,
0x4d72c879, 0xd7681924, 0x44e6d2f3, 0x9190117d
);
secp256k1_gej_t bj = SECP256K1_GEJ_CONST(
0xc7b74206, 0x1f788cd9, 0xabd0937d, 0x164a0d86,
0x95f6ff75, 0xf19a4ce9, 0xd013bd7b, 0xbf92d2a7,
0xffe1cc85, 0xc7f6c232, 0x93f0c792, 0xf4ed6c57,
0xb28d3786, 0x2897e6db, 0xbb192d0b, 0x6e6feab2
);
secp256k1_gej_t sumj = SECP256K1_GEJ_CONST(
0x671a63c0, 0x3efdad4c, 0x389a7798, 0x24356027,
0xb3d69010, 0x278625c3, 0x5c86d390, 0x184a8f7a,
0x5f6409c2, 0x2ce01f2b, 0x511fd375, 0x25071d08,
0xda651801, 0x70e95caf, 0x8f0d893c, 0xbed8fbbe
);
secp256k1_ge_t b;
secp256k1_gej_t resj;
secp256k1_ge_t res;
secp256k1_ge_set_gej(&b, &bj);

secp256k1_gej_add_var(&resj, &aj, &bj, NULL);
secp256k1_ge_set_gej(&res, &resj);
ge_equals_gej(&res, &sumj);

secp256k1_gej_add_ge(&resj, &aj, &b);
secp256k1_ge_set_gej(&res, &resj);
ge_equals_gej(&res, &sumj);

secp256k1_gej_add_ge_var(&resj, &aj, &b, NULL);
secp256k1_ge_set_gej(&res, &resj);
ge_equals_gej(&res, &sumj);
}

void run_ge(void) {
int i;
for (i = 0; i < count * 32; i++) {
test_ge();
}
test_add_neg_y_diff_x();
}

/***** ECMULT TESTS *****/
Expand Down