void ge_neg(ge_p3* r, const ge_p3 *p) { fe_neg(r->X, p->X); fe_copy(r->Y, p->Y); fe_copy(r->Z, p->Z); fe_neg(r->T, p->T); }
void ge_montx_to_p2(ge_p2* p, const fe u, const unsigned char ed_sign_bit) { fe x, y, A, v, v2, iv, nx; fe_frombytes(A, A_bytes); /* given u, recover edwards y */ /* given u, recover v */ /* given u and v, recover edwards x */ fe_montx_to_edy(y, u); /* y = (u - 1) / (u + 1) */ fe_mont_rhs(v2, u); /* v^2 = u(u^2 + Au + 1) */ fe_sqrt(v, v2); /* v = sqrt(v^2) */ fe_mul(x, u, A); /* x = u * sqrt(-(A+2)) */ fe_invert(iv, v); /* 1/v */ fe_mul(x, x, iv); /* x = (u/v) * sqrt(-(A+2)) */ fe_neg(nx, x); /* negate x to match sign bit */ fe_cmov(x, nx, fe_isnegative(x) ^ ed_sign_bit); fe_copy(p->X, x); fe_copy(p->Y, y); fe_1(p->Z); /* POSTCONDITION: check that p->X and p->Y satisfy the Ed curve equation */ /* -x^2 + y^2 = 1 + dx^2y^2 */ #ifndef NDEBUG { fe one, d, x2, y2, x2y2, dx2y2; unsigned char dbytes[32] = { 0xa3, 0x78, 0x59, 0x13, 0xca, 0x4d, 0xeb, 0x75, 0xab, 0xd8, 0x41, 0x41, 0x4d, 0x0a, 0x70, 0x00, 0x98, 0xe8, 0x79, 0x77, 0x79, 0x40, 0xc7, 0x8c, 0x73, 0xfe, 0x6f, 0x2b, 0xee, 0x6c, 0x03, 0x52 }; fe_frombytes(d, dbytes); fe_1(one); fe_sq(x2, p->X); /* x^2 */ fe_sq(y2, p->Y); /* y^2 */ fe_mul(dx2y2, x2, y2); /* x^2y^2 */ fe_mul(dx2y2, dx2y2, d); /* dx^2y^2 */ fe_add(dx2y2, dx2y2, one); /* dx^2y^2 + 1 */ fe_neg(x2y2, x2); /* -x^2 */ fe_add(x2y2, x2y2, y2); /* -x^2 + y^2 */ assert(fe_isequal(x2y2, dx2y2)); } #endif }
extern void ge_p3_to_cached(ge_cached *r,const ge_p3 *p) { fe_add(r->YplusX,p->Y,p->X); fe_sub(r->YminusX,p->Y,p->X); fe_copy(r->Z,p->Z); fe_mul(r->T2d,p->T,d2); }
/* out = (1 / in) % m; ... using Fermat's Little Theorem 44 mul, 262 sq */ static void _inv(fe out, const fe in) { fe o, x2, x4, x8, x16, x32, t[16]; long long i; _sq(o, in); for (i = 0; i < 1 - 1; ++i) _sq(o, o); _mul(x2, o, in); _sq(o, x2); for (i = 0; i < 2 - 1; ++i) _sq(o, o); _mul(x4, o, x2); _sq(o, x4); for (i = 0; i < 4 - 1; ++i) _sq(o, o); _mul(x8, o, x4); _sq(o, x8); for (i = 0; i < 8 - 1; ++i) _sq(o, o); _mul(x16, o, x8); _sq(o, x16); for (i = 0; i < 16 - 1; ++i) _sq(o, o); _mul(x32, o, x16); _sq(o, x32); for (i = 0; i < 32 - 1; ++i) _sq(o, o); for (i = 0; i < 32; ++i) _sq(o, o); _mul(o, o, x32); for (i = 0; i < 32; ++i) _sq(o, o); _mul(o, o, x32); fe_0(t[0]); fe_copy(t[1], in); _sq(t[2], t[1]); fe_copy(t[3], x2); for (i = 4; i < 15; ++i) { if ((i & 1) == 0) _sq(t[i], t[i / 2]); else _mul(t[i], t[i - 1], in); } fe_copy(t[15], x4); for (i = 0; i < 32; ++i) { _sq(o, o); _sq(o, o); _sq(o, o); _sq(o, o); _mul(o, o, t[m2[i]]); } fe_copy(out, o); cleanup(o); cleanup(t); cleanup(x2); cleanup(x4); cleanup(x8); cleanup(x16); cleanup(x32); }
/* Test if the public key can be uncommpressed and negate it (-X,Y,Z,-T) return 0 on success */ int ge_frombytes_negate_vartime(ge_p3 *p,const unsigned char *s) { byte parity; byte x[F25519_SIZE]; byte y[F25519_SIZE]; byte a[F25519_SIZE]; byte b[F25519_SIZE]; byte c[F25519_SIZE]; int ret = 0; /* unpack the key s */ parity = s[31] >> 7; fe_copy(y, s); y[31] &= 127; fe_mul__distinct(c, y, y); fe_mul__distinct(b, c, ed25519_d); fe_add(a, b, f25519_one); fe_inv__distinct(b, a); fe_sub(a, c, f25519_one); fe_mul__distinct(c, a, b); fe_sqrt(a, c); fe_neg(b, a); fe_select(x, a, b, (a[0] ^ parity) & 1); /* test that x^2 is equal to c */ fe_mul__distinct(a, x, x); fe_normalize(a); fe_normalize(c); ret |= ConstantCompare(a, c, F25519_SIZE); /* project the key s onto p */ fe_copy(p->X, x); fe_copy(p->Y, y); fe_load(p->Z, 1); fe_mul__distinct(p->T, x, y); /* negate, the point becomes (-X,Y,Z,-T) */ fe_neg(p->X,p->X); fe_neg(p->T,p->T); return ret; }
int ge_compress_key(byte* out, const byte* xIn, const byte* yIn, word32 keySz) { byte tmp[F25519_SIZE]; byte parity; byte pt[32]; int i; fe_copy(tmp, xIn); parity = (tmp[0] & 1) << 7; fe_copy(pt, yIn); pt[31] |= parity; for(i = 0; i < 32; i++) { out[32-i-1] = pt[i]; } (void)keySz; return 0; }
// fe_inv calculates |out| = |in|^{-1} // // Based on Fermat's Little Theorem: // a^p = a (mod p) // a^{p-1} = 1 (mod p) // a^{p-2} = a^{-1} (mod p) static void fe_inv(fe out, const fe in) { fe ftmp, ftmp2; // each e_I will hold |in|^{2^I - 1} fe e2, e4, e8, e16, e32, e64; fe_sqr(ftmp, in); // 2^1 fe_mul(ftmp, in, ftmp); // 2^2 - 2^0 fe_copy(e2, ftmp); fe_sqr(ftmp, ftmp); // 2^3 - 2^1 fe_sqr(ftmp, ftmp); // 2^4 - 2^2 fe_mul(ftmp, ftmp, e2); // 2^4 - 2^0 fe_copy(e4, ftmp); fe_sqr(ftmp, ftmp); // 2^5 - 2^1 fe_sqr(ftmp, ftmp); // 2^6 - 2^2 fe_sqr(ftmp, ftmp); // 2^7 - 2^3 fe_sqr(ftmp, ftmp); // 2^8 - 2^4 fe_mul(ftmp, ftmp, e4); // 2^8 - 2^0 fe_copy(e8, ftmp); for (size_t i = 0; i < 8; i++) { fe_sqr(ftmp, ftmp); } // 2^16 - 2^8 fe_mul(ftmp, ftmp, e8); // 2^16 - 2^0 fe_copy(e16, ftmp); for (size_t i = 0; i < 16; i++) { fe_sqr(ftmp, ftmp); } // 2^32 - 2^16 fe_mul(ftmp, ftmp, e16); // 2^32 - 2^0 fe_copy(e32, ftmp); for (size_t i = 0; i < 32; i++) { fe_sqr(ftmp, ftmp); } // 2^64 - 2^32 fe_copy(e64, ftmp); fe_mul(ftmp, ftmp, in); // 2^64 - 2^32 + 2^0 for (size_t i = 0; i < 192; i++) { fe_sqr(ftmp, ftmp); } // 2^256 - 2^224 + 2^192 fe_mul(ftmp2, e64, e32); // 2^64 - 2^0 for (size_t i = 0; i < 16; i++) { fe_sqr(ftmp2, ftmp2); } // 2^80 - 2^16 fe_mul(ftmp2, ftmp2, e16); // 2^80 - 2^0 for (size_t i = 0; i < 8; i++) { fe_sqr(ftmp2, ftmp2); } // 2^88 - 2^8 fe_mul(ftmp2, ftmp2, e8); // 2^88 - 2^0 for (size_t i = 0; i < 4; i++) { fe_sqr(ftmp2, ftmp2); } // 2^92 - 2^4 fe_mul(ftmp2, ftmp2, e4); // 2^92 - 2^0 fe_sqr(ftmp2, ftmp2); // 2^93 - 2^1 fe_sqr(ftmp2, ftmp2); // 2^94 - 2^2 fe_mul(ftmp2, ftmp2, e2); // 2^94 - 2^0 fe_sqr(ftmp2, ftmp2); // 2^95 - 2^1 fe_sqr(ftmp2, ftmp2); // 2^96 - 2^2 fe_mul(ftmp2, ftmp2, in); // 2^96 - 3 fe_mul(out, ftmp2, ftmp); // 2^256 - 2^224 + 2^192 + 2^96 - 3 }
int ge_double_scalarmult_vartime(ge_p2* R, const unsigned char *h, const ge_p3 *inA,const unsigned char *sig) { ge_p3 p, A; int ret = 0; XMEMCPY(&A, inA, sizeof(ge_p3)); /* find SB */ ed25519_smult(&p, &ed25519_base, sig); /* find H(R,A,M) * -A */ ed25519_smult(&A, &A, h); /* SB + -H(R,A,M)A */ ed25519_add(&A, &p, &A); fe_copy(R->X, A.X); fe_copy(R->Y, A.Y); fe_copy(R->Z, A.Z); return ret; }
static int curve25519(unsigned char* q, unsigned char* n, unsigned char* p) { unsigned char e[32]; unsigned int i; fe x1; fe x2; fe z2; fe x3; fe z3; fe tmp0; fe tmp1; int pos; unsigned int swap; unsigned int b; for (i = 0;i < 32;++i) e[i] = n[i]; e[0] &= 248; e[31] &= 127; e[31] |= 64; fe_frombytes(x1,p); fe_1(x2); fe_0(z2); fe_copy(x3,x1); fe_1(z3); swap = 0; for (pos = 254;pos >= 0;--pos) { b = e[pos / 8] >> (pos & 7); b &= 1; swap ^= b; fe_cswap(x2,x3,swap); fe_cswap(z2,z3,swap); swap = b; #include <cyassl/ctaocrypt/ecc25519_montgomery.h> } fe_cswap(x2,x3,swap); fe_cswap(z2,z3,swap); fe_invert(z2,z2); fe_mul(x2,x2,z2); fe_tobytes(q,x2); return 0; }
/* pack the point h into array s */ void ge_tobytes(unsigned char *s,const ge_p2 *h) { byte x[F25519_SIZE]; byte y[F25519_SIZE]; byte z1[F25519_SIZE]; byte parity; fe_inv__distinct(z1, h->Z); fe_mul__distinct(x, h->X, z1); fe_mul__distinct(y, h->Y, z1); fe_normalize(x); fe_normalize(y); parity = (x[0] & 1) << 7; fe_copy(s, y); fe_normalize(s); s[31] |= parity; }
void fe_sqrt(fe out, const fe a) { fe exp, b, b2, bi, i; #ifndef NDEBUG fe legendre, zero, one; #endif fe_frombytes(i, i_bytes); fe_pow22523(exp, a); /* b = a^(q-5)/8 */ /* PRECONDITION: legendre symbol == 1 (square) or 0 (a == zero) */ #ifndef NDEBUG fe_sq(legendre, exp); /* in^((q-5)/4) */ fe_sq(legendre, legendre); /* in^((q-5)/2) */ fe_mul(legendre, legendre, a); /* in^((q-3)/2) */ fe_mul(legendre, legendre, a); /* in^((q-1)/2) */ fe_0(zero); fe_1(one); assert(fe_isequal(legendre, zero) || fe_isequal(legendre, one)); #endif fe_mul(b, a, exp); /* b = a * a^(q-5)/8 */ fe_sq(b2, b); /* b^2 = a * a^(q-1)/4 */ /* note b^4 == a^2, so b^2 == a or -a * if b^2 != a, multiply it by sqrt(-1) */ fe_mul(bi, b, i); fe_cmov(b, bi, 1 ^ fe_isequal(b2, a)); fe_copy(out, b); /* PRECONDITION: out^2 == a */ #ifndef NDEBUG fe_sq(b2, out); assert(fe_isequal(a, b2)); #endif }
extern void ge_p3_to_p2(ge_p2 *r,const ge_p3 *p) { fe_copy(r->X,p->X); fe_copy(r->Y,p->Y); fe_copy(r->Z,p->Z); }
// point_add calcuates (x1, y1, z1) + (x2, y2, z2) // // The method is taken from: // http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl, // adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity). // // Coq transcription and correctness proof: // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L135> // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L205> // // This function includes a branch for checking whether the two input points // are equal, (while not equal to the point at infinity). This case never // happens during single point multiplication, so there is no timing leak for // ECDH or ECDSA signing. static void point_add(fe x3, fe y3, fe z3, const fe x1, const fe y1, const fe z1, const int mixed, const fe x2, const fe y2, const fe z2) { fe x_out, y_out, z_out; limb_t z1nz = fe_nz(z1); limb_t z2nz = fe_nz(z2); // z1z1 = z1z1 = z1**2 fe z1z1; fe_sqr(z1z1, z1); fe u1, s1, two_z1z2; if (!mixed) { // z2z2 = z2**2 fe z2z2; fe_sqr(z2z2, z2); // u1 = x1*z2z2 fe_mul(u1, x1, z2z2); // two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 fe_add(two_z1z2, z1, z2); fe_sqr(two_z1z2, two_z1z2); fe_sub(two_z1z2, two_z1z2, z1z1); fe_sub(two_z1z2, two_z1z2, z2z2); // s1 = y1 * z2**3 fe_mul(s1, z2, z2z2); fe_mul(s1, s1, y1); } else { // We'll assume z2 = 1 (special case z2 = 0 is handled later). // u1 = x1*z2z2 fe_copy(u1, x1); // two_z1z2 = 2z1z2 fe_add(two_z1z2, z1, z1); // s1 = y1 * z2**3 fe_copy(s1, y1); } // u2 = x2*z1z1 fe u2; fe_mul(u2, x2, z1z1); // h = u2 - u1 fe h; fe_sub(h, u2, u1); limb_t xneq = fe_nz(h); // z_out = two_z1z2 * h fe_mul(z_out, h, two_z1z2); // z1z1z1 = z1 * z1z1 fe z1z1z1; fe_mul(z1z1z1, z1, z1z1); // s2 = y2 * z1**3 fe s2; fe_mul(s2, y2, z1z1z1); // r = (s2 - s1)*2 fe r; fe_sub(r, s2, s1); fe_add(r, r, r); limb_t yneq = fe_nz(r); if (!xneq && !yneq && z1nz && z2nz) { point_double(x3, y3, z3, x1, y1, z1); return; } // I = (2h)**2 fe i; fe_add(i, h, h); fe_sqr(i, i); // J = h * I fe j; fe_mul(j, h, i); // V = U1 * I fe v; fe_mul(v, u1, i); // x_out = r**2 - J - 2V fe_sqr(x_out, r); fe_sub(x_out, x_out, j); fe_sub(x_out, x_out, v); fe_sub(x_out, x_out, v); // y_out = r(V-x_out) - 2 * s1 * J fe_sub(y_out, v, x_out); fe_mul(y_out, y_out, r); fe s1j; fe_mul(s1j, s1, j); fe_sub(y_out, y_out, s1j); fe_sub(y_out, y_out, s1j); fe_cmovznz(x_out, z1nz, x2, x_out); fe_cmovznz(x3, z2nz, x1, x_out); fe_cmovznz(y_out, z1nz, y2, y_out); fe_cmovznz(y3, z2nz, y1, y_out); fe_cmovznz(z_out, z1nz, z2, z_out); fe_cmovznz(z3, z2nz, z1, z_out); }
// Interleaved point multiplication using precomputed point multiples: The // small point multiples 0*P, 1*P, ..., 17*P are in p_pre_comp, the scalar // in p_scalar, if non-NULL. If g_scalar is non-NULL, we also add this multiple // of the generator, using certain (large) precomputed multiples in g_pre_comp. // Output point (X, Y, Z) is stored in x_out, y_out, z_out. static void batch_mul(fe x_out, fe y_out, fe z_out, const uint8_t *p_scalar, const uint8_t *g_scalar, const fe p_pre_comp[17][3]) { // set nq to the point at infinity fe nq[3] = {{0},{0},{0}}, ftmp, tmp[3]; uint64_t bits; uint8_t sign, digit; // Loop over both scalars msb-to-lsb, interleaving additions of multiples // of the generator (two in each of the last 32 rounds) and additions of p // (every 5th round). int skip = 1; // save two point operations in the first round size_t i = p_scalar != NULL ? 255 : 31; for (;;) { // double if (!skip) { point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); } // add multiples of the generator if (g_scalar != NULL && i <= 31) { // first, look 32 bits upwards bits = get_bit(g_scalar, i + 224) << 3; bits |= get_bit(g_scalar, i + 160) << 2; bits |= get_bit(g_scalar, i + 96) << 1; bits |= get_bit(g_scalar, i + 32); // select the point to add, in constant time select_point(bits, 16, g_pre_comp[1], tmp); if (!skip) { point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, tmp[0], tmp[1], tmp[2]); } else { fe_copy(nq[0], tmp[0]); fe_copy(nq[1], tmp[1]); fe_copy(nq[2], tmp[2]); skip = 0; } // second, look at the current position bits = get_bit(g_scalar, i + 192) << 3; bits |= get_bit(g_scalar, i + 128) << 2; bits |= get_bit(g_scalar, i + 64) << 1; bits |= get_bit(g_scalar, i); // select the point to add, in constant time select_point(bits, 16, g_pre_comp[0], tmp); point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, tmp[0], tmp[1], tmp[2]); } // do other additions every 5 doublings if (p_scalar != NULL && i % 5 == 0) { bits = get_bit(p_scalar, i + 4) << 5; bits |= get_bit(p_scalar, i + 3) << 4; bits |= get_bit(p_scalar, i + 2) << 3; bits |= get_bit(p_scalar, i + 1) << 2; bits |= get_bit(p_scalar, i) << 1; bits |= get_bit(p_scalar, i - 1); ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); // select the point to add or subtract, in constant time. select_point(digit, 17, p_pre_comp, tmp); fe_opp(ftmp, tmp[1]); // (X, -Y, Z) is the negative point. fe_cmovznz(tmp[1], sign, tmp[1], ftmp); if (!skip) { point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */, tmp[0], tmp[1], tmp[2]); } else { fe_copy(nq[0], tmp[0]); fe_copy(nq[1], tmp[1]); fe_copy(nq[2], tmp[2]); skip = 0; } } if (i == 0) { break; } --i; } fe_copy(x_out, nq[0]); fe_copy(y_out, nq[1]); fe_copy(z_out, nq[2]); }
static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, EC_RAW_POINT *r, const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, const EC_SCALAR *p_scalar) { #define P256_WSIZE_PUBLIC 4 // Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|. fe p_pre_comp[1 << (P256_WSIZE_PUBLIC-1)][3]; fe_from_generic(p_pre_comp[0][0], &p->X); fe_from_generic(p_pre_comp[0][1], &p->Y); fe_from_generic(p_pre_comp[0][2], &p->Z); fe p2[3]; point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0], p_pre_comp[0][1], p_pre_comp[0][2]); for (size_t i = 1; i < OPENSSL_ARRAY_SIZE(p_pre_comp); i++) { point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2], p_pre_comp[i - 1][0], p_pre_comp[i - 1][1], p_pre_comp[i - 1][2], 0 /* not mixed */, p2[0], p2[1], p2[2]); } // Set up the coefficients for |p_scalar|. int8_t p_wNAF[257]; ec_compute_wNAF(group, p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC); // Set |ret| to the point at infinity. int skip = 1; // Save some point operations. fe ret[3] = {{0},{0},{0}}; for (int i = 256; i >= 0; i--) { if (!skip) { point_double(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2]); } // For the |g_scalar|, we use the precomputed table without the // constant-time lookup. if (i <= 31) { // First, look 32 bits upwards. uint64_t bits = get_bit(g_scalar->bytes, i + 224) << 3; bits |= get_bit(g_scalar->bytes, i + 160) << 2; bits |= get_bit(g_scalar->bytes, i + 96) << 1; bits |= get_bit(g_scalar->bytes, i + 32); point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], 1 /* mixed */, g_pre_comp[1][bits][0], g_pre_comp[1][bits][1], g_pre_comp[1][bits][2]); skip = 0; // Second, look at the current position. bits = get_bit(g_scalar->bytes, i + 192) << 3; bits |= get_bit(g_scalar->bytes, i + 128) << 2; bits |= get_bit(g_scalar->bytes, i + 64) << 1; bits |= get_bit(g_scalar->bytes, i); point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], 1 /* mixed */, g_pre_comp[0][bits][0], g_pre_comp[0][bits][1], g_pre_comp[0][bits][2]); } int digit = p_wNAF[i]; if (digit != 0) { assert(digit & 1); int idx = digit < 0 ? (-digit) >> 1 : digit >> 1; fe *y = &p_pre_comp[idx][1], tmp; if (digit < 0) { fe_opp(tmp, p_pre_comp[idx][1]); y = &tmp; } if (!skip) { point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], 0 /* not mixed */, p_pre_comp[idx][0], *y, p_pre_comp[idx][2]); } else { fe_copy(ret[0], p_pre_comp[idx][0]); fe_copy(ret[1], *y); fe_copy(ret[2], p_pre_comp[idx][2]); skip = 0; } } }
void ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key) { unsigned char e[32]; unsigned int i; fe x1; fe x2; fe z2; fe x3; fe z3; fe tmp0; fe tmp1; int pos; unsigned int swap; unsigned int b; /* copy the private key and make sure it's valid */ for (i = 0; i < 32; ++i) { e[i] = private_key[i]; } e[0] &= 248; e[31] &= 63; e[31] |= 64; /* unpack the public key and convert edwards to montgomery */ /* due to CodesInChaos: montgomeryX = (edwardsY + 1)*inverse(1 - edwardsY) mod p */ fe_frombytes(x1, public_key); fe_1(tmp1); fe_add(tmp0, x1, tmp1); fe_sub(tmp1, tmp1, x1); fe_invert(tmp1, tmp1); fe_mul(x1, tmp0, tmp1); fe_1(x2); fe_0(z2); fe_copy(x3, x1); fe_1(z3); swap = 0; for (pos = 254; pos >= 0; --pos) { b = e[pos / 8] >> (pos & 7); b &= 1; swap ^= b; fe_cswap(x2, x3, swap); fe_cswap(z2, z3, swap); swap = b; /* from montgomery.h */ fe_sub(tmp0, x3, z3); fe_sub(tmp1, x2, z2); fe_add(x2, x2, z2); fe_add(z2, x3, z3); fe_mul(z3, tmp0, x2); fe_mul(z2, z2, tmp1); fe_sq(tmp0, tmp1); fe_sq(tmp1, x2); fe_add(x3, z3, z2); fe_sub(z2, z3, z2); fe_mul(x2, tmp1, tmp0); fe_sub(tmp1, tmp1, tmp0); fe_sq(z2, z2); fe_mul121666(z3, tmp1); fe_sq(x3, x3); fe_add(tmp0, tmp0, z3); fe_mul(z3, x1, z2); fe_mul(z2, tmp1, tmp0); } fe_cswap(x2, x3, swap); fe_cswap(z2, z3, swap); fe_invert(z2, z2); fe_mul(x2, x2, z2); fe_tobytes(shared_secret, x2); }
static int crypto_scalarmult_curve25519_ref10(unsigned char *q, const unsigned char *n, const unsigned char *p) { unsigned char e[32]; unsigned int i; fe x1; fe x2; fe z2; fe x3; fe z3; fe tmp0; fe tmp1; int pos; unsigned int swap; unsigned int b; for (i = 0;i < 32;++i) e[i] = n[i]; e[0] &= 248; e[31] &= 127; e[31] |= 64; fe_frombytes(x1,p); fe_1(x2); fe_0(z2); fe_copy(x3,x1); fe_1(z3); swap = 0; for (pos = 254;pos >= 0;--pos) { b = e[pos / 8] >> (pos & 7); b &= 1; swap ^= b; fe_cswap(x2,x3,swap); fe_cswap(z2,z3,swap); swap = b; fe_sub(tmp0,x3,z3); fe_sub(tmp1,x2,z2); fe_add(x2,x2,z2); fe_add(z2,x3,z3); fe_mul(z3,tmp0,x2); fe_mul(z2,z2,tmp1); fe_sq(tmp0,tmp1); fe_sq(tmp1,x2); fe_add(x3,z3,z2); fe_sub(z2,z3,z2); fe_mul(x2,tmp1,tmp0); fe_sub(tmp1,tmp1,tmp0); fe_sq(z2,z2); fe_mul121666(z3,tmp1); fe_sq(x3,x3); fe_add(tmp0,tmp0,z3); fe_mul(z3,x1,z2); fe_mul(z2,tmp1,tmp0); } fe_cswap(x2,x3,swap); fe_cswap(z2,z3,swap); fe_invert(z2,z2); fe_mul(x2,x2,z2); fe_tobytes(q,x2); return 0; }