/*! c <- REDC( a * b ) mod N \param a < N i.e. "reduced" \param b < N i.e. "reduced" \param mmm modulus N and n0' of N */ mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c, mp_mont_modulus *mmm) { mp_digit *pb; mp_digit m_i; mp_err res; mp_size ib; /* "index b": index of current digit of B */ mp_size useda, usedb; ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); if (MP_USED(a) < MP_USED(b)) { const mp_int *xch = b; /* switch a and b, to do fewer outer loops */ b = a; a = xch; } MP_USED(c) = 1; MP_DIGIT(c, 0) = 0; ib = (MP_USED(&mmm->N) << 1) + 1; if ((res = s_mp_pad(c, ib)) != MP_OKAY) goto CLEANUP; useda = MP_USED(a); pb = MP_DIGITS(b); s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c)); s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1)); m_i = MP_DIGIT(c, 0) * mmm->n0prime; s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0); /* Outer loop: Digits of b */ usedb = MP_USED(b); for (ib = 1; ib < usedb; ib++) { mp_digit b_i = *pb++; /* Inner product: Digits of a */ if (b_i) s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib); m_i = MP_DIGIT(c, ib) * mmm->n0prime; s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); } if (usedb < MP_USED(&mmm->N)) { for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib) { m_i = MP_DIGIT(c, ib) * mmm->n0prime; s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); } } s_mp_clamp(c); s_mp_rshd(c, MP_USED(&mmm->N)); /* c /= R */ if (s_mp_cmp(c, &mmm->N) >= 0) { MP_CHECKOK(s_mp_sub(c, &mmm->N)); } res = MP_OKAY; CLEANUP: return res; }
/* computes T = REDC(T), 2^b == R */ mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm) { mp_err res; mp_size i; i = MP_USED(T) + MP_USED(&mmm->N) + 2; MP_CHECKOK( s_mp_pad(T, i) ); for (i = 0; i < MP_USED(&mmm->N); ++i ) { mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime; /* T += N * m_i * (MP_RADIX ** i); */ MP_CHECKOK( s_mp_mul_d_add_offset(&mmm->N, m_i, T, i) ); } s_mp_clamp(T); /* T /= R */ s_mp_div_2d(T, mmm->b); if ((res = s_mp_cmp(T, &mmm->N)) >= 0) { /* T = T - N */ MP_CHECKOK( s_mp_sub(T, &mmm->N) ); #ifdef DEBUG if ((res = mp_cmp(T, &mmm->N)) >= 0) { res = MP_UNDEF; goto CLEANUP; } #endif } res = MP_OKAY; CLEANUP: return res; }
/* Computes the windowed non-adjacent-form (NAF) of a scalar. Out should * be an array of signed char's to output to, bitsize should be the number * of bits of out, in is the original scalar, and w is the window size. * NAF is discussed in the paper: D. Hankerson, J. Hernandez and A. * Menezes, "Software implementation of elliptic curve cryptography over * binary fields", Proc. CHES 2000. */ mp_err ec_compute_wNAF(signed char *out, int bitsize, const mp_int *in, int w) { mp_int k; mp_err res = MP_OKAY; int i, twowm1, mask; twowm1 = ec_twoTo(w - 1); mask = 2 * twowm1 - 1; MP_DIGITS(&k) = 0; MP_CHECKOK(mp_init_copy(&k, in)); i = 0; /* Compute wNAF form */ while (mp_cmp_z(&k) > 0) { if (mp_isodd(&k)) { out[i] = MP_DIGIT(&k, 0) & mask; if (out[i] >= twowm1) out[i] -= 2 * twowm1; /* Subtract off out[i]. Note mp_sub_d only works with * unsigned digits */ if (out[i] >= 0) { mp_sub_d(&k, out[i], &k); } else { mp_add_d(&k, -(out[i]), &k); } } else { out[i] = 0; } mp_div_2(&k, &k); i++; } /* Zero out the remaining elements of the out array. */ for (; i < bitsize + 1; i++) { out[i] = 0; } CLEANUP: mp_clear(&k); return res; }
/* Construct a generic GFMethod for arithmetic over prime fields with * irreducible irr. */ GFMethod * GFMethod_consGFp_mont(const mp_int *irr) { mp_err res = MP_OKAY; int i; GFMethod *meth = NULL; mp_mont_modulus *mmm; meth = GFMethod_consGFp(irr); if (meth == NULL) return NULL; mmm = (mp_mont_modulus *) malloc(sizeof(mp_mont_modulus)); if (mmm == NULL) { res = MP_MEM; goto CLEANUP; } meth->field_mul = &ec_GFp_mul_mont; meth->field_sqr = &ec_GFp_sqr_mont; meth->field_div = &ec_GFp_div_mont; meth->field_enc = &ec_GFp_enc_mont; meth->field_dec = &ec_GFp_dec_mont; meth->extra1 = mmm; meth->extra2 = NULL; meth->extra_free = &ec_GFp_extra_free_mont; mmm->N = meth->irr; i = mpl_significant_bits(&meth->irr); i += MP_DIGIT_BIT - 1; mmm->b = i - i % MP_DIGIT_BIT; mmm->n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(&meth->irr, 0)); CLEANUP: if (res != MP_OKAY) { GFMethod_free(meth); return NULL; } return meth; }
/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses * Jacobian coordinates. * * Assumes input is already field-encoded using field_enc, and returns * output that is still field-encoded. * * This routine implements Point Doubling in the Jacobian Projective * space as described in the paper "Efficient elliptic curve exponentiation * using mixed coordinates", by H. Cohen, A Miyaji, T. Ono. */ mp_err ec_GFp_pt_dbl_jac(const mp_int *px, const mp_int *py, const mp_int *pz, mp_int *rx, mp_int *ry, mp_int *rz, const ECGroup *group) { mp_err res = MP_OKAY; mp_int t0, t1, M, S; MP_DIGITS(&t0) = 0; MP_DIGITS(&t1) = 0; MP_DIGITS(&M) = 0; MP_DIGITS(&S) = 0; MP_CHECKOK(mp_init(&t0)); MP_CHECKOK(mp_init(&t1)); MP_CHECKOK(mp_init(&M)); MP_CHECKOK(mp_init(&S)); /* P == inf or P == -P */ if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES || mp_cmp_z(py) == 0) { MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); goto CLEANUP; } if (mp_cmp_d(pz, 1) == 0) { /* M = 3 * px^2 + a */ MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth)); MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth)); MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth)); MP_CHECKOK(group->meth-> field_add(&t0, &group->curvea, &M, group->meth)); } else if (MP_SIGN(&group->curvea) == MP_NEG && MP_USED(&group->curvea) == 1 && MP_DIGIT(&group->curvea, 0) == 3) { /* M = 3 * (px + pz^2) * (px - pz^2) */ MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth)); MP_CHECKOK(group->meth->field_add(px, &M, &t0, group->meth)); MP_CHECKOK(group->meth->field_sub(px, &M, &t1, group->meth)); MP_CHECKOK(group->meth->field_mul(&t0, &t1, &M, group->meth)); MP_CHECKOK(group->meth->field_add(&M, &M, &t0, group->meth)); MP_CHECKOK(group->meth->field_add(&t0, &M, &M, group->meth)); } else { /* M = 3 * (px^2) + a * (pz^4) */ MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth)); MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth)); MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth)); MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth)); MP_CHECKOK(group->meth->field_sqr(&M, &M, group->meth)); MP_CHECKOK(group->meth-> field_mul(&M, &group->curvea, &M, group->meth)); MP_CHECKOK(group->meth->field_add(&M, &t0, &M, group->meth)); } /* rz = 2 * py * pz */ /* t0 = 4 * py^2 */ if (mp_cmp_d(pz, 1) == 0) { MP_CHECKOK(group->meth->field_add(py, py, rz, group->meth)); MP_CHECKOK(group->meth->field_sqr(rz, &t0, group->meth)); } else { MP_CHECKOK(group->meth->field_add(py, py, &t0, group->meth)); MP_CHECKOK(group->meth->field_mul(&t0, pz, rz, group->meth)); MP_CHECKOK(group->meth->field_sqr(&t0, &t0, group->meth)); } /* S = 4 * px * py^2 = px * (2 * py)^2 */ MP_CHECKOK(group->meth->field_mul(px, &t0, &S, group->meth)); /* rx = M^2 - 2 * S */ MP_CHECKOK(group->meth->field_add(&S, &S, &t1, group->meth)); MP_CHECKOK(group->meth->field_sqr(&M, rx, group->meth)); MP_CHECKOK(group->meth->field_sub(rx, &t1, rx, group->meth)); /* ry = M * (S - rx) - 8 * py^4 */ MP_CHECKOK(group->meth->field_sqr(&t0, &t1, group->meth)); if (mp_isodd(&t1)) { MP_CHECKOK(mp_add(&t1, &group->meth->irr, &t1)); } MP_CHECKOK(mp_div_2(&t1, &t1)); MP_CHECKOK(group->meth->field_sub(&S, rx, &S, group->meth)); MP_CHECKOK(group->meth->field_mul(&M, &S, &M, group->meth)); MP_CHECKOK(group->meth->field_sub(&M, &t1, ry, group->meth)); CLEANUP: mp_clear(&t0); mp_clear(&t1); mp_clear(&M); mp_clear(&S); return res; }
/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is * (qx, qy, 1). Elliptic curve points P, Q, and R can all be identical. * Uses mixed Jacobian-affine coordinates. Assumes input is already * field-encoded using field_enc, and returns output that is still * field-encoded. Uses equation (2) from Brown, Hankerson, Lopez, and * Menezes. Software Implementation of the NIST Elliptic Curves Over Prime * Fields. */ mp_err ec_GFp_pt_add_jac_aff(const mp_int *px, const mp_int *py, const mp_int *pz, const mp_int *qx, const mp_int *qy, mp_int *rx, mp_int *ry, mp_int *rz, const ECGroup *group) { mp_err res = MP_OKAY; mp_int A, B, C, D, C2, C3; MP_DIGITS(&A) = 0; MP_DIGITS(&B) = 0; MP_DIGITS(&C) = 0; MP_DIGITS(&D) = 0; MP_DIGITS(&C2) = 0; MP_DIGITS(&C3) = 0; MP_CHECKOK(mp_init(&A)); MP_CHECKOK(mp_init(&B)); MP_CHECKOK(mp_init(&C)); MP_CHECKOK(mp_init(&D)); MP_CHECKOK(mp_init(&C2)); MP_CHECKOK(mp_init(&C3)); /* If either P or Q is the point at infinity, then return the other * point */ if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) { MP_CHECKOK(ec_GFp_pt_aff2jac(qx, qy, rx, ry, rz, group)); goto CLEANUP; } if (ec_GFp_pt_is_inf_aff(qx, qy) == MP_YES) { MP_CHECKOK(mp_copy(px, rx)); MP_CHECKOK(mp_copy(py, ry)); MP_CHECKOK(mp_copy(pz, rz)); goto CLEANUP; } /* A = qx * pz^2, B = qy * pz^3 */ MP_CHECKOK(group->meth->field_sqr(pz, &A, group->meth)); MP_CHECKOK(group->meth->field_mul(&A, pz, &B, group->meth)); MP_CHECKOK(group->meth->field_mul(&A, qx, &A, group->meth)); MP_CHECKOK(group->meth->field_mul(&B, qy, &B, group->meth)); /* C = A - px, D = B - py */ MP_CHECKOK(group->meth->field_sub(&A, px, &C, group->meth)); MP_CHECKOK(group->meth->field_sub(&B, py, &D, group->meth)); if (mp_cmp_z(&C) == 0) { /* P == Q or P == -Q */ if (mp_cmp_z(&D) == 0) { /* P == Q */ /* It is cheaper to double (qx, qy, 1) than (px, py, pz). */ MP_DIGIT(&D, 0) = 1; /* Set D to 1. */ MP_CHECKOK(ec_GFp_pt_dbl_jac(qx, qy, &D, rx, ry, rz, group)); } else { /* P == -Q */ MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); } goto CLEANUP; } /* C2 = C^2, C3 = C^3 */ MP_CHECKOK(group->meth->field_sqr(&C, &C2, group->meth)); MP_CHECKOK(group->meth->field_mul(&C, &C2, &C3, group->meth)); /* rz = pz * C */ MP_CHECKOK(group->meth->field_mul(pz, &C, rz, group->meth)); /* C = px * C^2 */ MP_CHECKOK(group->meth->field_mul(px, &C2, &C, group->meth)); /* A = D^2 */ MP_CHECKOK(group->meth->field_sqr(&D, &A, group->meth)); /* rx = D^2 - (C^3 + 2 * (px * C^2)) */ MP_CHECKOK(group->meth->field_add(&C, &C, rx, group->meth)); MP_CHECKOK(group->meth->field_add(&C3, rx, rx, group->meth)); MP_CHECKOK(group->meth->field_sub(&A, rx, rx, group->meth)); /* C3 = py * C^3 */ MP_CHECKOK(group->meth->field_mul(py, &C3, &C3, group->meth)); /* ry = D * (px * C^2 - rx) - py * C^3 */ MP_CHECKOK(group->meth->field_sub(&C, rx, ry, group->meth)); MP_CHECKOK(group->meth->field_mul(&D, ry, ry, group->meth)); MP_CHECKOK(group->meth->field_sub(ry, &C3, ry, group->meth)); CLEANUP: mp_clear(&A); mp_clear(&B); mp_clear(&C); mp_clear(&D); mp_clear(&C2); mp_clear(&C3); return res; }
/* ** Perform a raw public-key operation ** Length of input and output buffers are equal to key's modulus len. */ SECStatus RSA_PublicKeyOp(RSAPublicKey *key, unsigned char *output, const unsigned char *input) { unsigned int modLen, expLen, offset; mp_int n, e, m, c; mp_err err = MP_OKAY; SECStatus rv = SECSuccess; if (!key || !output || !input) { PORT_SetError(SEC_ERROR_INVALID_ARGS); return SECFailure; } MP_DIGITS(&n) = 0; MP_DIGITS(&e) = 0; MP_DIGITS(&m) = 0; MP_DIGITS(&c) = 0; CHECK_MPI_OK( mp_init(&n) ); CHECK_MPI_OK( mp_init(&e) ); CHECK_MPI_OK( mp_init(&m) ); CHECK_MPI_OK( mp_init(&c) ); modLen = rsa_modulusLen(&key->modulus); expLen = rsa_modulusLen(&key->publicExponent); /* 1. Obtain public key (n, e) */ if (BAD_RSA_KEY_SIZE(modLen, expLen)) { PORT_SetError(SEC_ERROR_INVALID_KEY); rv = SECFailure; goto cleanup; } SECITEM_TO_MPINT(key->modulus, &n); SECITEM_TO_MPINT(key->publicExponent, &e); if (e.used > n.used) { /* exponent should not be greater than modulus */ PORT_SetError(SEC_ERROR_INVALID_KEY); rv = SECFailure; goto cleanup; } /* 2. check input out of range (needs to be in range [0..n-1]) */ offset = (key->modulus.data[0] == 0) ? 1 : 0; /* may be leading 0 */ if (memcmp(input, key->modulus.data + offset, modLen) >= 0) { PORT_SetError(SEC_ERROR_INPUT_LEN); rv = SECFailure; goto cleanup; } /* 2 bis. Represent message as integer in range [0..n-1] */ CHECK_MPI_OK( mp_read_unsigned_octets(&m, input, modLen) ); /* 3. Compute c = m**e mod n */ #ifdef USE_MPI_EXPT_D /* XXX see which is faster */ if (MP_USED(&e) == 1) { CHECK_MPI_OK( mp_exptmod_d(&m, MP_DIGIT(&e, 0), &n, &c) ); } else #endif CHECK_MPI_OK( mp_exptmod(&m, &e, &n, &c) ); /* 4. result c is ciphertext */ err = mp_to_fixlen_octets(&c, output, modLen); if (err >= 0) err = MP_OKAY; cleanup: mp_clear(&n); mp_clear(&e); mp_clear(&m); mp_clear(&c); if (err) { MP_TO_SEC_ERROR(err); rv = SECFailure; } return rv; }
mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent, const mp_int *modulus, mp_int *result) { const mp_int *base; mp_size bits_in_exponent, i, window_bits, odd_ints; mp_err res; int nLen; mp_int montBase, goodBase; mp_mont_modulus mmm; #ifdef MP_USING_CACHE_SAFE_MOD_EXP static unsigned int max_window_bits; #endif /* function for computing n0prime only works if n0 is odd */ if (!mp_isodd(modulus)) return s_mp_exptmod(inBase, exponent, modulus, result); MP_DIGITS(&montBase) = 0; MP_DIGITS(&goodBase) = 0; if (mp_cmp(inBase, modulus) < 0) { base = inBase; } else { MP_CHECKOK( mp_init(&goodBase) ); base = &goodBase; MP_CHECKOK( mp_mod(inBase, modulus, &goodBase) ); } nLen = MP_USED(modulus); MP_CHECKOK( mp_init_size(&montBase, 2 * nLen + 2) ); mmm.N = *modulus; /* a copy of the mp_int struct */ /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX ** where n0 = least significant mp_digit of N, the modulus. */ mmm.n0prime = 0 - s_mp_invmod_radix( MP_DIGIT(modulus, 0) ); MP_CHECKOK( s_mp_to_mont(base, &mmm, &montBase) ); bits_in_exponent = mpl_significant_bits(exponent); #ifdef MP_USING_CACHE_SAFE_MOD_EXP if (mp_using_cache_safe_exp) { if (bits_in_exponent > 780) window_bits = 6; else if (bits_in_exponent > 256) window_bits = 5; else if (bits_in_exponent > 20) window_bits = 4; /* RSA public key exponents are typically under 20 bits (common values * are: 3, 17, 65537) and a 4-bit window is inefficient */ else window_bits = 1; } else #endif if (bits_in_exponent > 480) window_bits = 6; else if (bits_in_exponent > 160) window_bits = 5; else if (bits_in_exponent > 20) window_bits = 4; /* RSA public key exponents are typically under 20 bits (common values * are: 3, 17, 65537) and a 4-bit window is inefficient */ else window_bits = 1; #ifdef MP_USING_CACHE_SAFE_MOD_EXP /* * clamp the window size based on * the cache line size. */ if (!max_window_bits) { unsigned long cache_size = s_mpi_getProcessorLineSize(); /* processor has no cache, use 'fast' code always */ if (cache_size == 0) { mp_using_cache_safe_exp = 0; } if ((cache_size == 0) || (cache_size >= 64)) { max_window_bits = 6; } else if (cache_size >= 32) { max_window_bits = 5; } else if (cache_size >= 16) { max_window_bits = 4; } else max_window_bits = 1; /* should this be an assert? */ } /* clamp the window size down before we caclulate bits_in_exponent */ if (mp_using_cache_safe_exp) { if (window_bits > max_window_bits) { window_bits = max_window_bits; } } #endif odd_ints = 1 << (window_bits - 1); i = bits_in_exponent % window_bits; if (i != 0) { bits_in_exponent += window_bits - i; } #ifdef MP_USING_MONT_MULF if (mp_using_mont_mulf) { MP_CHECKOK( s_mp_pad(&montBase, nLen) ); res = mp_exptmod_f(&montBase, exponent, modulus, result, &mmm, nLen, bits_in_exponent, window_bits, odd_ints); } else #endif #ifdef MP_USING_CACHE_SAFE_MOD_EXP if (mp_using_cache_safe_exp) { res = mp_exptmod_safe_i(&montBase, exponent, modulus, result, &mmm, nLen, bits_in_exponent, window_bits, 1 << window_bits); } else #endif res = mp_exptmod_i(&montBase, exponent, modulus, result, &mmm, nLen, bits_in_exponent, window_bits, odd_ints); CLEANUP: mp_clear(&montBase); mp_clear(&goodBase); /* Don't mp_clear mmm.N because it is merely a copy of modulus. ** Just zap it. */ memset(&mmm, 0, sizeof mmm); return res; }
/* 6 words */ mp_err ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0; mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0; mp_digit borrow; switch (MP_USED(a)) { case 6: r5 = MP_DIGIT(a, 5); case 5: r4 = MP_DIGIT(a, 4); case 4: r3 = MP_DIGIT(a, 3); case 3: r2 = MP_DIGIT(a, 2); case 2: r1 = MP_DIGIT(a, 1); case 1: r0 = MP_DIGIT(a, 0); } switch (MP_USED(b)) { case 6: b5 = MP_DIGIT(b, 5); case 5: b4 = MP_DIGIT(b, 4); case 4: b3 = MP_DIGIT(b, 3); case 3: b2 = MP_DIGIT(b, 2); case 2: b1 = MP_DIGIT(b, 1); case 1: b0 = MP_DIGIT(b, 0); } borrow = 0; MP_SUB_BORROW(r0, b0, r0, borrow); MP_SUB_BORROW(r1, b1, r1, borrow); MP_SUB_BORROW(r2, b2, r2, borrow); MP_SUB_BORROW(r3, b3, r3, borrow); MP_SUB_BORROW(r4, b4, r4, borrow); MP_SUB_BORROW(r5, b5, r5, borrow); /* Do quick 'add' if we've gone under 0 * (subtract the 2's complement of the curve field) */ if (borrow) { b5 = MP_DIGIT(&meth->irr, 5); b4 = MP_DIGIT(&meth->irr, 4); b3 = MP_DIGIT(&meth->irr, 3); b2 = MP_DIGIT(&meth->irr, 2); b1 = MP_DIGIT(&meth->irr, 1); b0 = MP_DIGIT(&meth->irr, 0); borrow = 0; MP_ADD_CARRY(b0, r0, r0, borrow); MP_ADD_CARRY(b1, r1, r1, borrow); MP_ADD_CARRY(b2, r2, r2, borrow); MP_ADD_CARRY(b3, r3, r3, borrow); MP_ADD_CARRY(b4, r4, r4, borrow); MP_ADD_CARRY(b5, r5, r5, borrow); } MP_CHECKOK(s_mp_pad(r, 6)); MP_DIGIT(r, 5) = r5; MP_DIGIT(r, 4) = r4; MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; MP_SIGN(r) = MP_ZPOS; MP_USED(r) = 6; s_mp_clamp(r); CLEANUP: return res; }
/* 4 words */ mp_err ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0; mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0; mp_digit borrow; switch (MP_USED(a)) { case 4: r3 = MP_DIGIT(a, 3); case 3: r2 = MP_DIGIT(a, 2); case 2: r1 = MP_DIGIT(a, 1); case 1: r0 = MP_DIGIT(a, 0); } switch (MP_USED(b)) { case 4: b3 = MP_DIGIT(b, 3); case 3: b2 = MP_DIGIT(b, 2); case 2: b1 = MP_DIGIT(b, 1); case 1: b0 = MP_DIGIT(b, 0); } #ifndef MPI_AMD64_ADD borrow = 0; MP_SUB_BORROW(r0, b0, r0, borrow); MP_SUB_BORROW(r1, b1, r1, borrow); MP_SUB_BORROW(r2, b2, r2, borrow); MP_SUB_BORROW(r3, b3, r3, borrow); #else __asm__( "xorq %4,%4 \n\t" "subq %5,%0 \n\t" "sbbq %6,%1 \n\t" "sbbq %7,%2 \n\t" "sbbq %8,%3 \n\t" "adcq $0,%4 \n\t" : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(borrow) : "r"(b0), "r"(b1), "r"(b2), "r"(b3), "0"(r0), "1"(r1), "2"(r2), "3"(r3) : "%cc"); #endif /* Do quick 'add' if we've gone under 0 * (subtract the 2's complement of the curve field) */ if (borrow) { b3 = MP_DIGIT(&meth->irr, 3); b2 = MP_DIGIT(&meth->irr, 2); b1 = MP_DIGIT(&meth->irr, 1); b0 = MP_DIGIT(&meth->irr, 0); #ifndef MPI_AMD64_ADD borrow = 0; MP_ADD_CARRY(b0, r0, r0, borrow); MP_ADD_CARRY(b1, r1, r1, borrow); MP_ADD_CARRY(b2, r2, r2, borrow); MP_ADD_CARRY(b3, r3, r3, borrow); #else __asm__( "addq %4,%0 \n\t" "adcq %5,%1 \n\t" "adcq %6,%2 \n\t" "adcq %7,%3 \n\t" : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3) : "r"(b0), "r"(b1), "r"(b2), "r"(b3), "0"(r0), "1"(r1), "2"(r2), "3"(r3) : "%cc"); #endif } #ifdef MPI_AMD64_ADD /* compiler fakeout? */ if ((r3 == b0) && (r1 == b0) && (r0 == b0)) { MP_CHECKOK(s_mp_pad(r, 4)); } #endif MP_CHECKOK(s_mp_pad(r, 4)); MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; MP_SIGN(r) = MP_ZPOS; MP_USED(r) = 4; s_mp_clamp(r); CLEANUP: return res; }
/* 6 words */ mp_err ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0; mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0; mp_digit carry; switch (MP_USED(a)) { case 6: a5 = MP_DIGIT(a, 5); case 5: a4 = MP_DIGIT(a, 4); case 4: a3 = MP_DIGIT(a, 3); case 3: a2 = MP_DIGIT(a, 2); case 2: a1 = MP_DIGIT(a, 1); case 1: a0 = MP_DIGIT(a, 0); } switch (MP_USED(b)) { case 6: r5 = MP_DIGIT(b, 5); case 5: r4 = MP_DIGIT(b, 4); case 4: r3 = MP_DIGIT(b, 3); case 3: r2 = MP_DIGIT(b, 2); case 2: r1 = MP_DIGIT(b, 1); case 1: r0 = MP_DIGIT(b, 0); } carry = 0; MP_ADD_CARRY(a0, r0, r0, carry); MP_ADD_CARRY(a1, r1, r1, carry); MP_ADD_CARRY(a2, r2, r2, carry); MP_ADD_CARRY(a3, r3, r3, carry); MP_ADD_CARRY(a4, r4, r4, carry); MP_ADD_CARRY(a5, r5, r5, carry); MP_CHECKOK(s_mp_pad(r, 6)); MP_DIGIT(r, 5) = r5; MP_DIGIT(r, 4) = r4; MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; MP_SIGN(r) = MP_ZPOS; MP_USED(r) = 6; /* Do quick 'subract' if we've gone over * (add the 2's complement of the curve field) */ a5 = MP_DIGIT(&meth->irr, 5); if (carry || r5 > a5 || ((r5 == a5) && mp_cmp(r, &meth->irr) != MP_LT)) { a4 = MP_DIGIT(&meth->irr, 4); a3 = MP_DIGIT(&meth->irr, 3); a2 = MP_DIGIT(&meth->irr, 2); a1 = MP_DIGIT(&meth->irr, 1); a0 = MP_DIGIT(&meth->irr, 0); carry = 0; MP_SUB_BORROW(r0, a0, r0, carry); MP_SUB_BORROW(r1, a1, r1, carry); MP_SUB_BORROW(r2, a2, r2, carry); MP_SUB_BORROW(r3, a3, r3, carry); MP_SUB_BORROW(r4, a4, r4, carry); MP_SUB_BORROW(r5, a5, r5, carry); MP_DIGIT(r, 5) = r5; MP_DIGIT(r, 4) = r4; MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; } s_mp_clamp(r); CLEANUP: return res; }
/* 4 words */ mp_err ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0; mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0; mp_digit carry; switch (MP_USED(a)) { case 4: a3 = MP_DIGIT(a, 3); case 3: a2 = MP_DIGIT(a, 2); case 2: a1 = MP_DIGIT(a, 1); case 1: a0 = MP_DIGIT(a, 0); } switch (MP_USED(b)) { case 4: r3 = MP_DIGIT(b, 3); case 3: r2 = MP_DIGIT(b, 2); case 2: r1 = MP_DIGIT(b, 1); case 1: r0 = MP_DIGIT(b, 0); } #ifndef MPI_AMD64_ADD carry = 0; MP_ADD_CARRY(a0, r0, r0, carry); MP_ADD_CARRY(a1, r1, r1, carry); MP_ADD_CARRY(a2, r2, r2, carry); MP_ADD_CARRY(a3, r3, r3, carry); #else __asm__( "xorq %4,%4 \n\t" "addq %5,%0 \n\t" "adcq %6,%1 \n\t" "adcq %7,%2 \n\t" "adcq %8,%3 \n\t" "adcq $0,%4 \n\t" : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(carry) : "r"(a0), "r"(a1), "r"(a2), "r"(a3), "0"(r0), "1"(r1), "2"(r2), "3"(r3) : "%cc"); #endif MP_CHECKOK(s_mp_pad(r, 4)); MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; MP_SIGN(r) = MP_ZPOS; MP_USED(r) = 4; /* Do quick 'subract' if we've gone over * (add the 2's complement of the curve field) */ a3 = MP_DIGIT(&meth->irr, 3); if (carry || r3 > a3 || ((r3 == a3) && mp_cmp(r, &meth->irr) != MP_LT)) { a2 = MP_DIGIT(&meth->irr, 2); a1 = MP_DIGIT(&meth->irr, 1); a0 = MP_DIGIT(&meth->irr, 0); #ifndef MPI_AMD64_ADD carry = 0; MP_SUB_BORROW(r0, a0, r0, carry); MP_SUB_BORROW(r1, a1, r1, carry); MP_SUB_BORROW(r2, a2, r2, carry); MP_SUB_BORROW(r3, a3, r3, carry); #else __asm__( "subq %4,%0 \n\t" "sbbq %5,%1 \n\t" "sbbq %6,%2 \n\t" "sbbq %7,%3 \n\t" : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3) : "r"(a0), "r"(a1), "r"(a2), "r"(a3), "0"(r0), "1"(r1), "2"(r2), "3"(r3) : "%cc"); #endif MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; }