/* computes T = REDC(T), 2^b == R */ mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm) { mp_err res; mp_size i; i = MP_USED(T) + MP_USED(&mmm->N) + 2; MP_CHECKOK( s_mp_pad(T, i) ); for (i = 0; i < MP_USED(&mmm->N); ++i ) { mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime; /* T += N * m_i * (MP_RADIX ** i); */ MP_CHECKOK( s_mp_mul_d_add_offset(&mmm->N, m_i, T, i) ); } s_mp_clamp(T); /* T /= R */ s_mp_div_2d(T, mmm->b); if ((res = s_mp_cmp(T, &mmm->N)) >= 0) { /* T = T - N */ MP_CHECKOK( s_mp_sub(T, &mmm->N) ); #ifdef DEBUG if ((res = mp_cmp(T, &mmm->N)) >= 0) { res = MP_UNDEF; goto CLEANUP; } #endif } res = MP_OKAY; CLEANUP: return res; }
mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c, mp_mont_modulus *mmm) { mp_digit *pb; mp_digit m_i; mp_err res; mp_size ib; mp_size useda, usedb; ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); if (MP_USED(a) < MP_USED(b)) { const mp_int *xch = b; /* switch a and b, to do fewer outer loops */ b = a; a = xch; } MP_USED(c) = 1; MP_DIGIT(c, 0) = 0; ib = MP_USED(a) + MP_MAX(MP_USED(b), MP_USED(&mmm->N)) + 2; if((res = s_mp_pad(c, ib)) != MP_OKAY) goto CLEANUP; useda = MP_USED(a); pb = MP_DIGITS(b); s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c)); s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1)); m_i = MP_DIGIT(c, 0) * mmm->n0prime; s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0); /* Outer loop: Digits of b */ usedb = MP_USED(b); for (ib = 1; ib < usedb; ib++) { mp_digit b_i = *pb++; /* Inner product: Digits of a */ if (b_i) s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib); m_i = MP_DIGIT(c, ib) * mmm->n0prime; s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); } if (usedb < MP_USED(&mmm->N)) { for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib ) { m_i = MP_DIGIT(c, ib) * mmm->n0prime; s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); } } s_mp_clamp(c); s_mp_div_2d(c, mmm->b); if (s_mp_cmp(c, &mmm->N) >= 0) { MP_CHECKOK( s_mp_sub(c, &mmm->N) ); } res = MP_OKAY; CLEANUP: return res; }
/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k * P(x, * y). If x, y = NULL, then P is assumed to be the generator (base point) * of the group of points on the elliptic curve. Input and output values * are assumed to be NOT field-encoded. */ mp_err ECPoint_mul(const ECGroup *group, const mp_int *k, const mp_int *px, const mp_int *py, mp_int *rx, mp_int *ry) { mp_err res = MP_OKAY; mp_int kt; ARGCHK((k != NULL) && (group != NULL), MP_BADARG); MP_DIGITS(&kt) = 0; /* want scalar to be less than or equal to group order */ if (mp_cmp(k, &group->order) > 0) { MP_CHECKOK(mp_init(&kt, FLAG(k))); MP_CHECKOK(mp_mod(k, &group->order, &kt)); } else { MP_SIGN(&kt) = MP_ZPOS; MP_USED(&kt) = MP_USED(k); MP_ALLOC(&kt) = MP_ALLOC(k); MP_DIGITS(&kt) = MP_DIGITS(k); } if ((px == NULL) || (py == NULL)) { if (group->base_point_mul) { MP_CHECKOK(group->base_point_mul(&kt, rx, ry, group)); } else { MP_CHECKOK(group-> point_mul(&kt, &group->genx, &group->geny, rx, ry, group)); } } else { if (group->meth->field_enc) { MP_CHECKOK(group->meth->field_enc(px, rx, group->meth)); MP_CHECKOK(group->meth->field_enc(py, ry, group->meth)); MP_CHECKOK(group->point_mul(&kt, rx, ry, rx, ry, group)); } else { MP_CHECKOK(group->point_mul(&kt, px, py, rx, ry, group)); } } if (group->meth->field_dec) { MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth)); MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth)); } CLEANUP: if (MP_DIGITS(&kt) != MP_DIGITS(k)) { mp_clear(&kt); } return res; }
/* Reverse the operation above for one mp_int. * Reconstruct one mp_int from its column in the weaved array. * Every read accesses every element of the weaved array, in order to * avoid timing attacks based on patterns of memory accesses. */ mp_err weave_to_mpi(mp_int *a, /* out, result */ const mp_digit *weaved, /* in, byte matrix */ mp_size index, /* which column to read */ mp_size nDigits, /* number of mp_digits in each bignum */ mp_size nBignums) /* width of the matrix */ { /* these are indices, but need to be the same size as mp_digit * because of the CONST_TIME operations */ mp_digit i, j; mp_digit d; mp_digit *pDest = MP_DIGITS(a); MP_SIGN(a) = MP_ZPOS; MP_USED(a) = nDigits; assert(weaved != NULL); /* Fetch the proper column in constant time, indexing over the whole array */ for (i = 0; i < nDigits; ++i) { d = 0; for (j = 0; j < nBignums; ++j) { d |= weaved[i * nBignums + j] & CONST_TIME_EQ(j, index); } pDest[i] = d; } s_mp_clamp(a); return MP_OKAY; }
/* * mpi_to_weave takes an array of bignums, a matrix in which each bignum * occupies all the columns of a row, and transposes it into a matrix in * which each bignum occupies a column of every row. The first row of the * input matrix becomes the first column of the output matrix. The n'th * row of input becomes the n'th column of output. The input data is said * to be "interleaved" or "woven" into the output matrix. * * The array of bignums is left in this woven form. Each time a single * bignum value is needed, it is recreated by fetching the n'th column, * forming a single row which is the new bignum. * * The purpose of this interleaving is make it impossible to determine which * of the bignums is being used in any one operation by examining the pattern * of cache misses. * * The weaving function does not transpose the entire input matrix in one call. * It transposes 4 rows of mp_ints into their respective columns of output. * * There are two different implementations of the weaving and unweaving code * in this file. One uses byte loads and stores. The second uses loads and * stores of mp_weave_word size values. The weaved forms of these two * implementations differ. Consequently, each one has its own explanation. * * Here is the explanation for the byte-at-a-time implementation. * * This implementation treats each mp_int bignum as an array of bytes, * rather than as an array of mp_digits. It stores those bytes as a * column of bytes in the output matrix. It doesn't care if the machine * uses big-endian or little-endian byte ordering within mp_digits. * The first byte of the mp_digit array becomes the first byte in the output * column, regardless of whether that byte is the MSB or LSB of the mp_digit. * * "bignums" is an array of mp_ints. * It points to four rows, four mp_ints, a subset of a larger array of mp_ints. * * "weaved" is the weaved output matrix. * The first byte of bignums[0] is stored in weaved[0]. * * "nBignums" is the total number of bignums in the array of which "bignums" * is a part. * * "nDigits" is the size in mp_digits of each mp_int in the "bignums" array. * mp_ints that use less than nDigits digits are logically padded with zeros * while being stored in the weaved array. */ mp_err mpi_to_weave(const mp_int *bignums, unsigned char *weaved, mp_size nDigits, /* in each mp_int of input */ mp_size nBignums) /* in the entire source array */ { mp_size i; unsigned char * endDest = weaved + (nDigits * nBignums * sizeof(mp_digit)); for (i=0; i < WEAVE_WORD_SIZE; i++) { mp_size used = MP_USED(&bignums[i]); unsigned char *pSrc = (unsigned char *)MP_DIGITS(&bignums[i]); unsigned char *endSrc = pSrc + (used * sizeof(mp_digit)); unsigned char *pDest = weaved + i; ARGCHK(MP_SIGN(&bignums[i]) == MP_ZPOS, MP_BADARG); ARGCHK(used <= nDigits, MP_BADARG); for (; pSrc < endSrc; pSrc++) { *pDest = *pSrc; pDest += nBignums; } while (pDest < endDest) { *pDest = 0; pDest += nBignums; } } return MP_OKAY; }
/* reverse the operation above for one entry. * b points to the offset into the weave array of the power we are * calculating */ mp_err weave_to_mpi(mp_int *a, const unsigned char *b, mp_size b_size, mp_size count) { mp_digit *pb = MP_DIGITS(a); mp_digit *end = &pb[b_size]; MP_SIGN(a) = MP_ZPOS; MP_USED(a) = b_size; for (; pb < end; pb++) { register mp_digit digit; digit = *b << 8; b += count; #define MPI_UNWEAVE_ONE_STEP digit |= *b; b += count; digit = digit << 8; switch (sizeof(mp_digit)) { case 32: MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP case 16: MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP case 8: MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP case 4: MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP case 2: break; } digit |= *b; b += count; *pb = digit; } s_mp_clamp(a); return MP_OKAY; }
/* Construct a generic GFMethod for arithmetic over prime fields with * irreducible irr. */ GFMethod * GFMethod_consGFp(const mp_int *irr) { mp_err res = MP_OKAY; GFMethod *meth = NULL; meth = GFMethod_new(); if (meth == NULL) return NULL; MP_CHECKOK(mp_copy(irr, &meth->irr)); meth->irr_arr[0] = mpl_significant_bits(irr); meth->irr_arr[1] = meth->irr_arr[2] = meth->irr_arr[3] = meth->irr_arr[4] = 0; switch (MP_USED(&meth->irr)) { /* maybe we need 1 and 2 words here as well?*/ case 3: meth->field_add = &ec_GFp_add_3; meth->field_sub = &ec_GFp_sub_3; break; case 4: meth->field_add = &ec_GFp_add_4; meth->field_sub = &ec_GFp_sub_4; break; case 5: meth->field_add = &ec_GFp_add_5; meth->field_sub = &ec_GFp_sub_5; break; case 6: meth->field_add = &ec_GFp_add_6; meth->field_sub = &ec_GFp_sub_6; break; default: meth->field_add = &ec_GFp_add; meth->field_sub = &ec_GFp_sub; } meth->field_neg = &ec_GFp_neg; meth->field_mod = &ec_GFp_mod; meth->field_mul = &ec_GFp_mul; meth->field_sqr = &ec_GFp_sqr; meth->field_div = &ec_GFp_div; meth->field_enc = NULL; meth->field_dec = NULL; meth->extra1 = NULL; meth->extra2 = NULL; meth->extra_free = NULL; CLEANUP: if (res != MP_OKAY) { GFMethod_free(meth); return NULL; } return meth; }
STATIC mp_err s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont) { mp_err res; /* xMont = x * R mod N where N is modulus */ MP_CHECKOK( mp_copy( x, xMont ) ); MP_CHECKOK( s_mp_lshd( xMont, MP_USED(&mmm->N) ) ); /* xMont = x << b */ MP_CHECKOK( mp_div(xMont, &mmm->N, 0, xMont) ); /* mod N */ CLEANUP: return res; }
/* Reverse the operation above for one mp_int. * Reconstruct one mp_int from its column in the weaved array. * "pSrc" points to the offset into the weave array of the bignum we * are going to reconstruct. */ mp_err weave_to_mpi(mp_int *a, /* output, result */ const unsigned char *pSrc, /* input, byte matrix */ mp_size nDigits, /* per mp_int output */ mp_size nBignums) /* bignums in weaved matrix */ { unsigned char *pDest = (unsigned char *)MP_DIGITS(a); unsigned char *endDest = pDest + (nDigits * sizeof(mp_digit)); MP_SIGN(a) = MP_ZPOS; MP_USED(a) = nDigits; for (; pDest < endDest; pSrc += nBignums, pDest++) { *pDest = *pSrc; } s_mp_clamp(a); return MP_OKAY; }
void mp_int_random(mp_int z, int prec) { int i; if(prec > MP_ALLOC(z)) prec = MP_ALLOC(z); for(i = 0; i < prec; ++i) { mp_digit d = 0; int j; for(j = 0; j < sizeof(d); ++j) d = (d << CHAR_BIT) | (rand() & UCHAR_MAX); z->digits[i] = d; } MP_USED(z) = prec; }
/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses * Jacobian coordinates. * * Assumes input is already field-encoded using field_enc, and returns * output that is still field-encoded. * * This routine implements Point Doubling in the Jacobian Projective * space as described in the paper "Efficient elliptic curve exponentiation * using mixed coordinates", by H. Cohen, A Miyaji, T. Ono. */ mp_err ec_GFp_pt_dbl_jac(const mp_int *px, const mp_int *py, const mp_int *pz, mp_int *rx, mp_int *ry, mp_int *rz, const ECGroup *group) { mp_err res = MP_OKAY; mp_int t0, t1, M, S; MP_DIGITS(&t0) = 0; MP_DIGITS(&t1) = 0; MP_DIGITS(&M) = 0; MP_DIGITS(&S) = 0; MP_CHECKOK(mp_init(&t0)); MP_CHECKOK(mp_init(&t1)); MP_CHECKOK(mp_init(&M)); MP_CHECKOK(mp_init(&S)); /* P == inf or P == -P */ if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES || mp_cmp_z(py) == 0) { MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); goto CLEANUP; } if (mp_cmp_d(pz, 1) == 0) { /* M = 3 * px^2 + a */ MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth)); MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth)); MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth)); MP_CHECKOK(group->meth-> field_add(&t0, &group->curvea, &M, group->meth)); } else if (MP_SIGN(&group->curvea) == MP_NEG && MP_USED(&group->curvea) == 1 && MP_DIGIT(&group->curvea, 0) == 3) { /* M = 3 * (px + pz^2) * (px - pz^2) */ MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth)); MP_CHECKOK(group->meth->field_add(px, &M, &t0, group->meth)); MP_CHECKOK(group->meth->field_sub(px, &M, &t1, group->meth)); MP_CHECKOK(group->meth->field_mul(&t0, &t1, &M, group->meth)); MP_CHECKOK(group->meth->field_add(&M, &M, &t0, group->meth)); MP_CHECKOK(group->meth->field_add(&t0, &M, &M, group->meth)); } else { /* M = 3 * (px^2) + a * (pz^4) */ MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth)); MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth)); MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth)); MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth)); MP_CHECKOK(group->meth->field_sqr(&M, &M, group->meth)); MP_CHECKOK(group->meth-> field_mul(&M, &group->curvea, &M, group->meth)); MP_CHECKOK(group->meth->field_add(&M, &t0, &M, group->meth)); } /* rz = 2 * py * pz */ /* t0 = 4 * py^2 */ if (mp_cmp_d(pz, 1) == 0) { MP_CHECKOK(group->meth->field_add(py, py, rz, group->meth)); MP_CHECKOK(group->meth->field_sqr(rz, &t0, group->meth)); } else { MP_CHECKOK(group->meth->field_add(py, py, &t0, group->meth)); MP_CHECKOK(group->meth->field_mul(&t0, pz, rz, group->meth)); MP_CHECKOK(group->meth->field_sqr(&t0, &t0, group->meth)); } /* S = 4 * px * py^2 = px * (2 * py)^2 */ MP_CHECKOK(group->meth->field_mul(px, &t0, &S, group->meth)); /* rx = M^2 - 2 * S */ MP_CHECKOK(group->meth->field_add(&S, &S, &t1, group->meth)); MP_CHECKOK(group->meth->field_sqr(&M, rx, group->meth)); MP_CHECKOK(group->meth->field_sub(rx, &t1, rx, group->meth)); /* ry = M * (S - rx) - 8 * py^4 */ MP_CHECKOK(group->meth->field_sqr(&t0, &t1, group->meth)); if (mp_isodd(&t1)) { MP_CHECKOK(mp_add(&t1, &group->meth->irr, &t1)); } MP_CHECKOK(mp_div_2(&t1, &t1)); MP_CHECKOK(group->meth->field_sub(&S, rx, &S, group->meth)); MP_CHECKOK(group->meth->field_mul(&M, &S, &M, group->meth)); MP_CHECKOK(group->meth->field_sub(&M, &t1, ry, group->meth)); CLEANUP: mp_clear(&t0); mp_clear(&t1); mp_clear(&M); mp_clear(&S); return res; }
/* ** Perform a raw public-key operation ** Length of input and output buffers are equal to key's modulus len. */ SECStatus RSA_PublicKeyOp(RSAPublicKey *key, unsigned char *output, const unsigned char *input) { unsigned int modLen, expLen, offset; mp_int n, e, m, c; mp_err err = MP_OKAY; SECStatus rv = SECSuccess; if (!key || !output || !input) { PORT_SetError(SEC_ERROR_INVALID_ARGS); return SECFailure; } MP_DIGITS(&n) = 0; MP_DIGITS(&e) = 0; MP_DIGITS(&m) = 0; MP_DIGITS(&c) = 0; CHECK_MPI_OK( mp_init(&n) ); CHECK_MPI_OK( mp_init(&e) ); CHECK_MPI_OK( mp_init(&m) ); CHECK_MPI_OK( mp_init(&c) ); modLen = rsa_modulusLen(&key->modulus); expLen = rsa_modulusLen(&key->publicExponent); /* 1. Obtain public key (n, e) */ if (BAD_RSA_KEY_SIZE(modLen, expLen)) { PORT_SetError(SEC_ERROR_INVALID_KEY); rv = SECFailure; goto cleanup; } SECITEM_TO_MPINT(key->modulus, &n); SECITEM_TO_MPINT(key->publicExponent, &e); if (e.used > n.used) { /* exponent should not be greater than modulus */ PORT_SetError(SEC_ERROR_INVALID_KEY); rv = SECFailure; goto cleanup; } /* 2. check input out of range (needs to be in range [0..n-1]) */ offset = (key->modulus.data[0] == 0) ? 1 : 0; /* may be leading 0 */ if (memcmp(input, key->modulus.data + offset, modLen) >= 0) { PORT_SetError(SEC_ERROR_INPUT_LEN); rv = SECFailure; goto cleanup; } /* 2 bis. Represent message as integer in range [0..n-1] */ CHECK_MPI_OK( mp_read_unsigned_octets(&m, input, modLen) ); /* 3. Compute c = m**e mod n */ #ifdef USE_MPI_EXPT_D /* XXX see which is faster */ if (MP_USED(&e) == 1) { CHECK_MPI_OK( mp_exptmod_d(&m, MP_DIGIT(&e, 0), &n, &c) ); } else #endif CHECK_MPI_OK( mp_exptmod(&m, &e, &n, &c) ); /* 4. result c is ciphertext */ err = mp_to_fixlen_octets(&c, output, modLen); if (err >= 0) err = MP_OKAY; cleanup: mp_clear(&n); mp_clear(&e); mp_clear(&m); mp_clear(&c); if (err) { MP_TO_SEC_ERROR(err); rv = SECFailure; } return rv; }
/* Computes R = nP based on algorithm 2P of Lopex, J. and Dahab, R. "Fast * multiplication on elliptic curves over GF(2^m) without * precomputation". Elliptic curve points P and R can be identical. Uses * Montgomery projective coordinates. */ mp_err ec_GF2m_pt_mul_mont(const mp_int *n, const mp_int *px, const mp_int *py, mp_int *rx, mp_int *ry, const ECGroup *group) { mp_err res = MP_OKAY; mp_int x1, x2, z1, z2; int i, j; mp_digit top_bit, mask; MP_DIGITS(&x1) = 0; MP_DIGITS(&x2) = 0; MP_DIGITS(&z1) = 0; MP_DIGITS(&z2) = 0; MP_CHECKOK(mp_init(&x1, FLAG(n))); MP_CHECKOK(mp_init(&x2, FLAG(n))); MP_CHECKOK(mp_init(&z1, FLAG(n))); MP_CHECKOK(mp_init(&z2, FLAG(n))); /* if result should be point at infinity */ if ((mp_cmp_z(n) == 0) || (ec_GF2m_pt_is_inf_aff(px, py) == MP_YES)) { MP_CHECKOK(ec_GF2m_pt_set_inf_aff(rx, ry)); goto CLEANUP; } MP_CHECKOK(mp_copy(px, &x1)); /* x1 = px */ MP_CHECKOK(mp_set_int(&z1, 1)); /* z1 = 1 */ MP_CHECKOK(group->meth->field_sqr(&x1, &z2, group->meth)); /* z2 = * x1^2 = * px^2 */ MP_CHECKOK(group->meth->field_sqr(&z2, &x2, group->meth)); MP_CHECKOK(group->meth->field_add(&x2, &group->curveb, &x2, group->meth)); /* x2 * = * px^4 * + * b */ /* find top-most bit and go one past it */ i = MP_USED(n) - 1; j = MP_DIGIT_BIT - 1; top_bit = 1; top_bit <<= MP_DIGIT_BIT - 1; mask = top_bit; while (!(MP_DIGITS(n)[i] & mask)) { mask >>= 1; j--; } mask >>= 1; j--; /* if top most bit was at word break, go to next word */ if (!mask) { i--; j = MP_DIGIT_BIT - 1; mask = top_bit; } for (; i >= 0; i--) { for (; j >= 0; j--) { if (MP_DIGITS(n)[i] & mask) { MP_CHECKOK(gf2m_Madd(px, &x1, &z1, &x2, &z2, group, FLAG(n))); MP_CHECKOK(gf2m_Mdouble(&x2, &z2, group, FLAG(n))); } else { MP_CHECKOK(gf2m_Madd(px, &x2, &z2, &x1, &z1, group, FLAG(n))); MP_CHECKOK(gf2m_Mdouble(&x1, &z1, group, FLAG(n))); } mask >>= 1; } j = MP_DIGIT_BIT - 1; mask = top_bit; } /* convert out of "projective" coordinates */ i = gf2m_Mxy(px, py, &x1, &z1, &x2, &z2, group); if (i == 0) { res = MP_BADARG; goto CLEANUP; } else if (i == 1) { MP_CHECKOK(ec_GF2m_pt_set_inf_aff(rx, ry)); } else { MP_CHECKOK(mp_copy(&x2, rx)); MP_CHECKOK(mp_copy(&z2, ry)); } CLEANUP: mp_clear(&x1); mp_clear(&x2); mp_clear(&z1); mp_clear(&z2); return res; }
mp_err mpp_pprime(mp_int *a, int nt) { mp_err res; mp_int x, amo, m, z; /* "amo" = "a minus one" */ int iter; unsigned int jx; mp_size b; ARGCHK(a != NULL, MP_BADARG); MP_DIGITS(&x) = 0; MP_DIGITS(&amo) = 0; MP_DIGITS(&m) = 0; MP_DIGITS(&z) = 0; /* Initialize temporaries... */ MP_CHECKOK( mp_init(&amo)); /* Compute amo = a - 1 for what follows... */ MP_CHECKOK( mp_sub_d(a, 1, &amo) ); b = mp_trailing_zeros(&amo); if (!b) { /* a was even ? */ res = MP_NO; goto CLEANUP; } MP_CHECKOK( mp_init_size(&x, MP_USED(a)) ); MP_CHECKOK( mp_init(&z) ); MP_CHECKOK( mp_init(&m) ); MP_CHECKOK( mp_div_2d(&amo, b, &m, 0) ); /* Do the test nt times... */ for(iter = 0; iter < nt; iter++) { /* Choose a random value for x < a */ s_mp_pad(&x, USED(a)); mpp_random(&x); MP_CHECKOK( mp_mod(&x, a, &x) ); /* Compute z = (x ** m) mod a */ MP_CHECKOK( mp_exptmod(&x, &m, a, &z) ); if(mp_cmp_d(&z, 1) == 0 || mp_cmp(&z, &amo) == 0) { res = MP_YES; continue; } res = MP_NO; /* just in case the following for loop never executes. */ for (jx = 1; jx < b; jx++) { /* z = z^2 (mod a) */ MP_CHECKOK( mp_sqrmod(&z, a, &z) ); res = MP_NO; /* previous line set res to MP_YES */ if(mp_cmp_d(&z, 1) == 0) { break; } if(mp_cmp(&z, &amo) == 0) { res = MP_YES; break; } } /* end testing loop */ /* If the test passes, we will continue iterating, but a failed test means the candidate is definitely NOT prime, so we will immediately break out of this loop */ if(res == MP_NO) break; } /* end iterations loop */ CLEANUP: mp_clear(&m); mp_clear(&z); mp_clear(&x); mp_clear(&amo); return res; } /* end mpp_pprime() */
/* Converts from an mpint into a floating point representation. */ void ecfp_i2fp(double *out, const mp_int *x, const ECGroup *ecgroup) { int i; int j = 0; int size; double shift = 1; mp_digit *in; EC_group_fp *group = (EC_group_fp *) ecgroup->extra1; #ifdef ECL_DEBUG /* if debug mode, convert result back using ecfp_fp2i into cmp, then * compare to x. */ mp_int cmp; MP_DIGITS(&cmp) = NULL; mp_init(&cmp); #endif ECFP_ASSERT(group != NULL); /* init output to 0 (since we skip over some terms) */ for (i = 0; i < group->numDoubles; i++) out[i] = 0; i = 0; size = MP_USED(x); in = MP_DIGITS(x); /* Copy from int into doubles */ #ifdef ECL_THIRTY_TWO_BIT while (j < size) { while (group->doubleBitSize * (i + 1) <= 32 * j) { i++; } ECFP_ASSERT(group->doubleBitSize * i <= 32 * j); out[i] = in[j]; out[i] *= shift; shift *= ecfp_two32; j++; } #else while (j < size) { while (group->doubleBitSize * (i + 1) <= 64 * j) { i++; } ECFP_ASSERT(group->doubleBitSize * i <= 64 * j); out[i] = (in[j] & 0x00000000FFFFFFFF) * shift; while (group->doubleBitSize * (i + 1) <= 64 * j + 32) { i++; } ECFP_ASSERT(24 * i <= 64 * j + 32); out[i] = (in[j] & 0xFFFFFFFF00000000) * shift; shift *= ecfp_two64; j++; } #endif /* Realign bits to match double boundaries */ ecfp_tidyShort(out, group); #ifdef ECL_DEBUG /* Convert result back to mp_int, compare to original */ ecfp_fp2i(&cmp, out, ecgroup); ECFP_ASSERT(mp_cmp(&cmp, x) == 0); mp_clear(&cmp); #endif }
/* Converts from a floating point representation into an mp_int. Expects * that d is already reduced. */ void ecfp_fp2i(mp_int *mpout, double *d, const ECGroup *ecgroup) { EC_group_fp *group = (EC_group_fp *) ecgroup->extra1; unsigned short i16[(group->primeBitSize + 15) / 16]; double q = 1; #ifdef ECL_THIRTY_TWO_BIT /* TEST uint32_t z = 0; */ unsigned int z = 0; #else uint64_t z = 0; #endif int zBits = 0; int copiedBits = 0; int i = 0; int j = 0; mp_digit *out; /* Result should always be >= 0, so set sign accordingly */ MP_SIGN(mpout) = MP_ZPOS; /* Tidy up so we're just dealing with positive numbers */ ecfp_positiveTidy(d, group); /* We might need to do this reduction step more than once if the * reduction adds smaller terms which carry-over to cause another * reduction. However, this should happen very rarely, if ever, * depending on the elliptic curve. */ do { /* Init loop data */ z = 0; zBits = 0; q = 1; i = 0; j = 0; copiedBits = 0; /* Might have to do a bit more reduction */ group->ecfp_singleReduce(d, group); /* Grow the size of the mpint if it's too small */ s_mp_grow(mpout, group->numInts); MP_USED(mpout) = group->numInts; out = MP_DIGITS(mpout); /* Convert double to 16 bit integers */ while (copiedBits < group->primeBitSize) { if (zBits < 16) { z += d[i] * q; i++; ECFP_ASSERT(i < (group->primeBitSize + 15) / 16); zBits += group->doubleBitSize; } i16[j] = z; j++; z >>= 16; zBits -= 16; q *= ecfp_twom16; copiedBits += 16; } } while (z != 0); /* Convert 16 bit integers to mp_digit */ #ifdef ECL_THIRTY_TWO_BIT for (i = 0; i < (group->primeBitSize + 15) / 16; i += 2) { *out = 0; if (i + 1 < (group->primeBitSize + 15) / 16) { *out = i16[i + 1]; *out <<= 16; } *out++ += i16[i]; } #else /* 64 bit */ for (i = 0; i < (group->primeBitSize + 15) / 16; i += 4) { *out = 0; if (i + 3 < (group->primeBitSize + 15) / 16) { *out = i16[i + 3]; *out <<= 16; } if (i + 2 < (group->primeBitSize + 15) / 16) { *out += i16[i + 2]; *out <<= 16; } if (i + 1 < (group->primeBitSize + 15) / 16) { *out += i16[i + 1]; *out <<= 16; } *out++ += i16[i]; } #endif /* Perform final reduction. mpout should already be the same number * of bits as p, but might not be less than p. Make it so. Since * mpout has the same number of bits as p, and 2p has a larger bit * size, then mpout < 2p, so a single subtraction of p will suffice. */ if (mp_cmp(mpout, &ecgroup->meth->irr) >= 0) { mp_sub(mpout, &ecgroup->meth->irr, mpout); } /* Shrink the size of the mp_int to the actual used size (required for * mp_cmp_z == 0) */ out = MP_DIGITS(mpout); for (i = group->numInts - 1; i > 0; i--) { if (out[i] != 0) break; } MP_USED(mpout) = i + 1; /* Should be between 0 and p-1 */ ECFP_ASSERT(mp_cmp(mpout, &ecgroup->meth->irr) < 0); ECFP_ASSERT(mp_cmp_z(mpout) >= 0); }
mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent, const mp_int *modulus, mp_int *result) { const mp_int *base; mp_size bits_in_exponent, i, window_bits, odd_ints; mp_err res; int nLen; mp_int montBase, goodBase; mp_mont_modulus mmm; #ifdef MP_USING_CACHE_SAFE_MOD_EXP static unsigned int max_window_bits; #endif /* function for computing n0prime only works if n0 is odd */ if (!mp_isodd(modulus)) return s_mp_exptmod(inBase, exponent, modulus, result); MP_DIGITS(&montBase) = 0; MP_DIGITS(&goodBase) = 0; if (mp_cmp(inBase, modulus) < 0) { base = inBase; } else { MP_CHECKOK( mp_init(&goodBase) ); base = &goodBase; MP_CHECKOK( mp_mod(inBase, modulus, &goodBase) ); } nLen = MP_USED(modulus); MP_CHECKOK( mp_init_size(&montBase, 2 * nLen + 2) ); mmm.N = *modulus; /* a copy of the mp_int struct */ /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX ** where n0 = least significant mp_digit of N, the modulus. */ mmm.n0prime = 0 - s_mp_invmod_radix( MP_DIGIT(modulus, 0) ); MP_CHECKOK( s_mp_to_mont(base, &mmm, &montBase) ); bits_in_exponent = mpl_significant_bits(exponent); #ifdef MP_USING_CACHE_SAFE_MOD_EXP if (mp_using_cache_safe_exp) { if (bits_in_exponent > 780) window_bits = 6; else if (bits_in_exponent > 256) window_bits = 5; else if (bits_in_exponent > 20) window_bits = 4; /* RSA public key exponents are typically under 20 bits (common values * are: 3, 17, 65537) and a 4-bit window is inefficient */ else window_bits = 1; } else #endif if (bits_in_exponent > 480) window_bits = 6; else if (bits_in_exponent > 160) window_bits = 5; else if (bits_in_exponent > 20) window_bits = 4; /* RSA public key exponents are typically under 20 bits (common values * are: 3, 17, 65537) and a 4-bit window is inefficient */ else window_bits = 1; #ifdef MP_USING_CACHE_SAFE_MOD_EXP /* * clamp the window size based on * the cache line size. */ if (!max_window_bits) { unsigned long cache_size = s_mpi_getProcessorLineSize(); /* processor has no cache, use 'fast' code always */ if (cache_size == 0) { mp_using_cache_safe_exp = 0; } if ((cache_size == 0) || (cache_size >= 64)) { max_window_bits = 6; } else if (cache_size >= 32) { max_window_bits = 5; } else if (cache_size >= 16) { max_window_bits = 4; } else max_window_bits = 1; /* should this be an assert? */ } /* clamp the window size down before we caclulate bits_in_exponent */ if (mp_using_cache_safe_exp) { if (window_bits > max_window_bits) { window_bits = max_window_bits; } } #endif odd_ints = 1 << (window_bits - 1); i = bits_in_exponent % window_bits; if (i != 0) { bits_in_exponent += window_bits - i; } #ifdef MP_USING_MONT_MULF if (mp_using_mont_mulf) { MP_CHECKOK( s_mp_pad(&montBase, nLen) ); res = mp_exptmod_f(&montBase, exponent, modulus, result, &mmm, nLen, bits_in_exponent, window_bits, odd_ints); } else #endif #ifdef MP_USING_CACHE_SAFE_MOD_EXP if (mp_using_cache_safe_exp) { res = mp_exptmod_safe_i(&montBase, exponent, modulus, result, &mmm, nLen, bits_in_exponent, window_bits, 1 << window_bits); } else #endif res = mp_exptmod_i(&montBase, exponent, modulus, result, &mmm, nLen, bits_in_exponent, window_bits, odd_ints); CLEANUP: mp_clear(&montBase); mp_clear(&goodBase); /* Don't mp_clear mmm.N because it is merely a copy of modulus. ** Just zap it. */ memset(&mmm, 0, sizeof mmm); return res; }
/* Do modular exponentiation using integer multiply code. */ mp_err mp_exptmod_safe_i(const mp_int * montBase, const mp_int * exponent, const mp_int * modulus, mp_int * result, mp_mont_modulus *mmm, int nLen, mp_size bits_in_exponent, mp_size window_bits, mp_size num_powers) { mp_int *pa1, *pa2, *ptmp; mp_size i; mp_size first_window; mp_err res; int expOff; mp_int accum1, accum2, accum[WEAVE_WORD_SIZE]; mp_int tmp; unsigned char *powersArray; unsigned char *powers; MP_DIGITS(&accum1) = 0; MP_DIGITS(&accum2) = 0; MP_DIGITS(&accum[0]) = 0; MP_DIGITS(&accum[1]) = 0; MP_DIGITS(&accum[2]) = 0; MP_DIGITS(&accum[3]) = 0; MP_DIGITS(&tmp) = 0; powersArray = (unsigned char *)malloc(num_powers*(nLen*sizeof(mp_digit)+1)); if (powersArray == NULL) { res = MP_MEM; goto CLEANUP; } /* powers[i] = base ** (i); */ powers = (unsigned char *)MP_ALIGN(powersArray,num_powers); /* grab the first window value. This allows us to preload accumulator1 * and save a conversion, some squares and a multiple*/ MP_CHECKOK( mpl_get_bits(exponent, bits_in_exponent-window_bits, window_bits) ); first_window = (mp_size)res; MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) ); MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) ); MP_CHECKOK( mp_init_size(&tmp, 3 * nLen + 2) ); /* build the first WEAVE_WORD powers inline */ /* if WEAVE_WORD_SIZE is not 4, this code will have to change */ if (num_powers > 2) { MP_CHECKOK( mp_init_size(&accum[0], 3 * nLen + 2) ); MP_CHECKOK( mp_init_size(&accum[1], 3 * nLen + 2) ); MP_CHECKOK( mp_init_size(&accum[2], 3 * nLen + 2) ); MP_CHECKOK( mp_init_size(&accum[3], 3 * nLen + 2) ); mp_set(&accum[0], 1); MP_CHECKOK( s_mp_to_mont(&accum[0], mmm, &accum[0]) ); MP_CHECKOK( mp_copy(montBase, &accum[1]) ); SQR(montBase, &accum[2]); MUL_NOWEAVE(montBase, &accum[2], &accum[3]); MP_CHECKOK( mpi_to_weave(accum, powers, nLen, num_powers) ); if (first_window < 4) { MP_CHECKOK( mp_copy(&accum[first_window], &accum1) ); first_window = num_powers; } } else { if (first_window == 0) { mp_set(&accum1, 1); MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) ); } else { /* assert first_window == 1? */ MP_CHECKOK( mp_copy(montBase, &accum1) ); } } /* * calculate all the powers in the powers array. * this adds 2**(k-1)-2 square operations over just calculating the * odd powers where k is the window size in the two other mp_modexpt * implementations in this file. We will get some of that * back by not needing the first 'k' squares and one multiply for the * first window */ for (i = WEAVE_WORD_SIZE; i < num_powers; i++) { int acc_index = i & (WEAVE_WORD_SIZE-1); /* i % WEAVE_WORD_SIZE */ if ( i & 1 ) { MUL_NOWEAVE(montBase, &accum[acc_index-1] , &accum[acc_index]); /* we've filled the array do our 'per array' processing */ if (acc_index == (WEAVE_WORD_SIZE-1)) { MP_CHECKOK( mpi_to_weave(accum, powers + i - (WEAVE_WORD_SIZE-1), nLen, num_powers) ); if (first_window <= i) { MP_CHECKOK( mp_copy(&accum[first_window & (WEAVE_WORD_SIZE-1)], &accum1) ); first_window = num_powers; } } } else { /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source * and target are the same so we need to copy.. After that, the * value is overwritten, so we need to fetch it from the stored * weave array */ if (i > 2* WEAVE_WORD_SIZE) { MP_CHECKOK(weave_to_mpi(&accum2, powers+i/2, nLen, num_powers)); SQR(&accum2, &accum[acc_index]); } else { int half_power_index = (i/2) & (WEAVE_WORD_SIZE-1); if (half_power_index == acc_index) { /* copy is cheaper than weave_to_mpi */ MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2)); SQR(&accum2,&accum[acc_index]); } else { SQR(&accum[half_power_index],&accum[acc_index]); } } } } /* if the accum1 isn't set, Then there is something wrong with our logic * above and is an internal programming error. */ #if MP_ARGCHK == 2 assert(MP_USED(&accum1) != 0); #endif /* set accumulator to montgomery residue of 1 */ pa1 = &accum1; pa2 = &accum2; for (expOff = bits_in_exponent - window_bits*2; expOff >= 0; expOff -= window_bits) { mp_size smallExp; MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) ); smallExp = (mp_size)res; /* handle unroll the loops */ switch (window_bits) { case 1: if (!smallExp) { SQR(pa1,pa2); SWAPPA; } else if (smallExp & 1) { SQR(pa1,pa2); MUL_NOWEAVE(montBase,pa2,pa1); } else { abort(); } break; case 6: SQR(pa1,pa2); SQR(pa2,pa1); /* fall through */ case 4: SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp, pa1,pa2); SWAPPA; break; case 5: SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); MUL(smallExp,pa2,pa1); break; default: abort(); /* could do a loop? */ } } res = s_mp_redc(pa1, mmm); mp_exch(pa1, result); CLEANUP: mp_clear(&accum1); mp_clear(&accum2); mp_clear(&accum[0]); mp_clear(&accum[1]); mp_clear(&accum[2]); mp_clear(&accum[3]); mp_clear(&tmp); /* PORT_Memset(powers,0,num_powers*nLen*sizeof(mp_digit)); */ free(powersArray); return res; }
/* 6 words */ mp_err ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0; mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0; mp_digit carry; switch (MP_USED(a)) { case 6: a5 = MP_DIGIT(a, 5); case 5: a4 = MP_DIGIT(a, 4); case 4: a3 = MP_DIGIT(a, 3); case 3: a2 = MP_DIGIT(a, 2); case 2: a1 = MP_DIGIT(a, 1); case 1: a0 = MP_DIGIT(a, 0); } switch (MP_USED(b)) { case 6: r5 = MP_DIGIT(b, 5); case 5: r4 = MP_DIGIT(b, 4); case 4: r3 = MP_DIGIT(b, 3); case 3: r2 = MP_DIGIT(b, 2); case 2: r1 = MP_DIGIT(b, 1); case 1: r0 = MP_DIGIT(b, 0); } carry = 0; MP_ADD_CARRY(a0, r0, r0, carry); MP_ADD_CARRY(a1, r1, r1, carry); MP_ADD_CARRY(a2, r2, r2, carry); MP_ADD_CARRY(a3, r3, r3, carry); MP_ADD_CARRY(a4, r4, r4, carry); MP_ADD_CARRY(a5, r5, r5, carry); MP_CHECKOK(s_mp_pad(r, 6)); MP_DIGIT(r, 5) = r5; MP_DIGIT(r, 4) = r4; MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; MP_SIGN(r) = MP_ZPOS; MP_USED(r) = 6; /* Do quick 'subract' if we've gone over * (add the 2's complement of the curve field) */ a5 = MP_DIGIT(&meth->irr, 5); if (carry || r5 > a5 || ((r5 == a5) && mp_cmp(r, &meth->irr) != MP_LT)) { a4 = MP_DIGIT(&meth->irr, 4); a3 = MP_DIGIT(&meth->irr, 3); a2 = MP_DIGIT(&meth->irr, 2); a1 = MP_DIGIT(&meth->irr, 1); a0 = MP_DIGIT(&meth->irr, 0); carry = 0; MP_SUB_BORROW(r0, a0, r0, carry); MP_SUB_BORROW(r1, a1, r1, carry); MP_SUB_BORROW(r2, a2, r2, carry); MP_SUB_BORROW(r3, a3, r3, carry); MP_SUB_BORROW(r4, a4, r4, carry); MP_SUB_BORROW(r5, a5, r5, carry); MP_DIGIT(r, 5) = r5; MP_DIGIT(r, 4) = r4; MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; } s_mp_clamp(r); CLEANUP: return res; }
/* 4 words */ mp_err ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0; mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0; mp_digit borrow; switch (MP_USED(a)) { case 4: r3 = MP_DIGIT(a, 3); case 3: r2 = MP_DIGIT(a, 2); case 2: r1 = MP_DIGIT(a, 1); case 1: r0 = MP_DIGIT(a, 0); } switch (MP_USED(b)) { case 4: b3 = MP_DIGIT(b, 3); case 3: b2 = MP_DIGIT(b, 2); case 2: b1 = MP_DIGIT(b, 1); case 1: b0 = MP_DIGIT(b, 0); } #ifndef MPI_AMD64_ADD borrow = 0; MP_SUB_BORROW(r0, b0, r0, borrow); MP_SUB_BORROW(r1, b1, r1, borrow); MP_SUB_BORROW(r2, b2, r2, borrow); MP_SUB_BORROW(r3, b3, r3, borrow); #else __asm__( "xorq %4,%4 \n\t" "subq %5,%0 \n\t" "sbbq %6,%1 \n\t" "sbbq %7,%2 \n\t" "sbbq %8,%3 \n\t" "adcq $0,%4 \n\t" : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(borrow) : "r"(b0), "r"(b1), "r"(b2), "r"(b3), "0"(r0), "1"(r1), "2"(r2), "3"(r3) : "%cc"); #endif /* Do quick 'add' if we've gone under 0 * (subtract the 2's complement of the curve field) */ if (borrow) { b3 = MP_DIGIT(&meth->irr, 3); b2 = MP_DIGIT(&meth->irr, 2); b1 = MP_DIGIT(&meth->irr, 1); b0 = MP_DIGIT(&meth->irr, 0); #ifndef MPI_AMD64_ADD borrow = 0; MP_ADD_CARRY(b0, r0, r0, borrow); MP_ADD_CARRY(b1, r1, r1, borrow); MP_ADD_CARRY(b2, r2, r2, borrow); MP_ADD_CARRY(b3, r3, r3, borrow); #else __asm__( "addq %4,%0 \n\t" "adcq %5,%1 \n\t" "adcq %6,%2 \n\t" "adcq %7,%3 \n\t" : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3) : "r"(b0), "r"(b1), "r"(b2), "r"(b3), "0"(r0), "1"(r1), "2"(r2), "3"(r3) : "%cc"); #endif } #ifdef MPI_AMD64_ADD /* compiler fakeout? */ if ((r3 == b0) && (r1 == b0) && (r0 == b0)) { MP_CHECKOK(s_mp_pad(r, 4)); } #endif MP_CHECKOK(s_mp_pad(r, 4)); MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; MP_SIGN(r) = MP_ZPOS; MP_USED(r) = 4; s_mp_clamp(r); CLEANUP: return res; }
/* 4 words */ mp_err ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0; mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0; mp_digit carry; switch (MP_USED(a)) { case 4: a3 = MP_DIGIT(a, 3); case 3: a2 = MP_DIGIT(a, 2); case 2: a1 = MP_DIGIT(a, 1); case 1: a0 = MP_DIGIT(a, 0); } switch (MP_USED(b)) { case 4: r3 = MP_DIGIT(b, 3); case 3: r2 = MP_DIGIT(b, 2); case 2: r1 = MP_DIGIT(b, 1); case 1: r0 = MP_DIGIT(b, 0); } #ifndef MPI_AMD64_ADD carry = 0; MP_ADD_CARRY(a0, r0, r0, carry); MP_ADD_CARRY(a1, r1, r1, carry); MP_ADD_CARRY(a2, r2, r2, carry); MP_ADD_CARRY(a3, r3, r3, carry); #else __asm__( "xorq %4,%4 \n\t" "addq %5,%0 \n\t" "adcq %6,%1 \n\t" "adcq %7,%2 \n\t" "adcq %8,%3 \n\t" "adcq $0,%4 \n\t" : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(carry) : "r"(a0), "r"(a1), "r"(a2), "r"(a3), "0"(r0), "1"(r1), "2"(r2), "3"(r3) : "%cc"); #endif MP_CHECKOK(s_mp_pad(r, 4)); MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; MP_SIGN(r) = MP_ZPOS; MP_USED(r) = 4; /* Do quick 'subract' if we've gone over * (add the 2's complement of the curve field) */ a3 = MP_DIGIT(&meth->irr, 3); if (carry || r3 > a3 || ((r3 == a3) && mp_cmp(r, &meth->irr) != MP_LT)) { a2 = MP_DIGIT(&meth->irr, 2); a1 = MP_DIGIT(&meth->irr, 1); a0 = MP_DIGIT(&meth->irr, 0); #ifndef MPI_AMD64_ADD carry = 0; MP_SUB_BORROW(r0, a0, r0, carry); MP_SUB_BORROW(r1, a1, r1, carry); MP_SUB_BORROW(r2, a2, r2, carry); MP_SUB_BORROW(r3, a3, r3, carry); #else __asm__( "subq %4,%0 \n\t" "sbbq %5,%1 \n\t" "sbbq %6,%2 \n\t" "sbbq %7,%3 \n\t" : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3) : "r"(a0), "r"(a1), "r"(a2), "r"(a3), "0"(r0), "1"(r1), "2"(r2), "3"(r3) : "%cc"); #endif MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; }
/* Do modular exponentiation using integer multiply code. */ mp_err mp_exptmod_i(const mp_int * montBase, const mp_int * exponent, const mp_int * modulus, mp_int * result, mp_mont_modulus *mmm, int nLen, mp_size bits_in_exponent, mp_size window_bits, mp_size odd_ints) { mp_int *pa1, *pa2, *ptmp; mp_size i; mp_err res; int expOff; mp_int accum1, accum2, power2, oddPowers[MAX_ODD_INTS]; /* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */ /* oddPowers[i] = base ** (2*i + 1); */ MP_DIGITS(&accum1) = 0; MP_DIGITS(&accum2) = 0; MP_DIGITS(&power2) = 0; for (i = 0; i < MAX_ODD_INTS; ++i) { MP_DIGITS(oddPowers + i) = 0; } MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) ); MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) ); MP_CHECKOK( mp_init_copy(&oddPowers[0], montBase) ); mp_init_size(&power2, nLen + 2 * MP_USED(montBase) + 2); MP_CHECKOK( mp_sqr(montBase, &power2) ); /* power2 = montBase ** 2 */ MP_CHECKOK( s_mp_redc(&power2, mmm) ); for (i = 1; i < odd_ints; ++i) { mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2); MP_CHECKOK( mp_mul(oddPowers + (i - 1), &power2, oddPowers + i) ); MP_CHECKOK( s_mp_redc(oddPowers + i, mmm) ); } /* set accumulator to montgomery residue of 1 */ mp_set(&accum1, 1); MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) ); pa1 = &accum1; pa2 = &accum2; for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) { mp_size smallExp; MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) ); smallExp = (mp_size)res; if (window_bits == 1) { if (!smallExp) { SQR(pa1,pa2); SWAPPA; } else if (smallExp & 1) { SQR(pa1,pa2); MUL(0,pa2,pa1); } else { ABORT; } } else if (window_bits == 4) { if (!smallExp) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); } else if (smallExp & 1) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/2, pa1,pa2); SWAPPA; } else if (smallExp & 2) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA; } else if (smallExp & 4) { SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/8,pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA; } else if (smallExp & 8) { SQR(pa1,pa2); MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA; } else { ABORT; } } else if (window_bits == 5) { if (!smallExp) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA; } else if (smallExp & 1) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); MUL(smallExp/2,pa2,pa1); } else if (smallExp & 2) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/4,pa1,pa2); SQR(pa2,pa1); } else if (smallExp & 4) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); MUL(smallExp/8,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); } else if (smallExp & 8) { SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/16,pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); } else if (smallExp & 0x10) { SQR(pa1,pa2); MUL(smallExp/32,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); } else { ABORT; } } else if (window_bits == 6) { if (!smallExp) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); } else if (smallExp & 1) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/2,pa1,pa2); SWAPPA; } else if (smallExp & 2) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA; } else if (smallExp & 4) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/8,pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA; } else if (smallExp & 8) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA; } else if (smallExp & 0x10) { SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/32,pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA; } else if (smallExp & 0x20) { SQR(pa1,pa2); MUL(smallExp/64,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA; } else { ABORT; } } else { ABORT; } } res = s_mp_redc(pa1, mmm); mp_exch(pa1, result); CLEANUP: mp_clear(&accum1); mp_clear(&accum2); mp_clear(&power2); for (i = 0; i < odd_ints; ++i) { mp_clear(oddPowers + i); } return res; }
/* * on some platforms character stores into memory is very expensive since they * generate a read/modify/write operation on the bus. On those platforms * we need to do integer writes to the bus. Because of some unrolled code, * in this current code the size of mp_weave_word must be four. The code that * makes this assumption explicity is called out. (on some platforms a write * of 4 bytes still requires a single read-modify-write operation. * * This function is takes the identical parameters as the function above, * however it lays out the final array differently. Where the previous function * treats the mpi_int as an byte array, this function treats it as an array of * mp_digits where each digit is stored in big endian order. * * since we need to interleave on a byte by byte basis, we need to collect * several mpi structures together into a single uint32 before we write. We * also need to make sure the uint32 is arranged so that the first value of * the first array winds up in b[0]. This means construction of that uint32 * is endian specific (even though the layout of the mp_digits in the array * is always big endian). * * The final data is stored as follows : * * Our same logical array p array, m is sizeof(mp_digit), * N is still count and n is now b_size. If we define p[i].digit[j]0 as the * most significant byte of the word p[i].digit[j], p[i].digit[j]1 as * the next most significant byte of p[i].digit[j], ... and p[i].digit[j]m-1 * is the least significant byte. * Our array would look like: * p[0].digit[0]0 p[1].digit[0]0 ... p[N-2].digit[0]0 p[N-1].digit[0]0 * p[0].digit[0]1 p[1].digit[0]1 ... p[N-2].digit[0]1 p[N-1].digit[0]1 * . . * p[0].digit[0]m-1 p[1].digit[0]m-1 ... p[N-2].digit[0]m-1 p[N-1].digit[0]m-1 * p[0].digit[1]0 p[1].digit[1]0 ... p[N-2].digit[1]0 p[N-1].digit[1]0 * . . * . . * p[0].digit[n-1]m-2 p[1].digit[n-1]m-2 ... p[N-2].digit[n-1]m-2 p[N-1].digit[n-1]m-2 * p[0].digit[n-1]m-1 p[1].digit[n-1]m-1 ... p[N-2].digit[n-1]m-1 p[N-1].digit[n-1]m-1 * */ mp_err mpi_to_weave(const mp_int *a, unsigned char *b, mp_size b_size, mp_size count) { mp_size i; mp_digit *digitsa0; mp_digit *digitsa1; mp_digit *digitsa2; mp_digit *digitsa3; mp_size useda0; mp_size useda1; mp_size useda2; mp_size useda3; mp_weave_word *weaved = (mp_weave_word *)b; count = count/sizeof(mp_weave_word); /* this code pretty much depends on this ! */ #if MP_ARGCHK == 2 assert(WEAVE_WORD_SIZE == 4); assert(sizeof(mp_weave_word) == 4); #endif digitsa0 = MP_DIGITS(&a[0]); digitsa1 = MP_DIGITS(&a[1]); digitsa2 = MP_DIGITS(&a[2]); digitsa3 = MP_DIGITS(&a[3]); useda0 = MP_USED(&a[0]); useda1 = MP_USED(&a[1]); useda2 = MP_USED(&a[2]); useda3 = MP_USED(&a[3]); ARGCHK(MP_SIGN(&a[0]) == MP_ZPOS, MP_BADARG); ARGCHK(MP_SIGN(&a[1]) == MP_ZPOS, MP_BADARG); ARGCHK(MP_SIGN(&a[2]) == MP_ZPOS, MP_BADARG); ARGCHK(MP_SIGN(&a[3]) == MP_ZPOS, MP_BADARG); ARGCHK(useda0 <= b_size, MP_BADARG); ARGCHK(useda1 <= b_size, MP_BADARG); ARGCHK(useda2 <= b_size, MP_BADARG); ARGCHK(useda3 <= b_size, MP_BADARG); #define SAFE_FETCH(digit, used, word) ((word) < (used) ? (digit[word]) : 0) for (i=0; i < b_size; i++) { mp_digit d0 = SAFE_FETCH(digitsa0,useda0,i); mp_digit d1 = SAFE_FETCH(digitsa1,useda1,i); mp_digit d2 = SAFE_FETCH(digitsa2,useda2,i); mp_digit d3 = SAFE_FETCH(digitsa3,useda3,i); register mp_weave_word acc; /* * ONE_STEP takes the MSB of each of our current digits and places that * byte in the appropriate position for writing to the weaved array. * On little endian: * b3 b2 b1 b0 * On big endian: * b0 b1 b2 b3 * When the data is written it would always wind up: * b[0] = b0 * b[1] = b1 * b[2] = b2 * b[3] = b3 * * Once we've written the MSB, we shift the whole digit up left one * byte, putting the Next Most Significant Byte in the MSB position, * so we we repeat the next one step that byte will be written. * NOTE: This code assumes sizeof(mp_weave_word) and MP_WEAVE_WORD_SIZE * is 4. */ #ifdef MP_IS_LITTLE_ENDIAN #define MPI_WEAVE_ONE_STEP \ acc = (d0 >> (MP_DIGIT_BIT-8)) & 0x000000ff; d0 <<= 8; /*b0*/ \ acc |= (d1 >> (MP_DIGIT_BIT-16)) & 0x0000ff00; d1 <<= 8; /*b1*/ \ acc |= (d2 >> (MP_DIGIT_BIT-24)) & 0x00ff0000; d2 <<= 8; /*b2*/ \ acc |= (d3 >> (MP_DIGIT_BIT-32)) & 0xff000000; d3 <<= 8; /*b3*/ \ *weaved = acc; weaved += count; #else #define MPI_WEAVE_ONE_STEP \ acc = (d0 >> (MP_DIGIT_BIT-32)) & 0xff000000; d0 <<= 8; /*b0*/ \ acc |= (d1 >> (MP_DIGIT_BIT-24)) & 0x00ff0000; d1 <<= 8; /*b1*/ \ acc |= (d2 >> (MP_DIGIT_BIT-16)) & 0x0000ff00; d2 <<= 8; /*b2*/ \ acc |= (d3 >> (MP_DIGIT_BIT-8)) & 0x000000ff; d3 <<= 8; /*b3*/ \ *weaved = acc; weaved += count; #endif switch (sizeof(mp_digit)) { case 32: MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP case 16: MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP case 8: MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP case 4: MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP case 2: MPI_WEAVE_ONE_STEP case 1: MPI_WEAVE_ONE_STEP break; } } return MP_OKAY; }
/* 6 words */ mp_err ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0; mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0; mp_digit borrow; switch (MP_USED(a)) { case 6: r5 = MP_DIGIT(a, 5); case 5: r4 = MP_DIGIT(a, 4); case 4: r3 = MP_DIGIT(a, 3); case 3: r2 = MP_DIGIT(a, 2); case 2: r1 = MP_DIGIT(a, 1); case 1: r0 = MP_DIGIT(a, 0); } switch (MP_USED(b)) { case 6: b5 = MP_DIGIT(b, 5); case 5: b4 = MP_DIGIT(b, 4); case 4: b3 = MP_DIGIT(b, 3); case 3: b2 = MP_DIGIT(b, 2); case 2: b1 = MP_DIGIT(b, 1); case 1: b0 = MP_DIGIT(b, 0); } borrow = 0; MP_SUB_BORROW(r0, b0, r0, borrow); MP_SUB_BORROW(r1, b1, r1, borrow); MP_SUB_BORROW(r2, b2, r2, borrow); MP_SUB_BORROW(r3, b3, r3, borrow); MP_SUB_BORROW(r4, b4, r4, borrow); MP_SUB_BORROW(r5, b5, r5, borrow); /* Do quick 'add' if we've gone under 0 * (subtract the 2's complement of the curve field) */ if (borrow) { b5 = MP_DIGIT(&meth->irr, 5); b4 = MP_DIGIT(&meth->irr, 4); b3 = MP_DIGIT(&meth->irr, 3); b2 = MP_DIGIT(&meth->irr, 2); b1 = MP_DIGIT(&meth->irr, 1); b0 = MP_DIGIT(&meth->irr, 0); borrow = 0; MP_ADD_CARRY(b0, r0, r0, borrow); MP_ADD_CARRY(b1, r1, r1, borrow); MP_ADD_CARRY(b2, r2, r2, borrow); MP_ADD_CARRY(b3, r3, r3, borrow); MP_ADD_CARRY(b4, r4, r4, borrow); MP_ADD_CARRY(b5, r5, r5, borrow); } MP_CHECKOK(s_mp_pad(r, 6)); MP_DIGIT(r, 5) = r5; MP_DIGIT(r, 4) = r4; MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; MP_SIGN(r) = MP_ZPOS; MP_USED(r) = 6; s_mp_clamp(r); CLEANUP: return res; }