/* Reverse the operation above for one mp_int. * Reconstruct one mp_int from its column in the weaved array. * Every read accesses every element of the weaved array, in order to * avoid timing attacks based on patterns of memory accesses. */ mp_err weave_to_mpi(mp_int *a, /* out, result */ const mp_digit *weaved, /* in, byte matrix */ mp_size index, /* which column to read */ mp_size nDigits, /* number of mp_digits in each bignum */ mp_size nBignums) /* width of the matrix */ { /* these are indices, but need to be the same size as mp_digit * because of the CONST_TIME operations */ mp_digit i, j; mp_digit d; mp_digit *pDest = MP_DIGITS(a); MP_SIGN(a) = MP_ZPOS; MP_USED(a) = nDigits; assert(weaved != NULL); /* Fetch the proper column in constant time, indexing over the whole array */ for (i = 0; i < nDigits; ++i) { d = 0; for (j = 0; j < nBignums; ++j) { d |= weaved[i * nBignums + j] & CONST_TIME_EQ(j, index); } pDest[i] = d; } s_mp_clamp(a); return MP_OKAY; }
/* * mpi_to_weave takes an array of bignums, a matrix in which each bignum * occupies all the columns of a row, and transposes it into a matrix in * which each bignum occupies a column of every row. The first row of the * input matrix becomes the first column of the output matrix. The n'th * row of input becomes the n'th column of output. The input data is said * to be "interleaved" or "woven" into the output matrix. * * The array of bignums is left in this woven form. Each time a single * bignum value is needed, it is recreated by fetching the n'th column, * forming a single row which is the new bignum. * * The purpose of this interleaving is make it impossible to determine which * of the bignums is being used in any one operation by examining the pattern * of cache misses. * * The weaving function does not transpose the entire input matrix in one call. * It transposes 4 rows of mp_ints into their respective columns of output. * * There are two different implementations of the weaving and unweaving code * in this file. One uses byte loads and stores. The second uses loads and * stores of mp_weave_word size values. The weaved forms of these two * implementations differ. Consequently, each one has its own explanation. * * Here is the explanation for the byte-at-a-time implementation. * * This implementation treats each mp_int bignum as an array of bytes, * rather than as an array of mp_digits. It stores those bytes as a * column of bytes in the output matrix. It doesn't care if the machine * uses big-endian or little-endian byte ordering within mp_digits. * The first byte of the mp_digit array becomes the first byte in the output * column, regardless of whether that byte is the MSB or LSB of the mp_digit. * * "bignums" is an array of mp_ints. * It points to four rows, four mp_ints, a subset of a larger array of mp_ints. * * "weaved" is the weaved output matrix. * The first byte of bignums[0] is stored in weaved[0]. * * "nBignums" is the total number of bignums in the array of which "bignums" * is a part. * * "nDigits" is the size in mp_digits of each mp_int in the "bignums" array. * mp_ints that use less than nDigits digits are logically padded with zeros * while being stored in the weaved array. */ mp_err mpi_to_weave(const mp_int *bignums, unsigned char *weaved, mp_size nDigits, /* in each mp_int of input */ mp_size nBignums) /* in the entire source array */ { mp_size i; unsigned char * endDest = weaved + (nDigits * nBignums * sizeof(mp_digit)); for (i=0; i < WEAVE_WORD_SIZE; i++) { mp_size used = MP_USED(&bignums[i]); unsigned char *pSrc = (unsigned char *)MP_DIGITS(&bignums[i]); unsigned char *endSrc = pSrc + (used * sizeof(mp_digit)); unsigned char *pDest = weaved + i; ARGCHK(MP_SIGN(&bignums[i]) == MP_ZPOS, MP_BADARG); ARGCHK(used <= nDigits, MP_BADARG); for (; pSrc < endSrc; pSrc++) { *pDest = *pSrc; pDest += nBignums; } while (pDest < endDest) { *pDest = 0; pDest += nBignums; } } return MP_OKAY; }
/* reverse the operation above for one entry. * b points to the offset into the weave array of the power we are * calculating */ mp_err weave_to_mpi(mp_int *a, const unsigned char *b, mp_size b_size, mp_size count) { mp_digit *pb = MP_DIGITS(a); mp_digit *end = &pb[b_size]; MP_SIGN(a) = MP_ZPOS; MP_USED(a) = b_size; for (; pb < end; pb++) { register mp_digit digit; digit = *b << 8; b += count; #define MPI_UNWEAVE_ONE_STEP digit |= *b; b += count; digit = digit << 8; switch (sizeof(mp_digit)) { case 32: MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP case 16: MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP case 8: MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP case 4: MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP case 2: break; } digit |= *b; b += count; *pb = digit; } s_mp_clamp(a); return MP_OKAY; }
/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k * P(x, * y). If x, y = NULL, then P is assumed to be the generator (base point) * of the group of points on the elliptic curve. Input and output values * are assumed to be NOT field-encoded. */ mp_err ECPoint_mul(const ECGroup *group, const mp_int *k, const mp_int *px, const mp_int *py, mp_int *rx, mp_int *ry) { mp_err res = MP_OKAY; mp_int kt; ARGCHK((k != NULL) && (group != NULL), MP_BADARG); MP_DIGITS(&kt) = 0; /* want scalar to be less than or equal to group order */ if (mp_cmp(k, &group->order) > 0) { MP_CHECKOK(mp_init(&kt, FLAG(k))); MP_CHECKOK(mp_mod(k, &group->order, &kt)); } else { MP_SIGN(&kt) = MP_ZPOS; MP_USED(&kt) = MP_USED(k); MP_ALLOC(&kt) = MP_ALLOC(k); MP_DIGITS(&kt) = MP_DIGITS(k); } if ((px == NULL) || (py == NULL)) { if (group->base_point_mul) { MP_CHECKOK(group->base_point_mul(&kt, rx, ry, group)); } else { MP_CHECKOK(group-> point_mul(&kt, &group->genx, &group->geny, rx, ry, group)); } } else { if (group->meth->field_enc) { MP_CHECKOK(group->meth->field_enc(px, rx, group->meth)); MP_CHECKOK(group->meth->field_enc(py, ry, group->meth)); MP_CHECKOK(group->point_mul(&kt, rx, ry, rx, ry, group)); } else { MP_CHECKOK(group->point_mul(&kt, px, py, rx, ry, group)); } } if (group->meth->field_dec) { MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth)); MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth)); } CLEANUP: if (MP_DIGITS(&kt) != MP_DIGITS(k)) { mp_clear(&kt); } return res; }
/* Reverse the operation above for one mp_int. * Reconstruct one mp_int from its column in the weaved array. * "pSrc" points to the offset into the weave array of the bignum we * are going to reconstruct. */ mp_err weave_to_mpi(mp_int *a, /* output, result */ const unsigned char *pSrc, /* input, byte matrix */ mp_size nDigits, /* per mp_int output */ mp_size nBignums) /* bignums in weaved matrix */ { unsigned char *pDest = (unsigned char *)MP_DIGITS(a); unsigned char *endDest = pDest + (nDigits * sizeof(mp_digit)); MP_SIGN(a) = MP_ZPOS; MP_USED(a) = nDigits; for (; pDest < endDest; pSrc += nBignums, pDest++) { *pDest = *pSrc; } s_mp_clamp(a); return MP_OKAY; }
/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses * Jacobian coordinates. * * Assumes input is already field-encoded using field_enc, and returns * output that is still field-encoded. * * This routine implements Point Doubling in the Jacobian Projective * space as described in the paper "Efficient elliptic curve exponentiation * using mixed coordinates", by H. Cohen, A Miyaji, T. Ono. */ mp_err ec_GFp_pt_dbl_jac(const mp_int *px, const mp_int *py, const mp_int *pz, mp_int *rx, mp_int *ry, mp_int *rz, const ECGroup *group) { mp_err res = MP_OKAY; mp_int t0, t1, M, S; MP_DIGITS(&t0) = 0; MP_DIGITS(&t1) = 0; MP_DIGITS(&M) = 0; MP_DIGITS(&S) = 0; MP_CHECKOK(mp_init(&t0)); MP_CHECKOK(mp_init(&t1)); MP_CHECKOK(mp_init(&M)); MP_CHECKOK(mp_init(&S)); /* P == inf or P == -P */ if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES || mp_cmp_z(py) == 0) { MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); goto CLEANUP; } if (mp_cmp_d(pz, 1) == 0) { /* M = 3 * px^2 + a */ MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth)); MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth)); MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth)); MP_CHECKOK(group->meth-> field_add(&t0, &group->curvea, &M, group->meth)); } else if (MP_SIGN(&group->curvea) == MP_NEG && MP_USED(&group->curvea) == 1 && MP_DIGIT(&group->curvea, 0) == 3) { /* M = 3 * (px + pz^2) * (px - pz^2) */ MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth)); MP_CHECKOK(group->meth->field_add(px, &M, &t0, group->meth)); MP_CHECKOK(group->meth->field_sub(px, &M, &t1, group->meth)); MP_CHECKOK(group->meth->field_mul(&t0, &t1, &M, group->meth)); MP_CHECKOK(group->meth->field_add(&M, &M, &t0, group->meth)); MP_CHECKOK(group->meth->field_add(&t0, &M, &M, group->meth)); } else { /* M = 3 * (px^2) + a * (pz^4) */ MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth)); MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth)); MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth)); MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth)); MP_CHECKOK(group->meth->field_sqr(&M, &M, group->meth)); MP_CHECKOK(group->meth-> field_mul(&M, &group->curvea, &M, group->meth)); MP_CHECKOK(group->meth->field_add(&M, &t0, &M, group->meth)); } /* rz = 2 * py * pz */ /* t0 = 4 * py^2 */ if (mp_cmp_d(pz, 1) == 0) { MP_CHECKOK(group->meth->field_add(py, py, rz, group->meth)); MP_CHECKOK(group->meth->field_sqr(rz, &t0, group->meth)); } else { MP_CHECKOK(group->meth->field_add(py, py, &t0, group->meth)); MP_CHECKOK(group->meth->field_mul(&t0, pz, rz, group->meth)); MP_CHECKOK(group->meth->field_sqr(&t0, &t0, group->meth)); } /* S = 4 * px * py^2 = px * (2 * py)^2 */ MP_CHECKOK(group->meth->field_mul(px, &t0, &S, group->meth)); /* rx = M^2 - 2 * S */ MP_CHECKOK(group->meth->field_add(&S, &S, &t1, group->meth)); MP_CHECKOK(group->meth->field_sqr(&M, rx, group->meth)); MP_CHECKOK(group->meth->field_sub(rx, &t1, rx, group->meth)); /* ry = M * (S - rx) - 8 * py^4 */ MP_CHECKOK(group->meth->field_sqr(&t0, &t1, group->meth)); if (mp_isodd(&t1)) { MP_CHECKOK(mp_add(&t1, &group->meth->irr, &t1)); } MP_CHECKOK(mp_div_2(&t1, &t1)); MP_CHECKOK(group->meth->field_sub(&S, rx, &S, group->meth)); MP_CHECKOK(group->meth->field_mul(&M, &S, &M, group->meth)); MP_CHECKOK(group->meth->field_sub(&M, &t1, ry, group->meth)); CLEANUP: mp_clear(&t0); mp_clear(&t1); mp_clear(&M); mp_clear(&S); return res; }
/* Validates a point on a GFp curve. */ mp_err ec_GFp_validate_point(const mp_int *px, const mp_int *py, const ECGroup *group) { mp_err res = MP_NO; mp_int accl, accr, tmp, pxt, pyt; MP_DIGITS(&accl) = 0; MP_DIGITS(&accr) = 0; MP_DIGITS(&tmp) = 0; MP_DIGITS(&pxt) = 0; MP_DIGITS(&pyt) = 0; MP_CHECKOK(mp_init(&accl)); MP_CHECKOK(mp_init(&accr)); MP_CHECKOK(mp_init(&tmp)); MP_CHECKOK(mp_init(&pxt)); MP_CHECKOK(mp_init(&pyt)); /* 1: Verify that publicValue is not the point at infinity */ if (ec_GFp_pt_is_inf_aff(px, py) == MP_YES) { res = MP_NO; goto CLEANUP; } /* 2: Verify that the coordinates of publicValue are elements * of the field. */ if ((MP_SIGN(px) == MP_NEG) || (mp_cmp(px, &group->meth->irr) >= 0) || (MP_SIGN(py) == MP_NEG) || (mp_cmp(py, &group->meth->irr) >= 0)) { res = MP_NO; goto CLEANUP; } /* 3: Verify that publicValue is on the curve. */ if (group->meth->field_enc) { group->meth->field_enc(px, &pxt, group->meth); group->meth->field_enc(py, &pyt, group->meth); } else { MP_CHECKOK(mp_copy(px, &pxt)); MP_CHECKOK(mp_copy(py, &pyt)); } /* left-hand side: y^2 */ MP_CHECKOK(group->meth->field_sqr(&pyt, &accl, group->meth)); /* right-hand side: x^3 + a*x + b = (x^2 + a)*x + b by Horner's rule */ MP_CHECKOK(group->meth->field_sqr(&pxt, &tmp, group->meth)); MP_CHECKOK(group->meth->field_add(&tmp, &group->curvea, &tmp, group->meth)); MP_CHECKOK(group->meth->field_mul(&tmp, &pxt, &accr, group->meth)); MP_CHECKOK(group->meth->field_add(&accr, &group->curveb, &accr, group->meth)); /* check LHS - RHS == 0 */ MP_CHECKOK(group->meth->field_sub(&accl, &accr, &accr, group->meth)); if (mp_cmp_z(&accr) != 0) { res = MP_NO; goto CLEANUP; } /* 4: Verify that the order of the curve times the publicValue * is the point at infinity. */ MP_CHECKOK(ECPoint_mul(group, &group->order, px, py, &pxt, &pyt)); if (ec_GFp_pt_is_inf_aff(&pxt, &pyt) != MP_YES) { res = MP_NO; goto CLEANUP; } res = MP_YES; CLEANUP: mp_clear(&accl); mp_clear(&accr); mp_clear(&tmp); mp_clear(&pxt); mp_clear(&pyt); return res; }
/* Converts from a floating point representation into an mp_int. Expects * that d is already reduced. */ void ecfp_fp2i(mp_int *mpout, double *d, const ECGroup *ecgroup) { EC_group_fp *group = (EC_group_fp *) ecgroup->extra1; unsigned short i16[(group->primeBitSize + 15) / 16]; double q = 1; #ifdef ECL_THIRTY_TWO_BIT /* TEST uint32_t z = 0; */ unsigned int z = 0; #else uint64_t z = 0; #endif int zBits = 0; int copiedBits = 0; int i = 0; int j = 0; mp_digit *out; /* Result should always be >= 0, so set sign accordingly */ MP_SIGN(mpout) = MP_ZPOS; /* Tidy up so we're just dealing with positive numbers */ ecfp_positiveTidy(d, group); /* We might need to do this reduction step more than once if the * reduction adds smaller terms which carry-over to cause another * reduction. However, this should happen very rarely, if ever, * depending on the elliptic curve. */ do { /* Init loop data */ z = 0; zBits = 0; q = 1; i = 0; j = 0; copiedBits = 0; /* Might have to do a bit more reduction */ group->ecfp_singleReduce(d, group); /* Grow the size of the mpint if it's too small */ s_mp_grow(mpout, group->numInts); MP_USED(mpout) = group->numInts; out = MP_DIGITS(mpout); /* Convert double to 16 bit integers */ while (copiedBits < group->primeBitSize) { if (zBits < 16) { z += d[i] * q; i++; ECFP_ASSERT(i < (group->primeBitSize + 15) / 16); zBits += group->doubleBitSize; } i16[j] = z; j++; z >>= 16; zBits -= 16; q *= ecfp_twom16; copiedBits += 16; } } while (z != 0); /* Convert 16 bit integers to mp_digit */ #ifdef ECL_THIRTY_TWO_BIT for (i = 0; i < (group->primeBitSize + 15) / 16; i += 2) { *out = 0; if (i + 1 < (group->primeBitSize + 15) / 16) { *out = i16[i + 1]; *out <<= 16; } *out++ += i16[i]; } #else /* 64 bit */ for (i = 0; i < (group->primeBitSize + 15) / 16; i += 4) { *out = 0; if (i + 3 < (group->primeBitSize + 15) / 16) { *out = i16[i + 3]; *out <<= 16; } if (i + 2 < (group->primeBitSize + 15) / 16) { *out += i16[i + 2]; *out <<= 16; } if (i + 1 < (group->primeBitSize + 15) / 16) { *out += i16[i + 1]; *out <<= 16; } *out++ += i16[i]; } #endif /* Perform final reduction. mpout should already be the same number * of bits as p, but might not be less than p. Make it so. Since * mpout has the same number of bits as p, and 2p has a larger bit * size, then mpout < 2p, so a single subtraction of p will suffice. */ if (mp_cmp(mpout, &ecgroup->meth->irr) >= 0) { mp_sub(mpout, &ecgroup->meth->irr, mpout); } /* Shrink the size of the mp_int to the actual used size (required for * mp_cmp_z == 0) */ out = MP_DIGITS(mpout); for (i = group->numInts - 1; i > 0; i--) { if (out[i] != 0) break; } MP_USED(mpout) = i + 1; /* Should be between 0 and p-1 */ ECFP_ASSERT(mp_cmp(mpout, &ecgroup->meth->irr) < 0); ECFP_ASSERT(mp_cmp_z(mpout) >= 0); }
/* * on some platforms character stores into memory is very expensive since they * generate a read/modify/write operation on the bus. On those platforms * we need to do integer writes to the bus. Because of some unrolled code, * in this current code the size of mp_weave_word must be four. The code that * makes this assumption explicity is called out. (on some platforms a write * of 4 bytes still requires a single read-modify-write operation. * * This function is takes the identical parameters as the function above, * however it lays out the final array differently. Where the previous function * treats the mpi_int as an byte array, this function treats it as an array of * mp_digits where each digit is stored in big endian order. * * since we need to interleave on a byte by byte basis, we need to collect * several mpi structures together into a single uint32 before we write. We * also need to make sure the uint32 is arranged so that the first value of * the first array winds up in b[0]. This means construction of that uint32 * is endian specific (even though the layout of the mp_digits in the array * is always big endian). * * The final data is stored as follows : * * Our same logical array p array, m is sizeof(mp_digit), * N is still count and n is now b_size. If we define p[i].digit[j]0 as the * most significant byte of the word p[i].digit[j], p[i].digit[j]1 as * the next most significant byte of p[i].digit[j], ... and p[i].digit[j]m-1 * is the least significant byte. * Our array would look like: * p[0].digit[0]0 p[1].digit[0]0 ... p[N-2].digit[0]0 p[N-1].digit[0]0 * p[0].digit[0]1 p[1].digit[0]1 ... p[N-2].digit[0]1 p[N-1].digit[0]1 * . . * p[0].digit[0]m-1 p[1].digit[0]m-1 ... p[N-2].digit[0]m-1 p[N-1].digit[0]m-1 * p[0].digit[1]0 p[1].digit[1]0 ... p[N-2].digit[1]0 p[N-1].digit[1]0 * . . * . . * p[0].digit[n-1]m-2 p[1].digit[n-1]m-2 ... p[N-2].digit[n-1]m-2 p[N-1].digit[n-1]m-2 * p[0].digit[n-1]m-1 p[1].digit[n-1]m-1 ... p[N-2].digit[n-1]m-1 p[N-1].digit[n-1]m-1 * */ mp_err mpi_to_weave(const mp_int *a, unsigned char *b, mp_size b_size, mp_size count) { mp_size i; mp_digit *digitsa0; mp_digit *digitsa1; mp_digit *digitsa2; mp_digit *digitsa3; mp_size useda0; mp_size useda1; mp_size useda2; mp_size useda3; mp_weave_word *weaved = (mp_weave_word *)b; count = count/sizeof(mp_weave_word); /* this code pretty much depends on this ! */ #if MP_ARGCHK == 2 assert(WEAVE_WORD_SIZE == 4); assert(sizeof(mp_weave_word) == 4); #endif digitsa0 = MP_DIGITS(&a[0]); digitsa1 = MP_DIGITS(&a[1]); digitsa2 = MP_DIGITS(&a[2]); digitsa3 = MP_DIGITS(&a[3]); useda0 = MP_USED(&a[0]); useda1 = MP_USED(&a[1]); useda2 = MP_USED(&a[2]); useda3 = MP_USED(&a[3]); ARGCHK(MP_SIGN(&a[0]) == MP_ZPOS, MP_BADARG); ARGCHK(MP_SIGN(&a[1]) == MP_ZPOS, MP_BADARG); ARGCHK(MP_SIGN(&a[2]) == MP_ZPOS, MP_BADARG); ARGCHK(MP_SIGN(&a[3]) == MP_ZPOS, MP_BADARG); ARGCHK(useda0 <= b_size, MP_BADARG); ARGCHK(useda1 <= b_size, MP_BADARG); ARGCHK(useda2 <= b_size, MP_BADARG); ARGCHK(useda3 <= b_size, MP_BADARG); #define SAFE_FETCH(digit, used, word) ((word) < (used) ? (digit[word]) : 0) for (i=0; i < b_size; i++) { mp_digit d0 = SAFE_FETCH(digitsa0,useda0,i); mp_digit d1 = SAFE_FETCH(digitsa1,useda1,i); mp_digit d2 = SAFE_FETCH(digitsa2,useda2,i); mp_digit d3 = SAFE_FETCH(digitsa3,useda3,i); register mp_weave_word acc; /* * ONE_STEP takes the MSB of each of our current digits and places that * byte in the appropriate position for writing to the weaved array. * On little endian: * b3 b2 b1 b0 * On big endian: * b0 b1 b2 b3 * When the data is written it would always wind up: * b[0] = b0 * b[1] = b1 * b[2] = b2 * b[3] = b3 * * Once we've written the MSB, we shift the whole digit up left one * byte, putting the Next Most Significant Byte in the MSB position, * so we we repeat the next one step that byte will be written. * NOTE: This code assumes sizeof(mp_weave_word) and MP_WEAVE_WORD_SIZE * is 4. */ #ifdef MP_IS_LITTLE_ENDIAN #define MPI_WEAVE_ONE_STEP \ acc = (d0 >> (MP_DIGIT_BIT-8)) & 0x000000ff; d0 <<= 8; /*b0*/ \ acc |= (d1 >> (MP_DIGIT_BIT-16)) & 0x0000ff00; d1 <<= 8; /*b1*/ \ acc |= (d2 >> (MP_DIGIT_BIT-24)) & 0x00ff0000; d2 <<= 8; /*b2*/ \ acc |= (d3 >> (MP_DIGIT_BIT-32)) & 0xff000000; d3 <<= 8; /*b3*/ \ *weaved = acc; weaved += count; #else #define MPI_WEAVE_ONE_STEP \ acc = (d0 >> (MP_DIGIT_BIT-32)) & 0xff000000; d0 <<= 8; /*b0*/ \ acc |= (d1 >> (MP_DIGIT_BIT-24)) & 0x00ff0000; d1 <<= 8; /*b1*/ \ acc |= (d2 >> (MP_DIGIT_BIT-16)) & 0x0000ff00; d2 <<= 8; /*b2*/ \ acc |= (d3 >> (MP_DIGIT_BIT-8)) & 0x000000ff; d3 <<= 8; /*b3*/ \ *weaved = acc; weaved += count; #endif switch (sizeof(mp_digit)) { case 32: MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP case 16: MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP case 8: MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP case 4: MPI_WEAVE_ONE_STEP MPI_WEAVE_ONE_STEP case 2: MPI_WEAVE_ONE_STEP case 1: MPI_WEAVE_ONE_STEP break; } } return MP_OKAY; }
/* 6 words */ mp_err ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0; mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0; mp_digit borrow; switch (MP_USED(a)) { case 6: r5 = MP_DIGIT(a, 5); case 5: r4 = MP_DIGIT(a, 4); case 4: r3 = MP_DIGIT(a, 3); case 3: r2 = MP_DIGIT(a, 2); case 2: r1 = MP_DIGIT(a, 1); case 1: r0 = MP_DIGIT(a, 0); } switch (MP_USED(b)) { case 6: b5 = MP_DIGIT(b, 5); case 5: b4 = MP_DIGIT(b, 4); case 4: b3 = MP_DIGIT(b, 3); case 3: b2 = MP_DIGIT(b, 2); case 2: b1 = MP_DIGIT(b, 1); case 1: b0 = MP_DIGIT(b, 0); } borrow = 0; MP_SUB_BORROW(r0, b0, r0, borrow); MP_SUB_BORROW(r1, b1, r1, borrow); MP_SUB_BORROW(r2, b2, r2, borrow); MP_SUB_BORROW(r3, b3, r3, borrow); MP_SUB_BORROW(r4, b4, r4, borrow); MP_SUB_BORROW(r5, b5, r5, borrow); /* Do quick 'add' if we've gone under 0 * (subtract the 2's complement of the curve field) */ if (borrow) { b5 = MP_DIGIT(&meth->irr, 5); b4 = MP_DIGIT(&meth->irr, 4); b3 = MP_DIGIT(&meth->irr, 3); b2 = MP_DIGIT(&meth->irr, 2); b1 = MP_DIGIT(&meth->irr, 1); b0 = MP_DIGIT(&meth->irr, 0); borrow = 0; MP_ADD_CARRY(b0, r0, r0, borrow); MP_ADD_CARRY(b1, r1, r1, borrow); MP_ADD_CARRY(b2, r2, r2, borrow); MP_ADD_CARRY(b3, r3, r3, borrow); MP_ADD_CARRY(b4, r4, r4, borrow); MP_ADD_CARRY(b5, r5, r5, borrow); } MP_CHECKOK(s_mp_pad(r, 6)); MP_DIGIT(r, 5) = r5; MP_DIGIT(r, 4) = r4; MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; MP_SIGN(r) = MP_ZPOS; MP_USED(r) = 6; s_mp_clamp(r); CLEANUP: return res; }
/* 4 words */ mp_err ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0; mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0; mp_digit borrow; switch (MP_USED(a)) { case 4: r3 = MP_DIGIT(a, 3); case 3: r2 = MP_DIGIT(a, 2); case 2: r1 = MP_DIGIT(a, 1); case 1: r0 = MP_DIGIT(a, 0); } switch (MP_USED(b)) { case 4: b3 = MP_DIGIT(b, 3); case 3: b2 = MP_DIGIT(b, 2); case 2: b1 = MP_DIGIT(b, 1); case 1: b0 = MP_DIGIT(b, 0); } #ifndef MPI_AMD64_ADD borrow = 0; MP_SUB_BORROW(r0, b0, r0, borrow); MP_SUB_BORROW(r1, b1, r1, borrow); MP_SUB_BORROW(r2, b2, r2, borrow); MP_SUB_BORROW(r3, b3, r3, borrow); #else __asm__( "xorq %4,%4 \n\t" "subq %5,%0 \n\t" "sbbq %6,%1 \n\t" "sbbq %7,%2 \n\t" "sbbq %8,%3 \n\t" "adcq $0,%4 \n\t" : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(borrow) : "r"(b0), "r"(b1), "r"(b2), "r"(b3), "0"(r0), "1"(r1), "2"(r2), "3"(r3) : "%cc"); #endif /* Do quick 'add' if we've gone under 0 * (subtract the 2's complement of the curve field) */ if (borrow) { b3 = MP_DIGIT(&meth->irr, 3); b2 = MP_DIGIT(&meth->irr, 2); b1 = MP_DIGIT(&meth->irr, 1); b0 = MP_DIGIT(&meth->irr, 0); #ifndef MPI_AMD64_ADD borrow = 0; MP_ADD_CARRY(b0, r0, r0, borrow); MP_ADD_CARRY(b1, r1, r1, borrow); MP_ADD_CARRY(b2, r2, r2, borrow); MP_ADD_CARRY(b3, r3, r3, borrow); #else __asm__( "addq %4,%0 \n\t" "adcq %5,%1 \n\t" "adcq %6,%2 \n\t" "adcq %7,%3 \n\t" : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3) : "r"(b0), "r"(b1), "r"(b2), "r"(b3), "0"(r0), "1"(r1), "2"(r2), "3"(r3) : "%cc"); #endif } #ifdef MPI_AMD64_ADD /* compiler fakeout? */ if ((r3 == b0) && (r1 == b0) && (r0 == b0)) { MP_CHECKOK(s_mp_pad(r, 4)); } #endif MP_CHECKOK(s_mp_pad(r, 4)); MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; MP_SIGN(r) = MP_ZPOS; MP_USED(r) = 4; s_mp_clamp(r); CLEANUP: return res; }
/* 6 words */ mp_err ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0; mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0; mp_digit carry; switch (MP_USED(a)) { case 6: a5 = MP_DIGIT(a, 5); case 5: a4 = MP_DIGIT(a, 4); case 4: a3 = MP_DIGIT(a, 3); case 3: a2 = MP_DIGIT(a, 2); case 2: a1 = MP_DIGIT(a, 1); case 1: a0 = MP_DIGIT(a, 0); } switch (MP_USED(b)) { case 6: r5 = MP_DIGIT(b, 5); case 5: r4 = MP_DIGIT(b, 4); case 4: r3 = MP_DIGIT(b, 3); case 3: r2 = MP_DIGIT(b, 2); case 2: r1 = MP_DIGIT(b, 1); case 1: r0 = MP_DIGIT(b, 0); } carry = 0; MP_ADD_CARRY(a0, r0, r0, carry); MP_ADD_CARRY(a1, r1, r1, carry); MP_ADD_CARRY(a2, r2, r2, carry); MP_ADD_CARRY(a3, r3, r3, carry); MP_ADD_CARRY(a4, r4, r4, carry); MP_ADD_CARRY(a5, r5, r5, carry); MP_CHECKOK(s_mp_pad(r, 6)); MP_DIGIT(r, 5) = r5; MP_DIGIT(r, 4) = r4; MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; MP_SIGN(r) = MP_ZPOS; MP_USED(r) = 6; /* Do quick 'subract' if we've gone over * (add the 2's complement of the curve field) */ a5 = MP_DIGIT(&meth->irr, 5); if (carry || r5 > a5 || ((r5 == a5) && mp_cmp(r, &meth->irr) != MP_LT)) { a4 = MP_DIGIT(&meth->irr, 4); a3 = MP_DIGIT(&meth->irr, 3); a2 = MP_DIGIT(&meth->irr, 2); a1 = MP_DIGIT(&meth->irr, 1); a0 = MP_DIGIT(&meth->irr, 0); carry = 0; MP_SUB_BORROW(r0, a0, r0, carry); MP_SUB_BORROW(r1, a1, r1, carry); MP_SUB_BORROW(r2, a2, r2, carry); MP_SUB_BORROW(r3, a3, r3, carry); MP_SUB_BORROW(r4, a4, r4, carry); MP_SUB_BORROW(r5, a5, r5, carry); MP_DIGIT(r, 5) = r5; MP_DIGIT(r, 4) = r4; MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; } s_mp_clamp(r); CLEANUP: return res; }
/* 4 words */ mp_err ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0; mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0; mp_digit carry; switch (MP_USED(a)) { case 4: a3 = MP_DIGIT(a, 3); case 3: a2 = MP_DIGIT(a, 2); case 2: a1 = MP_DIGIT(a, 1); case 1: a0 = MP_DIGIT(a, 0); } switch (MP_USED(b)) { case 4: r3 = MP_DIGIT(b, 3); case 3: r2 = MP_DIGIT(b, 2); case 2: r1 = MP_DIGIT(b, 1); case 1: r0 = MP_DIGIT(b, 0); } #ifndef MPI_AMD64_ADD carry = 0; MP_ADD_CARRY(a0, r0, r0, carry); MP_ADD_CARRY(a1, r1, r1, carry); MP_ADD_CARRY(a2, r2, r2, carry); MP_ADD_CARRY(a3, r3, r3, carry); #else __asm__( "xorq %4,%4 \n\t" "addq %5,%0 \n\t" "adcq %6,%1 \n\t" "adcq %7,%2 \n\t" "adcq %8,%3 \n\t" "adcq $0,%4 \n\t" : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(carry) : "r"(a0), "r"(a1), "r"(a2), "r"(a3), "0"(r0), "1"(r1), "2"(r2), "3"(r3) : "%cc"); #endif MP_CHECKOK(s_mp_pad(r, 4)); MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; MP_SIGN(r) = MP_ZPOS; MP_USED(r) = 4; /* Do quick 'subract' if we've gone over * (add the 2's complement of the curve field) */ a3 = MP_DIGIT(&meth->irr, 3); if (carry || r3 > a3 || ((r3 == a3) && mp_cmp(r, &meth->irr) != MP_LT)) { a2 = MP_DIGIT(&meth->irr, 2); a1 = MP_DIGIT(&meth->irr, 1); a0 = MP_DIGIT(&meth->irr, 0); #ifndef MPI_AMD64_ADD carry = 0; MP_SUB_BORROW(r0, a0, r0, carry); MP_SUB_BORROW(r1, a1, r1, carry); MP_SUB_BORROW(r2, a2, r2, carry); MP_SUB_BORROW(r3, a3, r3, carry); #else __asm__( "subq %4,%0 \n\t" "sbbq %5,%1 \n\t" "sbbq %6,%2 \n\t" "sbbq %7,%3 \n\t" : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3) : "r"(a0), "r"(a1), "r"(a2), "r"(a3), "0"(r0), "1"(r1), "2"(r2), "3"(r3) : "%cc"); #endif MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; }