/* computes T = REDC(T), 2^b == R */ mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm) { mp_err res; mp_size i; i = MP_USED(T) + MP_USED(&mmm->N) + 2; MP_CHECKOK( s_mp_pad(T, i) ); for (i = 0; i < MP_USED(&mmm->N); ++i ) { mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime; /* T += N * m_i * (MP_RADIX ** i); */ MP_CHECKOK( s_mp_mul_d_add_offset(&mmm->N, m_i, T, i) ); } s_mp_clamp(T); /* T /= R */ s_mp_div_2d(T, mmm->b); if ((res = s_mp_cmp(T, &mmm->N)) >= 0) { /* T = T - N */ MP_CHECKOK( s_mp_sub(T, &mmm->N) ); #ifdef DEBUG if ((res = mp_cmp(T, &mmm->N)) >= 0) { res = MP_UNDEF; goto CLEANUP; } #endif } res = MP_OKAY; CLEANUP: return res; }
/* Reverse the operation above for one mp_int. * Reconstruct one mp_int from its column in the weaved array. * Every read accesses every element of the weaved array, in order to * avoid timing attacks based on patterns of memory accesses. */ mp_err weave_to_mpi(mp_int *a, /* out, result */ const mp_digit *weaved, /* in, byte matrix */ mp_size index, /* which column to read */ mp_size nDigits, /* number of mp_digits in each bignum */ mp_size nBignums) /* width of the matrix */ { /* these are indices, but need to be the same size as mp_digit * because of the CONST_TIME operations */ mp_digit i, j; mp_digit d; mp_digit *pDest = MP_DIGITS(a); MP_SIGN(a) = MP_ZPOS; MP_USED(a) = nDigits; assert(weaved != NULL); /* Fetch the proper column in constant time, indexing over the whole array */ for (i = 0; i < nDigits; ++i) { d = 0; for (j = 0; j < nBignums; ++j) { d |= weaved[i * nBignums + j] & CONST_TIME_EQ(j, index); } pDest[i] = d; } s_mp_clamp(a); return MP_OKAY; }
/* reverse the operation above for one entry. * b points to the offset into the weave array of the power we are * calculating */ mp_err weave_to_mpi(mp_int *a, const unsigned char *b, mp_size b_size, mp_size count) { mp_digit *pb = MP_DIGITS(a); mp_digit *end = &pb[b_size]; MP_SIGN(a) = MP_ZPOS; MP_USED(a) = b_size; for (; pb < end; pb++) { register mp_digit digit; digit = *b << 8; b += count; #define MPI_UNWEAVE_ONE_STEP digit |= *b; b += count; digit = digit << 8; switch (sizeof(mp_digit)) { case 32: MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP case 16: MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP case 8: MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP case 4: MPI_UNWEAVE_ONE_STEP MPI_UNWEAVE_ONE_STEP case 2: break; } digit |= *b; b += count; *pb = digit; } s_mp_clamp(a); return MP_OKAY; }
/*! c <- REDC( a * b ) mod N \param a < N i.e. "reduced" \param b < N i.e. "reduced" \param mmm modulus N and n0' of N */ mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c, mp_mont_modulus *mmm) { mp_digit *pb; mp_digit m_i; mp_err res; mp_size ib; /* "index b": index of current digit of B */ mp_size useda, usedb; ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); if (MP_USED(a) < MP_USED(b)) { const mp_int *xch = b; /* switch a and b, to do fewer outer loops */ b = a; a = xch; } MP_USED(c) = 1; MP_DIGIT(c, 0) = 0; ib = (MP_USED(&mmm->N) << 1) + 1; if ((res = s_mp_pad(c, ib)) != MP_OKAY) goto CLEANUP; useda = MP_USED(a); pb = MP_DIGITS(b); s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c)); s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1)); m_i = MP_DIGIT(c, 0) * mmm->n0prime; s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0); /* Outer loop: Digits of b */ usedb = MP_USED(b); for (ib = 1; ib < usedb; ib++) { mp_digit b_i = *pb++; /* Inner product: Digits of a */ if (b_i) s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib); m_i = MP_DIGIT(c, ib) * mmm->n0prime; s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); } if (usedb < MP_USED(&mmm->N)) { for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib) { m_i = MP_DIGIT(c, ib) * mmm->n0prime; s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); } } s_mp_clamp(c); s_mp_rshd(c, MP_USED(&mmm->N)); /* c /= R */ if (s_mp_cmp(c, &mmm->N) >= 0) { MP_CHECKOK(s_mp_sub(c, &mmm->N)); } res = MP_OKAY; CLEANUP: return res; }
/* Reverse the operation above for one mp_int. * Reconstruct one mp_int from its column in the weaved array. * "pSrc" points to the offset into the weave array of the bignum we * are going to reconstruct. */ mp_err weave_to_mpi(mp_int *a, /* output, result */ const unsigned char *pSrc, /* input, byte matrix */ mp_size nDigits, /* per mp_int output */ mp_size nBignums) /* bignums in weaved matrix */ { unsigned char *pDest = (unsigned char *)MP_DIGITS(a); unsigned char *endDest = pDest + (nDigits * sizeof(mp_digit)); MP_SIGN(a) = MP_ZPOS; MP_USED(a) = nDigits; for (; pDest < endDest; pSrc += nBignums, pDest++) { *pDest = *pSrc; } s_mp_clamp(a); return MP_OKAY; }
/* 6 words */ mp_err ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0; mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0; mp_digit borrow; switch (MP_USED(a)) { case 6: r5 = MP_DIGIT(a, 5); case 5: r4 = MP_DIGIT(a, 4); case 4: r3 = MP_DIGIT(a, 3); case 3: r2 = MP_DIGIT(a, 2); case 2: r1 = MP_DIGIT(a, 1); case 1: r0 = MP_DIGIT(a, 0); } switch (MP_USED(b)) { case 6: b5 = MP_DIGIT(b, 5); case 5: b4 = MP_DIGIT(b, 4); case 4: b3 = MP_DIGIT(b, 3); case 3: b2 = MP_DIGIT(b, 2); case 2: b1 = MP_DIGIT(b, 1); case 1: b0 = MP_DIGIT(b, 0); } borrow = 0; MP_SUB_BORROW(r0, b0, r0, borrow); MP_SUB_BORROW(r1, b1, r1, borrow); MP_SUB_BORROW(r2, b2, r2, borrow); MP_SUB_BORROW(r3, b3, r3, borrow); MP_SUB_BORROW(r4, b4, r4, borrow); MP_SUB_BORROW(r5, b5, r5, borrow); /* Do quick 'add' if we've gone under 0 * (subtract the 2's complement of the curve field) */ if (borrow) { b5 = MP_DIGIT(&meth->irr, 5); b4 = MP_DIGIT(&meth->irr, 4); b3 = MP_DIGIT(&meth->irr, 3); b2 = MP_DIGIT(&meth->irr, 2); b1 = MP_DIGIT(&meth->irr, 1); b0 = MP_DIGIT(&meth->irr, 0); borrow = 0; MP_ADD_CARRY(b0, r0, r0, borrow); MP_ADD_CARRY(b1, r1, r1, borrow); MP_ADD_CARRY(b2, r2, r2, borrow); MP_ADD_CARRY(b3, r3, r3, borrow); MP_ADD_CARRY(b4, r4, r4, borrow); MP_ADD_CARRY(b5, r5, r5, borrow); } MP_CHECKOK(s_mp_pad(r, 6)); MP_DIGIT(r, 5) = r5; MP_DIGIT(r, 4) = r4; MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; MP_SIGN(r) = MP_ZPOS; MP_USED(r) = 6; s_mp_clamp(r); CLEANUP: return res; }
/* 4 words */ mp_err ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0; mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0; mp_digit borrow; switch (MP_USED(a)) { case 4: r3 = MP_DIGIT(a, 3); case 3: r2 = MP_DIGIT(a, 2); case 2: r1 = MP_DIGIT(a, 1); case 1: r0 = MP_DIGIT(a, 0); } switch (MP_USED(b)) { case 4: b3 = MP_DIGIT(b, 3); case 3: b2 = MP_DIGIT(b, 2); case 2: b1 = MP_DIGIT(b, 1); case 1: b0 = MP_DIGIT(b, 0); } #ifndef MPI_AMD64_ADD borrow = 0; MP_SUB_BORROW(r0, b0, r0, borrow); MP_SUB_BORROW(r1, b1, r1, borrow); MP_SUB_BORROW(r2, b2, r2, borrow); MP_SUB_BORROW(r3, b3, r3, borrow); #else __asm__( "xorq %4,%4 \n\t" "subq %5,%0 \n\t" "sbbq %6,%1 \n\t" "sbbq %7,%2 \n\t" "sbbq %8,%3 \n\t" "adcq $0,%4 \n\t" : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(borrow) : "r"(b0), "r"(b1), "r"(b2), "r"(b3), "0"(r0), "1"(r1), "2"(r2), "3"(r3) : "%cc"); #endif /* Do quick 'add' if we've gone under 0 * (subtract the 2's complement of the curve field) */ if (borrow) { b3 = MP_DIGIT(&meth->irr, 3); b2 = MP_DIGIT(&meth->irr, 2); b1 = MP_DIGIT(&meth->irr, 1); b0 = MP_DIGIT(&meth->irr, 0); #ifndef MPI_AMD64_ADD borrow = 0; MP_ADD_CARRY(b0, r0, r0, borrow); MP_ADD_CARRY(b1, r1, r1, borrow); MP_ADD_CARRY(b2, r2, r2, borrow); MP_ADD_CARRY(b3, r3, r3, borrow); #else __asm__( "addq %4,%0 \n\t" "adcq %5,%1 \n\t" "adcq %6,%2 \n\t" "adcq %7,%3 \n\t" : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3) : "r"(b0), "r"(b1), "r"(b2), "r"(b3), "0"(r0), "1"(r1), "2"(r2), "3"(r3) : "%cc"); #endif } #ifdef MPI_AMD64_ADD /* compiler fakeout? */ if ((r3 == b0) && (r1 == b0) && (r0 == b0)) { MP_CHECKOK(s_mp_pad(r, 4)); } #endif MP_CHECKOK(s_mp_pad(r, 4)); MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; MP_SIGN(r) = MP_ZPOS; MP_USED(r) = 4; s_mp_clamp(r); CLEANUP: return res; }
/* 6 words */ mp_err ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0; mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0; mp_digit carry; switch (MP_USED(a)) { case 6: a5 = MP_DIGIT(a, 5); case 5: a4 = MP_DIGIT(a, 4); case 4: a3 = MP_DIGIT(a, 3); case 3: a2 = MP_DIGIT(a, 2); case 2: a1 = MP_DIGIT(a, 1); case 1: a0 = MP_DIGIT(a, 0); } switch (MP_USED(b)) { case 6: r5 = MP_DIGIT(b, 5); case 5: r4 = MP_DIGIT(b, 4); case 4: r3 = MP_DIGIT(b, 3); case 3: r2 = MP_DIGIT(b, 2); case 2: r1 = MP_DIGIT(b, 1); case 1: r0 = MP_DIGIT(b, 0); } carry = 0; MP_ADD_CARRY(a0, r0, r0, carry); MP_ADD_CARRY(a1, r1, r1, carry); MP_ADD_CARRY(a2, r2, r2, carry); MP_ADD_CARRY(a3, r3, r3, carry); MP_ADD_CARRY(a4, r4, r4, carry); MP_ADD_CARRY(a5, r5, r5, carry); MP_CHECKOK(s_mp_pad(r, 6)); MP_DIGIT(r, 5) = r5; MP_DIGIT(r, 4) = r4; MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; MP_SIGN(r) = MP_ZPOS; MP_USED(r) = 6; /* Do quick 'subract' if we've gone over * (add the 2's complement of the curve field) */ a5 = MP_DIGIT(&meth->irr, 5); if (carry || r5 > a5 || ((r5 == a5) && mp_cmp(r, &meth->irr) != MP_LT)) { a4 = MP_DIGIT(&meth->irr, 4); a3 = MP_DIGIT(&meth->irr, 3); a2 = MP_DIGIT(&meth->irr, 2); a1 = MP_DIGIT(&meth->irr, 1); a0 = MP_DIGIT(&meth->irr, 0); carry = 0; MP_SUB_BORROW(r0, a0, r0, carry); MP_SUB_BORROW(r1, a1, r1, carry); MP_SUB_BORROW(r2, a2, r2, carry); MP_SUB_BORROW(r3, a3, r3, carry); MP_SUB_BORROW(r4, a4, r4, carry); MP_SUB_BORROW(r5, a5, r5, carry); MP_DIGIT(r, 5) = r5; MP_DIGIT(r, 4) = r4; MP_DIGIT(r, 3) = r3; MP_DIGIT(r, 2) = r2; MP_DIGIT(r, 1) = r1; MP_DIGIT(r, 0) = r0; } s_mp_clamp(r); CLEANUP: return res; }