/*! c <- REDC( a * b ) mod N \param a < N i.e. "reduced" \param b < N i.e. "reduced" \param mmm modulus N and n0' of N */ mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c, mp_mont_modulus *mmm) { mp_digit *pb; mp_digit m_i; mp_err res; mp_size ib; /* "index b": index of current digit of B */ mp_size useda, usedb; ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); if (MP_USED(a) < MP_USED(b)) { const mp_int *xch = b; /* switch a and b, to do fewer outer loops */ b = a; a = xch; } MP_USED(c) = 1; MP_DIGIT(c, 0) = 0; ib = (MP_USED(&mmm->N) << 1) + 1; if ((res = s_mp_pad(c, ib)) != MP_OKAY) goto CLEANUP; useda = MP_USED(a); pb = MP_DIGITS(b); s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c)); s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1)); m_i = MP_DIGIT(c, 0) * mmm->n0prime; s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0); /* Outer loop: Digits of b */ usedb = MP_USED(b); for (ib = 1; ib < usedb; ib++) { mp_digit b_i = *pb++; /* Inner product: Digits of a */ if (b_i) s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib); m_i = MP_DIGIT(c, ib) * mmm->n0prime; s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); } if (usedb < MP_USED(&mmm->N)) { for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib) { m_i = MP_DIGIT(c, ib) * mmm->n0prime; s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); } } s_mp_clamp(c); s_mp_rshd(c, MP_USED(&mmm->N)); /* c /= R */ if (s_mp_cmp(c, &mmm->N) >= 0) { MP_CHECKOK(s_mp_sub(c, &mmm->N)); } res = MP_OKAY; CLEANUP: return res; }
/* c = a * b */ void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) { mp_digit x[MAX_STACK_DIGITS]; mp_digit *px = x; size_t xSize = 0; if (a == c) { if (a_len > MAX_STACK_DIGITS) { xSize = sizeof(mp_digit) * (a_len + 2); px = malloc(xSize); if (!px) return; } memcpy(px, a, a_len * sizeof(*a)); a = px; } s_mp_setz(c, a_len + 1); HP_MPY_ADD_FN(a_len, &b, a, c); if (px != x && px) { memset(px, 0, xSize); free(px); } }