/* high level subtraction (handles signs) */ int mp_sub (mp_int * a, mp_int * b, mp_int * c) { int sa, sb, res; sa = a->sign; sb = b->sign; if (sa != sb) { /* subtract a negative from a positive, OR */ /* subtract a positive from a negative. */ /* In either case, ADD their magnitudes, */ /* and use the sign of the first number. */ c->sign = sa; res = s_mp_add (a, b, c); } else { /* subtract a positive from a positive, OR */ /* subtract a negative from a negative. */ /* First, take the difference between their */ /* magnitudes, then... */ if (mp_cmp_mag (a, b) != MP_LT) { /* Copy the sign from the first */ c->sign = sa; /* The first has a larger or equal magnitude */ res = s_mp_sub (a, b, c); } else { /* The result has the *opposite* sign from */ /* the first number. */ c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS; /* The second has a larger magnitude */ res = s_mp_sub (b, a, c); } } return res; }
/* high level addition (handles signs) */ int mp_add(const mp_int *a, const mp_int *b, mp_int *c) { int sa, sb, res; /* get sign of both inputs */ sa = a->sign; sb = b->sign; /* handle two cases, not four */ if (sa == sb) { /* both positive or both negative */ /* add their magnitudes, copy the sign */ c->sign = sa; res = s_mp_add(a, b, c); } else { /* one positive, the other negative */ /* subtract the one with the greater magnitude from */ /* the one of the lesser magnitude. The result gets */ /* the sign of the one with the greater magnitude. */ if (mp_cmp_mag(a, b) == MP_LT) { c->sign = sb; res = s_mp_sub(b, a, c); } else { c->sign = sa; res = s_mp_sub(a, b, c); } } return res; }
/* determines the setup value */ int mp_reduce_2k_setup(mp_int *a, mp_digit *d) { int res, p; mp_int tmp; if ((res = mp_init(&tmp)) != MP_OKAY) { return res; } p = mp_count_bits(a); if ((res = mp_2expt(&tmp, p)) != MP_OKAY) { mp_clear(&tmp); return res; } if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) { mp_clear(&tmp); return res; } *d = tmp.dp[0]; mp_clear(&tmp); return MP_OKAY; }
/* computes T = REDC(T), 2^b == R */ mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm) { mp_err res; mp_size i; i = MP_USED(T) + MP_USED(&mmm->N) + 2; MP_CHECKOK( s_mp_pad(T, i) ); for (i = 0; i < MP_USED(&mmm->N); ++i ) { mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime; /* T += N * m_i * (MP_RADIX ** i); */ MP_CHECKOK( s_mp_mul_d_add_offset(&mmm->N, m_i, T, i) ); } s_mp_clamp(T); /* T /= R */ s_mp_div_2d(T, mmm->b); if ((res = s_mp_cmp(T, &mmm->N)) >= 0) { /* T = T - N */ MP_CHECKOK( s_mp_sub(T, &mmm->N) ); #ifdef DEBUG if ((res = mp_cmp(T, &mmm->N)) >= 0) { res = MP_UNDEF; goto CLEANUP; } #endif } res = MP_OKAY; CLEANUP: return res; }
/* reduces a modulo n where n is of the form 2**p - d This differs from reduce_2k since "d" can be larger than a single digit. */ int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d) { mp_int q; int p, res; if ((res = mp_init(&q)) != MP_OKAY) { return res; } p = mp_count_bits(n); top: /* q = a/2**p, a = a mod 2**p */ if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { goto LBL_ERR; } /* q = q * d */ if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { goto LBL_ERR; } /* a = a + q */ if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { goto LBL_ERR; } if (mp_cmp_mag(a, n) != MP_LT) { s_mp_sub(a, n, a); goto top; } LBL_ERR: mp_clear(&q); return res; }
/* * shifts with subtractions when the result is greater than b. * * The method is slightly modified to shift B unconditionally upto just under * the leading bit of b. This saves alot of multiple precision shifting. */ int mp_montgomery_calc_normalization (mp_int * a, mp_int * b) { int x, bits, res; /* how many bits of last digit does b use */ bits = mp_count_bits (b) % DIGIT_BIT; if (b->used > 1) { if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) { return res; } } else { mp_set(a, 1); bits = 1; } /* now compute C = A * B mod b */ for (x = bits - 1; x < (int)DIGIT_BIT; x++) { if ((res = mp_mul_2 (a, a)) != MP_OKAY) { return res; } if (mp_cmp_mag (a, b) != MP_LT) { if ((res = s_mp_sub (a, b, a)) != MP_OKAY) { return res; } } } return MP_OKAY; }
/* reduce "x" in place modulo "n" using the Diminished Radix algorithm. * * Based on algorithm from the paper * * "Generating Efficient Primes for Discrete Log Cryptosystems" * Chae Hoon Lim, Pil Joong Lee, * POSTECH Information Research Laboratories * * The modulus must be of a special format [see manual] * * Has been modified to use algorithm 7.10 from the LTM book instead * * Input x must be in the range 0 <= x <= (n-1)**2 */ int mp_dr_reduce(mp_int *x, mp_int *n, mp_digit k) { int err, i, m; mp_word r; mp_digit mu, *tmpx1, *tmpx2; /* m = digits in modulus */ m = n->used; /* ensure that "x" has at least 2m digits */ if (x->alloc < (m + m)) { if ((err = mp_grow(x, m + m)) != MP_OKAY) { return err; } } /* top of loop, this is where the code resumes if * another reduction pass is required. */ top: /* aliases for digits */ /* alias for lower half of x */ tmpx1 = x->dp; /* alias for upper half of x, or x/B**m */ tmpx2 = x->dp + m; /* set carry to zero */ mu = 0; /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ for (i = 0; i < m; i++) { r = (((mp_word) * tmpx2++) * (mp_word)k) + *tmpx1 + mu; *tmpx1++ = (mp_digit)(r & MP_MASK); mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT)); } /* set final carry */ *tmpx1++ = mu; /* zero words above m */ for (i = m + 1; i < x->used; i++) { *tmpx1++ = 0; } /* clamp, sub and return */ mp_clamp(x); /* if x >= n then subtract and reduce again * Each successive "recursion" makes the input smaller and smaller. */ if (mp_cmp_mag(x, n) != MP_LT) { if ((err = s_mp_sub(x, n, x)) != MP_OKAY) { return err; } goto top; } return MP_OKAY; }
/*! c <- REDC( a * b ) mod N \param a < N i.e. "reduced" \param b < N i.e. "reduced" \param mmm modulus N and n0' of N */ mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c, mp_mont_modulus *mmm) { mp_digit *pb; mp_digit m_i; mp_err res; mp_size ib; /* "index b": index of current digit of B */ mp_size useda, usedb; ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); if (MP_USED(a) < MP_USED(b)) { const mp_int *xch = b; /* switch a and b, to do fewer outer loops */ b = a; a = xch; } MP_USED(c) = 1; MP_DIGIT(c, 0) = 0; ib = (MP_USED(&mmm->N) << 1) + 1; if ((res = s_mp_pad(c, ib)) != MP_OKAY) goto CLEANUP; useda = MP_USED(a); pb = MP_DIGITS(b); s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c)); s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1)); m_i = MP_DIGIT(c, 0) * mmm->n0prime; s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0); /* Outer loop: Digits of b */ usedb = MP_USED(b); for (ib = 1; ib < usedb; ib++) { mp_digit b_i = *pb++; /* Inner product: Digits of a */ if (b_i) s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib); m_i = MP_DIGIT(c, ib) * mmm->n0prime; s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); } if (usedb < MP_USED(&mmm->N)) { for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib) { m_i = MP_DIGIT(c, ib) * mmm->n0prime; s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); } } s_mp_clamp(c); s_mp_rshd(c, MP_USED(&mmm->N)); /* c /= R */ if (s_mp_cmp(c, &mmm->N) >= 0) { MP_CHECKOK(s_mp_sub(c, &mmm->N)); } res = MP_OKAY; CLEANUP: return res; }
/* determines the setup value */ int mp_reduce_2k_setup_l(mp_int *a, mp_int *d) { int res; mp_int tmp; if ((res = mp_init(&tmp)) != MP_OKAY) { return res; } if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { goto ERR; } if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) { goto ERR; } ERR: mp_clear(&tmp); return res; }
/* reduces a modulo n where n is of the form 2**p - k */ int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit k) { mp_int q; int p, res; if ((res = mp_init(&q)) != MP_OKAY) { return res; } p = mp_count_bits(n); top: /* q = a/2**p, a = a mod 2**p */ if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { goto ERR; } if (k != 1) { /* q = q * k */ if ((res = mp_mul_d(&q, k, &q)) != MP_OKAY) { goto ERR; } } /* a = a + q */ if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { goto ERR; } if (mp_cmp_mag(a, n) != MP_LT) { s_mp_sub(a, n, a); goto top; } ERR: mp_clear(&q); return res; }
/* Karatsuba squaring, computes b = a*a using three * half size squarings * * See comments of karatsuba_mul for details. It * is essentially the same algorithm but merely * tuned to perform recursive squarings. */ int mp_karatsuba_sqr(const mp_int *a, mp_int *b) { mp_int x0, x1, t1, t2, x0x0, x1x1; int B, err; err = MP_MEM; /* min # of digits */ B = a->used; /* now divide in two */ B = B >> 1; /* init copy all the temps */ if (mp_init_size(&x0, B) != MP_OKAY) goto LBL_ERR; if (mp_init_size(&x1, a->used - B) != MP_OKAY) goto X0; /* init temps */ if (mp_init_size(&t1, a->used * 2) != MP_OKAY) goto X1; if (mp_init_size(&t2, a->used * 2) != MP_OKAY) goto T1; if (mp_init_size(&x0x0, B * 2) != MP_OKAY) goto T2; if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY) goto X0X0; { int x; mp_digit *dst, *src; src = a->dp; /* now shift the digits */ dst = x0.dp; for (x = 0; x < B; x++) { *dst++ = *src++; } dst = x1.dp; for (x = B; x < a->used; x++) { *dst++ = *src++; } } x0.used = B; x1.used = a->used - B; mp_clamp(&x0); /* now calc the products x0*x0 and x1*x1 */ if (mp_sqr(&x0, &x0x0) != MP_OKAY) goto X1X1; /* x0x0 = x0*x0 */ if (mp_sqr(&x1, &x1x1) != MP_OKAY) goto X1X1; /* x1x1 = x1*x1 */ /* now calc (x1+x0)**2 */ if (s_mp_add(&x1, &x0, &t1) != MP_OKAY) goto X1X1; /* t1 = x1 - x0 */ if (mp_sqr(&t1, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ /* add x0y0 */ if (s_mp_add(&x0x0, &x1x1, &t2) != MP_OKAY) goto X1X1; /* t2 = x0x0 + x1x1 */ if (s_mp_sub(&t1, &t2, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ /* shift by B */ if (mp_lshd(&t1, B) != MP_OKAY) goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */ if (mp_lshd(&x1x1, B * 2) != MP_OKAY) goto X1X1; /* x1x1 = x1x1 << 2*B */ if (mp_add(&x0x0, &t1, &t1) != MP_OKAY) goto X1X1; /* t1 = x0x0 + t1 */ if (mp_add(&t1, &x1x1, b) != MP_OKAY) goto X1X1; /* t1 = x0x0 + t1 + x1x1 */ err = MP_OKAY; X1X1: mp_clear(&x1x1); X0X0: mp_clear(&x0x0); T2: mp_clear(&t2); T1: mp_clear(&t1); X1: mp_clear(&x1); X0: mp_clear(&x0); LBL_ERR: return err; }
/* computes xR**-1 == x (mod N) via Montgomery Reduction * * This is an optimized implementation of montgomery_reduce * which uses the comba method to quickly calculate the columns of the * reduction. * * Based on Algorithm 14.32 on pp.601 of HAC. */ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) { int ix, res, olduse; mp_word W[MP_WARRAY] = { 0 }; /* get old used count */ olduse = x->used; /* grow a as required */ if (x->alloc < n->used + 1) { if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) { return res; } } /* first we have to get the digits of the input into * an array of double precision words W[...] */ { register mp_word *_W; register mp_digit *tmpx; /* alias for the W[] array */ _W = W; /* alias for the digits of x*/ tmpx = x->dp; /* copy the digits of a into W[0..a->used-1] */ for (ix = 0; ix < x->used; ix++) { *_W++ = *tmpx++; } /* zero the high words of W[a->used..m->used*2] */ for (; ix < n->used * 2 + 1; ix++) { *_W++ = 0; } } /* now we proceed to zero successive digits * from the least significant upwards */ for (ix = 0; ix < n->used; ix++) { /* mu = ai * m' mod b * * We avoid a double precision multiplication (which isn't required) * by casting the value down to a mp_digit. Note this requires * that W[ix-1] have the carry cleared (see after the inner loop) */ register mp_digit mu; mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); /* a = a + mu * m * b**i * * This is computed in place and on the fly. The multiplication * by b**i is handled by offseting which columns the results * are added to. * * Note the comba method normally doesn't handle carries in the * inner loop In this case we fix the carry from the previous * column since the Montgomery reduction requires digits of the * result (so far) [see above] to work. This is * handled by fixing up one carry after the inner loop. The * carry fixups are done in order so after these loops the * first m->used words of W[] have the carries fixed */ { register int iy; register mp_digit *tmpn; register mp_word *_W; /* alias for the digits of the modulus */ tmpn = n->dp; /* Alias for the columns set by an offset of ix */ _W = W + ix; /* inner loop */ for (iy = 0; iy < n->used; iy++) { *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); } } /* now fix carry for next digit, W[ix+1] */ W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); } /* now we have to propagate the carries and * shift the words downward [all those least * significant digits we zeroed]. */ { register mp_digit *tmpx; register mp_word *_W, *_W1; /* nox fix rest of carries */ /* alias for current word */ _W1 = W + ix; /* alias for next word, where the carry goes */ _W = W + ++ix; for (; ix <= n->used * 2 + 1; ix++) { *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); } /* copy out, A = A/b**n * * The result is A/b**n but instead of converting from an * array of mp_word to mp_digit than calling mp_rshd * we just copy them in the right order */ /* alias for destination word */ tmpx = x->dp; /* alias for shifted double precision result */ _W = W + n->used; for (ix = 0; ix < n->used + 1; ix++) { *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); } /* zero oldused digits, if the input a was larger than * m->used+1 we'll have to clear the digits */ for (; ix < olduse; ix++) { *tmpx++ = 0; } } /* set the max used and clamp */ x->used = n->used + 1; mp_clamp (x); /* if A >= m then A = A - m */ if (mp_cmp_mag (x, n) != MP_LT) { return s_mp_sub (x, n, x); } return MP_OKAY; }
/* c = |a| * |b| using Karatsuba Multiplication using * three half size multiplications * * Let B represent the radix [e.g. 2**DIGIT_BIT] and * let n represent half of the number of digits in * the min(a,b) * * a = a1 * B**n + a0 * b = b1 * B**n + b0 * * Then, a * b => a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 * * Note that a1b1 and a0b0 are used twice and only need to be * computed once. So in total three half size (half # of * digit) multiplications are performed, a0b0, a1b1 and * (a1+b1)(a0+b0) * * Note that a multiplication of half the digits requires * 1/4th the number of single precision multiplications so in * total after one call 25% of the single precision multiplications * are saved. Note also that the call to mp_mul can end up back * in this function if the a0, a1, b0, or b1 are above the threshold. * This is known as divide-and-conquer and leads to the famous * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than * the standard O(N**2) that the baseline/comba methods use. * Generally though the overhead of this method doesn't pay off * until a certain size (N ~ 80) is reached. */ int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) { mp_int x0, x1, y0, y1, t1, x0y0, x1y1; int B, err; /* default the return code to an error */ err = MP_MEM; /* min # of digits */ B = MIN (a->used, b->used); /* now divide in two */ B = B >> 1; /* init copy all the temps */ if (mp_init_size (&x0, B) != MP_OKAY) goto ERR; if (mp_init_size (&x1, a->used - B) != MP_OKAY) goto X0; if (mp_init_size (&y0, B) != MP_OKAY) goto X1; if (mp_init_size (&y1, b->used - B) != MP_OKAY) goto Y0; /* init temps */ if (mp_init_size (&t1, B * 2) != MP_OKAY) goto Y1; if (mp_init_size (&x0y0, B * 2) != MP_OKAY) goto T1; if (mp_init_size (&x1y1, B * 2) != MP_OKAY) goto X0Y0; /* now shift the digits */ x0.used = y0.used = B; x1.used = a->used - B; y1.used = b->used - B; { register int x; register mp_digit *tmpa, *tmpb, *tmpx, *tmpy; /* we copy the digits directly instead of using higher level functions * since we also need to shift the digits */ tmpa = a->dp; tmpb = b->dp; tmpx = x0.dp; tmpy = y0.dp; for (x = 0; x < B; x++) { *tmpx++ = *tmpa++; *tmpy++ = *tmpb++; } tmpx = x1.dp; for (x = B; x < a->used; x++) { *tmpx++ = *tmpa++; } tmpy = y1.dp; for (x = B; x < b->used; x++) { *tmpy++ = *tmpb++; } } /* only need to clamp the lower words since by definition the * upper words x1/y1 must have a known number of digits */ mp_clamp (&x0); mp_clamp (&y0); /* now calc the products x0y0 and x1y1 */ /* after this x0 is no longer required, free temp [x0==t2]! */ if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) goto X1Y1; /* x0y0 = x0*y0 */ if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) goto X1Y1; /* x1y1 = x1*y1 */ /* now calc x1+x0 and y1+y0 */ if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) goto X1Y1; /* t1 = x1 - x0 */ if (s_mp_add (&y1, &y0, &x0) != MP_OKAY) goto X1Y1; /* t2 = y1 - y0 */ if (mp_mul (&t1, &x0, &t1) != MP_OKAY) goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ /* add x0y0 */ if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) goto X1Y1; /* t2 = x0y0 + x1y1 */ if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY) goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ /* shift by B */ if (mp_lshd (&t1, B) != MP_OKAY) goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ if (mp_lshd (&x1y1, B * 2) != MP_OKAY) goto X1Y1; /* x1y1 = x1y1 << 2*B */ if (mp_add (&x0y0, &t1, &t1) != MP_OKAY) goto X1Y1; /* t1 = x0y0 + t1 */ if (mp_add (&t1, &x1y1, c) != MP_OKAY) goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ /* Algorithm succeeded set the return code to MP_OKAY */ err = MP_OKAY; X1Y1: mp_clear (&x1y1); X0Y0: mp_clear (&x0y0); T1: mp_clear (&t1); Y1: mp_clear (&y1); Y0: mp_clear (&y0); X1: mp_clear (&x1); X0: mp_clear (&x0); ERR: return err; }
/* Karatsuba squaring, computes b = a*a using three * half size squarings * * See comments of karatsuba_mul for details. It * is essentially the same algorithm but merely * tuned to perform recursive squarings. */ int mp_karatsuba_sqr (mp_int * a, mp_int * b) { mp_int x0, x1, t1, t2, x0x0, x1x1; int B, err; err = MP_MEM; /* min # of digits */ B = USED(a); /* now divide in two */ B = B >> 1; /* init copy all the temps */ if (mp_init_size (&x0, B) != MP_OKAY) goto ERR; if (mp_init_size (&x1, USED(a) - B) != MP_OKAY) goto X0; /* init temps */ if (mp_init_size (&t1, USED(a) * 2) != MP_OKAY) goto X1; if (mp_init_size (&t2, USED(a) * 2) != MP_OKAY) goto T1; if (mp_init_size (&x0x0, B * 2) != MP_OKAY) goto T2; if (mp_init_size (&x1x1, (USED(a) - B) * 2) != MP_OKAY) goto X0X0; { register int x; register mp_digit *dst, *src; src = DIGITS(a); /* now shift the digits */ dst = DIGITS(&x0); for (x = 0; x < B; x++) { *dst++ = *src++; } dst = DIGITS(&x1); for (x = B; x < USED(a); x++) { *dst++ = *src++; } } SET_USED(&x0,B); SET_USED(&x1,USED(a) - B); mp_clamp (&x0); /* now calc the products x0*x0 and x1*x1 */ if (mp_sqr (&x0, &x0x0) != MP_OKAY) goto X1X1; /* x0x0 = x0*x0 */ if (mp_sqr (&x1, &x1x1) != MP_OKAY) goto X1X1; /* x1x1 = x1*x1 */ /* now calc (x1+x0)**2 */ if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) goto X1X1; /* t1 = x1 - x0 */ if (mp_sqr (&t1, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ /* add x0y0 */ if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY) goto X1X1; /* t2 = x0x0 + x1x1 */ if (s_mp_sub (&t1, &t2, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ /* shift by B */ if (mp_lshd (&t1, B) != MP_OKAY) goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */ if (mp_lshd (&x1x1, B * 2) != MP_OKAY) goto X1X1; /* x1x1 = x1x1 << 2*B */ if (mp_add (&x0x0, &t1, &t1) != MP_OKAY) goto X1X1; /* t1 = x0x0 + t1 */ if (mp_add (&t1, &x1x1, b) != MP_OKAY) goto X1X1; /* t1 = x0x0 + t1 + x1x1 */ err = MP_OKAY; X1X1:mp_clear (&x1x1); X0X0:mp_clear (&x0x0); T2:mp_clear (&t2); T1:mp_clear (&t1); X1:mp_clear (&x1); X0:mp_clear (&x0); ERR: return err; }
/* reduces x mod m, assumes 0 < x < m**2, mu is * precomputed via mp_reduce_setup. * From HAC pp.604 Algorithm 14.42 */ int mp_reduce (mp_int * x, mp_int * m, mp_int * mu) { mp_int q; int res, um = USED(m); /* q = x */ if ((res = mp_init_copy (&q, x)) != MP_OKAY) { return res; } /* q1 = x / b**(k-1) */ mp_rshd (&q, um - 1); /* according to HAC this optimization is ok */ if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) { if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) { goto CLEANUP; } } else { #ifdef BN_S_MP_MUL_HIGH_DIGS_C if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { goto CLEANUP; } #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C) if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { goto CLEANUP; } #else { res = MP_VAL; goto CLEANUP; } #endif } /* q3 = q2 / b**(k+1) */ mp_rshd (&q, um + 1); /* x = x mod b**(k+1), quick (no division) */ if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { goto CLEANUP; } /* q = q * m mod b**(k+1), quick (no division) */ if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) { goto CLEANUP; } /* x = x - q */ if ((res = mp_sub (x, &q, x)) != MP_OKAY) { goto CLEANUP; } /* If x < 0, add b**(k+1) to it */ if (mp_cmp_d (x, 0) == MP_LT) { mp_set (&q, 1); if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) goto CLEANUP; if ((res = mp_add (x, &q, x)) != MP_OKAY) goto CLEANUP; } /* Back off if it's too big */ while (mp_cmp (x, m) != MP_LT) { if ((res = s_mp_sub (x, m, x)) != MP_OKAY) { goto CLEANUP; } } CLEANUP: mp_clear (&q); return res; }
/* computes xR**-1 == x (mod N) via Montgomery Reduction */ int mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) { int ix, res, digs; mp_digit mu; /* can the fast reduction [comba] method be used? * * Note that unlike in mul you're safely allowed *less* * than the available columns [255 per default] since carries * are fixed up in the inner loop. */ digs = n->used * 2 + 1; if ((digs < MP_WARRAY) && n->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { return fast_mp_montgomery_reduce (x, n, rho); } /* grow the input as required */ if (x->alloc < digs) { if ((res = mp_grow (x, digs)) != MP_OKAY) { return res; } } x->used = digs; for (ix = 0; ix < n->used; ix++) { /* mu = ai * rho mod b * * The value of rho must be precalculated via * montgomery_setup() such that * it equals -1/n0 mod b this allows the * following inner loop to reduce the * input one digit at a time */ mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK); /* a = a + mu * m * b**i */ { register int iy; register mp_digit *tmpn, *tmpx, u; register mp_word r; /* alias for digits of the modulus */ tmpn = n->dp; /* alias for the digits of x [the input] */ tmpx = x->dp + ix; /* set the carry to zero */ u = 0; /* Multiply and add in place */ for (iy = 0; iy < n->used; iy++) { /* compute product and sum */ r = ((mp_word)mu) * ((mp_word)*tmpn++) + ((mp_word) u) + ((mp_word) * tmpx); /* get carry */ u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); /* fix digit */ *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK)); } /* At this point the ix'th digit of x should be zero */ /* propagate carries upwards as required*/ while (u) { *tmpx += u; u = *tmpx >> DIGIT_BIT; *tmpx++ &= MP_MASK; } } } /* at this point the n.used'th least * significant digits of x are all zero * which means we can shift x to the * right by n.used digits and the * residue is unchanged. */ /* x = x/b**n.used */ mp_clamp(x); mp_rshd (x, n->used); /* if x >= n then x = x - n */ if (mp_cmp_mag (x, n) != MP_LT) { return s_mp_sub (x, n, x); } return MP_OKAY; }
/* Greatest Common Divisor using the binary method */ int mp_gcd (mp_int * a, mp_int * b, mp_int * c) { mp_int u, v; int k, u_lsb, v_lsb, res; /* either zero than gcd is the largest */ if (mp_iszero (a) == MP_YES) { return mp_abs (b, c); } if (mp_iszero (b) == MP_YES) { return mp_abs (a, c); } /* get copies of a and b we can modify */ if ((res = mp_init_copy (&u, a)) != MP_OKAY) { return res; } if ((res = mp_init_copy (&v, b)) != MP_OKAY) { goto LBL_U; } /* must be positive for the remainder of the algorithm */ u.sign = v.sign = MP_ZPOS; /* B1. Find the common power of two for u and v */ u_lsb = mp_cnt_lsb(&u); v_lsb = mp_cnt_lsb(&v); k = MIN(u_lsb, v_lsb); if (k > 0) { /* divide the power of two out */ if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) { goto LBL_V; } if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) { goto LBL_V; } } /* divide any remaining factors of two out */ if (u_lsb != k) { if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) { goto LBL_V; } } if (v_lsb != k) { if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) { goto LBL_V; } } while (mp_iszero(&v) == MP_NO) { /* make sure v is the largest */ if (mp_cmp_mag(&u, &v) == MP_GT) { /* swap u and v to make sure v is >= u */ mp_exch(&u, &v); } /* subtract smallest from largest */ if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) { goto LBL_V; } /* Divide out all factors of two */ if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) { goto LBL_V; } } /* multiply by 2**k which we divided out at the beginning */ if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) { goto LBL_V; } c->sign = MP_ZPOS; res = MP_OKAY; LBL_V:mp_clear (&u); LBL_U:mp_clear (&v); return res; }