/* makes a pseudo-random int of a given size */ int mp_rand (mp_int * a, int digits) { int res; mp_digit d; mp_zero (a); if (digits <= 0) { return MP_OKAY; } /* first place a random non-zero digit */ do { d = ((mp_digit) abs (rand ())) & MP_MASK; } while (d == 0); if ((res = mp_add_d (a, d, a)) != MP_OKAY) { return res; } while (--digits > 0) { if ((res = mp_lshd (a, 1)) != MP_OKAY) { return res; } if ((res = mp_add_d (a, ((mp_digit) abs (rand ())), a)) != MP_OKAY) { return res; } } return MP_OKAY; }
int mp_rand(mp_int* a, int digits, WC_RNG* rng) { int ret; mp_digit d; if (rng == NULL) return MISSING_RNG_E; if (a == NULL) return BAD_FUNC_ARG; mp_zero(a); if (digits <= 0) { return MP_OKAY; } /* first place a random non-zero digit */ do { ret = get_rand_digit(rng, &d); if (ret != 0) { return ret; } } while (d == 0); if ((ret = mp_add_d(a, d, a)) != MP_OKAY) { return ret; } while (--digits > 0) { if ((ret = mp_lshd(a, 1)) != MP_OKAY) { return ret; } if ((ret = get_rand_digit(rng, &d)) != 0) { return ret; } if ((ret = mp_add_d(a, d, a)) != MP_OKAY) { return ret; } } return ret; }
int mp_rand(mp_int *a, int digits) { int res; mp_digit d; mp_zero(a); if (digits <= 0) { return MP_OKAY; } /* first place a random non-zero digit */ do { if (mp_rand_digit(&d) != MP_OKAY) { return MP_VAL; } } while (d == 0u); if ((res = mp_add_d(a, d, a)) != MP_OKAY) { return res; } while (--digits > 0) { if ((res = mp_lshd(a, 1)) != MP_OKAY) { return res; } if (mp_rand_digit(&d) != MP_OKAY) { return MP_VAL; } if ((res = mp_add_d(a, d, a)) != MP_OKAY) { return res; } } return MP_OKAY; }
/* Karatsuba squaring, computes b = a*a using three * half size squarings * * See comments of karatsuba_mul for details. It * is essentially the same algorithm but merely * tuned to perform recursive squarings. */ int mp_karatsuba_sqr(const mp_int *a, mp_int *b) { mp_int x0, x1, t1, t2, x0x0, x1x1; int B, err; err = MP_MEM; /* min # of digits */ B = a->used; /* now divide in two */ B = B >> 1; /* init copy all the temps */ if (mp_init_size(&x0, B) != MP_OKAY) goto LBL_ERR; if (mp_init_size(&x1, a->used - B) != MP_OKAY) goto X0; /* init temps */ if (mp_init_size(&t1, a->used * 2) != MP_OKAY) goto X1; if (mp_init_size(&t2, a->used * 2) != MP_OKAY) goto T1; if (mp_init_size(&x0x0, B * 2) != MP_OKAY) goto T2; if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY) goto X0X0; { int x; mp_digit *dst, *src; src = a->dp; /* now shift the digits */ dst = x0.dp; for (x = 0; x < B; x++) { *dst++ = *src++; } dst = x1.dp; for (x = B; x < a->used; x++) { *dst++ = *src++; } } x0.used = B; x1.used = a->used - B; mp_clamp(&x0); /* now calc the products x0*x0 and x1*x1 */ if (mp_sqr(&x0, &x0x0) != MP_OKAY) goto X1X1; /* x0x0 = x0*x0 */ if (mp_sqr(&x1, &x1x1) != MP_OKAY) goto X1X1; /* x1x1 = x1*x1 */ /* now calc (x1+x0)**2 */ if (s_mp_add(&x1, &x0, &t1) != MP_OKAY) goto X1X1; /* t1 = x1 - x0 */ if (mp_sqr(&t1, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ /* add x0y0 */ if (s_mp_add(&x0x0, &x1x1, &t2) != MP_OKAY) goto X1X1; /* t2 = x0x0 + x1x1 */ if (s_mp_sub(&t1, &t2, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ /* shift by B */ if (mp_lshd(&t1, B) != MP_OKAY) goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */ if (mp_lshd(&x1x1, B * 2) != MP_OKAY) goto X1X1; /* x1x1 = x1x1 << 2*B */ if (mp_add(&x0x0, &t1, &t1) != MP_OKAY) goto X1X1; /* t1 = x0x0 + t1 */ if (mp_add(&t1, &x1x1, b) != MP_OKAY) goto X1X1; /* t1 = x0x0 + t1 + x1x1 */ err = MP_OKAY; X1X1: mp_clear(&x1x1); X0X0: mp_clear(&x0x0); T2: mp_clear(&t2); T1: mp_clear(&t1); X1: mp_clear(&x1); X0: mp_clear(&x0); LBL_ERR: return err; }
/* integer signed division. * c*b + d == a [e.g. a/b, c=quotient, d=remainder] * HAC pp.598 Algorithm 14.20 * * Note that the description in HAC is horribly * incomplete. For example, it doesn't consider * the case where digits are removed from 'x' in * the inner loop. It also doesn't consider the * case that y has fewer than three digits, etc.. * * The overall algorithm is as described as * 14.20 from HAC but fixed to treat these cases. */ int mp_div MPA(mp_int * a, mp_int * b, mp_int * c, mp_int * d) { mp_int q, x, y, t1, t2; int res, n, t, i, norm, neg; /* is divisor zero ? */ if (mp_iszero (b) == 1) { return MP_VAL; } /* if a < b then q=0, r = a */ if (mp_cmp_mag (a, b) == MP_LT) { if (d != NULL) { res = mp_copy (MPST, a, d); } else { res = MP_OKAY; } if (c != NULL) { mp_zero (c); } return res; } if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) { return res; } q.used = a->used + 2; if ((res = mp_init (&t1)) != MP_OKAY) { goto LBL_Q; } if ((res = mp_init (&t2)) != MP_OKAY) { goto LBL_T1; } if ((res = mp_init_copy (MPST, &x, a)) != MP_OKAY) { goto LBL_T2; } if ((res = mp_init_copy (MPST, &y, b)) != MP_OKAY) { goto LBL_X; } /* fix the sign */ neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; x.sign = y.sign = MP_ZPOS; /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ norm = mp_count_bits(&y) % DIGIT_BIT; if (norm < (int)(DIGIT_BIT-1)) { norm = (DIGIT_BIT-1) - norm; if ((res = mp_mul_2d (MPST, &x, norm, &x)) != MP_OKAY) { goto LBL_Y; } if ((res = mp_mul_2d (MPST, &y, norm, &y)) != MP_OKAY) { goto LBL_Y; } } else { norm = 0; } /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ n = x.used - 1; t = y.used - 1; /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ if ((res = mp_lshd (MPST, &y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */ goto LBL_Y; } while (mp_cmp (&x, &y) != MP_LT) { ++(q.dp[n - t]); if ((res = mp_sub (MPST, &x, &y, &x)) != MP_OKAY) { goto LBL_Y; } } /* reset y by shifting it back down */ mp_rshd (&y, n - t); /* step 3. for i from n down to (t + 1) */ for (i = n; i >= (t + 1); i--) { if (i > x.used) { continue; } /* step 3.1 if xi == yt then set q{i-t-1} to b-1, * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ if (x.dp[i] == y.dp[t]) { q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1); } else { mp_word tmp; tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT); tmp |= ((mp_word) x.dp[i - 1]); tmp /= ((mp_word) y.dp[t]); if (tmp > (mp_word) MP_MASK) tmp = MP_MASK; q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK)); } /* while (q{i-t-1} * (yt * b + y{t-1})) > xi * b**2 + xi-1 * b + xi-2 do q{i-t-1} -= 1; */ q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK; do { q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK; /* find left hand */ mp_zero (&t1); t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; t1.dp[1] = y.dp[t]; t1.used = 2; if ((res = mp_mul_d (MPST, &t1, q.dp[i - t - 1], &t1)) != MP_OKAY) { goto LBL_Y; } /* find right hand */ t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; t2.dp[2] = x.dp[i]; t2.used = 3; } while (mp_cmp_mag(&t1, &t2) == MP_GT); /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ if ((res = mp_mul_d (MPST, &y, q.dp[i - t - 1], &t1)) != MP_OKAY) { goto LBL_Y; } if ((res = mp_lshd (MPST, &t1, i - t - 1)) != MP_OKAY) { goto LBL_Y; } if ((res = mp_sub (MPST, &x, &t1, &x)) != MP_OKAY) { goto LBL_Y; } /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ if (x.sign == MP_NEG) { if ((res = mp_copy (MPST, &y, &t1)) != MP_OKAY) { goto LBL_Y; } if ((res = mp_lshd (MPST, &t1, i - t - 1)) != MP_OKAY) { goto LBL_Y; } if ((res = mp_add (MPST, &x, &t1, &x)) != MP_OKAY) { goto LBL_Y; } q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK; } } /* now q is the quotient and x is the remainder * [which we have to normalize] */ /* get sign before writing to c */ x.sign = x.used == 0 ? MP_ZPOS : a->sign; if (c != NULL) { mp_clamp (&q); mp_managed_copy (MPST, &q, c); c->sign = neg; } if (d != NULL) { mp_div_2d (MPST, &x, norm, &x, NULL); mp_managed_copy (MPST, &x, d); } res = MP_OKAY; LBL_Y:mp_clear (&y); LBL_X:mp_clear (&x); LBL_T2:mp_clear (&t2); LBL_T1:mp_clear (&t1); LBL_Q:mp_clear (&q); return res; }
/* multiplication using the Toom-Cook 3-way algorithm * * Much more complicated than Karatsuba but has a lower * asymptotic running time of O(N**1.464). This algorithm is * only particularly useful on VERY large inputs * (we're talking 1000s of digits here...). */ int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) { mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; int res, B; /* init temps */ if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &b0, &b1, &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { return res; } /* B */ B = MIN(a->used, b->used) / 3; /* a = a2 * B**2 + a1 * B + a0 */ if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { goto ERR; } if ((res = mp_copy(a, &a1)) != MP_OKAY) { goto ERR; } mp_rshd(&a1, B); mp_mod_2d(&a1, DIGIT_BIT * B, &a1); if ((res = mp_copy(a, &a2)) != MP_OKAY) { goto ERR; } mp_rshd(&a2, B*2); /* b = b2 * B**2 + b1 * B + b0 */ if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { goto ERR; } if ((res = mp_copy(b, &b1)) != MP_OKAY) { goto ERR; } mp_rshd(&b1, B); mp_mod_2d(&b1, DIGIT_BIT * B, &b1); if ((res = mp_copy(b, &b2)) != MP_OKAY) { goto ERR; } mp_rshd(&b2, B*2); /* w0 = a0*b0 */ if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) { goto ERR; } /* w4 = a2 * b2 */ if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) { goto ERR; } /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { goto ERR; } if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { goto ERR; } if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { goto ERR; } if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { goto ERR; } if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) { goto ERR; } if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { goto ERR; } if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { goto ERR; } if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { goto ERR; } if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) { goto ERR; } /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { goto ERR; } if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { goto ERR; } if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { goto ERR; } if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { goto ERR; } if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) { goto ERR; } if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { goto ERR; } if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { goto ERR; } if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { goto ERR; } if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) { goto ERR; } /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { goto ERR; } if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { goto ERR; } if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) { goto ERR; } if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { goto ERR; } if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) { goto ERR; } /* now solve the matrix 0 0 0 0 1 1 2 4 8 16 1 1 1 1 1 16 8 4 2 1 1 0 0 0 0 using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication */ /* r1 - r4 */ if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { goto ERR; } /* r3 - r0 */ if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { goto ERR; } /* r1/2 */ if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { goto ERR; } /* r3/2 */ if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { goto ERR; } /* r2 - r0 - r4 */ if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { goto ERR; } if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { goto ERR; } /* r1 - r2 */ if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { goto ERR; } /* r3 - r2 */ if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { goto ERR; } /* r1 - 8r0 */ if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { goto ERR; } if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { goto ERR; } /* r3 - 8r4 */ if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { goto ERR; } if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { goto ERR; } /* 3r2 - r1 - r3 */ if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { goto ERR; } if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { goto ERR; } if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { goto ERR; } /* r1 - r2 */ if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { goto ERR; } /* r3 - r2 */ if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { goto ERR; } /* r1/3 */ if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { goto ERR; } /* r3/3 */ if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { goto ERR; } /* at this point shift W[n] by B*n */ if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { goto ERR; } if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { goto ERR; } if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { goto ERR; } if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { goto ERR; } if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) { goto ERR; } if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { goto ERR; } if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { goto ERR; } if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) { goto ERR; } ERR: mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &b0, &b1, &b2, &tmp1, &tmp2, NULL); return res; }
/* c = |a| * |b| using Karatsuba Multiplication using * three half size multiplications * * Let B represent the radix [e.g. 2**DIGIT_BIT] and * let n represent half of the number of digits in * the min(a,b) * * a = a1 * B**n + a0 * b = b1 * B**n + b0 * * Then, a * b => a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 * * Note that a1b1 and a0b0 are used twice and only need to be * computed once. So in total three half size (half # of * digit) multiplications are performed, a0b0, a1b1 and * (a1+b1)(a0+b0) * * Note that a multiplication of half the digits requires * 1/4th the number of single precision multiplications so in * total after one call 25% of the single precision multiplications * are saved. Note also that the call to mp_mul can end up back * in this function if the a0, a1, b0, or b1 are above the threshold. * This is known as divide-and-conquer and leads to the famous * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than * the standard O(N**2) that the baseline/comba methods use. * Generally though the overhead of this method doesn't pay off * until a certain size (N ~ 80) is reached. */ int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) { mp_int x0, x1, y0, y1, t1, x0y0, x1y1; int B, err; /* default the return code to an error */ err = MP_MEM; /* min # of digits */ B = MIN (a->used, b->used); /* now divide in two */ B = B >> 1; /* init copy all the temps */ if (mp_init_size (&x0, B) != MP_OKAY) goto ERR; if (mp_init_size (&x1, a->used - B) != MP_OKAY) goto X0; if (mp_init_size (&y0, B) != MP_OKAY) goto X1; if (mp_init_size (&y1, b->used - B) != MP_OKAY) goto Y0; /* init temps */ if (mp_init_size (&t1, B * 2) != MP_OKAY) goto Y1; if (mp_init_size (&x0y0, B * 2) != MP_OKAY) goto T1; if (mp_init_size (&x1y1, B * 2) != MP_OKAY) goto X0Y0; /* now shift the digits */ x0.used = y0.used = B; x1.used = a->used - B; y1.used = b->used - B; { register int x; register mp_digit *tmpa, *tmpb, *tmpx, *tmpy; /* we copy the digits directly instead of using higher level functions * since we also need to shift the digits */ tmpa = a->dp; tmpb = b->dp; tmpx = x0.dp; tmpy = y0.dp; for (x = 0; x < B; x++) { *tmpx++ = *tmpa++; *tmpy++ = *tmpb++; } tmpx = x1.dp; for (x = B; x < a->used; x++) { *tmpx++ = *tmpa++; } tmpy = y1.dp; for (x = B; x < b->used; x++) { *tmpy++ = *tmpb++; } } /* only need to clamp the lower words since by definition the * upper words x1/y1 must have a known number of digits */ mp_clamp (&x0); mp_clamp (&y0); /* now calc the products x0y0 and x1y1 */ /* after this x0 is no longer required, free temp [x0==t2]! */ if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) goto X1Y1; /* x0y0 = x0*y0 */ if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) goto X1Y1; /* x1y1 = x1*y1 */ /* now calc x1+x0 and y1+y0 */ if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) goto X1Y1; /* t1 = x1 - x0 */ if (s_mp_add (&y1, &y0, &x0) != MP_OKAY) goto X1Y1; /* t2 = y1 - y0 */ if (mp_mul (&t1, &x0, &t1) != MP_OKAY) goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ /* add x0y0 */ if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) goto X1Y1; /* t2 = x0y0 + x1y1 */ if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY) goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ /* shift by B */ if (mp_lshd (&t1, B) != MP_OKAY) goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ if (mp_lshd (&x1y1, B * 2) != MP_OKAY) goto X1Y1; /* x1y1 = x1y1 << 2*B */ if (mp_add (&x0y0, &t1, &t1) != MP_OKAY) goto X1Y1; /* t1 = x0y0 + t1 */ if (mp_add (&t1, &x1y1, c) != MP_OKAY) goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ /* Algorithm succeeded set the return code to MP_OKAY */ err = MP_OKAY; X1Y1: mp_clear (&x1y1); X0Y0: mp_clear (&x0y0); T1: mp_clear (&t1); Y1: mp_clear (&y1); Y0: mp_clear (&y0); X1: mp_clear (&x1); X0: mp_clear (&x0); ERR: return err; }
/* reduces x mod m, assumes 0 < x < m**2, mu is * precomputed via mp_reduce_setup. * From HAC pp.604 Algorithm 14.42 */ int mp_reduce (mp_int * x, mp_int * m, mp_int * mu) { mp_int q; int res, um = USED(m); /* q = x */ if ((res = mp_init_copy (&q, x)) != MP_OKAY) { return res; } /* q1 = x / b**(k-1) */ mp_rshd (&q, um - 1); /* according to HAC this optimization is ok */ if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) { if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) { goto CLEANUP; } } else { #ifdef BN_S_MP_MUL_HIGH_DIGS_C if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { goto CLEANUP; } #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C) if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { goto CLEANUP; } #else { res = MP_VAL; goto CLEANUP; } #endif } /* q3 = q2 / b**(k+1) */ mp_rshd (&q, um + 1); /* x = x mod b**(k+1), quick (no division) */ if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { goto CLEANUP; } /* q = q * m mod b**(k+1), quick (no division) */ if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) { goto CLEANUP; } /* x = x - q */ if ((res = mp_sub (x, &q, x)) != MP_OKAY) { goto CLEANUP; } /* If x < 0, add b**(k+1) to it */ if (mp_cmp_d (x, 0) == MP_LT) { mp_set (&q, 1); if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) goto CLEANUP; if ((res = mp_add (x, &q, x)) != MP_OKAY) goto CLEANUP; } /* Back off if it's too big */ while (mp_cmp (x, m) != MP_LT) { if ((res = s_mp_sub (x, m, x)) != MP_OKAY) { goto CLEANUP; } } CLEANUP: mp_clear (&q); return res; }
/* Karatsuba squaring, computes b = a*a using three * half size squarings * * See comments of karatsuba_mul for details. It * is essentially the same algorithm but merely * tuned to perform recursive squarings. */ int mp_karatsuba_sqr (mp_int * a, mp_int * b) { mp_int x0, x1, t1, t2, x0x0, x1x1; int B, err; err = MP_MEM; /* min # of digits */ B = USED(a); /* now divide in two */ B = B >> 1; /* init copy all the temps */ if (mp_init_size (&x0, B) != MP_OKAY) goto ERR; if (mp_init_size (&x1, USED(a) - B) != MP_OKAY) goto X0; /* init temps */ if (mp_init_size (&t1, USED(a) * 2) != MP_OKAY) goto X1; if (mp_init_size (&t2, USED(a) * 2) != MP_OKAY) goto T1; if (mp_init_size (&x0x0, B * 2) != MP_OKAY) goto T2; if (mp_init_size (&x1x1, (USED(a) - B) * 2) != MP_OKAY) goto X0X0; { register int x; register mp_digit *dst, *src; src = DIGITS(a); /* now shift the digits */ dst = DIGITS(&x0); for (x = 0; x < B; x++) { *dst++ = *src++; } dst = DIGITS(&x1); for (x = B; x < USED(a); x++) { *dst++ = *src++; } } SET_USED(&x0,B); SET_USED(&x1,USED(a) - B); mp_clamp (&x0); /* now calc the products x0*x0 and x1*x1 */ if (mp_sqr (&x0, &x0x0) != MP_OKAY) goto X1X1; /* x0x0 = x0*x0 */ if (mp_sqr (&x1, &x1x1) != MP_OKAY) goto X1X1; /* x1x1 = x1*x1 */ /* now calc (x1+x0)**2 */ if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) goto X1X1; /* t1 = x1 - x0 */ if (mp_sqr (&t1, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ /* add x0y0 */ if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY) goto X1X1; /* t2 = x0x0 + x1x1 */ if (s_mp_sub (&t1, &t2, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ /* shift by B */ if (mp_lshd (&t1, B) != MP_OKAY) goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */ if (mp_lshd (&x1x1, B * 2) != MP_OKAY) goto X1X1; /* x1x1 = x1x1 << 2*B */ if (mp_add (&x0x0, &t1, &t1) != MP_OKAY) goto X1X1; /* t1 = x0x0 + t1 */ if (mp_add (&t1, &x1x1, b) != MP_OKAY) goto X1X1; /* t1 = x0x0 + t1 + x1x1 */ err = MP_OKAY; X1X1:mp_clear (&x1x1); X0X0:mp_clear (&x0x0); T2:mp_clear (&t2); T1:mp_clear (&t1); X1:mp_clear (&x1); X0:mp_clear (&x0); ERR: return err; }
/* squaring using Toom-Cook 3-way algorithm */ int mp_toom_sqr(const mp_int *a, mp_int *b) { mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2; int res, B; /* init temps */ if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) { return res; } /* B */ B = a->used / 3; /* a = a2 * B**2 + a1 * B + a0 */ if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_copy(a, &a1)) != MP_OKAY) { goto LBL_ERR; } mp_rshd(&a1, B); if ((res = mp_mod_2d(&a1, DIGIT_BIT * B, &a1)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_copy(a, &a2)) != MP_OKAY) { goto LBL_ERR; } mp_rshd(&a2, B*2); /* w0 = a0*a0 */ if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) { goto LBL_ERR; } /* w4 = a2 * a2 */ if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) { goto LBL_ERR; } /* w1 = (a2 + 2(a1 + 2a0))**2 */ if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) { goto LBL_ERR; } /* w3 = (a0 + 2(a1 + 2a2))**2 */ if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) { goto LBL_ERR; } /* w2 = (a2 + a1 + a0)**2 */ if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) { goto LBL_ERR; } /* now solve the matrix 0 0 0 0 1 1 2 4 8 16 1 1 1 1 1 16 8 4 2 1 1 0 0 0 0 using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication. */ /* r1 - r4 */ if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { goto LBL_ERR; } /* r3 - r0 */ if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { goto LBL_ERR; } /* r1/2 */ if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { goto LBL_ERR; } /* r3/2 */ if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { goto LBL_ERR; } /* r2 - r0 - r4 */ if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { goto LBL_ERR; } /* r1 - r2 */ if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { goto LBL_ERR; } /* r3 - r2 */ if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { goto LBL_ERR; } /* r1 - 8r0 */ if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { goto LBL_ERR; } /* r3 - 8r4 */ if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { goto LBL_ERR; } /* 3r2 - r1 - r3 */ if ((res = mp_mul_d(&w2, 3uL, &w2)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { goto LBL_ERR; } /* r1 - r2 */ if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { goto LBL_ERR; } /* r3 - r2 */ if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { goto LBL_ERR; } /* r1/3 */ if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { goto LBL_ERR; } /* r3/3 */ if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { goto LBL_ERR; } /* at this point shift W[n] by B*n */ if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { goto LBL_ERR; } if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) { goto LBL_ERR; } LBL_ERR: mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL); return res; }
int mp_toom_cook_5_mul(mp_int *a, mp_int *b, mp_int *c) { mp_int w1, w2, w3, w4, w5, w6, w7, w8, w9; mp_int tmp1, tmp2; mp_int a0, a1, a2, a3, a4; mp_int b0, b1, b2, b3, b4; int e = MP_OKAY; int B, count, sign; B = (MAX(a->used, b->used)) / 5; sign = (a->sign != b->sign) ? MP_NEG : MP_ZPOS; if (MIN(a->used, b->used) < TOOM_COOK_5_MUL_CO) { if ((e = mp_mul(a, b, c)) != MP_OKAY) { return e; } c->sign = sign; return MP_OKAY; } if ((e = mp_init_multi(&w1, &w2, &w3, &w4, &w5, &w6, &w7, &w8, &w9, &tmp1, &tmp2, //&a0, &a1, &a2, &a3, &a4, &b0, &b1, &b2, &b3, &b4, NULL)) != MP_OKAY) { goto ERR0; //goto ERR; } if ((e = mp_init_size(&a0, B)) != MP_OKAY) { goto ERRa0; } if ((e = mp_init_size(&a1, B)) != MP_OKAY) { goto ERRa1; } if ((e = mp_init_size(&a2, B)) != MP_OKAY) { goto ERRa2; } if ((e = mp_init_size(&a3, B)) != MP_OKAY) { goto ERRa3; } if ((e = mp_init_size(&a4, B)) != MP_OKAY) { goto ERRa4; } if ((e = mp_init_size(&b0, B)) != MP_OKAY) { goto ERRb0; } if ((e = mp_init_size(&b1, B)) != MP_OKAY) { goto ERRb1; } if ((e = mp_init_size(&b2, B)) != MP_OKAY) { goto ERRb2; } if ((e = mp_init_size(&b3, B)) != MP_OKAY) { goto ERRb3; } if ((e = mp_init_size(&b4, B)) != MP_OKAY) { goto ERRb4; } // A = a4*x^4 + a3*x^3 + a2*x^2 + a1*x + a0 for (count = 0; count < a->used; count++) { switch (count / B) { case 0: a0.dp[count] = a->dp[count]; a0.used++; break; case 1: a1.dp[count - B] = a->dp[count]; a1.used++; break; case 2: a2.dp[count - 2 * B] = a->dp[count]; a2.used++; break; case 3: a3.dp[count - 3 * B] = a->dp[count]; a3.used++; break; case 4: a4.dp[count - 4 * B] = a->dp[count]; a4.used++; break; default: a4.dp[count - 4 * B] = a->dp[count]; a4.used++; break; } } mp_clamp(&a0); mp_clamp(&a1); mp_clamp(&a2); mp_clamp(&a3); mp_clamp(&a4); // B = b4*x^4 + b3*x^3 + b2*x^2 + b1*x + b0 for (count = 0; count < b->used; count++) { switch (count / B) { case 0: b0.dp[count] = b->dp[count]; b0.used++; break; case 1: b1.dp[count - B] = b->dp[count]; b1.used++; break; case 2: b2.dp[count - 2 * B] = b->dp[count]; b2.used++; break; case 3: b3.dp[count - 3 * B] = b->dp[count]; b3.used++; break; case 4: b4.dp[count - 4 * B] = b->dp[count]; b4.used++; break; default: b4.dp[count - 4 * B] = b->dp[count]; b4.used++; break; } } mp_clamp(&b0); mp_clamp(&b1); mp_clamp(&b2); mp_clamp(&b3); mp_clamp(&b4); /* if ((e = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { goto ERR; } if ((e = mp_copy(a, &a1)) != MP_OKAY) { goto ERR; } mp_rshd(&a1, B); mp_mod_2d(&a1, DIGIT_BIT * B, &a1); if ((e = mp_copy(a, &a2)) != MP_OKAY) { goto ERR; } mp_rshd(&a2, B * 2); mp_mod_2d(&a2, DIGIT_BIT * B, &a2); if ((e = mp_copy(a, &a3)) != MP_OKAY) { goto ERR; } mp_rshd(&a3, B * 3); mp_mod_2d(&a3, DIGIT_BIT * B, &a3); if ((e = mp_copy(a, &a4)) != MP_OKAY) { goto ERR; } mp_rshd(&a4, B * 4); if ((e = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { goto ERR; } if ((e = mp_copy(a, &b1)) != MP_OKAY) { goto ERR; } mp_rshd(&b1, B); mp_mod_2d(&b1, DIGIT_BIT * B, &b1); if ((e = mp_copy(b, &b2)) != MP_OKAY) { goto ERR; } mp_rshd(&b2, B * 2); mp_mod_2d(&b2, DIGIT_BIT * B, &b2); if ((e = mp_copy(b, &b3)) != MP_OKAY) { goto ERR; } mp_rshd(&b3, B * 3); mp_mod_2d(&b3, DIGIT_BIT * B, &b3); if ((e = mp_copy(b, &b4)) != MP_OKAY) { goto ERR; } mp_rshd(&b4, B * 4); */ // S1 = a4*b4 if ((e = mp_mul(&a4, &b4, &w1)) != MP_OKAY) { goto ERR; } // S9 = a0*b0 if ((e = mp_mul(&a0, &b0, &w9)) != MP_OKAY) { goto ERR; } // S2 = (a0- 2*a1 +4*a2 -8*a3 +16*a4) if ((e = mp_mul_2d(&a1, 1, &tmp1)) != MP_OKAY) { goto ERR; } // 2*a1 = tmp1 if ((e = mp_sub(&a0, &tmp1, &w2)) != MP_OKAY) { goto ERR; } // a0- 2*a1 = a0 - tmp1 = w2 if ((e = mp_mul_2d(&a2, 2, &tmp1)) != MP_OKAY) { goto ERR; } // 4*a2 = tmp1 if ((e = mp_add(&w2, &tmp1, &w2)) != MP_OKAY) { goto ERR; } // a0- 2*a1 +4*a2 = w2 + tmp1 = w2 if ((e = mp_mul_2d(&a3, 3, &tmp1)) != MP_OKAY) { goto ERR; } // 8*a3 = tmp1 if ((e = mp_sub(&w2, &tmp1, &w2)) != MP_OKAY) { goto ERR; } // a0- 2*a1 +4*a2 -8*a3 = w2 - tmp1 = w2 if ((e = mp_mul_2d(&a4, 4, &tmp1)) != MP_OKAY) { goto ERR; } // 16*a4 = tmp1 if ((e = mp_add(&w2, &tmp1, &w2)) != MP_OKAY) { goto ERR; } // a0- 2*a1 +4*a2 -8*a3 +16*a4 = w2 + tmp1 = w2 // * (b0- 2*b1 +4*b2 -8*b3 +16*b4) if ((e = mp_mul_2d(&b1, 1, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_sub(&b0, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&b2, 2, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&b3, 3, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_sub(&tmp2, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&b4, 4, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul(&tmp2, &w2, &w2)) != MP_OKAY) { goto ERR; } // S5 = (a0+ 2*a1+ 4*a2+ 8*a3+ 16*a4) if ((e = mp_mul_2d(&a1, 1, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&a0, &tmp1, &w5)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&a2, 2, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w5, &tmp1, &w5)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&a3, 3, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w5, &tmp1, &w5)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&a4, 4, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w5, &tmp1, &w5)) != MP_OKAY) { goto ERR; } // *(b0+ 2*b1+ 4*b2+ 8*b3+ 16*b4) if ((e = mp_mul_2d(&b1, 1, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&b0, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&b2, 2, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&b3, 3, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&b4, 4, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul(&tmp2, &w5, &w5)) != MP_OKAY) { goto ERR; } // S3 = (a4+ 2*a3+ 4*a2+ 8*a1+ 16*a0) if ((e = mp_mul_2d(&a3, 1, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&a4, &tmp1, &w3)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&a2, 2, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w3, &tmp1, &w3)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&a1, 3, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w3, &tmp1, &w3)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&a0, 4, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w3, &tmp1, &w3)) != MP_OKAY) { goto ERR; } // * (b4+ 2*b3+ 4*b2+ 8*b1+ 16*b0) if ((e = mp_mul_2d(&b3, 1, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&b4, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&b2, 2, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&b1, 3, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&b0, 4, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul(&tmp2, &w3, &w3)) != MP_OKAY) { goto ERR; } // S8 = (a4- 2*a3+ 4*a2- 8*a1+ 16*a0) if ((e = mp_mul_2d(&a3, 1, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_sub(&a4, &tmp1, &w8)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&a2, 2, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w8, &tmp1, &w8)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&a1, 3, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_sub(&w8, &tmp1, &w8)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&a0, 4, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w8, &tmp1, &w8)) != MP_OKAY) { goto ERR; } //* (b4- 2*b3+ 4*b2- 8*b1+ 16*b0) if ((e = mp_mul_2d(&b3, 1, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_sub(&b4, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&b2, 2, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&b1, 3, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_sub(&tmp2, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&b0, 4, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul(&tmp2, &w8, &w8)) != MP_OKAY) { goto ERR; } // S4 = (a0+ 4*a1+ 16*a2+ 64*a3+ 256*a4) if ((e = mp_mul_2d(&a1, 2, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&a0, &tmp1, &w4)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&a2, 4, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w4, &tmp1, &w4)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&a3, 6, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w4, &tmp1, &w4)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&a4, 8, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w4, &tmp1, &w4)) != MP_OKAY) { goto ERR; } //* (b0+ 4*b1+ 16*b2+ 64*b3+ 256*b4) if ((e = mp_mul_2d(&b1, 2, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&b0, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&b2, 4, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&b3, 6, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul_2d(&b4, 8, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) { goto ERR; } if ((e = mp_mul(&tmp2, &w4, &w4)) != MP_OKAY) { goto ERR; } // S6 = (a0- a1+ a2- a3 +a4) if ((e = mp_sub(&a0, &a1, &w6)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w6, &a2, &w6)) != MP_OKAY) { goto ERR; } if ((e = mp_sub(&w6, &a3, &w6)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w6, &a4, &w6)) != MP_OKAY) { goto ERR; } // * (b0- b1+ b2- b3+ b4) if ((e = mp_sub(&b0, &b1, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp1, &b2, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_sub(&tmp1, &b3, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp1, &b4, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_mul(&tmp1, &w6, &w6)) != MP_OKAY) { goto ERR; } // S7 = (a0+ a1+ a2+ a3+ a4) if ((e = mp_add(&a0, &a1, &w7)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w7, &a2, &w7)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w7, &a3, &w7)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w7, &a4, &w7)) != MP_OKAY) { goto ERR; } // * (b0+ b1+ b2+ b3+ b4) if ((e = mp_add(&b0, &b1, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp1, &b2, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp1, &b3, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp1, &b4, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_mul(&tmp1, &w7, &w7)) != MP_OKAY) { goto ERR; } // S6 -= S7 if ((e = mp_sub(&w6, &w7, &w6)) != MP_OKAY) { goto ERR; } // S2 -= S5 if ((e = mp_sub(&w2, &w5, &w2)) != MP_OKAY) { goto ERR; } // S4 -= S9 if ((e = mp_sub(&w4, &w9, &w4)) != MP_OKAY) { goto ERR; } // S4 -= (2^16*S1) if ((e = mp_mul_2d(&w1, 16, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_sub(&w4, &tmp1, &w4)) != MP_OKAY) { goto ERR; } // S8 -= S3 if ((e = mp_sub(&w8, &w3, &w8)) != MP_OKAY) { goto ERR; } // S6 /= 2 if ((e = mp_div_2d(&w6, 1, &w6, NULL)) != MP_OKAY) { goto ERR; } // S5 *= 2 if ((e = mp_mul_2d(&w5, 1, &w5)) != MP_OKAY) { goto ERR; } // S5 += S2 if ((e = mp_add(&w5, &w2, &w5)) != MP_OKAY) { goto ERR; } // S2 = -S2 if ((e = mp_neg(&w2, &w2)) != MP_OKAY) { goto ERR; } // S8 = -S8 if ((e = mp_neg(&w8, &w8)) != MP_OKAY) { goto ERR; } // S7 += S6 if ((e = mp_add(&w7, &w6, &w7)) != MP_OKAY) { goto ERR; } // S6 = -S6 if ((e = mp_neg(&w6, &w6)) != MP_OKAY) { goto ERR; } // S3 -= S7 if ((e = mp_sub(&w3, &w7, &w3)) != MP_OKAY) { goto ERR; } // S5 -= (512*S7) if ((e = mp_mul_2d(&w7, 9, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_sub(&w5, &tmp1, &w5)) != MP_OKAY) { goto ERR; } // S3 *= 2 if ((e = mp_mul_2d(&w3, 1, &w3)) != MP_OKAY) { goto ERR; } // S3 -= S8 if ((e = mp_sub(&w3, &w8, &w3)) != MP_OKAY) { goto ERR; } // S7 -= S1 if ((e = mp_sub(&w7, &w1, &w7)) != MP_OKAY) { goto ERR; } // S7 -= S9 if ((e = mp_sub(&w7, &w9, &w7)) != MP_OKAY) { goto ERR; } // S8 += S2 if ((e = mp_add(&w8, &w2, &w8)) != MP_OKAY) { goto ERR; } // S5 += S3 if ((e = mp_add(&w5, &w3, &w5)) != MP_OKAY) { goto ERR; } // S8 -= (80*S6) if ((e = mp_mul_d(&w6, 80, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_sub(&w8, &tmp1, &w8)) != MP_OKAY) { goto ERR; } // S3 -= (510*S9) if ((e = mp_mul_d(&w9, 510, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { goto ERR; } // S4 -= S2 if ((e = mp_sub(&w4, &w2, &w4)) != MP_OKAY) { goto ERR; } // S3 *= 3 if ((e = mp_mul_d(&w3, 3, &w3)) != MP_OKAY) { goto ERR; } // S3 += S5 if ((e = mp_add(&w3, &w5, &w3)) != MP_OKAY) { goto ERR; } // S8 /= 180 \\ division by 180 if ((e = mp_div_d(&w8, 180, &w8, NULL)) != MP_OKAY) { goto ERR; } // S5 += (378*S7) if ((e = mp_mul_d(&w7, 378, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w5, &tmp1, &w5)) != MP_OKAY) { goto ERR; } // S2 /= 4 if ((e = mp_div_2d(&w2, 2, &w2, NULL)) != MP_OKAY) { goto ERR; } // S6 -= S2 if ((e = mp_sub(&w6, &w2, &w6)) != MP_OKAY) { goto ERR; } // S5 /= (-72) \\ division by -72 if ((e = mp_div_d(&w5, 72, &w5, NULL)) != MP_OKAY) { goto ERR; } if (&w5.sign == MP_ZPOS) (&w5)->sign = MP_NEG; (&w5)->sign = MP_ZPOS; // S3 /= (-360) \\ division by -360 if ((e = mp_div_d(&w3, 360, &w3, NULL)) != MP_OKAY) { goto ERR; } if (&w3.sign == MP_ZPOS) (&w3)->sign = MP_NEG; (&w3)->sign = MP_ZPOS; // S2 -= S8 if ((e = mp_sub(&w2, &w8, &w2)) != MP_OKAY) { goto ERR; } // S7 -= S3 if ((e = mp_sub(&w7, &w3, &w7)) != MP_OKAY) { goto ERR; } // S4 -= (256*S5) if ((e = mp_mul_2d(&w5, 8, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_sub(&w4, &tmp1, &w4)) != MP_OKAY) { goto ERR; } // S3 -= S5 if ((e = mp_sub(&w3, &w5, &w3)) != MP_OKAY) { goto ERR; } // S4 -= (4096*S3) if ((e = mp_mul_2d(&w3, 12, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_sub(&w4, &tmp1, &w4)) != MP_OKAY) { goto ERR; } // S4 -= (16*S7) if ((e = mp_mul_2d(&w7, 4, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_sub(&w4, &tmp1, &w4)) != MP_OKAY) { goto ERR; } // S4 += (256*S6) if ((e = mp_mul_2d(&w6, 8, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w4, &tmp1, &w4)) != MP_OKAY) { goto ERR; } // S6 += S2 if ((e = mp_add(&w6, &w2, &w6)) != MP_OKAY) { goto ERR; } // S2 *= 180 if ((e = mp_mul_d(&w2, 180, &w2)) != MP_OKAY) { goto ERR; } // S2 += S4 if ((e = mp_add(&w2, &w4, &w2)) != MP_OKAY) { goto ERR; } // S2 /= 11340 \\ division by 11340 if ((e = mp_div_d(&w2, 11340, &w2, NULL)) != MP_OKAY) { goto ERR; } // S4 += (720*S6) if ((e = mp_mul_d(&w6, 720, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&w4, &tmp1, &w4)) != MP_OKAY) { goto ERR; } // S4 /= (-2160) \\ division by -2160 if ((e = mp_div_d(&w4, 2160, &w4, NULL)) != MP_OKAY) { goto ERR; } if (&w4.sign == MP_ZPOS) (&w4)->sign = MP_NEG; (&w4)->sign = MP_ZPOS; // S6 -= S4 if ((e = mp_sub(&w6, &w4, &w6)) != MP_OKAY) { goto ERR; } // S8 -= S2 if ((e = mp_sub(&w8, &w2, &w8)) != MP_OKAY) { goto ERR; } // P = S1*x^8 + S2*x^7 + S3*x^6 + S4*x^5 + S5*x^4 + S6*x^3 + S7*x^2 + S8*x + S9 if ((e = mp_copy(&w9, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_lshd(&w8, B)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp1, &w8, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_lshd(&w7, 2 * B)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp1, &w7, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_lshd(&w6, 3 * B)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp1, &w6, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_lshd(&w5, 4 * B)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp1, &w5, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_lshd(&w4, 5 * B)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp1, &w4, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_lshd(&w3, 6 * B)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp1, &w3, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_lshd(&w2, 7 * B)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp1, &w2, &tmp1)) != MP_OKAY) { goto ERR; } if ((e = mp_lshd(&w1, 8 * B)) != MP_OKAY) { goto ERR; } if ((e = mp_add(&tmp1, &w1, c)) != MP_OKAY) { goto ERR; } // P - A*B \\ == zero c->sign = sign; ERR: ERRb4: mp_clear(&b4); ERRb3: mp_clear(&b3); ERRb2: mp_clear(&b2); ERRb1: mp_clear(&b1); ERRb0: mp_clear(&b0); ERRa4: mp_clear(&a4); ERRa3: mp_clear(&a3); ERRa2: mp_clear(&a2); ERRa1: mp_clear(&a1); ERRa0: mp_clear(&a0); ERR0: mp_clear_multi(&w1, &w2, &w3, &w4, &w5, &w6, &w7, &w8, &w9, &tmp1, &tmp2, // &a0, &a1, &a2, &a3, &a4, &b0, &b1, &b2, &b3, &b4, NULL); return e; }