int main(int argc, char *argv[]) { int ntests, prec, ix; unsigned int seed; char *senv; clock_t start, stop; double multime; mp_int a, b, c; if((senv = getenv("SEED")) != NULL) seed = atoi(senv); else seed = (unsigned int)time(NULL); if(argc < 3) { fprintf(stderr, "Usage: %s <ntests> <nbits>\n", argv[0]); return 1; } if((ntests = abs(atoi(argv[1]))) == 0) { fprintf(stderr, "%s: must request at least 1 test.\n", argv[0]); return 1; } if((prec = abs(atoi(argv[2]))) < CHAR_BIT) { fprintf(stderr, "%s: must request at least %d bits.\n", argv[0], CHAR_BIT); return 1; } prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT; mp_init_size(&a, prec); mp_init_size(&b, prec); mp_init_size(&c, 2 * prec); srand(seed); start = clock(); for(ix = 0; ix < ntests; ix++) { mpp_random_size(&a, prec); mpp_random_size(&b, prec); mp_mul(&a, &a, &c); } stop = clock(); multime = (double)(stop - start) / CLOCKS_PER_SEC; printf("Total: %.4f\n", multime); printf("Individual: %.4f\n", multime / ntests); mp_clear(&a); mp_clear(&b); mp_clear(&c); return 0; }
/* divide by three (based on routine from MPI and the GMP manual) */ int mp_div_3 (mp_int * a, mp_int *c, mp_digit * d) { mp_int q; mp_word w, t; mp_digit b; int res, ix; /* b = 2**DIGIT_BIT / 3 */ b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3); if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { return res; } q.used = a->used; q.sign = a->sign; w = 0; for (ix = a->used - 1; ix >= 0; ix--) { w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); if (w >= 3) { t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT); w -= (t << ((mp_word)1)) + t; while (w >= 3) { t += 1; w -= 3; } } else {
extern void TclBNInitBignumFromWideUInt( mp_int *a, /* Bignum to initialize */ Tcl_WideUInt v) /* Initial value */ { int status; mp_digit *p; /* * Allocate enough memory to hold the largest possible Tcl_WideUInt. */ status = mp_init_size(a, (CHAR_BIT * sizeof(Tcl_WideUInt) + DIGIT_BIT - 1) / DIGIT_BIT); if (status != MP_OKAY) { Tcl_Panic("initialization failure in TclBNInitBignumFromWideUInt"); } a->sign = MP_ZPOS; /* * Store the magnitude in the bignum. */ p = a->dp; while (v) { *p++ = (mp_digit) (v & MP_MASK); v >>= MP_DIGIT_BIT; } a->used = p - a->dp; }
/* divide by three (based on routine from MPI and the GMP manual) */ int mp_div_3(const mp_int *a, mp_int *c, mp_digit *d) { mp_int q; mp_word w, t; mp_digit b; int res, ix; /* b = 2**MP_DIGIT_BIT / 3 */ b = ((mp_word)1 << (mp_word)MP_DIGIT_BIT) / (mp_word)3; if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { return res; } q.used = a->used; q.sign = a->sign; w = 0; for (ix = a->used - 1; ix >= 0; ix--) { w = (w << (mp_word)MP_DIGIT_BIT) | (mp_word)a->dp[ix]; if (w >= 3u) { /* multiply w by [1/3] */ t = (w * (mp_word)b) >> (mp_word)MP_DIGIT_BIT; /* now subtract 3 * [w/3] from w, to get the remainder */ w -= t+t+t; /* fixup the remainder as required since * the optimization is not exact. */ while (w >= 3u) { t += 1u; w -= 3u; } } else {
/* creates "a" then copies b into it */ int mp_init_copy (mp_int * a, mp_int * b) { int res; if ((res = mp_init_size (a, b->used)) != MP_OKAY) { return res; } return mp_copy (b, a); }
/* multiplies |a| * |b| and does not compute the lower digs digits * [meant to get the higher part of the product] */ int s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs) { mp_int t; int res, pa, pb, ix, iy; mp_digit u; mp_word r; mp_digit tmpx, *tmpt, *tmpy; /* can we use the fast multiplier? */ #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C if (((a->used + b->used + 1) < MP_WARRAY) && (MIN(a->used, b->used) < (1 << ((CHAR_BIT * sizeof(mp_word)) - (2 * DIGIT_BIT))))) { return fast_s_mp_mul_high_digs(a, b, c, digs); } #endif if ((res = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) { return res; } t.used = a->used + b->used + 1; pa = a->used; pb = b->used; for (ix = 0; ix < pa; ix++) { /* clear the carry */ u = 0; /* left hand side of A[ix] * B[iy] */ tmpx = a->dp[ix]; /* alias to the address of where the digits will be stored */ tmpt = &(t.dp[digs]); /* alias for where to read the right hand side from */ tmpy = b->dp + (digs - ix); for (iy = digs - ix; iy < pb; iy++) { /* calculate the double precision result */ r = (mp_word) * tmpt + ((mp_word)tmpx * (mp_word) * tmpy++) + (mp_word)u; /* get the lower part */ *tmpt++ = (mp_digit)(r & ((mp_word)MP_MASK)); /* carry the carry */ u = (mp_digit)(r >> ((mp_word)DIGIT_BIT)); } *tmpt = u; } mp_clamp(&t); mp_exch(&t, c); mp_clear(&t); return MP_OKAY; }
int main(int argc, char *argv[]) { instant_t start, finish; mp_int prime, gen, expt, res; unsigned int ix, diff; int num; srand(time(NULL)); if(argc < 2) { fprintf(stderr, "Usage: %s <num-tests>\n", argv[0]); return 1; } if((num = atoi(argv[1])) < 0) num = -num; if(num == 0) ++num; mp_init(&prime); mp_init(&gen); mp_init(&res); mp_read_radix(&prime, g_prime, 16); mp_read_radix(&gen, g_gen, 16); mp_init_size(&expt, USED(&prime) - 1); s_mp_pad(&expt, USED(&prime) - 1); printf("Testing %d modular exponentations ... \n", num); start = now(); for(ix = 0; ix < num; ix++) { mpp_random(&expt); mp_exptmod(&gen, &expt, &prime, &res); } finish = now(); diff = (finish.sec - start.sec) * 1000000; diff += finish.usec; diff -= start.usec; printf("%d operations took %u usec (%.3f sec)\n", num, diff, (double)diff / 1000000.0); printf("That is %.3f sec per operation.\n", ((double)diff / 1000000.0) / num); mp_clear(&expt); mp_clear(&res); mp_clear(&gen); mp_clear(&prime); return 0; }
/* single digit division (based on routine from MPI) */ int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d) { mp_int q; mp_word w; mp_digit t; int res, ix; if (b == 0) { return MP_VAL; } if (b == 3) { return mp_div_3(a, c, d); } if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { return res; } q.used = a->used; q.sign = a->sign; w = 0; for (ix = a->used - 1; ix >= 0; ix--) { w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); if (w >= b) { t = (mp_digit)(w / b); w = w % b; } else { t = 0; } q.dp[ix] = (mp_digit)t; } if (d != NULL) { *d = (mp_digit)w; } if (c != NULL) { mp_clamp(&q); mp_exch(&q, c); } mp_clear(&q); return res; }
/* d = a * b (mod c) */ int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) { int res; mp_int t; if ((res = mp_init_size (&t, c->used)) != MP_OKAY) { return res; } if ((res = mp_mul (a, b, &t)) != MP_OKAY) { mp_clear (&t); return res; } res = mp_mod (&t, c, d); mp_clear (&t); return res; }
extern void TclBNInitBignumFromLong( mp_int *a, long initVal) { int status; unsigned long v; mp_digit* p; /* * Allocate enough memory to hold the largest possible long */ status = mp_init_size(a, (CHAR_BIT * sizeof(long) + DIGIT_BIT - 1) / DIGIT_BIT); if (status != MP_OKAY) { Tcl_Panic("initialization failure in TclBNInitBignumFromLong"); } /* * Convert arg to sign and magnitude. */ if (initVal < 0) { a->sign = MP_NEG; v = -initVal; } else { a->sign = MP_ZPOS; v = initVal; } /* * Store the magnitude in the bignum. */ p = a->dp; while (v) { *p++ = (mp_digit) (v & MP_MASK); v >>= MP_DIGIT_BIT; } a->used = p - a->dp; }
/* c = |a| * |b| using Karatsuba Multiplication using * three half size multiplications * * Let B represent the radix [e.g. 2**DIGIT_BIT] and * let n represent half of the number of digits in * the min(a,b) * * a = a1 * B**n + a0 * b = b1 * B**n + b0 * * Then, a * b => a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 * * Note that a1b1 and a0b0 are used twice and only need to be * computed once. So in total three half size (half # of * digit) multiplications are performed, a0b0, a1b1 and * (a1+b1)(a0+b0) * * Note that a multiplication of half the digits requires * 1/4th the number of single precision multiplications so in * total after one call 25% of the single precision multiplications * are saved. Note also that the call to mp_mul can end up back * in this function if the a0, a1, b0, or b1 are above the threshold. * This is known as divide-and-conquer and leads to the famous * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than * the standard O(N**2) that the baseline/comba methods use. * Generally though the overhead of this method doesn't pay off * until a certain size (N ~ 80) is reached. */ int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) { mp_int x0, x1, y0, y1, t1, x0y0, x1y1; int B, err; /* default the return code to an error */ err = MP_MEM; /* min # of digits */ B = MIN (a->used, b->used); /* now divide in two */ B = B >> 1; /* init copy all the temps */ if (mp_init_size (&x0, B) != MP_OKAY) goto ERR; if (mp_init_size (&x1, a->used - B) != MP_OKAY) goto X0; if (mp_init_size (&y0, B) != MP_OKAY) goto X1; if (mp_init_size (&y1, b->used - B) != MP_OKAY) goto Y0; /* init temps */ if (mp_init_size (&t1, B * 2) != MP_OKAY) goto Y1; if (mp_init_size (&x0y0, B * 2) != MP_OKAY) goto T1; if (mp_init_size (&x1y1, B * 2) != MP_OKAY) goto X0Y0; /* now shift the digits */ x0.used = y0.used = B; x1.used = a->used - B; y1.used = b->used - B; { register int x; register mp_digit *tmpa, *tmpb, *tmpx, *tmpy; /* we copy the digits directly instead of using higher level functions * since we also need to shift the digits */ tmpa = a->dp; tmpb = b->dp; tmpx = x0.dp; tmpy = y0.dp; for (x = 0; x < B; x++) { *tmpx++ = *tmpa++; *tmpy++ = *tmpb++; } tmpx = x1.dp; for (x = B; x < a->used; x++) { *tmpx++ = *tmpa++; } tmpy = y1.dp; for (x = B; x < b->used; x++) { *tmpy++ = *tmpb++; } } /* only need to clamp the lower words since by definition the * upper words x1/y1 must have a known number of digits */ mp_clamp (&x0); mp_clamp (&y0); /* now calc the products x0y0 and x1y1 */ /* after this x0 is no longer required, free temp [x0==t2]! */ if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) goto X1Y1; /* x0y0 = x0*y0 */ if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) goto X1Y1; /* x1y1 = x1*y1 */ /* now calc x1+x0 and y1+y0 */ if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) goto X1Y1; /* t1 = x1 - x0 */ if (s_mp_add (&y1, &y0, &x0) != MP_OKAY) goto X1Y1; /* t2 = y1 - y0 */ if (mp_mul (&t1, &x0, &t1) != MP_OKAY) goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ /* add x0y0 */ if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) goto X1Y1; /* t2 = x0y0 + x1y1 */ if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY) goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ /* shift by B */ if (mp_lshd (&t1, B) != MP_OKAY) goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ if (mp_lshd (&x1y1, B * 2) != MP_OKAY) goto X1Y1; /* x1y1 = x1y1 << 2*B */ if (mp_add (&x0y0, &t1, &t1) != MP_OKAY) goto X1Y1; /* t1 = x0y0 + t1 */ if (mp_add (&t1, &x1y1, c) != MP_OKAY) goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ /* Algorithm succeeded set the return code to MP_OKAY */ err = MP_OKAY; X1Y1: mp_clear (&x1y1); X0Y0: mp_clear (&x0y0); T1: mp_clear (&t1); Y1: mp_clear (&y1); Y0: mp_clear (&y0); X1: mp_clear (&x1); X0: mp_clear (&x0); ERR: return err; }
int main(int argc, char *argv[]) { int ix, num, prec = PRECISION; mp_int a, b, c, d; instant_t start, finish; time_t seed; unsigned int d1, d2; seed = time(NULL); if(argc < 2) { fprintf(stderr, "Usage: %s <num-tests>\n", argv[0]); return 1; } if((num = atoi(argv[1])) < 0) num = -num; printf("Test 5a: Euclid vs. Binary, a GCD speed test\n\n" "Number of tests: %d\n" "Precision: %d digits\n\n", num, prec); mp_init_size(&a, prec); mp_init_size(&b, prec); mp_init(&c); mp_init(&d); printf("Verifying accuracy ... \n"); srand((unsigned int)seed); for(ix = 0; ix < num; ix++) { mpp_random_size(&a, prec); mpp_random_size(&b, prec); mp_gcd(&a, &b, &c); mp_bgcd(&a, &b, &d); if(mp_cmp(&c, &d) != 0) { printf("Error! Results not accurate:\n"); printf("a = "); mp_print(&a, stdout); fputc('\n', stdout); printf("b = "); mp_print(&b, stdout); fputc('\n', stdout); printf("c = "); mp_print(&c, stdout); fputc('\n', stdout); printf("d = "); mp_print(&d, stdout); fputc('\n', stdout); mp_clear(&a); mp_clear(&b); mp_clear(&c); mp_clear(&d); return 1; } } mp_clear(&d); printf("Accuracy confirmed for the %d test samples\n", num); printf("Testing Euclid ... \n"); srand((unsigned int)seed); start = now(); for(ix = 0; ix < num; ix++) { mpp_random_size(&a, prec); mpp_random_size(&b, prec); mp_gcd(&a, &b, &c); } finish = now(); d1 = (finish.sec - start.sec) * 1000000; d1 -= start.usec; d1 += finish.usec; printf("Testing binary ... \n"); srand((unsigned int)seed); start = now(); for(ix = 0; ix < num; ix++) { mpp_random_size(&a, prec); mpp_random_size(&b, prec); mp_bgcd(&a, &b, &c); } finish = now(); d2 = (finish.sec - start.sec) * 1000000; d2 -= start.usec; d2 += finish.usec; printf("Euclidean algorithm time: %u usec\n", d1); printf("Binary algorithm time: %u usec\n", d2); printf("Improvement: %.2f%%\n", (1.0 - ((double)d2 / (double)d1)) * 100.0); mp_clear(&c); mp_clear(&b); mp_clear(&a); return 0; }
int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) { mp_int M[TAB_SIZE], res; mp_digit buf, mp; int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; /* use a pointer to the reduction algorithm. This allows us to use * one of many reduction algorithms without modding the guts of * the code with if statements everywhere. */ int (*redux)(mp_int*,mp_int*,mp_digit); /* find window size */ x = mp_count_bits (X); if (x <= 7) { winsize = 2; } else if (x <= 36) { winsize = 3; } else if (x <= 140) { winsize = 4; } else if (x <= 450) { winsize = 5; } else if (x <= 1303) { winsize = 6; } else if (x <= 3529) { winsize = 7; } else { winsize = 8; } #ifdef MP_LOW_MEM if (winsize > 5) { winsize = 5; } #endif /* init M array */ /* init first cell */ if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) { return err; } /* now init the second half of the array */ for (x = 1<<(winsize-1); x < (1 << winsize); x++) { if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) { for (y = 1<<(winsize-1); y < x; y++) { mp_clear (&M[y]); } mp_clear(&M[1]); return err; } } /* determine and setup reduction code */ if (redmode == 0) { #ifdef BN_MP_MONTGOMERY_SETUP_C /* now setup montgomery */ if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) { goto LBL_M; } #else err = MP_VAL; goto LBL_M; #endif /* automatically pick the comba one if available (saves quite a few calls/ifs) */ #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C if ((((P->used * 2) + 1) < MP_WARRAY) && (P->used < (1 << ((CHAR_BIT * sizeof(mp_word)) - (2 * DIGIT_BIT))))) { redux = fast_mp_montgomery_reduce; } else #endif { #ifdef BN_MP_MONTGOMERY_REDUCE_C /* use slower baseline Montgomery method */ redux = mp_montgomery_reduce; #else err = MP_VAL; goto LBL_M; #endif } } else if (redmode == 1) { #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C) /* setup DR reduction for moduli of the form B**k - b */ mp_dr_setup(P, &mp); redux = mp_dr_reduce; #else err = MP_VAL; goto LBL_M; #endif } else { #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C) /* setup DR reduction for moduli of the form 2**k - b */ if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { goto LBL_M; } redux = mp_reduce_2k; #else err = MP_VAL; goto LBL_M; #endif } /* setup result */ if ((err = mp_init_size (&res, P->alloc)) != MP_OKAY) { goto LBL_M; } /* create M table * * * The first half of the table is not computed though accept for M[0] and M[1] */ if (redmode == 0) { #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C /* now we need R mod m */ if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) { goto LBL_RES; } /* now set M[1] to G * R mod m */ if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) { goto LBL_RES; } #else err = MP_VAL; goto LBL_RES; #endif } else { mp_set(&res, 1); if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) { goto LBL_RES; } } /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { goto LBL_RES; } for (x = 0; x < (winsize - 1); x++) { if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { goto LBL_RES; } if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) { goto LBL_RES; } } /* create upper table */ for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { goto LBL_RES; } if ((err = redux (&M[x], P, mp)) != MP_OKAY) { goto LBL_RES; } } /* set initial mode and bit cnt */ mode = 0; bitcnt = 1; buf = 0; digidx = X->used - 1; bitcpy = 0; bitbuf = 0; for (;;) { /* grab next digit as required */ if (--bitcnt == 0) { /* if digidx == -1 we are out of digits so break */ if (digidx == -1) { break; } /* read next digit and reset bitcnt */ buf = X->dp[digidx--]; bitcnt = (int)DIGIT_BIT; } /* grab the next msb from the exponent */ y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1; buf <<= (mp_digit)1; /* if the bit is zero and mode == 0 then we ignore it * These represent the leading zero bits before the first 1 bit * in the exponent. Technically this opt is not required but it * does lower the # of trivial squaring/reductions used */ if ((mode == 0) && (y == 0)) { continue; } /* if the bit is zero and mode == 1 then we square */ if ((mode == 1) && (y == 0)) { if ((err = mp_sqr (&res, &res)) != MP_OKAY) { goto LBL_RES; } if ((err = redux (&res, P, mp)) != MP_OKAY) { goto LBL_RES; } continue; } /* else we add it to the window */ bitbuf |= (y << (winsize - ++bitcpy)); mode = 2; if (bitcpy == winsize) { /* ok window is filled so square as required and multiply */ /* square first */ for (x = 0; x < winsize; x++) { if ((err = mp_sqr (&res, &res)) != MP_OKAY) { goto LBL_RES; } if ((err = redux (&res, P, mp)) != MP_OKAY) { goto LBL_RES; } } /* then multiply */ if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { goto LBL_RES; } if ((err = redux (&res, P, mp)) != MP_OKAY) { goto LBL_RES; } /* empty window and reset */ bitcpy = 0; bitbuf = 0; mode = 1; } } /* if bits remain then square/multiply */ if ((mode == 2) && (bitcpy > 0)) { /* square then multiply if the bit is set */ for (x = 0; x < bitcpy; x++) { if ((err = mp_sqr (&res, &res)) != MP_OKAY) { goto LBL_RES; } if ((err = redux (&res, P, mp)) != MP_OKAY) { goto LBL_RES; } /* get next bit of the window */ bitbuf <<= 1; if ((bitbuf & (1 << winsize)) != 0) { /* then multiply */ if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { goto LBL_RES; } if ((err = redux (&res, P, mp)) != MP_OKAY) { goto LBL_RES; } } } } if (redmode == 0) { /* fixup result if Montgomery reduction is used * recall that any value in a Montgomery system is * actually multiplied by R mod n. So we have * to reduce one more time to cancel out the factor * of R. */ if ((err = redux(&res, P, mp)) != MP_OKAY) { goto LBL_RES; } } /* swap res with Y */ mp_exch (&res, Y); err = MP_OKAY; LBL_RES:mp_clear (&res); LBL_M: mp_clear(&M[1]); for (x = 1<<(winsize-1); x < (1 << winsize); x++) { mp_clear (&M[x]); } return err; }
/* this function is less generic than mp_n_root, simpler and faster */ int mp_sqrt(const mp_int *arg, mp_int *ret) { int res; mp_int t1, t2; int i, j, k; #ifndef NO_FLOATING_POINT volatile double d; mp_digit dig; #endif /* must be positive */ if (arg->sign == MP_NEG) { return MP_VAL; } /* easy out */ if (mp_iszero(arg) == MP_YES) { mp_zero(ret); return MP_OKAY; } i = (arg->used / 2) - 1; j = 2 * i; if ((res = mp_init_size(&t1, i+2)) != MP_OKAY) { return res; } if ((res = mp_init(&t2)) != MP_OKAY) { goto E2; } for (k = 0; k < i; ++k) { t1.dp[k] = (mp_digit) 0; } #ifndef NO_FLOATING_POINT /* Estimate the square root using the hardware floating point unit. */ d = 0.0; for (k = arg->used-1; k >= j; --k) { d = ldexp(d, DIGIT_BIT) + (double)(arg->dp[k]); } /* * At this point, d is the nearest floating point number to the most * significant 1 or 2 mp_digits of arg. Extract its square root. */ d = sqrt(d); /* dig is the most significant mp_digit of the square root */ dig = (mp_digit) ldexp(d, -DIGIT_BIT); /* * If the most significant digit is nonzero, find the next digit down * by subtracting DIGIT_BIT times thie most significant digit. * Subtract one from the result so that our initial estimate is always * low. */ if (dig) { t1.used = i+2; d -= ldexp((double) dig, DIGIT_BIT); if (d >= 1.0) { t1.dp[i+1] = dig; t1.dp[i] = ((mp_digit) d) - 1; } else { t1.dp[i+1] = dig-1; t1.dp[i] = MP_DIGIT_MAX; } } else { t1.used = i+1; t1.dp[i] = ((mp_digit) d) - 1; } #else /* Estimate the square root as having 1 in the most significant place. */ t1.used = i + 2; t1.dp[i+1] = (mp_digit) 1; t1.dp[i] = (mp_digit) 0; #endif /* t1 > 0 */ if ((res = mp_div(arg, &t1, &t2, NULL)) != MP_OKAY) { goto E1; } if ((res = mp_add(&t1, &t2, &t1)) != MP_OKAY) { goto E1; } if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) { goto E1; } /* And now t1 > sqrt(arg) */ do { if ((res = mp_div(arg, &t1, &t2, NULL)) != MP_OKAY) { goto E1; } if ((res = mp_add(&t1, &t2, &t1)) != MP_OKAY) { goto E1; } if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) { goto E1; } /* t1 >= sqrt(arg) >= t2 at this point */ } while (mp_cmp_mag(&t1, &t2) == MP_GT); mp_exch(&t1, ret); E1: mp_clear(&t2); E2: mp_clear(&t1); return res; }
/* Do modular exponentiation using floating point multiply code. */ mp_err mp_exptmod_f(const mp_int * montBase, const mp_int * exponent, const mp_int * modulus, mp_int * result, mp_mont_modulus *mmm, int nLen, mp_size bits_in_exponent, mp_size window_bits, mp_size odd_ints) { mp_digit *mResult; double *dBuf = 0, *dm1, *dn, *dSqr, *d16Tmp, *dTmp; double dn0; mp_size i; mp_err res; int expOff; int dSize = 0, oddPowSize, dTmpSize; mp_int accum1; double *oddPowers[MAX_ODD_INTS]; /* function for computing n0prime only works if n0 is odd */ MP_DIGITS(&accum1) = 0; for (i = 0; i < MAX_ODD_INTS; ++i) oddPowers[i] = 0; MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) ); mp_set(&accum1, 1); MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) ); MP_CHECKOK( s_mp_pad(&accum1, nLen) ); oddPowSize = 2 * nLen + 1; dTmpSize = 2 * oddPowSize; dSize = sizeof(double) * (nLen * 4 + 1 + ((odd_ints + 1) * oddPowSize) + dTmpSize); dBuf = (double *)malloc(dSize); dm1 = dBuf; /* array of d32 */ dn = dBuf + nLen; /* array of d32 */ dSqr = dn + nLen; /* array of d32 */ d16Tmp = dSqr + nLen; /* array of d16 */ dTmp = d16Tmp + oddPowSize; for (i = 0; i < odd_ints; ++i) { oddPowers[i] = dTmp; dTmp += oddPowSize; } mResult = (mp_digit *)(dTmp + dTmpSize); /* size is nLen + 1 */ /* Make dn and dn0 */ conv_i32_to_d32(dn, MP_DIGITS(modulus), nLen); dn0 = (double)(mmm->n0prime & 0xffff); /* Make dSqr */ conv_i32_to_d32_and_d16(dm1, oddPowers[0], MP_DIGITS(montBase), nLen); mont_mulf_noconv(mResult, dm1, oddPowers[0], dTmp, dn, MP_DIGITS(modulus), nLen, dn0); conv_i32_to_d32(dSqr, mResult, nLen); for (i = 1; i < odd_ints; ++i) { mont_mulf_noconv(mResult, dSqr, oddPowers[i - 1], dTmp, dn, MP_DIGITS(modulus), nLen, dn0); conv_i32_to_d16(oddPowers[i], mResult, nLen); } s_mp_copy(MP_DIGITS(&accum1), mResult, nLen); /* from, to, len */ for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) { mp_size smallExp; MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) ); smallExp = (mp_size)res; if (window_bits == 1) { if (!smallExp) { SQR; } else if (smallExp & 1) { SQR; MUL(0); } else { ABORT; } } else if (window_bits == 4) { if (!smallExp) { SQR; SQR; SQR; SQR; } else if (smallExp & 1) { SQR; SQR; SQR; SQR; MUL(smallExp/2); } else if (smallExp & 2) { SQR; SQR; SQR; MUL(smallExp/4); SQR; } else if (smallExp & 4) { SQR; SQR; MUL(smallExp/8); SQR; SQR; } else if (smallExp & 8) { SQR; MUL(smallExp/16); SQR; SQR; SQR; } else { ABORT; } } else if (window_bits == 5) { if (!smallExp) { SQR; SQR; SQR; SQR; SQR; } else if (smallExp & 1) { SQR; SQR; SQR; SQR; SQR; MUL(smallExp/2); } else if (smallExp & 2) { SQR; SQR; SQR; SQR; MUL(smallExp/4); SQR; } else if (smallExp & 4) { SQR; SQR; SQR; MUL(smallExp/8); SQR; SQR; } else if (smallExp & 8) { SQR; SQR; MUL(smallExp/16); SQR; SQR; SQR; } else if (smallExp & 0x10) { SQR; MUL(smallExp/32); SQR; SQR; SQR; SQR; } else { ABORT; } } else if (window_bits == 6) { if (!smallExp) { SQR; SQR; SQR; SQR; SQR; SQR; } else if (smallExp & 1) { SQR; SQR; SQR; SQR; SQR; SQR; MUL(smallExp/2); } else if (smallExp & 2) { SQR; SQR; SQR; SQR; SQR; MUL(smallExp/4); SQR; } else if (smallExp & 4) { SQR; SQR; SQR; SQR; MUL(smallExp/8); SQR; SQR; } else if (smallExp & 8) { SQR; SQR; SQR; MUL(smallExp/16); SQR; SQR; SQR; } else if (smallExp & 0x10) { SQR; SQR; MUL(smallExp/32); SQR; SQR; SQR; SQR; } else if (smallExp & 0x20) { SQR; MUL(smallExp/64); SQR; SQR; SQR; SQR; SQR; } else { ABORT; } } else { ABORT; } } s_mp_copy(mResult, MP_DIGITS(&accum1), nLen); /* from, to, len */ res = s_mp_redc(&accum1, mmm); mp_exch(&accum1, result); CLEANUP: mp_clear(&accum1); if (dBuf) { if (dSize) memset(dBuf, 0, dSize); free(dBuf); } return res; }
/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G + * k2 * P(x, y), where G is the generator (base point) of the group of * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL. * Input and output values are assumed to be NOT field-encoded. Uses * algorithm 15 (simultaneous multiple point multiplication) from Brown, * Hankerson, Lopez, Menezes. Software Implementation of the NIST * Elliptic Curves over Prime Fields. */ mp_err ec_pts_mul_simul_w2(const mp_int *k1, const mp_int *k2, const mp_int *px, const mp_int *py, mp_int *rx, mp_int *ry, const ECGroup *group) { mp_err res = MP_OKAY; mp_int precomp[4][4][2]; const mp_int *a, *b; int i, j; int ai, bi, d; ARGCHK(group != NULL, MP_BADARG); ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG); /* if some arguments are not defined used ECPoint_mul */ if (k1 == NULL) { return ECPoint_mul(group, k2, px, py, rx, ry); } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) { return ECPoint_mul(group, k1, NULL, NULL, rx, ry); } /* initialize precomputation table */ for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { MP_DIGITS(&precomp[i][j][0]) = 0; MP_DIGITS(&precomp[i][j][1]) = 0; } } for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { MP_CHECKOK( mp_init_size(&precomp[i][j][0], ECL_MAX_FIELD_SIZE_DIGITS, FLAG(k1)) ); MP_CHECKOK( mp_init_size(&precomp[i][j][1], ECL_MAX_FIELD_SIZE_DIGITS, FLAG(k1)) ); } } /* fill precomputation table */ /* assign {k1, k2} = {a, b} such that len(a) >= len(b) */ if (mpl_significant_bits(k1) < mpl_significant_bits(k2)) { a = k2; b = k1; if (group->meth->field_enc) { MP_CHECKOK(group->meth-> field_enc(px, &precomp[1][0][0], group->meth)); MP_CHECKOK(group->meth-> field_enc(py, &precomp[1][0][1], group->meth)); } else { MP_CHECKOK(mp_copy(px, &precomp[1][0][0])); MP_CHECKOK(mp_copy(py, &precomp[1][0][1])); } MP_CHECKOK(mp_copy(&group->genx, &precomp[0][1][0])); MP_CHECKOK(mp_copy(&group->geny, &precomp[0][1][1])); } else { a = k1; b = k2; MP_CHECKOK(mp_copy(&group->genx, &precomp[1][0][0])); MP_CHECKOK(mp_copy(&group->geny, &precomp[1][0][1])); if (group->meth->field_enc) { MP_CHECKOK(group->meth-> field_enc(px, &precomp[0][1][0], group->meth)); MP_CHECKOK(group->meth-> field_enc(py, &precomp[0][1][1], group->meth)); } else { MP_CHECKOK(mp_copy(px, &precomp[0][1][0])); MP_CHECKOK(mp_copy(py, &precomp[0][1][1])); } } /* precompute [*][0][*] */ mp_zero(&precomp[0][0][0]); mp_zero(&precomp[0][0][1]); MP_CHECKOK(group-> point_dbl(&precomp[1][0][0], &precomp[1][0][1], &precomp[2][0][0], &precomp[2][0][1], group)); MP_CHECKOK(group-> point_add(&precomp[1][0][0], &precomp[1][0][1], &precomp[2][0][0], &precomp[2][0][1], &precomp[3][0][0], &precomp[3][0][1], group)); /* precompute [*][1][*] */ for (i = 1; i < 4; i++) { MP_CHECKOK(group-> point_add(&precomp[0][1][0], &precomp[0][1][1], &precomp[i][0][0], &precomp[i][0][1], &precomp[i][1][0], &precomp[i][1][1], group)); } /* precompute [*][2][*] */ MP_CHECKOK(group-> point_dbl(&precomp[0][1][0], &precomp[0][1][1], &precomp[0][2][0], &precomp[0][2][1], group)); for (i = 1; i < 4; i++) { MP_CHECKOK(group-> point_add(&precomp[0][2][0], &precomp[0][2][1], &precomp[i][0][0], &precomp[i][0][1], &precomp[i][2][0], &precomp[i][2][1], group)); } /* precompute [*][3][*] */ MP_CHECKOK(group-> point_add(&precomp[0][1][0], &precomp[0][1][1], &precomp[0][2][0], &precomp[0][2][1], &precomp[0][3][0], &precomp[0][3][1], group)); for (i = 1; i < 4; i++) { MP_CHECKOK(group-> point_add(&precomp[0][3][0], &precomp[0][3][1], &precomp[i][0][0], &precomp[i][0][1], &precomp[i][3][0], &precomp[i][3][1], group)); } d = (mpl_significant_bits(a) + 1) / 2; /* R = inf */ mp_zero(rx); mp_zero(ry); for (i = d - 1; i >= 0; i--) { ai = MP_GET_BIT(a, 2 * i + 1); ai <<= 1; ai |= MP_GET_BIT(a, 2 * i); bi = MP_GET_BIT(b, 2 * i + 1); bi <<= 1; bi |= MP_GET_BIT(b, 2 * i); /* R = 2^2 * R */ MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group)); MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group)); /* R = R + (ai * A + bi * B) */ MP_CHECKOK(group-> point_add(rx, ry, &precomp[ai][bi][0], &precomp[ai][bi][1], rx, ry, group)); } if (group->meth->field_dec) { MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth)); MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth)); } CLEANUP: for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { mp_clear(&precomp[i][j][0]); mp_clear(&precomp[i][j][1]); } } return res; }
mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent, const mp_int *modulus, mp_int *result) { const mp_int *base; mp_size bits_in_exponent, i, window_bits, odd_ints; mp_err res; int nLen; mp_int montBase, goodBase; mp_mont_modulus mmm; #ifdef MP_USING_CACHE_SAFE_MOD_EXP static unsigned int max_window_bits; #endif /* function for computing n0prime only works if n0 is odd */ if (!mp_isodd(modulus)) return s_mp_exptmod(inBase, exponent, modulus, result); MP_DIGITS(&montBase) = 0; MP_DIGITS(&goodBase) = 0; if (mp_cmp(inBase, modulus) < 0) { base = inBase; } else { MP_CHECKOK( mp_init(&goodBase) ); base = &goodBase; MP_CHECKOK( mp_mod(inBase, modulus, &goodBase) ); } nLen = MP_USED(modulus); MP_CHECKOK( mp_init_size(&montBase, 2 * nLen + 2) ); mmm.N = *modulus; /* a copy of the mp_int struct */ /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX ** where n0 = least significant mp_digit of N, the modulus. */ mmm.n0prime = 0 - s_mp_invmod_radix( MP_DIGIT(modulus, 0) ); MP_CHECKOK( s_mp_to_mont(base, &mmm, &montBase) ); bits_in_exponent = mpl_significant_bits(exponent); #ifdef MP_USING_CACHE_SAFE_MOD_EXP if (mp_using_cache_safe_exp) { if (bits_in_exponent > 780) window_bits = 6; else if (bits_in_exponent > 256) window_bits = 5; else if (bits_in_exponent > 20) window_bits = 4; /* RSA public key exponents are typically under 20 bits (common values * are: 3, 17, 65537) and a 4-bit window is inefficient */ else window_bits = 1; } else #endif if (bits_in_exponent > 480) window_bits = 6; else if (bits_in_exponent > 160) window_bits = 5; else if (bits_in_exponent > 20) window_bits = 4; /* RSA public key exponents are typically under 20 bits (common values * are: 3, 17, 65537) and a 4-bit window is inefficient */ else window_bits = 1; #ifdef MP_USING_CACHE_SAFE_MOD_EXP /* * clamp the window size based on * the cache line size. */ if (!max_window_bits) { unsigned long cache_size = s_mpi_getProcessorLineSize(); /* processor has no cache, use 'fast' code always */ if (cache_size == 0) { mp_using_cache_safe_exp = 0; } if ((cache_size == 0) || (cache_size >= 64)) { max_window_bits = 6; } else if (cache_size >= 32) { max_window_bits = 5; } else if (cache_size >= 16) { max_window_bits = 4; } else max_window_bits = 1; /* should this be an assert? */ } /* clamp the window size down before we caclulate bits_in_exponent */ if (mp_using_cache_safe_exp) { if (window_bits > max_window_bits) { window_bits = max_window_bits; } } #endif odd_ints = 1 << (window_bits - 1); i = bits_in_exponent % window_bits; if (i != 0) { bits_in_exponent += window_bits - i; } #ifdef MP_USING_MONT_MULF if (mp_using_mont_mulf) { MP_CHECKOK( s_mp_pad(&montBase, nLen) ); res = mp_exptmod_f(&montBase, exponent, modulus, result, &mmm, nLen, bits_in_exponent, window_bits, odd_ints); } else #endif #ifdef MP_USING_CACHE_SAFE_MOD_EXP if (mp_using_cache_safe_exp) { res = mp_exptmod_safe_i(&montBase, exponent, modulus, result, &mmm, nLen, bits_in_exponent, window_bits, 1 << window_bits); } else #endif res = mp_exptmod_i(&montBase, exponent, modulus, result, &mmm, nLen, bits_in_exponent, window_bits, odd_ints); CLEANUP: mp_clear(&montBase); mp_clear(&goodBase); /* Don't mp_clear mmm.N because it is merely a copy of modulus. ** Just zap it. */ memset(&mmm, 0, sizeof mmm); return res; }
/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */ int s_mp_sqr (mp_int * a, mp_int * b) { mp_int t; int res, ix, iy, pa; mp_word r; mp_digit u, tmpx, *tmpt; pa = a->used; if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) { return res; } /* default used is maximum possible size */ t.used = 2*pa + 1; for (ix = 0; ix < pa; ix++) { /* first calculate the digit at 2*ix */ /* calculate double precision result */ r = ((mp_word) t.dp[2*ix]) + ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]); /* store lower part in result */ t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK)); /* get the carry */ u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); /* left hand side of A[ix] * A[iy] */ tmpx = a->dp[ix]; /* alias for where to store the results */ tmpt = t.dp + (2*ix + 1); for (iy = ix + 1; iy < pa; iy++) { /* first calculate the product */ r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]); /* now calculate the double precision result, note we use * addition instead of *2 since it's easier to optimize */ r = ((mp_word) *tmpt) + r + r + ((mp_word) u); /* store lower part */ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); /* get carry */ u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); } /* propagate upwards */ while (u != ((mp_digit) 0)) { r = ((mp_word) *tmpt) + ((mp_word) u); *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); } } mp_clamp (&t); mp_exch (&t, b); mp_clear (&t); return MP_OKAY; }
/* single digit division (based on routine from MPI) */ int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d) { mp_int q; mp_word w; mp_digit t; int res, ix; /* cannot divide by zero */ if (b == 0) { return MP_VAL; } /* quick outs */ if (b == 1 || mp_iszero(a) == 1) { if (d != NULL) { *d = 0; } if (c != NULL) { return mp_copy(a, c); } return MP_OKAY; } /* power of two ? */ if (s_is_power_of_two(b, &ix) == 1) { if (d != NULL) { *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1); } if (c != NULL) { return mp_div_2d(a, ix, c, NULL); } return MP_OKAY; } #ifdef BN_MP_DIV_3_C /* three? */ if (b == 3) { return mp_div_3(a, c, d); } #endif /* no easy answer [c'est la vie]. Just division */ if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { return res; } q.used = a->used; q.sign = a->sign; w = 0; for (ix = a->used - 1; ix >= 0; ix--) { w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); if (w >= b) { t = (mp_digit)(w / b); w -= ((mp_word)t) * ((mp_word)b); } else { t = 0; } q.dp[ix] = (mp_digit)t; } if (d != NULL) { *d = (mp_digit)w; } if (c != NULL) { mp_clamp(&q); mp_exch(&q, c); } mp_clear(&q); return res; }
/* this function is less generic than mp_n_root, simpler and faster */ int mp_sqrt(mp_int *arg, mp_int *ret) { int res; mp_int t1,t2; int i, j, k; #ifndef NO_FLOATING_POINT double d; mp_digit dig; #endif /* must be positive */ if (arg->sign == MP_NEG) { return MP_VAL; } /* easy out */ if (mp_iszero(arg) == MP_YES) { mp_zero(ret); return MP_OKAY; } i = (arg->used / 2) - 1; j = 2 * i; if ((res = mp_init_size(&t1, i+2)) != MP_OKAY) { return res; } if ((res = mp_init(&t2)) != MP_OKAY) { goto E2; } for (k = 0; k < i; ++k) { t1.dp[k] = (mp_digit) 0; } #ifndef NO_FLOATING_POINT /* Estimate the square root using the hardware floating point unit. */ d = 0.0; for (k = arg->used-1; k >= j; --k) { d = ldexp(d, DIGIT_BIT) + (double) (arg->dp[k]); } d = sqrt(d); dig = (mp_digit) ldexp(d, -DIGIT_BIT); if (dig) { t1.used = i+2; d -= ldexp((double) dig, DIGIT_BIT); if (d != 0.0) { t1.dp[i+1] = dig; t1.dp[i] = ((mp_digit) d) - 1; } else { t1.dp[i+1] = dig-1; t1.dp[i] = MP_DIGIT_MAX; } } else { t1.used = i+1; t1.dp[i] = ((mp_digit) d) - 1; } #else /* Estimate the square root as having 1 in the most significant place. */ t1.used = i + 2; t1.dp[i+1] = (mp_digit) 1; t1.dp[i] = (mp_digit) 0; #endif /* t1 > 0 */ if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) { goto E1; } if ((res = mp_add(&t1,&t2,&t1)) != MP_OKAY) { goto E1; } if ((res = mp_div_2(&t1,&t1)) != MP_OKAY) { goto E1; } /* And now t1 > sqrt(arg) */ do { if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) { goto E1; } if ((res = mp_add(&t1,&t2,&t1)) != MP_OKAY) { goto E1; } if ((res = mp_div_2(&t1,&t1)) != MP_OKAY) { goto E1; } /* t1 >= sqrt(arg) >= t2 at this point */ } while (mp_cmp_mag(&t1,&t2) == MP_GT); mp_exch(&t1,ret); E1: mp_clear(&t2); E2: mp_clear(&t1); return res; }
int main(int argc, char *argv[]) { int ix, num, prec = 8; unsigned int seed; clock_t start, stop; double sec; mp_int a, m, c; if(getenv("SEED") != NULL) seed = abs(atoi(getenv("SEED"))); else seed = (unsigned int)time(NULL); if(argc < 2) { fprintf(stderr, "Usage: %s <num-tests> [<nbits>]\n", argv[0]); return 1; } if((num = atoi(argv[1])) < 0) num = -num; if(!num) { fprintf(stderr, "%s: must perform at least 1 test\n", argv[0]); return 1; } if(argc > 2) { if((prec = atoi(argv[2])) <= 0) prec = 8; else prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT; } printf("Modular exponentiation timing test\n" "Precision: %d digits (%d bits)\n" "# of tests: %d\n\n", prec, prec * DIGIT_BIT, num); mp_init_size(&a, prec); mp_init_size(&m, prec); mp_init_size(&c, prec); srand(seed); start = clock(); for(ix = 0; ix < num; ix++) { mpp_random_size(&a, prec); mpp_random_size(&c, prec); mpp_random_size(&m, prec); /* set msb and lsb of m */ DIGIT(&m,0) |= 1; DIGIT(&m, USED(&m)-1) |= (mp_digit)1 << (DIGIT_BIT - 1); if (mp_cmp(&a, &m) > 0) mp_sub(&a, &m, &a); mp_exptmod(&a, &c, &m, &c); } stop = clock(); sec = clk_to_sec(start, stop); printf("Total: %.3f seconds\n", sec); printf("Individual: %.3f seconds\n", sec / num); mp_clear(&c); mp_clear(&a); mp_clear(&m); return 0; }
/* Do modular exponentiation using integer multiply code. */ mp_err mp_exptmod_i(const mp_int * montBase, const mp_int * exponent, const mp_int * modulus, mp_int * result, mp_mont_modulus *mmm, int nLen, mp_size bits_in_exponent, mp_size window_bits, mp_size odd_ints) { mp_int *pa1, *pa2, *ptmp; mp_size i; mp_err res; int expOff; mp_int accum1, accum2, power2, oddPowers[MAX_ODD_INTS]; /* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */ /* oddPowers[i] = base ** (2*i + 1); */ MP_DIGITS(&accum1) = 0; MP_DIGITS(&accum2) = 0; MP_DIGITS(&power2) = 0; for (i = 0; i < MAX_ODD_INTS; ++i) { MP_DIGITS(oddPowers + i) = 0; } MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) ); MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) ); MP_CHECKOK( mp_init_copy(&oddPowers[0], montBase) ); mp_init_size(&power2, nLen + 2 * MP_USED(montBase) + 2); MP_CHECKOK( mp_sqr(montBase, &power2) ); /* power2 = montBase ** 2 */ MP_CHECKOK( s_mp_redc(&power2, mmm) ); for (i = 1; i < odd_ints; ++i) { mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2); MP_CHECKOK( mp_mul(oddPowers + (i - 1), &power2, oddPowers + i) ); MP_CHECKOK( s_mp_redc(oddPowers + i, mmm) ); } /* set accumulator to montgomery residue of 1 */ mp_set(&accum1, 1); MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) ); pa1 = &accum1; pa2 = &accum2; for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) { mp_size smallExp; MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) ); smallExp = (mp_size)res; if (window_bits == 1) { if (!smallExp) { SQR(pa1,pa2); SWAPPA; } else if (smallExp & 1) { SQR(pa1,pa2); MUL(0,pa2,pa1); } else { ABORT; } } else if (window_bits == 4) { if (!smallExp) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); } else if (smallExp & 1) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/2, pa1,pa2); SWAPPA; } else if (smallExp & 2) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA; } else if (smallExp & 4) { SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/8,pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA; } else if (smallExp & 8) { SQR(pa1,pa2); MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA; } else { ABORT; } } else if (window_bits == 5) { if (!smallExp) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA; } else if (smallExp & 1) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); MUL(smallExp/2,pa2,pa1); } else if (smallExp & 2) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/4,pa1,pa2); SQR(pa2,pa1); } else if (smallExp & 4) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); MUL(smallExp/8,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); } else if (smallExp & 8) { SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/16,pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); } else if (smallExp & 0x10) { SQR(pa1,pa2); MUL(smallExp/32,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); } else { ABORT; } } else if (window_bits == 6) { if (!smallExp) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); } else if (smallExp & 1) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/2,pa1,pa2); SWAPPA; } else if (smallExp & 2) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA; } else if (smallExp & 4) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/8,pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA; } else if (smallExp & 8) { SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA; } else if (smallExp & 0x10) { SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/32,pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA; } else if (smallExp & 0x20) { SQR(pa1,pa2); MUL(smallExp/64,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA; } else { ABORT; } } else { ABORT; } } res = s_mp_redc(pa1, mmm); mp_exch(pa1, result); CLEANUP: mp_clear(&accum1); mp_clear(&accum2); mp_clear(&power2); for (i = 0; i < odd_ints; ++i) { mp_clear(oddPowers + i); } return res; }
int main(int argc, char *argv[]) { int ix, num, prec = 8; unsigned int d; instant_t start, finish; time_t seed; mp_int a, m, c; seed = time(NULL); if(argc < 2) { fprintf(stderr, "Usage: %s <num-tests> [<precision>]\n", argv[0]); return 1; } if((num = atoi(argv[1])) < 0) num = -num; if(!num) { fprintf(stderr, "%s: must perform at least 1 test\n", argv[0]); return 1; } if(argc > 2) { if((prec = atoi(argv[2])) <= 0) prec = 8; } printf("Test 3a: Modular exponentiation timing test\n" "Precision: %d digits (%d bits)\n" "# of tests: %d\n\n", prec, prec * DIGIT_BIT, num); mp_init_size(&a, prec); mp_init_size(&m, prec); mp_init_size(&c, prec); s_mp_pad(&a, prec); s_mp_pad(&m, prec); s_mp_pad(&c, prec); printf("Testing modular exponentiation ... \n"); srand((unsigned int)seed); start = now(); for(ix = 0; ix < num; ix++) { mpp_random(&a); mpp_random(&c); mpp_random(&m); mp_exptmod(&a, &c, &m, &c); } finish = now(); d = (finish.sec - start.sec) * 1000000; d -= start.usec; d += finish.usec; printf("Total time elapsed: %u usec\n", d); printf("Time per exponentiation: %u usec (%.3f sec)\n", (d / num), (double)(d / num) / 1000000); mp_clear(&c); mp_clear(&a); mp_clear(&m); return 0; }
int main(int argc, char *argv[]) { int ntests, prec, ix; unsigned int seed; clock_t start, stop; double multime, sqrtime; mp_int a, c; seed = (unsigned int)time(NULL); if(argc < 3) { fprintf(stderr, "Usage: %s <ntests> <nbits>\n", argv[0]); return 1; } if((ntests = abs(atoi(argv[1]))) == 0) { fprintf(stderr, "%s: must request at least 1 test.\n", argv[0]); return 1; } if((prec = abs(atoi(argv[2]))) < CHAR_BIT) { fprintf(stderr, "%s: must request at least %d bits.\n", argv[0], CHAR_BIT); return 1; } prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT; mp_init_size(&a, prec); mp_init_size(&c, 2 * prec); /* Test multiplication by self */ srand(seed); start = clock(); for(ix = 0; ix < ntests; ix++) { mpp_random_size(&a, prec); mp_mul(&a, &a, &c); } stop = clock(); multime = (double)(stop - start) / CLOCKS_PER_SEC; /* Test squaring */ srand(seed); start = clock(); for(ix = 0; ix < ntests; ix++) { mpp_random_size(&a, prec); mp_sqr(&a, &c); } stop = clock(); sqrtime = (double)(stop - start) / CLOCKS_PER_SEC; printf("Multiply: %.4f\n", multime); printf("Square: %.4f\n", sqrtime); if(multime < sqrtime) { printf("Speedup: %.1f%%\n", 100.0 * (1.0 - multime / sqrtime)); printf("Prefer: multiply\n"); } else { printf("Speedup: %.1f%%\n", 100.0 * (1.0 - sqrtime / multime)); printf("Prefer: square\n"); } mp_clear(&a); mp_clear(&c); return 0; }
term_t bin2term(apr_byte_t **data, int *bytes_left, atoms_t *atoms, heap_t *heap) { #define require(__n) \ do { \ if (*bytes_left < __n) \ return noval; \ (*bytes_left) -= __n; \ } while (0) #define get_byte() (*(*data)++) require(1); switch (get_byte()) { case 97: { require(1); return tag_int(get_byte()); } case 98: { int a, b, c, d; require(4); a = get_byte(); b = get_byte(); c = get_byte(); d = get_byte(); return int_to_term((a << 24) | (b << 16) | (c << 8) | d, heap); } case 99: { double value; require(31); sscanf((const char *)*data, "%lf", &value); (*data) += 31; return heap_float(heap, value); } case 100: { int a, b; int len; cstr_t *s; int index; require(2); a = get_byte(); b = get_byte(); len = ((a << 8) | b); if (len > 255) return noval; require(len); s = (cstr_t *)heap_alloc(heap, sizeof(cstr_t) + len); s->size = len; memcpy(s->data, *data, len); index = atoms_set(atoms, s); (*data) += len; return tag_atom(index); } case 104: { int arity, i; term_t tuple; term_box_t *tbox; require(1); arity = get_byte(); tuple = heap_tuple(heap, arity); tbox = peel(tuple); for (i = 0; i < arity; i++) { term_t e = bin2term(data, bytes_left, atoms, heap); if (e == noval) return noval; tbox->tuple.elts[i] = e; } return tuple; } case 105: { int a, b, c, d; int arity, i; term_t tuple; term_box_t *tbox; require(4); a = get_byte(); b = get_byte(); c = get_byte(); d = get_byte(); arity = ((a << 24) | (b << 16) | (c << 8) | d); tuple = heap_tuple(heap, arity); tbox = peel(tuple); for (i = 0; i < arity; i++) { term_t e = bin2term(data, bytes_left, atoms, heap); if (e == noval) return noval; tbox->tuple.elts[i] = e; } return tuple; } case 106: { return nil; } case 107: { int a, b; int len, i; term_t cons = nil; require(2); a = get_byte(); b = get_byte(); len = ((a << 8) | b); require(len); i = len-1; while (i >= 0) cons = heap_cons2(heap, tag_int((*data)[i--]), cons); (*data) += len; return cons; } case 108: { int a, b, c, d; int len, i; term_t *es; term_t tail; require(4); a = get_byte(); b = get_byte(); c = get_byte(); d = get_byte(); len = ((a << 24) | (b << 16) | (c << 8) | d); es = (term_t *)heap_alloc(heap, len*sizeof(term_t)); for (i = 0; i < len; i++) { term_t e = bin2term(data, bytes_left, atoms, heap); if (e == noval) return noval; es[i] = e; } tail = bin2term(data, bytes_left, atoms, heap); if (tail == noval) return noval; i = len-1; while (i >= 0) tail = heap_cons2(heap, es[i--], tail); return tail; } case 109: { int a, b, c, d; int len; term_t bin; require(4); a = get_byte(); b = get_byte(); c = get_byte(); d = get_byte(); len = ((a << 24) | (b << 16) | (c << 8) | d); require(len); bin = heap_binary(heap, len*8, (*data)); (*data) += len; return bin; } case 110: { int len; int sign; mp_size prec; mp_int mp; mp_err rs; require(1); len = get_byte(); sign = get_byte(); require(len); prec = (len + (MP_DIGIT_SIZE-1)) / MP_DIGIT_SIZE; mp_init_size(&mp, prec, heap); //TODO: use mp_read_signed_bin rs = mp_read_unsigned_bin_lsb(&mp, *data, len, heap); if (rs != MP_OKAY) return noval; (*data) += len; if (sign == 1) mp_neg(&mp, &mp, heap); return mp_to_term(mp); } case 111: { int a, b, c, d; int len; int sign; mp_size prec; mp_int mp; mp_err rs; require(4); a = get_byte(); b = get_byte(); c = get_byte(); d = get_byte(); len = ((a << 24) | (b << 16) | (c << 8) | d); require(1); sign = get_byte(); require(len); prec = (len + (MP_DIGIT_SIZE-1)) / MP_DIGIT_SIZE; mp_init_size(&mp, prec, heap); rs = mp_read_unsigned_bin_lsb(&mp, *data, len, heap); if (rs != MP_OKAY) return noval; (*data) += len; if (sign == 1) mp_neg(&mp, &mp, heap); return mp_to_term(mp); } default: return noval; // only a subset of tags are supported; inspired by BERT } }
/* Do modular exponentiation using integer multiply code. */ mp_err mp_exptmod_safe_i(const mp_int * montBase, const mp_int * exponent, const mp_int * modulus, mp_int * result, mp_mont_modulus *mmm, int nLen, mp_size bits_in_exponent, mp_size window_bits, mp_size num_powers) { mp_int *pa1, *pa2, *ptmp; mp_size i; mp_size first_window; mp_err res; int expOff; mp_int accum1, accum2, accum[WEAVE_WORD_SIZE]; mp_int tmp; unsigned char *powersArray; unsigned char *powers; MP_DIGITS(&accum1) = 0; MP_DIGITS(&accum2) = 0; MP_DIGITS(&accum[0]) = 0; MP_DIGITS(&accum[1]) = 0; MP_DIGITS(&accum[2]) = 0; MP_DIGITS(&accum[3]) = 0; MP_DIGITS(&tmp) = 0; powersArray = (unsigned char *)malloc(num_powers*(nLen*sizeof(mp_digit)+1)); if (powersArray == NULL) { res = MP_MEM; goto CLEANUP; } /* powers[i] = base ** (i); */ powers = (unsigned char *)MP_ALIGN(powersArray,num_powers); /* grab the first window value. This allows us to preload accumulator1 * and save a conversion, some squares and a multiple*/ MP_CHECKOK( mpl_get_bits(exponent, bits_in_exponent-window_bits, window_bits) ); first_window = (mp_size)res; MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) ); MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) ); MP_CHECKOK( mp_init_size(&tmp, 3 * nLen + 2) ); /* build the first WEAVE_WORD powers inline */ /* if WEAVE_WORD_SIZE is not 4, this code will have to change */ if (num_powers > 2) { MP_CHECKOK( mp_init_size(&accum[0], 3 * nLen + 2) ); MP_CHECKOK( mp_init_size(&accum[1], 3 * nLen + 2) ); MP_CHECKOK( mp_init_size(&accum[2], 3 * nLen + 2) ); MP_CHECKOK( mp_init_size(&accum[3], 3 * nLen + 2) ); mp_set(&accum[0], 1); MP_CHECKOK( s_mp_to_mont(&accum[0], mmm, &accum[0]) ); MP_CHECKOK( mp_copy(montBase, &accum[1]) ); SQR(montBase, &accum[2]); MUL_NOWEAVE(montBase, &accum[2], &accum[3]); MP_CHECKOK( mpi_to_weave(accum, powers, nLen, num_powers) ); if (first_window < 4) { MP_CHECKOK( mp_copy(&accum[first_window], &accum1) ); first_window = num_powers; } } else { if (first_window == 0) { mp_set(&accum1, 1); MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) ); } else { /* assert first_window == 1? */ MP_CHECKOK( mp_copy(montBase, &accum1) ); } } /* * calculate all the powers in the powers array. * this adds 2**(k-1)-2 square operations over just calculating the * odd powers where k is the window size in the two other mp_modexpt * implementations in this file. We will get some of that * back by not needing the first 'k' squares and one multiply for the * first window */ for (i = WEAVE_WORD_SIZE; i < num_powers; i++) { int acc_index = i & (WEAVE_WORD_SIZE-1); /* i % WEAVE_WORD_SIZE */ if ( i & 1 ) { MUL_NOWEAVE(montBase, &accum[acc_index-1] , &accum[acc_index]); /* we've filled the array do our 'per array' processing */ if (acc_index == (WEAVE_WORD_SIZE-1)) { MP_CHECKOK( mpi_to_weave(accum, powers + i - (WEAVE_WORD_SIZE-1), nLen, num_powers) ); if (first_window <= i) { MP_CHECKOK( mp_copy(&accum[first_window & (WEAVE_WORD_SIZE-1)], &accum1) ); first_window = num_powers; } } } else { /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source * and target are the same so we need to copy.. After that, the * value is overwritten, so we need to fetch it from the stored * weave array */ if (i > 2* WEAVE_WORD_SIZE) { MP_CHECKOK(weave_to_mpi(&accum2, powers+i/2, nLen, num_powers)); SQR(&accum2, &accum[acc_index]); } else { int half_power_index = (i/2) & (WEAVE_WORD_SIZE-1); if (half_power_index == acc_index) { /* copy is cheaper than weave_to_mpi */ MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2)); SQR(&accum2,&accum[acc_index]); } else { SQR(&accum[half_power_index],&accum[acc_index]); } } } } /* if the accum1 isn't set, Then there is something wrong with our logic * above and is an internal programming error. */ #if MP_ARGCHK == 2 assert(MP_USED(&accum1) != 0); #endif /* set accumulator to montgomery residue of 1 */ pa1 = &accum1; pa2 = &accum2; for (expOff = bits_in_exponent - window_bits*2; expOff >= 0; expOff -= window_bits) { mp_size smallExp; MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) ); smallExp = (mp_size)res; /* handle unroll the loops */ switch (window_bits) { case 1: if (!smallExp) { SQR(pa1,pa2); SWAPPA; } else if (smallExp & 1) { SQR(pa1,pa2); MUL_NOWEAVE(montBase,pa2,pa1); } else { abort(); } break; case 6: SQR(pa1,pa2); SQR(pa2,pa1); /* fall through */ case 4: SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp, pa1,pa2); SWAPPA; break; case 5: SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); MUL(smallExp,pa2,pa1); break; default: abort(); /* could do a loop? */ } } res = s_mp_redc(pa1, mmm); mp_exch(pa1, result); CLEANUP: mp_clear(&accum1); mp_clear(&accum2); mp_clear(&accum[0]); mp_clear(&accum[1]); mp_clear(&accum[2]); mp_clear(&accum[3]); mp_clear(&tmp); /* PORT_Memset(powers,0,num_powers*nLen*sizeof(mp_digit)); */ free(powersArray); return res; }
/* integer signed division. * c*b + d == a [e.g. a/b, c=quotient, d=remainder] * HAC pp.598 Algorithm 14.20 * * Note that the description in HAC is horribly * incomplete. For example, it doesn't consider * the case where digits are removed from 'x' in * the inner loop. It also doesn't consider the * case that y has fewer than three digits, etc.. * * The overall algorithm is as described as * 14.20 from HAC but fixed to treat these cases. */ int mp_div MPA(mp_int * a, mp_int * b, mp_int * c, mp_int * d) { mp_int q, x, y, t1, t2; int res, n, t, i, norm, neg; /* is divisor zero ? */ if (mp_iszero (b) == 1) { return MP_VAL; } /* if a < b then q=0, r = a */ if (mp_cmp_mag (a, b) == MP_LT) { if (d != NULL) { res = mp_copy (MPST, a, d); } else { res = MP_OKAY; } if (c != NULL) { mp_zero (c); } return res; } if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) { return res; } q.used = a->used + 2; if ((res = mp_init (&t1)) != MP_OKAY) { goto LBL_Q; } if ((res = mp_init (&t2)) != MP_OKAY) { goto LBL_T1; } if ((res = mp_init_copy (MPST, &x, a)) != MP_OKAY) { goto LBL_T2; } if ((res = mp_init_copy (MPST, &y, b)) != MP_OKAY) { goto LBL_X; } /* fix the sign */ neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; x.sign = y.sign = MP_ZPOS; /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ norm = mp_count_bits(&y) % DIGIT_BIT; if (norm < (int)(DIGIT_BIT-1)) { norm = (DIGIT_BIT-1) - norm; if ((res = mp_mul_2d (MPST, &x, norm, &x)) != MP_OKAY) { goto LBL_Y; } if ((res = mp_mul_2d (MPST, &y, norm, &y)) != MP_OKAY) { goto LBL_Y; } } else { norm = 0; } /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ n = x.used - 1; t = y.used - 1; /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ if ((res = mp_lshd (MPST, &y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */ goto LBL_Y; } while (mp_cmp (&x, &y) != MP_LT) { ++(q.dp[n - t]); if ((res = mp_sub (MPST, &x, &y, &x)) != MP_OKAY) { goto LBL_Y; } } /* reset y by shifting it back down */ mp_rshd (&y, n - t); /* step 3. for i from n down to (t + 1) */ for (i = n; i >= (t + 1); i--) { if (i > x.used) { continue; } /* step 3.1 if xi == yt then set q{i-t-1} to b-1, * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ if (x.dp[i] == y.dp[t]) { q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1); } else { mp_word tmp; tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT); tmp |= ((mp_word) x.dp[i - 1]); tmp /= ((mp_word) y.dp[t]); if (tmp > (mp_word) MP_MASK) tmp = MP_MASK; q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK)); } /* while (q{i-t-1} * (yt * b + y{t-1})) > xi * b**2 + xi-1 * b + xi-2 do q{i-t-1} -= 1; */ q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK; do { q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK; /* find left hand */ mp_zero (&t1); t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; t1.dp[1] = y.dp[t]; t1.used = 2; if ((res = mp_mul_d (MPST, &t1, q.dp[i - t - 1], &t1)) != MP_OKAY) { goto LBL_Y; } /* find right hand */ t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; t2.dp[2] = x.dp[i]; t2.used = 3; } while (mp_cmp_mag(&t1, &t2) == MP_GT); /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ if ((res = mp_mul_d (MPST, &y, q.dp[i - t - 1], &t1)) != MP_OKAY) { goto LBL_Y; } if ((res = mp_lshd (MPST, &t1, i - t - 1)) != MP_OKAY) { goto LBL_Y; } if ((res = mp_sub (MPST, &x, &t1, &x)) != MP_OKAY) { goto LBL_Y; } /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ if (x.sign == MP_NEG) { if ((res = mp_copy (MPST, &y, &t1)) != MP_OKAY) { goto LBL_Y; } if ((res = mp_lshd (MPST, &t1, i - t - 1)) != MP_OKAY) { goto LBL_Y; } if ((res = mp_add (MPST, &x, &t1, &x)) != MP_OKAY) { goto LBL_Y; } q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK; } } /* now q is the quotient and x is the remainder * [which we have to normalize] */ /* get sign before writing to c */ x.sign = x.used == 0 ? MP_ZPOS : a->sign; if (c != NULL) { mp_clamp (&q); mp_managed_copy (MPST, &q, c); c->sign = neg; } if (d != NULL) { mp_div_2d (MPST, &x, norm, &x, NULL); mp_managed_copy (MPST, &x, d); } res = MP_OKAY; LBL_Y:mp_clear (&y); LBL_X:mp_clear (&x); LBL_T2:mp_clear (&t2); LBL_T1:mp_clear (&t1); LBL_Q:mp_clear (&q); return res; }
mp_err mpp_pprime(mp_int *a, int nt) { mp_err res; mp_int x, amo, m, z; /* "amo" = "a minus one" */ int iter; unsigned int jx; mp_size b; ARGCHK(a != NULL, MP_BADARG); MP_DIGITS(&x) = 0; MP_DIGITS(&amo) = 0; MP_DIGITS(&m) = 0; MP_DIGITS(&z) = 0; /* Initialize temporaries... */ MP_CHECKOK( mp_init(&amo)); /* Compute amo = a - 1 for what follows... */ MP_CHECKOK( mp_sub_d(a, 1, &amo) ); b = mp_trailing_zeros(&amo); if (!b) { /* a was even ? */ res = MP_NO; goto CLEANUP; } MP_CHECKOK( mp_init_size(&x, MP_USED(a)) ); MP_CHECKOK( mp_init(&z) ); MP_CHECKOK( mp_init(&m) ); MP_CHECKOK( mp_div_2d(&amo, b, &m, 0) ); /* Do the test nt times... */ for(iter = 0; iter < nt; iter++) { /* Choose a random value for x < a */ s_mp_pad(&x, USED(a)); mpp_random(&x); MP_CHECKOK( mp_mod(&x, a, &x) ); /* Compute z = (x ** m) mod a */ MP_CHECKOK( mp_exptmod(&x, &m, a, &z) ); if(mp_cmp_d(&z, 1) == 0 || mp_cmp(&z, &amo) == 0) { res = MP_YES; continue; } res = MP_NO; /* just in case the following for loop never executes. */ for (jx = 1; jx < b; jx++) { /* z = z^2 (mod a) */ MP_CHECKOK( mp_sqrmod(&z, a, &z) ); res = MP_NO; /* previous line set res to MP_YES */ if(mp_cmp_d(&z, 1) == 0) { break; } if(mp_cmp(&z, &amo) == 0) { res = MP_YES; break; } } /* end testing loop */ /* If the test passes, we will continue iterating, but a failed test means the candidate is definitely NOT prime, so we will immediately break out of this loop */ if(res == MP_NO) break; } /* end iterations loop */ CLEANUP: mp_clear(&m); mp_clear(&z); mp_clear(&x); mp_clear(&amo); return res; } /* end mpp_pprime() */
/* Karatsuba squaring, computes b = a*a using three * half size squarings * * See comments of karatsuba_mul for details. It * is essentially the same algorithm but merely * tuned to perform recursive squarings. */ int mp_karatsuba_sqr(const mp_int *a, mp_int *b) { mp_int x0, x1, t1, t2, x0x0, x1x1; int B, err; err = MP_MEM; /* min # of digits */ B = a->used; /* now divide in two */ B = B >> 1; /* init copy all the temps */ if (mp_init_size(&x0, B) != MP_OKAY) goto LBL_ERR; if (mp_init_size(&x1, a->used - B) != MP_OKAY) goto X0; /* init temps */ if (mp_init_size(&t1, a->used * 2) != MP_OKAY) goto X1; if (mp_init_size(&t2, a->used * 2) != MP_OKAY) goto T1; if (mp_init_size(&x0x0, B * 2) != MP_OKAY) goto T2; if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY) goto X0X0; { int x; mp_digit *dst, *src; src = a->dp; /* now shift the digits */ dst = x0.dp; for (x = 0; x < B; x++) { *dst++ = *src++; } dst = x1.dp; for (x = B; x < a->used; x++) { *dst++ = *src++; } } x0.used = B; x1.used = a->used - B; mp_clamp(&x0); /* now calc the products x0*x0 and x1*x1 */ if (mp_sqr(&x0, &x0x0) != MP_OKAY) goto X1X1; /* x0x0 = x0*x0 */ if (mp_sqr(&x1, &x1x1) != MP_OKAY) goto X1X1; /* x1x1 = x1*x1 */ /* now calc (x1+x0)**2 */ if (s_mp_add(&x1, &x0, &t1) != MP_OKAY) goto X1X1; /* t1 = x1 - x0 */ if (mp_sqr(&t1, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ /* add x0y0 */ if (s_mp_add(&x0x0, &x1x1, &t2) != MP_OKAY) goto X1X1; /* t2 = x0x0 + x1x1 */ if (s_mp_sub(&t1, &t2, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ /* shift by B */ if (mp_lshd(&t1, B) != MP_OKAY) goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */ if (mp_lshd(&x1x1, B * 2) != MP_OKAY) goto X1X1; /* x1x1 = x1x1 << 2*B */ if (mp_add(&x0x0, &t1, &t1) != MP_OKAY) goto X1X1; /* t1 = x0x0 + t1 */ if (mp_add(&t1, &x1x1, b) != MP_OKAY) goto X1X1; /* t1 = x0x0 + t1 + x1x1 */ err = MP_OKAY; X1X1: mp_clear(&x1x1); X0X0: mp_clear(&x0x0); T2: mp_clear(&t2); T1: mp_clear(&t1); X1: mp_clear(&x1); X0: mp_clear(&x0); LBL_ERR: return err; }
/* Karatsuba squaring, computes b = a*a using three * half size squarings * * See comments of karatsuba_mul for details. It * is essentially the same algorithm but merely * tuned to perform recursive squarings. */ int mp_karatsuba_sqr (mp_int * a, mp_int * b) { mp_int x0, x1, t1, t2, x0x0, x1x1; int B, err; err = MP_MEM; /* min # of digits */ B = USED(a); /* now divide in two */ B = B >> 1; /* init copy all the temps */ if (mp_init_size (&x0, B) != MP_OKAY) goto ERR; if (mp_init_size (&x1, USED(a) - B) != MP_OKAY) goto X0; /* init temps */ if (mp_init_size (&t1, USED(a) * 2) != MP_OKAY) goto X1; if (mp_init_size (&t2, USED(a) * 2) != MP_OKAY) goto T1; if (mp_init_size (&x0x0, B * 2) != MP_OKAY) goto T2; if (mp_init_size (&x1x1, (USED(a) - B) * 2) != MP_OKAY) goto X0X0; { register int x; register mp_digit *dst, *src; src = DIGITS(a); /* now shift the digits */ dst = DIGITS(&x0); for (x = 0; x < B; x++) { *dst++ = *src++; } dst = DIGITS(&x1); for (x = B; x < USED(a); x++) { *dst++ = *src++; } } SET_USED(&x0,B); SET_USED(&x1,USED(a) - B); mp_clamp (&x0); /* now calc the products x0*x0 and x1*x1 */ if (mp_sqr (&x0, &x0x0) != MP_OKAY) goto X1X1; /* x0x0 = x0*x0 */ if (mp_sqr (&x1, &x1x1) != MP_OKAY) goto X1X1; /* x1x1 = x1*x1 */ /* now calc (x1+x0)**2 */ if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) goto X1X1; /* t1 = x1 - x0 */ if (mp_sqr (&t1, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ /* add x0y0 */ if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY) goto X1X1; /* t2 = x0x0 + x1x1 */ if (s_mp_sub (&t1, &t2, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ /* shift by B */ if (mp_lshd (&t1, B) != MP_OKAY) goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */ if (mp_lshd (&x1x1, B * 2) != MP_OKAY) goto X1X1; /* x1x1 = x1x1 << 2*B */ if (mp_add (&x0x0, &t1, &t1) != MP_OKAY) goto X1X1; /* t1 = x0x0 + t1 */ if (mp_add (&t1, &x1x1, b) != MP_OKAY) goto X1X1; /* t1 = x0x0 + t1 + x1x1 */ err = MP_OKAY; X1X1:mp_clear (&x1x1); X0X0:mp_clear (&x0x0); T2:mp_clear (&t2); T1:mp_clear (&t1); X1:mp_clear (&x1); X0:mp_clear (&x0); ERR: return err; }