Example #1
0
int main(int argc, char *argv[])
{
  int           ntests, prec, ix;
  unsigned int  seed;
  char         *senv;
  clock_t       start, stop;
  double        multime;
  mp_int        a, b, c;

  if((senv = getenv("SEED")) != NULL)
    seed = atoi(senv);
  else
    seed = (unsigned int)time(NULL);

  if(argc < 3) {
    fprintf(stderr, "Usage: %s <ntests> <nbits>\n", argv[0]);
    return 1;
  }

  if((ntests = abs(atoi(argv[1]))) == 0) {
    fprintf(stderr, "%s: must request at least 1 test.\n", argv[0]);
    return 1;
  }
  if((prec = abs(atoi(argv[2]))) < CHAR_BIT) {
    fprintf(stderr, "%s: must request at least %d bits.\n", argv[0],
	    CHAR_BIT);
    return 1;
  }

  prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT;

  mp_init_size(&a, prec);
  mp_init_size(&b, prec);
  mp_init_size(&c, 2 * prec);

  srand(seed);
  start = clock();
  for(ix = 0; ix < ntests; ix++) {
    mpp_random_size(&a, prec);
    mpp_random_size(&b, prec);
    mp_mul(&a, &a, &c);
  }
  stop = clock();

  multime = (double)(stop - start) / CLOCKS_PER_SEC;

  printf("Total: %.4f\n", multime);
  printf("Individual: %.4f\n", multime / ntests);

  mp_clear(&a); mp_clear(&b); mp_clear(&c);
  return 0;

}
Example #2
0
/* divide by three (based on routine from MPI and the GMP manual) */
int
mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
{
  mp_int   q;
  mp_word  w, t;
  mp_digit b;
  int      res, ix;
  
  /* b = 2**DIGIT_BIT / 3 */
  b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);

  if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
     return res;
  }
  
  q.used = a->used;
  q.sign = a->sign;
  w = 0;
  for (ix = a->used - 1; ix >= 0; ix--) {
     w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
     
     if (w >= 3) {
        t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
        w -= (t << ((mp_word)1)) + t;
        while (w >= 3) {
           t += 1;
           w -= 3;
        }
      } else {
extern void
TclBNInitBignumFromWideUInt(
    mp_int *a,			/* Bignum to initialize */
    Tcl_WideUInt v)		/* Initial value */
{
    int status;
    mp_digit *p;

    /*
     * Allocate enough memory to hold the largest possible Tcl_WideUInt.
     */

    status = mp_init_size(a,
	    (CHAR_BIT * sizeof(Tcl_WideUInt) + DIGIT_BIT - 1) / DIGIT_BIT);
    if (status != MP_OKAY) {
	Tcl_Panic("initialization failure in TclBNInitBignumFromWideUInt");
    }

    a->sign = MP_ZPOS;

    /*
     * Store the magnitude in the bignum.
     */

    p = a->dp;
    while (v) {
	*p++ = (mp_digit) (v & MP_MASK);
	v >>= MP_DIGIT_BIT;
    }
    a->used = p - a->dp;
}
Example #4
0
/* divide by three (based on routine from MPI and the GMP manual) */
int mp_div_3(const mp_int *a, mp_int *c, mp_digit *d)
{
   mp_int   q;
   mp_word  w, t;
   mp_digit b;
   int      res, ix;

   /* b = 2**MP_DIGIT_BIT / 3 */
   b = ((mp_word)1 << (mp_word)MP_DIGIT_BIT) / (mp_word)3;

   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
      return res;
   }

   q.used = a->used;
   q.sign = a->sign;
   w = 0;
   for (ix = a->used - 1; ix >= 0; ix--) {
      w = (w << (mp_word)MP_DIGIT_BIT) | (mp_word)a->dp[ix];

      if (w >= 3u) {
         /* multiply w by [1/3] */
         t = (w * (mp_word)b) >> (mp_word)MP_DIGIT_BIT;

         /* now subtract 3 * [w/3] from w, to get the remainder */
         w -= t+t+t;

         /* fixup the remainder as required since
          * the optimization is not exact.
          */
         while (w >= 3u) {
            t += 1u;
            w -= 3u;
         }
      } else {
Example #5
0
/* creates "a" then copies b into it */
int mp_init_copy (mp_int * a, mp_int * b)
{
  int     res;

  if ((res = mp_init_size (a, b->used)) != MP_OKAY) {
    return res;
  }
  return mp_copy (b, a);
}
/* multiplies |a| * |b| and does not compute the lower digs digits
 * [meant to get the higher part of the product]
 */
int
s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs) {
    mp_int   t;
    int      res, pa, pb, ix, iy;
    mp_digit u;
    mp_word  r;
    mp_digit tmpx, *tmpt, *tmpy;

    /* can we use the fast multiplier? */
#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
    if (((a->used + b->used + 1) < MP_WARRAY) &&
            (MIN(a->used, b->used) < (1 << ((CHAR_BIT * sizeof(mp_word)) - (2 * DIGIT_BIT))))) {
        return fast_s_mp_mul_high_digs(a, b, c, digs);
    }
#endif

    if ((res = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) {
        return res;
    }
    t.used = a->used + b->used + 1;

    pa = a->used;
    pb = b->used;
    for (ix = 0; ix < pa; ix++) {
        /* clear the carry */
        u = 0;

        /* left hand side of A[ix] * B[iy] */
        tmpx = a->dp[ix];

        /* alias to the address of where the digits will be stored */
        tmpt = &(t.dp[digs]);

        /* alias for where to read the right hand side from */
        tmpy = b->dp + (digs - ix);

        for (iy = digs - ix; iy < pb; iy++) {
            /* calculate the double precision result */
            r = (mp_word) * tmpt +
                ((mp_word)tmpx * (mp_word) * tmpy++) +
                (mp_word)u;

            /* get the lower part */
            *tmpt++ = (mp_digit)(r & ((mp_word)MP_MASK));

            /* carry the carry */
            u = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
        }
        *tmpt = u;
    }
    mp_clamp(&t);
    mp_exch(&t, c);
    mp_clear(&t);
    return MP_OKAY;
}
Example #7
0
int main(int argc, char *argv[])
{
    instant_t    start, finish;
    mp_int       prime, gen, expt, res;
    unsigned int ix, diff;
    int          num;

    srand(time(NULL));

    if(argc < 2) {
        fprintf(stderr, "Usage: %s <num-tests>\n", argv[0]);
        return 1;
    }

    if((num = atoi(argv[1])) < 0)
        num = -num;

    if(num == 0)
        ++num;

    mp_init(&prime);
    mp_init(&gen);
    mp_init(&res);
    mp_read_radix(&prime, g_prime, 16);
    mp_read_radix(&gen, g_gen, 16);

    mp_init_size(&expt, USED(&prime) - 1);
    s_mp_pad(&expt, USED(&prime) - 1);

    printf("Testing %d modular exponentations ... \n", num);

    start = now();
    for(ix = 0; ix < num; ix++) {
        mpp_random(&expt);
        mp_exptmod(&gen, &expt, &prime, &res);
    }
    finish = now();

    diff = (finish.sec - start.sec) * 1000000;
    diff += finish.usec;
    diff -= start.usec;

    printf("%d operations took %u usec (%.3f sec)\n",
           num, diff, (double)diff / 1000000.0);
    printf("That is %.3f sec per operation.\n",
           ((double)diff / 1000000.0) / num);

    mp_clear(&expt);
    mp_clear(&res);
    mp_clear(&gen);
    mp_clear(&prime);

    return 0;
}
Example #8
0
/* single digit division (based on routine from MPI) */
int
mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
{
  mp_int  q;
  mp_word w;
  mp_digit t;
  int     res, ix;
  
  if (b == 0) {
     return MP_VAL;
  }
  
  if (b == 3) {
     return mp_div_3(a, c, d);
  }
  
  if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
     return res;
  }
  
  q.used = a->used;
  q.sign = a->sign;
  w = 0;
  for (ix = a->used - 1; ix >= 0; ix--) {
     w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
     
     if (w >= b) {
        t = (mp_digit)(w / b);
        w = w % b;
      } else {
        t = 0;
      }
      q.dp[ix] = (mp_digit)t;
  }
  
  if (d != NULL) {
     *d = (mp_digit)w;
  }
  
  if (c != NULL) {
     mp_clamp(&q);
     mp_exch(&q, c);
  }
  mp_clear(&q);
  
  return res;
}
Example #9
0
/* d = a * b (mod c) */
int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
{
  int     res;
  mp_int  t;

  if ((res = mp_init_size (&t, c->used)) != MP_OKAY) {
    return res;
  }

  if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
    mp_clear (&t);
    return res;
  }
  res = mp_mod (&t, c, d);
  mp_clear (&t);
  return res;
}
extern void
TclBNInitBignumFromLong(
    mp_int *a,
    long initVal)
{
    int status;
    unsigned long v;
    mp_digit* p;

    /*
     * Allocate enough memory to hold the largest possible long
     */

    status = mp_init_size(a,
	    (CHAR_BIT * sizeof(long) + DIGIT_BIT - 1) / DIGIT_BIT);
    if (status != MP_OKAY) {
	Tcl_Panic("initialization failure in TclBNInitBignumFromLong");
    }

    /*
     * Convert arg to sign and magnitude.
     */

    if (initVal < 0) {
	a->sign = MP_NEG;
	v = -initVal;
    } else {
	a->sign = MP_ZPOS;
	v = initVal;
    }

    /*
     * Store the magnitude in the bignum.
     */

    p = a->dp;
    while (v) {
	*p++ = (mp_digit) (v & MP_MASK);
	v >>= MP_DIGIT_BIT;
    }
    a->used = p - a->dp;
}
Example #11
0
/* c = |a| * |b| using Karatsuba Multiplication using
 * three half size multiplications
 *
 * Let B represent the radix [e.g. 2**DIGIT_BIT] and
 * let n represent half of the number of digits in
 * the min(a,b)
 *
 * a = a1 * B**n + a0
 * b = b1 * B**n + b0
 *
 * Then, a * b =>
   a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0
 *
 * Note that a1b1 and a0b0 are used twice and only need to be
 * computed once.  So in total three half size (half # of
 * digit) multiplications are performed, a0b0, a1b1 and
 * (a1+b1)(a0+b0)
 *
 * Note that a multiplication of half the digits requires
 * 1/4th the number of single precision multiplications so in
 * total after one call 25% of the single precision multiplications
 * are saved.  Note also that the call to mp_mul can end up back
 * in this function if the a0, a1, b0, or b1 are above the threshold.
 * This is known as divide-and-conquer and leads to the famous
 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
 * the standard O(N**2) that the baseline/comba methods use.
 * Generally though the overhead of this method doesn't pay off
 * until a certain size (N ~ 80) is reached.
 */
int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
{
    mp_int  x0, x1, y0, y1, t1, x0y0, x1y1;
    int     B, err;

    /* default the return code to an error */
    err = MP_MEM;

    /* min # of digits */
    B = MIN (a->used, b->used);

    /* now divide in two */
    B = B >> 1;

    /* init copy all the temps */
    if (mp_init_size (&x0, B) != MP_OKAY)
        goto ERR;
    if (mp_init_size (&x1, a->used - B) != MP_OKAY)
        goto X0;
    if (mp_init_size (&y0, B) != MP_OKAY)
        goto X1;
    if (mp_init_size (&y1, b->used - B) != MP_OKAY)
        goto Y0;

    /* init temps */
    if (mp_init_size (&t1, B * 2) != MP_OKAY)
        goto Y1;
    if (mp_init_size (&x0y0, B * 2) != MP_OKAY)
        goto T1;
    if (mp_init_size (&x1y1, B * 2) != MP_OKAY)
        goto X0Y0;

    /* now shift the digits */
    x0.used = y0.used = B;
    x1.used = a->used - B;
    y1.used = b->used - B;

    {
        register int x;
        register mp_digit *tmpa, *tmpb, *tmpx, *tmpy;

        /* we copy the digits directly instead of using higher level functions
         * since we also need to shift the digits
         */
        tmpa = a->dp;
        tmpb = b->dp;

        tmpx = x0.dp;
        tmpy = y0.dp;
        for (x = 0; x < B; x++) {
            *tmpx++ = *tmpa++;
            *tmpy++ = *tmpb++;
        }

        tmpx = x1.dp;
        for (x = B; x < a->used; x++) {
            *tmpx++ = *tmpa++;
        }

        tmpy = y1.dp;
        for (x = B; x < b->used; x++) {
            *tmpy++ = *tmpb++;
        }
    }

    /* only need to clamp the lower words since by definition the
     * upper words x1/y1 must have a known number of digits
     */
    mp_clamp (&x0);
    mp_clamp (&y0);

    /* now calc the products x0y0 and x1y1 */
    /* after this x0 is no longer required, free temp [x0==t2]! */
    if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
        goto X1Y1;          /* x0y0 = x0*y0 */
    if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
        goto X1Y1;          /* x1y1 = x1*y1 */

    /* now calc x1+x0 and y1+y0 */
    if (s_mp_add (&x1, &x0, &t1) != MP_OKAY)
        goto X1Y1;          /* t1 = x1 - x0 */
    if (s_mp_add (&y1, &y0, &x0) != MP_OKAY)
        goto X1Y1;          /* t2 = y1 - y0 */
    if (mp_mul (&t1, &x0, &t1) != MP_OKAY)
        goto X1Y1;          /* t1 = (x1 + x0) * (y1 + y0) */

    /* add x0y0 */
    if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY)
        goto X1Y1;          /* t2 = x0y0 + x1y1 */
    if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY)
        goto X1Y1;          /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */

    /* shift by B */
    if (mp_lshd (&t1, B) != MP_OKAY)
        goto X1Y1;          /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
    if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
        goto X1Y1;          /* x1y1 = x1y1 << 2*B */

    if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
        goto X1Y1;          /* t1 = x0y0 + t1 */
    if (mp_add (&t1, &x1y1, c) != MP_OKAY)
        goto X1Y1;          /* t1 = x0y0 + t1 + x1y1 */

    /* Algorithm succeeded set the return code to MP_OKAY */
    err = MP_OKAY;

X1Y1:
    mp_clear (&x1y1);
X0Y0:
    mp_clear (&x0y0);
T1:
    mp_clear (&t1);
Y1:
    mp_clear (&y1);
Y0:
    mp_clear (&y0);
X1:
    mp_clear (&x1);
X0:
    mp_clear (&x0);
ERR:
    return err;
}
Example #12
0
int main(int argc, char *argv[])
{
  int          ix, num, prec = PRECISION;
  mp_int       a, b, c, d;
  instant_t    start, finish;
  time_t       seed;
  unsigned int d1, d2;

  seed = time(NULL);

  if(argc < 2) {
    fprintf(stderr, "Usage: %s <num-tests>\n", argv[0]);
    return 1;
  }

  if((num = atoi(argv[1])) < 0)
    num = -num;

  printf("Test 5a: Euclid vs. Binary, a GCD speed test\n\n"
	 "Number of tests: %d\n"
	 "Precision:       %d digits\n\n", num, prec);

  mp_init_size(&a, prec);
  mp_init_size(&b, prec);
  mp_init(&c);
  mp_init(&d);

  printf("Verifying accuracy ... \n");
  srand((unsigned int)seed);
  for(ix = 0; ix < num; ix++) {
    mpp_random_size(&a, prec);
    mpp_random_size(&b, prec);

    mp_gcd(&a, &b, &c);
    mp_bgcd(&a, &b, &d);

    if(mp_cmp(&c, &d) != 0) {
      printf("Error!  Results not accurate:\n");
      printf("a = "); mp_print(&a, stdout); fputc('\n', stdout);
      printf("b = "); mp_print(&b, stdout); fputc('\n', stdout);
      printf("c = "); mp_print(&c, stdout); fputc('\n', stdout);
      printf("d = "); mp_print(&d, stdout); fputc('\n', stdout);

      mp_clear(&a); mp_clear(&b); mp_clear(&c); mp_clear(&d);
      return 1;
    }
  }
  mp_clear(&d);
  printf("Accuracy confirmed for the %d test samples\n", num);

  printf("Testing Euclid ... \n");
  srand((unsigned int)seed);
  start = now();
  for(ix = 0; ix < num; ix++) {
    mpp_random_size(&a, prec);
    mpp_random_size(&b, prec);
    mp_gcd(&a, &b, &c);

  }
  finish = now();

  d1 = (finish.sec - start.sec) * 1000000;
  d1 -= start.usec; d1 += finish.usec;

  printf("Testing binary ... \n");
  srand((unsigned int)seed);
  start = now();
  for(ix = 0; ix < num; ix++) {
    mpp_random_size(&a, prec);
    mpp_random_size(&b, prec);
    mp_bgcd(&a, &b, &c);
  }
  finish = now();

  d2 = (finish.sec - start.sec) * 1000000;
  d2 -= start.usec; d2 += finish.usec;

  printf("Euclidean algorithm time: %u usec\n", d1);
  printf("Binary algorithm time:    %u usec\n", d2);
  printf("Improvement:              %.2f%%\n",
         (1.0 - ((double)d2 / (double)d1)) * 100.0);

  mp_clear(&c);
  mp_clear(&b);
  mp_clear(&a);

  return 0;
}
Example #13
0
int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
{
  mp_int  M[TAB_SIZE], res;
  mp_digit buf, mp;
  int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;

  /* use a pointer to the reduction algorithm.  This allows us to use
   * one of many reduction algorithms without modding the guts of
   * the code with if statements everywhere.
   */
  int     (*redux)(mp_int*,mp_int*,mp_digit);

  /* find window size */
  x = mp_count_bits (X);
  if (x <= 7) {
    winsize = 2;
  } else if (x <= 36) {
    winsize = 3;
  } else if (x <= 140) {
    winsize = 4;
  } else if (x <= 450) {
    winsize = 5;
  } else if (x <= 1303) {
    winsize = 6;
  } else if (x <= 3529) {
    winsize = 7;
  } else {
    winsize = 8;
  }

#ifdef MP_LOW_MEM
  if (winsize > 5) {
     winsize = 5;
  }
#endif

  /* init M array */
  /* init first cell */
  if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
     return err;
  }

  /* now init the second half of the array */
  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
    if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
      for (y = 1<<(winsize-1); y < x; y++) {
        mp_clear (&M[y]);
      }
      mp_clear(&M[1]);
      return err;
    }
  }

  /* determine and setup reduction code */
  if (redmode == 0) {
#ifdef BN_MP_MONTGOMERY_SETUP_C     
     /* now setup montgomery  */
     if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
        goto LBL_M;
     }
#else
     err = MP_VAL;
     goto LBL_M;
#endif

     /* automatically pick the comba one if available (saves quite a few calls/ifs) */
#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
     if ((((P->used * 2) + 1) < MP_WARRAY) &&
          (P->used < (1 << ((CHAR_BIT * sizeof(mp_word)) - (2 * DIGIT_BIT))))) {
        redux = fast_mp_montgomery_reduce;
     } else 
#endif
     {
#ifdef BN_MP_MONTGOMERY_REDUCE_C
        /* use slower baseline Montgomery method */
        redux = mp_montgomery_reduce;
#else
        err = MP_VAL;
        goto LBL_M;
#endif
     }
  } else if (redmode == 1) {
#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
     /* setup DR reduction for moduli of the form B**k - b */
     mp_dr_setup(P, &mp);
     redux = mp_dr_reduce;
#else
     err = MP_VAL;
     goto LBL_M;
#endif
  } else {
#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
     /* setup DR reduction for moduli of the form 2**k - b */
     if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
        goto LBL_M;
     }
     redux = mp_reduce_2k;
#else
     err = MP_VAL;
     goto LBL_M;
#endif
  }

  /* setup result */
  if ((err = mp_init_size (&res, P->alloc)) != MP_OKAY) {
    goto LBL_M;
  }

  /* create M table
   *

   *
   * The first half of the table is not computed though accept for M[0] and M[1]
   */

  if (redmode == 0) {
#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
     /* now we need R mod m */
     if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
       goto LBL_RES;
     }

     /* now set M[1] to G * R mod m */
     if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
       goto LBL_RES;
     }
#else
     err = MP_VAL;
     goto LBL_RES;
#endif
  } else {
     mp_set(&res, 1);
     if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
        goto LBL_RES;
     }
  }

  /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
  if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
    goto LBL_RES;
  }

  for (x = 0; x < (winsize - 1); x++) {
    if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
      goto LBL_RES;
    }
    if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
      goto LBL_RES;
    }
  }

  /* create upper table */
  for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
    if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
      goto LBL_RES;
    }
    if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
      goto LBL_RES;
    }
  }

  /* set initial mode and bit cnt */
  mode   = 0;
  bitcnt = 1;
  buf    = 0;
  digidx = X->used - 1;
  bitcpy = 0;
  bitbuf = 0;

  for (;;) {
    /* grab next digit as required */
    if (--bitcnt == 0) {
      /* if digidx == -1 we are out of digits so break */
      if (digidx == -1) {
        break;
      }
      /* read next digit and reset bitcnt */
      buf    = X->dp[digidx--];
      bitcnt = (int)DIGIT_BIT;
    }

    /* grab the next msb from the exponent */
    y     = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
    buf <<= (mp_digit)1;

    /* if the bit is zero and mode == 0 then we ignore it
     * These represent the leading zero bits before the first 1 bit
     * in the exponent.  Technically this opt is not required but it
     * does lower the # of trivial squaring/reductions used
     */
    if ((mode == 0) && (y == 0)) {
      continue;
    }

    /* if the bit is zero and mode == 1 then we square */
    if ((mode == 1) && (y == 0)) {
      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
        goto LBL_RES;
      }
      if ((err = redux (&res, P, mp)) != MP_OKAY) {
        goto LBL_RES;
      }
      continue;
    }

    /* else we add it to the window */
    bitbuf |= (y << (winsize - ++bitcpy));
    mode    = 2;

    if (bitcpy == winsize) {
      /* ok window is filled so square as required and multiply  */
      /* square first */
      for (x = 0; x < winsize; x++) {
        if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
          goto LBL_RES;
        }
        if ((err = redux (&res, P, mp)) != MP_OKAY) {
          goto LBL_RES;
        }
      }

      /* then multiply */
      if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
        goto LBL_RES;
      }
      if ((err = redux (&res, P, mp)) != MP_OKAY) {
        goto LBL_RES;
      }

      /* empty window and reset */
      bitcpy = 0;
      bitbuf = 0;
      mode   = 1;
    }
  }

  /* if bits remain then square/multiply */
  if ((mode == 2) && (bitcpy > 0)) {
    /* square then multiply if the bit is set */
    for (x = 0; x < bitcpy; x++) {
      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
        goto LBL_RES;
      }
      if ((err = redux (&res, P, mp)) != MP_OKAY) {
        goto LBL_RES;
      }

      /* get next bit of the window */
      bitbuf <<= 1;
      if ((bitbuf & (1 << winsize)) != 0) {
        /* then multiply */
        if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
          goto LBL_RES;
        }
        if ((err = redux (&res, P, mp)) != MP_OKAY) {
          goto LBL_RES;
        }
      }
    }
  }

  if (redmode == 0) {
     /* fixup result if Montgomery reduction is used
      * recall that any value in a Montgomery system is
      * actually multiplied by R mod n.  So we have
      * to reduce one more time to cancel out the factor
      * of R.
      */
     if ((err = redux(&res, P, mp)) != MP_OKAY) {
       goto LBL_RES;
     }
  }

  /* swap res with Y */
  mp_exch (&res, Y);
  err = MP_OKAY;
LBL_RES:mp_clear (&res);
LBL_M:
  mp_clear(&M[1]);
  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
    mp_clear (&M[x]);
  }
  return err;
}
Example #14
0
/* this function is less generic than mp_n_root, simpler and faster */
int mp_sqrt(const mp_int *arg, mp_int *ret)
{
   int res;
   mp_int t1, t2;
   int i, j, k;
#ifndef NO_FLOATING_POINT
   volatile double d;
   mp_digit dig;
#endif

   /* must be positive */
   if (arg->sign == MP_NEG) {
      return MP_VAL;
   }

   /* easy out */
   if (mp_iszero(arg) == MP_YES) {
      mp_zero(ret);
      return MP_OKAY;
   }

   i = (arg->used / 2) - 1;
   j = 2 * i;
   if ((res = mp_init_size(&t1, i+2)) != MP_OKAY) {
      return res;
   }

   if ((res = mp_init(&t2)) != MP_OKAY) {
      goto E2;
   }

   for (k = 0; k < i; ++k) {
      t1.dp[k] = (mp_digit) 0;
   }

#ifndef NO_FLOATING_POINT

   /* Estimate the square root using the hardware floating point unit. */

   d = 0.0;
   for (k = arg->used-1; k >= j; --k) {
      d = ldexp(d, DIGIT_BIT) + (double)(arg->dp[k]);
   }

   /*
    * At this point, d is the nearest floating point number to the most
    * significant 1 or 2 mp_digits of arg. Extract its square root.
    */

   d = sqrt(d);

   /* dig is the most significant mp_digit of the square root */

   dig = (mp_digit) ldexp(d, -DIGIT_BIT);

   /*
    * If the most significant digit is nonzero, find the next digit down
    * by subtracting DIGIT_BIT times thie most significant digit.
    * Subtract one from the result so that our initial estimate is always
    * low.
    */

   if (dig) {
      t1.used = i+2;
      d -= ldexp((double) dig, DIGIT_BIT);
      if (d >= 1.0) {
         t1.dp[i+1] = dig;
         t1.dp[i] = ((mp_digit) d) - 1;
      } else {
         t1.dp[i+1] = dig-1;
         t1.dp[i] = MP_DIGIT_MAX;
      }
   } else {
      t1.used = i+1;
      t1.dp[i] = ((mp_digit) d) - 1;
   }

#else

   /* Estimate the square root as having 1 in the most significant place. */

   t1.used = i + 2;
   t1.dp[i+1] = (mp_digit) 1;
   t1.dp[i] = (mp_digit) 0;

#endif

   /* t1 > 0  */
   if ((res = mp_div(arg, &t1, &t2, NULL)) != MP_OKAY) {
      goto E1;
   }
   if ((res = mp_add(&t1, &t2, &t1)) != MP_OKAY) {
      goto E1;
   }
   if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) {
      goto E1;
   }
   /* And now t1 > sqrt(arg) */
   do {
      if ((res = mp_div(arg, &t1, &t2, NULL)) != MP_OKAY) {
         goto E1;
      }
      if ((res = mp_add(&t1, &t2, &t1)) != MP_OKAY) {
         goto E1;
      }
      if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) {
         goto E1;
      }
      /* t1 >= sqrt(arg) >= t2 at this point */
   } while (mp_cmp_mag(&t1, &t2) == MP_GT);

   mp_exch(&t1, ret);

E1:
   mp_clear(&t2);
E2:
   mp_clear(&t1);
   return res;
}
Example #15
0
/* Do modular exponentiation using floating point multiply code. */
mp_err mp_exptmod_f(const mp_int *   montBase, 
                    const mp_int *   exponent, 
		    const mp_int *   modulus, 
		    mp_int *         result, 
		    mp_mont_modulus *mmm, 
		    int              nLen, 
		    mp_size          bits_in_exponent, 
		    mp_size          window_bits,
		    mp_size          odd_ints)
{
  mp_digit *mResult;
  double   *dBuf = 0, *dm1, *dn, *dSqr, *d16Tmp, *dTmp;
  double    dn0;
  mp_size   i;
  mp_err    res;
  int       expOff;
  int       dSize = 0, oddPowSize, dTmpSize;
  mp_int    accum1;
  double   *oddPowers[MAX_ODD_INTS];

  /* function for computing n0prime only works if n0 is odd */

  MP_DIGITS(&accum1) = 0;

  for (i = 0; i < MAX_ODD_INTS; ++i)
    oddPowers[i] = 0;

  MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) );

  mp_set(&accum1, 1);
  MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) );
  MP_CHECKOK( s_mp_pad(&accum1, nLen) );

  oddPowSize = 2 * nLen + 1;
  dTmpSize   = 2 * oddPowSize;
  dSize = sizeof(double) * (nLen * 4 + 1 + 
			    ((odd_ints + 1) * oddPowSize) + dTmpSize);
  dBuf   = (double *)malloc(dSize);
  dm1    = dBuf;		/* array of d32 */
  dn     = dBuf   + nLen;	/* array of d32 */
  dSqr   = dn     + nLen;    	/* array of d32 */
  d16Tmp = dSqr   + nLen;	/* array of d16 */
  dTmp   = d16Tmp + oddPowSize;

  for (i = 0; i < odd_ints; ++i) {
      oddPowers[i] = dTmp;
      dTmp += oddPowSize;
  }
  mResult = (mp_digit *)(dTmp + dTmpSize);	/* size is nLen + 1 */

  /* Make dn and dn0 */
  conv_i32_to_d32(dn, MP_DIGITS(modulus), nLen);
  dn0 = (double)(mmm->n0prime & 0xffff);

  /* Make dSqr */
  conv_i32_to_d32_and_d16(dm1, oddPowers[0], MP_DIGITS(montBase), nLen);
  mont_mulf_noconv(mResult, dm1, oddPowers[0], 
		   dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
  conv_i32_to_d32(dSqr, mResult, nLen);

  for (i = 1; i < odd_ints; ++i) {
    mont_mulf_noconv(mResult, dSqr, oddPowers[i - 1], 
		     dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
    conv_i32_to_d16(oddPowers[i], mResult, nLen);
  }

  s_mp_copy(MP_DIGITS(&accum1), mResult, nLen); /* from, to, len */

  for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
    mp_size smallExp;
    MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) );
    smallExp = (mp_size)res;

    if (window_bits == 1) {
      if (!smallExp) {
	SQR;
      } else if (smallExp & 1) {
	SQR; MUL(0); 
      } else {
	ABORT;
      }
    } else if (window_bits == 4) {
      if (!smallExp) {
	SQR; SQR; SQR; SQR;
      } else if (smallExp & 1) {
	SQR; SQR; SQR; SQR; MUL(smallExp/2); 
      } else if (smallExp & 2) {
	SQR; SQR; SQR; MUL(smallExp/4); SQR; 
      } else if (smallExp & 4) {
	SQR; SQR; MUL(smallExp/8); SQR; SQR; 
      } else if (smallExp & 8) {
	SQR; MUL(smallExp/16); SQR; SQR; SQR; 
      } else {
	ABORT;
      }
    } else if (window_bits == 5) {
      if (!smallExp) {
	SQR; SQR; SQR; SQR; SQR; 
      } else if (smallExp & 1) {
	SQR; SQR; SQR; SQR; SQR; MUL(smallExp/2);
      } else if (smallExp & 2) {
	SQR; SQR; SQR; SQR; MUL(smallExp/4); SQR;
      } else if (smallExp & 4) {
	SQR; SQR; SQR; MUL(smallExp/8); SQR; SQR;
      } else if (smallExp & 8) {
	SQR; SQR; MUL(smallExp/16); SQR; SQR; SQR;
      } else if (smallExp & 0x10) {
	SQR; MUL(smallExp/32); SQR; SQR; SQR; SQR;
      } else {
	ABORT;
      }
    } else if (window_bits == 6) {
      if (!smallExp) {
	SQR; SQR; SQR; SQR; SQR; SQR;
      } else if (smallExp & 1) {
	SQR; SQR; SQR; SQR; SQR; SQR; MUL(smallExp/2); 
      } else if (smallExp & 2) {
	SQR; SQR; SQR; SQR; SQR; MUL(smallExp/4); SQR; 
      } else if (smallExp & 4) {
	SQR; SQR; SQR; SQR; MUL(smallExp/8); SQR; SQR; 
      } else if (smallExp & 8) {
	SQR; SQR; SQR; MUL(smallExp/16); SQR; SQR; SQR; 
      } else if (smallExp & 0x10) {
	SQR; SQR; MUL(smallExp/32); SQR; SQR; SQR; SQR; 
      } else if (smallExp & 0x20) {
	SQR; MUL(smallExp/64); SQR; SQR; SQR; SQR; SQR; 
      } else {
	ABORT;
      }
    } else {
      ABORT;
    }
  }

  s_mp_copy(mResult, MP_DIGITS(&accum1), nLen); /* from, to, len */

  res = s_mp_redc(&accum1, mmm);
  mp_exch(&accum1, result);

CLEANUP:
  mp_clear(&accum1);
  if (dBuf) {
    if (dSize)
      memset(dBuf, 0, dSize);
    free(dBuf);
  }

  return res;
}
Example #16
0
/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G +
 * k2 * P(x, y), where G is the generator (base point) of the group of
 * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL.
 * Input and output values are assumed to be NOT field-encoded. Uses
 * algorithm 15 (simultaneous multiple point multiplication) from Brown,
 * Hankerson, Lopez, Menezes. Software Implementation of the NIST
 * Elliptic Curves over Prime Fields. */
mp_err
ec_pts_mul_simul_w2(const mp_int *k1, const mp_int *k2, const mp_int *px,
                                        const mp_int *py, mp_int *rx, mp_int *ry,
                                        const ECGroup *group)
{
        mp_err res = MP_OKAY;
        mp_int precomp[4][4][2];
        const mp_int *a, *b;
        int i, j;
        int ai, bi, d;

        ARGCHK(group != NULL, MP_BADARG);
        ARGCHK(!((k1 == NULL)
                         && ((k2 == NULL) || (px == NULL)
                                 || (py == NULL))), MP_BADARG);

        /* if some arguments are not defined used ECPoint_mul */
        if (k1 == NULL) {
                return ECPoint_mul(group, k2, px, py, rx, ry);
        } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) {
                return ECPoint_mul(group, k1, NULL, NULL, rx, ry);
        }

        /* initialize precomputation table */
        for (i = 0; i < 4; i++) {
                for (j = 0; j < 4; j++) {
                        MP_DIGITS(&precomp[i][j][0]) = 0;
                        MP_DIGITS(&precomp[i][j][1]) = 0;
                }
        }
        for (i = 0; i < 4; i++) {
                for (j = 0; j < 4; j++) {
                         MP_CHECKOK( mp_init_size(&precomp[i][j][0],
                                         ECL_MAX_FIELD_SIZE_DIGITS, FLAG(k1)) );
                         MP_CHECKOK( mp_init_size(&precomp[i][j][1],
                                         ECL_MAX_FIELD_SIZE_DIGITS, FLAG(k1)) );
                }
        }

        /* fill precomputation table */
        /* assign {k1, k2} = {a, b} such that len(a) >= len(b) */
        if (mpl_significant_bits(k1) < mpl_significant_bits(k2)) {
                a = k2;
                b = k1;
                if (group->meth->field_enc) {
                        MP_CHECKOK(group->meth->
                                           field_enc(px, &precomp[1][0][0], group->meth));
                        MP_CHECKOK(group->meth->
                                           field_enc(py, &precomp[1][0][1], group->meth));
                } else {
                        MP_CHECKOK(mp_copy(px, &precomp[1][0][0]));
                        MP_CHECKOK(mp_copy(py, &precomp[1][0][1]));
                }
                MP_CHECKOK(mp_copy(&group->genx, &precomp[0][1][0]));
                MP_CHECKOK(mp_copy(&group->geny, &precomp[0][1][1]));
        } else {
                a = k1;
                b = k2;
                MP_CHECKOK(mp_copy(&group->genx, &precomp[1][0][0]));
                MP_CHECKOK(mp_copy(&group->geny, &precomp[1][0][1]));
                if (group->meth->field_enc) {
                        MP_CHECKOK(group->meth->
                                           field_enc(px, &precomp[0][1][0], group->meth));
                        MP_CHECKOK(group->meth->
                                           field_enc(py, &precomp[0][1][1], group->meth));
                } else {
                        MP_CHECKOK(mp_copy(px, &precomp[0][1][0]));
                        MP_CHECKOK(mp_copy(py, &precomp[0][1][1]));
                }
        }
        /* precompute [*][0][*] */
        mp_zero(&precomp[0][0][0]);
        mp_zero(&precomp[0][0][1]);
        MP_CHECKOK(group->
                           point_dbl(&precomp[1][0][0], &precomp[1][0][1],
                                                 &precomp[2][0][0], &precomp[2][0][1], group));
        MP_CHECKOK(group->
                           point_add(&precomp[1][0][0], &precomp[1][0][1],
                                                 &precomp[2][0][0], &precomp[2][0][1],
                                                 &precomp[3][0][0], &precomp[3][0][1], group));
        /* precompute [*][1][*] */
        for (i = 1; i < 4; i++) {
                MP_CHECKOK(group->
                                   point_add(&precomp[0][1][0], &precomp[0][1][1],
                                                         &precomp[i][0][0], &precomp[i][0][1],
                                                         &precomp[i][1][0], &precomp[i][1][1], group));
        }
        /* precompute [*][2][*] */
        MP_CHECKOK(group->
                           point_dbl(&precomp[0][1][0], &precomp[0][1][1],
                                                 &precomp[0][2][0], &precomp[0][2][1], group));
        for (i = 1; i < 4; i++) {
                MP_CHECKOK(group->
                                   point_add(&precomp[0][2][0], &precomp[0][2][1],
                                                         &precomp[i][0][0], &precomp[i][0][1],
                                                         &precomp[i][2][0], &precomp[i][2][1], group));
        }
        /* precompute [*][3][*] */
        MP_CHECKOK(group->
                           point_add(&precomp[0][1][0], &precomp[0][1][1],
                                                 &precomp[0][2][0], &precomp[0][2][1],
                                                 &precomp[0][3][0], &precomp[0][3][1], group));
        for (i = 1; i < 4; i++) {
                MP_CHECKOK(group->
                                   point_add(&precomp[0][3][0], &precomp[0][3][1],
                                                         &precomp[i][0][0], &precomp[i][0][1],
                                                         &precomp[i][3][0], &precomp[i][3][1], group));
        }

        d = (mpl_significant_bits(a) + 1) / 2;

        /* R = inf */
        mp_zero(rx);
        mp_zero(ry);

        for (i = d - 1; i >= 0; i--) {
                ai = MP_GET_BIT(a, 2 * i + 1);
                ai <<= 1;
                ai |= MP_GET_BIT(a, 2 * i);
                bi = MP_GET_BIT(b, 2 * i + 1);
                bi <<= 1;
                bi |= MP_GET_BIT(b, 2 * i);
                /* R = 2^2 * R */
                MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group));
                MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group));
                /* R = R + (ai * A + bi * B) */
                MP_CHECKOK(group->
                                   point_add(rx, ry, &precomp[ai][bi][0],
                                                         &precomp[ai][bi][1], rx, ry, group));
        }

        if (group->meth->field_dec) {
                MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth));
                MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth));
        }

  CLEANUP:
        for (i = 0; i < 4; i++) {
                for (j = 0; j < 4; j++) {
                        mp_clear(&precomp[i][j][0]);
                        mp_clear(&precomp[i][j][1]);
                }
        }
        return res;
}
Example #17
0
mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent, 
		  const mp_int *modulus, mp_int *result)
{
  const mp_int *base;
  mp_size bits_in_exponent, i, window_bits, odd_ints;
  mp_err  res;
  int     nLen;
  mp_int  montBase, goodBase;
  mp_mont_modulus mmm;
#ifdef MP_USING_CACHE_SAFE_MOD_EXP
  static unsigned int max_window_bits;
#endif

  /* function for computing n0prime only works if n0 is odd */
  if (!mp_isodd(modulus))
    return s_mp_exptmod(inBase, exponent, modulus, result);

  MP_DIGITS(&montBase) = 0;
  MP_DIGITS(&goodBase) = 0;

  if (mp_cmp(inBase, modulus) < 0) {
    base = inBase;
  } else {
    MP_CHECKOK( mp_init(&goodBase) );
    base = &goodBase;
    MP_CHECKOK( mp_mod(inBase, modulus, &goodBase) );
  }

  nLen  = MP_USED(modulus);
  MP_CHECKOK( mp_init_size(&montBase, 2 * nLen + 2) );

  mmm.N = *modulus;			/* a copy of the mp_int struct */

  /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX
  **		where n0 = least significant mp_digit of N, the modulus.
  */
  mmm.n0prime = 0 - s_mp_invmod_radix( MP_DIGIT(modulus, 0) );

  MP_CHECKOK( s_mp_to_mont(base, &mmm, &montBase) );

  bits_in_exponent = mpl_significant_bits(exponent);
#ifdef MP_USING_CACHE_SAFE_MOD_EXP
  if (mp_using_cache_safe_exp) {
    if (bits_in_exponent > 780)
	window_bits = 6;
    else if (bits_in_exponent > 256)
	window_bits = 5;
    else if (bits_in_exponent > 20)
	window_bits = 4;
       /* RSA public key exponents are typically under 20 bits (common values 
        * are: 3, 17, 65537) and a 4-bit window is inefficient
        */
    else 
	window_bits = 1;
  } else
#endif
  if (bits_in_exponent > 480)
    window_bits = 6;
  else if (bits_in_exponent > 160)
    window_bits = 5;
  else if (bits_in_exponent > 20)
    window_bits = 4;
  /* RSA public key exponents are typically under 20 bits (common values 
   * are: 3, 17, 65537) and a 4-bit window is inefficient
   */
  else 
    window_bits = 1;

#ifdef MP_USING_CACHE_SAFE_MOD_EXP
  /*
   * clamp the window size based on
   * the cache line size.
   */
  if (!max_window_bits) {
    unsigned long cache_size = s_mpi_getProcessorLineSize();
    /* processor has no cache, use 'fast' code always */
    if (cache_size == 0) {
      mp_using_cache_safe_exp = 0;
    } 
    if ((cache_size == 0) || (cache_size >= 64)) {
      max_window_bits = 6;
    } else if (cache_size >= 32) {
      max_window_bits = 5;
    } else if (cache_size >= 16) {
      max_window_bits = 4;
    } else max_window_bits = 1; /* should this be an assert? */
  }

  /* clamp the window size down before we caclulate bits_in_exponent */
  if (mp_using_cache_safe_exp) {
    if (window_bits > max_window_bits) {
      window_bits = max_window_bits;
    }
  }
#endif

  odd_ints = 1 << (window_bits - 1);
  i = bits_in_exponent % window_bits;
  if (i != 0) {
    bits_in_exponent += window_bits - i;
  } 

#ifdef MP_USING_MONT_MULF
  if (mp_using_mont_mulf) {
    MP_CHECKOK( s_mp_pad(&montBase, nLen) );
    res = mp_exptmod_f(&montBase, exponent, modulus, result, &mmm, nLen, 
		     bits_in_exponent, window_bits, odd_ints);
  } else
#endif
#ifdef MP_USING_CACHE_SAFE_MOD_EXP
  if (mp_using_cache_safe_exp) {
    res = mp_exptmod_safe_i(&montBase, exponent, modulus, result, &mmm, nLen, 
		     bits_in_exponent, window_bits, 1 << window_bits);
  } else
#endif
  res = mp_exptmod_i(&montBase, exponent, modulus, result, &mmm, nLen, 
		     bits_in_exponent, window_bits, odd_ints);

CLEANUP:
  mp_clear(&montBase);
  mp_clear(&goodBase);
  /* Don't mp_clear mmm.N because it is merely a copy of modulus.
  ** Just zap it.
  */
  memset(&mmm, 0, sizeof mmm);
  return res;
}
/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
int s_mp_sqr (mp_int * a, mp_int * b)
{
  mp_int  t;
  int     res, ix, iy, pa;
  mp_word r;
  mp_digit u, tmpx, *tmpt;

  pa = a->used;
  if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
    return res;
  }

  /* default used is maximum possible size */
  t.used = 2*pa + 1;

  for (ix = 0; ix < pa; ix++) {
    /* first calculate the digit at 2*ix */
    /* calculate double precision result */
    r = ((mp_word) t.dp[2*ix]) +
        ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);

    /* store lower part in result */
    t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));

    /* get the carry */
    u           = (mp_digit)(r >> ((mp_word) DIGIT_BIT));

    /* left hand side of A[ix] * A[iy] */
    tmpx        = a->dp[ix];

    /* alias for where to store the results */
    tmpt        = t.dp + (2*ix + 1);
    
    for (iy = ix + 1; iy < pa; iy++) {
      /* first calculate the product */
      r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);

      /* now calculate the double precision result, note we use
       * addition instead of *2 since it's easier to optimize
       */
      r       = ((mp_word) *tmpt) + r + r + ((mp_word) u);

      /* store lower part */
      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));

      /* get carry */
      u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
    }
    /* propagate upwards */
    while (u != ((mp_digit) 0)) {
      r       = ((mp_word) *tmpt) + ((mp_word) u);
      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
      u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
    }
  }

  mp_clamp (&t);
  mp_exch (&t, b);
  mp_clear (&t);
  return MP_OKAY;
}
Example #19
0
/* single digit division (based on routine from MPI) */
int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
{
  mp_int  q;
  mp_word w;
  mp_digit t;
  int     res, ix;

  /* cannot divide by zero */
  if (b == 0) {
     return MP_VAL;
  }

  /* quick outs */
  if (b == 1 || mp_iszero(a) == 1) {
     if (d != NULL) {
        *d = 0;
     }
     if (c != NULL) {
        return mp_copy(a, c);
     }
     return MP_OKAY;
  }

  /* power of two ? */
  if (s_is_power_of_two(b, &ix) == 1) {
     if (d != NULL) {
        *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
     }
     if (c != NULL) {
        return mp_div_2d(a, ix, c, NULL);
     }
     return MP_OKAY;
  }

#ifdef BN_MP_DIV_3_C
  /* three? */
  if (b == 3) {
     return mp_div_3(a, c, d);
  }
#endif

  /* no easy answer [c'est la vie].  Just division */
  if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
     return res;
  }

  q.used = a->used;
  q.sign = a->sign;
  w = 0;
  for (ix = a->used - 1; ix >= 0; ix--) {
     w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);

     if (w >= b) {
        t = (mp_digit)(w / b);
        w -= ((mp_word)t) * ((mp_word)b);
      } else {
        t = 0;
      }
      q.dp[ix] = (mp_digit)t;
  }

  if (d != NULL) {
     *d = (mp_digit)w;
  }

  if (c != NULL) {
     mp_clamp(&q);
     mp_exch(&q, c);
  }
  mp_clear(&q);

  return res;
}
Example #20
0
/* this function is less generic than mp_n_root, simpler and faster */
int mp_sqrt(mp_int *arg, mp_int *ret) 
{
  int res;
  mp_int t1,t2;
  int i, j, k;
#ifndef NO_FLOATING_POINT
  double d;
  mp_digit dig;
#endif

  /* must be positive */
  if (arg->sign == MP_NEG) {
    return MP_VAL;
  }

  /* easy out */
  if (mp_iszero(arg) == MP_YES) {
    mp_zero(ret);
    return MP_OKAY;
  }
  
  i = (arg->used / 2) - 1;
  j = 2 * i;
  if ((res = mp_init_size(&t1, i+2)) != MP_OKAY) {
      return res;
  }
  
  if ((res = mp_init(&t2)) != MP_OKAY) {
    goto E2;
  }

  for (k = 0; k < i; ++k) {
      t1.dp[k] = (mp_digit) 0;
  }
      
#ifndef NO_FLOATING_POINT

  /* Estimate the square root using the hardware floating point unit. */

  d = 0.0;
  for (k = arg->used-1; k >= j; --k) {
      d = ldexp(d, DIGIT_BIT) + (double) (arg->dp[k]);
  }
  d = sqrt(d);
  dig = (mp_digit) ldexp(d, -DIGIT_BIT);
  if (dig) {
      t1.used = i+2;
      d -= ldexp((double) dig, DIGIT_BIT);
      if (d != 0.0) {
	  t1.dp[i+1] = dig;
	  t1.dp[i] = ((mp_digit) d) - 1;
      } else {
	  t1.dp[i+1] = dig-1;
	  t1.dp[i] = MP_DIGIT_MAX;
      }
  } else {
      t1.used = i+1;
      t1.dp[i] = ((mp_digit) d) - 1;
  }

#else

  /* Estimate the square root as having 1 in the most significant place. */

  t1.used = i + 2;
  t1.dp[i+1] = (mp_digit) 1;
  t1.dp[i] = (mp_digit) 0;

#endif

  /* t1 > 0  */ 
  if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) {
    goto E1;
  }
  if ((res = mp_add(&t1,&t2,&t1)) != MP_OKAY) {
    goto E1;
  }
  if ((res = mp_div_2(&t1,&t1)) != MP_OKAY) {
    goto E1;
  }
  /* And now t1 > sqrt(arg) */
  do { 
    if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) {
      goto E1;
    }
    if ((res = mp_add(&t1,&t2,&t1)) != MP_OKAY) {
      goto E1;
    }
    if ((res = mp_div_2(&t1,&t1)) != MP_OKAY) {
      goto E1;
    }
    /* t1 >= sqrt(arg) >= t2 at this point */
  } while (mp_cmp_mag(&t1,&t2) == MP_GT);

  mp_exch(&t1,ret);

E1: mp_clear(&t2);
E2: mp_clear(&t1);
  return res;
}
int main(int argc, char *argv[])
{
  int          ix, num, prec = 8;
  unsigned int seed;
  clock_t      start, stop;
  double       sec;

  mp_int     a, m, c;

  if(getenv("SEED") != NULL)
    seed = abs(atoi(getenv("SEED")));
  else 
    seed = (unsigned int)time(NULL);

  if(argc < 2) {
    fprintf(stderr, "Usage: %s <num-tests> [<nbits>]\n", argv[0]);
    return 1;
  }

  if((num = atoi(argv[1])) < 0)
    num = -num;

  if(!num) {
    fprintf(stderr, "%s: must perform at least 1 test\n", argv[0]);
    return 1;
  }

  if(argc > 2) {
    if((prec = atoi(argv[2])) <= 0)
      prec = 8;
    else 
      prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT;

  }
  
  printf("Modular exponentiation timing test\n"
	 "Precision:  %d digits (%d bits)\n"
	 "# of tests: %d\n\n", prec, prec * DIGIT_BIT, num);

  mp_init_size(&a, prec);
  mp_init_size(&m, prec);
  mp_init_size(&c, prec);

  srand(seed);

  start = clock();
  for(ix = 0; ix < num; ix++) {

    mpp_random_size(&a, prec);
    mpp_random_size(&c, prec);
    mpp_random_size(&m, prec);
    /* set msb and lsb of m */
    DIGIT(&m,0) |= 1;
    DIGIT(&m, USED(&m)-1) |= (mp_digit)1 << (DIGIT_BIT - 1);
    if (mp_cmp(&a, &m) > 0)
      mp_sub(&a, &m, &a);

    mp_exptmod(&a, &c, &m, &c);
  }
  stop = clock();

  sec = clk_to_sec(start, stop);

  printf("Total:      %.3f seconds\n", sec);
  printf("Individual: %.3f seconds\n", sec / num);

  mp_clear(&c);
  mp_clear(&a);
  mp_clear(&m);

  return 0;
}
Example #22
0
/* Do modular exponentiation using integer multiply code. */
mp_err mp_exptmod_i(const mp_int *   montBase, 
                    const mp_int *   exponent, 
		    const mp_int *   modulus, 
		    mp_int *         result, 
		    mp_mont_modulus *mmm, 
		    int              nLen, 
		    mp_size          bits_in_exponent, 
		    mp_size          window_bits,
		    mp_size          odd_ints)
{
  mp_int *pa1, *pa2, *ptmp;
  mp_size i;
  mp_err  res;
  int     expOff;
  mp_int  accum1, accum2, power2, oddPowers[MAX_ODD_INTS];

  /* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */
  /* oddPowers[i] = base ** (2*i + 1); */

  MP_DIGITS(&accum1) = 0;
  MP_DIGITS(&accum2) = 0;
  MP_DIGITS(&power2) = 0;
  for (i = 0; i < MAX_ODD_INTS; ++i) {
    MP_DIGITS(oddPowers + i) = 0;
  }

  MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) );
  MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) );

  MP_CHECKOK( mp_init_copy(&oddPowers[0], montBase) );

  mp_init_size(&power2, nLen + 2 * MP_USED(montBase) + 2);
  MP_CHECKOK( mp_sqr(montBase, &power2) );	/* power2 = montBase ** 2 */
  MP_CHECKOK( s_mp_redc(&power2, mmm) );

  for (i = 1; i < odd_ints; ++i) {
    mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2);
    MP_CHECKOK( mp_mul(oddPowers + (i - 1), &power2, oddPowers + i) );
    MP_CHECKOK( s_mp_redc(oddPowers + i, mmm) );
  }

  /* set accumulator to montgomery residue of 1 */
  mp_set(&accum1, 1);
  MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) );
  pa1 = &accum1;
  pa2 = &accum2;

  for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
    mp_size smallExp;
    MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) );
    smallExp = (mp_size)res;

    if (window_bits == 1) {
      if (!smallExp) {
	SQR(pa1,pa2); SWAPPA;
      } else if (smallExp & 1) {
	SQR(pa1,pa2); MUL(0,pa2,pa1);
      } else {
	ABORT;
      }
    } else if (window_bits == 4) {
      if (!smallExp) {
	SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
      } else if (smallExp & 1) {
	SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
	MUL(smallExp/2, pa1,pa2); SWAPPA;
      } else if (smallExp & 2) {
	SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); 
	MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA;
      } else if (smallExp & 4) {
	SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/8,pa1,pa2); 
	SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
      } else if (smallExp & 8) {
	SQR(pa1,pa2); MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2); 
	SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
      } else {
	ABORT;
      }
    } else if (window_bits == 5) {
      if (!smallExp) {
	SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
	SQR(pa1,pa2); SWAPPA;
      } else if (smallExp & 1) {
	SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
	SQR(pa1,pa2); MUL(smallExp/2,pa2,pa1);
      } else if (smallExp & 2) {
	SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
	MUL(smallExp/4,pa1,pa2); SQR(pa2,pa1);
      } else if (smallExp & 4) {
	SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); 
	MUL(smallExp/8,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
      } else if (smallExp & 8) {
	SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/16,pa1,pa2); 
	SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
      } else if (smallExp & 0x10) {
	SQR(pa1,pa2); MUL(smallExp/32,pa2,pa1); SQR(pa1,pa2); 
	SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
      } else {
	ABORT;
      }
    } else if (window_bits == 6) {
      if (!smallExp) {
	SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
	SQR(pa1,pa2); SQR(pa2,pa1);
      } else if (smallExp & 1) {
	SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
	SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/2,pa1,pa2); SWAPPA;
      } else if (smallExp & 2) {
	SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
	SQR(pa1,pa2); MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA;
      } else if (smallExp & 4) {
	SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
	MUL(smallExp/8,pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
      } else if (smallExp & 8) {
	SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); 
	MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
	SQR(pa1,pa2); SWAPPA;
      } else if (smallExp & 0x10) {
	SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/32,pa1,pa2); 
	SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
      } else if (smallExp & 0x20) {
	SQR(pa1,pa2); MUL(smallExp/64,pa2,pa1); SQR(pa1,pa2); 
	SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
      } else {
	ABORT;
      }
    } else {
      ABORT;
    }
  }

  res = s_mp_redc(pa1, mmm);
  mp_exch(pa1, result);

CLEANUP:
  mp_clear(&accum1);
  mp_clear(&accum2);
  mp_clear(&power2);
  for (i = 0; i < odd_ints; ++i) {
    mp_clear(oddPowers + i);
  }
  return res;
}
Example #23
0
int main(int argc, char *argv[])
{
  int        ix, num, prec = 8;
  unsigned   int d;
  instant_t  start, finish;
  time_t     seed;
  mp_int     a, m, c;

  seed = time(NULL);

  if(argc < 2) {
    fprintf(stderr, "Usage: %s <num-tests> [<precision>]\n", argv[0]);
    return 1;
  }

  if((num = atoi(argv[1])) < 0)
    num = -num;

  if(!num) {
    fprintf(stderr, "%s: must perform at least 1 test\n", argv[0]);
    return 1;
  }

  if(argc > 2) {
    if((prec = atoi(argv[2])) <= 0)
      prec = 8;
  }
  
  printf("Test 3a: Modular exponentiation timing test\n"
	 "Precision:  %d digits (%d bits)\n"
	 "# of tests: %d\n\n", prec, prec * DIGIT_BIT, num);

  mp_init_size(&a, prec);
  mp_init_size(&m, prec);
  mp_init_size(&c, prec);
  s_mp_pad(&a, prec);
  s_mp_pad(&m, prec);
  s_mp_pad(&c, prec);

  printf("Testing modular exponentiation ... \n");
  srand((unsigned int)seed);

  start = now();
  for(ix = 0; ix < num; ix++) {
    mpp_random(&a);
    mpp_random(&c);
    mpp_random(&m);
    mp_exptmod(&a, &c, &m, &c);
  }
  finish = now();

  d = (finish.sec - start.sec) * 1000000;
  d -= start.usec; d += finish.usec;

  printf("Total time elapsed:        %u usec\n", d);
  printf("Time per exponentiation:   %u usec (%.3f sec)\n", 
	 (d / num), (double)(d / num) / 1000000);

  mp_clear(&c);
  mp_clear(&a);
  mp_clear(&m);

  return 0;
}
Example #24
0
int main(int argc, char *argv[])
{
  int           ntests, prec, ix;
  unsigned int  seed;
  clock_t       start, stop;
  double        multime, sqrtime;
  mp_int        a, c;

  seed = (unsigned int)time(NULL);

  if(argc < 3) {
    fprintf(stderr, "Usage: %s <ntests> <nbits>\n", argv[0]);
    return 1;
  }

  if((ntests = abs(atoi(argv[1]))) == 0) {
    fprintf(stderr, "%s: must request at least 1 test.\n", argv[0]);
    return 1;
  }
  if((prec = abs(atoi(argv[2]))) < CHAR_BIT) {
    fprintf(stderr, "%s: must request at least %d bits.\n", argv[0],
	    CHAR_BIT);
    return 1;
  }

  prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT;

  mp_init_size(&a, prec);
  mp_init_size(&c, 2 * prec);

  /* Test multiplication by self */
  srand(seed);
  start = clock();
  for(ix = 0; ix < ntests; ix++) {
    mpp_random_size(&a, prec);
    mp_mul(&a, &a, &c);
  }
  stop = clock();

  multime = (double)(stop - start) / CLOCKS_PER_SEC;

  /* Test squaring */
  srand(seed);
  start = clock();
  for(ix = 0; ix < ntests; ix++) {
    mpp_random_size(&a, prec);
    mp_sqr(&a, &c);
  }
  stop = clock();

  sqrtime = (double)(stop - start) / CLOCKS_PER_SEC;

  printf("Multiply: %.4f\n", multime);
  printf("Square:   %.4f\n", sqrtime);
  if(multime < sqrtime) {
    printf("Speedup:  %.1f%%\n", 100.0 * (1.0 - multime / sqrtime));
    printf("Prefer:   multiply\n");
  } else {
    printf("Speedup:  %.1f%%\n", 100.0 * (1.0 - sqrtime / multime));
    printf("Prefer:   square\n");
  }

  mp_clear(&a); mp_clear(&c);
  return 0;

}
Example #25
0
term_t bin2term(apr_byte_t **data, int *bytes_left, atoms_t *atoms, heap_t *heap)
{

#define require(__n) \
	do { \
		if (*bytes_left < __n) \
			return noval; \
		(*bytes_left) -= __n; \
	} while (0)

#define get_byte() (*(*data)++)

	require(1);
	switch (get_byte())
	{
	case 97:
	{
		require(1);
		return tag_int(get_byte());
	}
	case 98:
	{
		int a, b, c, d;
		require(4);
		a = get_byte();
		b = get_byte();
		c = get_byte();
		d = get_byte();
		return int_to_term((a << 24) | (b << 16) | (c << 8) | d, heap);
	}
	case 99:
	{
		double value;
		require(31);
		sscanf((const char *)*data, "%lf", &value);
		(*data) += 31;
		return heap_float(heap, value);
	}
	case 100:
	{
		int a, b;
		int len;
		cstr_t *s;
		int index;
		require(2);
		a = get_byte();
		b = get_byte();
		len = ((a << 8) | b);
		if (len > 255)
			return noval;
		require(len);
		s = (cstr_t *)heap_alloc(heap, sizeof(cstr_t) + len);
		s->size = len;
		memcpy(s->data, *data, len);
		index = atoms_set(atoms, s);
		(*data) += len;
		return tag_atom(index);
	}
	case 104:
	{
		int arity, i;
		term_t tuple;
		term_box_t *tbox;
		require(1);
		arity = get_byte();
		tuple = heap_tuple(heap, arity);
		tbox = peel(tuple);
		for (i = 0; i < arity; i++)
		{
			term_t e = bin2term(data, bytes_left, atoms, heap);
			if (e == noval)
				return noval;
			tbox->tuple.elts[i] = e;
		}
		return tuple;
	}
	case 105:
	{
		int a, b, c, d;
		int arity, i;
		term_t tuple;
		term_box_t *tbox;
		require(4);
		a = get_byte();
		b = get_byte();
		c = get_byte();
		d = get_byte();
		arity = ((a << 24) | (b << 16) | (c << 8) | d);
		tuple = heap_tuple(heap, arity);
		tbox = peel(tuple);
		for (i = 0; i < arity; i++)
		{
			term_t e = bin2term(data, bytes_left, atoms, heap);
			if (e == noval)
				return noval;
			tbox->tuple.elts[i] = e;
		}
		return tuple;
	}
	case 106:
	{
		return nil;
	}
	case 107:
	{
		int a, b;
		int len, i;
		term_t cons = nil;
		require(2);
		a = get_byte();
		b = get_byte();
		len = ((a << 8) | b);
		require(len);
		i = len-1;
		while (i >= 0)
			cons = heap_cons2(heap, tag_int((*data)[i--]), cons);
		(*data) += len;
		return cons;
	}
	case 108:
	{
		int a, b, c, d;
		int len, i;
		term_t *es;
		term_t tail;
		require(4);
		a = get_byte();
		b = get_byte();
		c = get_byte();
		d = get_byte();
		len = ((a << 24) | (b << 16) | (c << 8) | d);
		es = (term_t *)heap_alloc(heap, len*sizeof(term_t));
		for (i = 0; i < len; i++)
		{
			term_t e = bin2term(data, bytes_left, atoms, heap);
			if (e == noval)
				return noval;
			es[i] = e;
		}
		tail = bin2term(data, bytes_left, atoms, heap);
		if (tail == noval)
			return noval;
		i = len-1;
		while (i >= 0)
			tail = heap_cons2(heap, es[i--], tail);
		return tail;
	}
	case 109:
	{
		int a, b, c, d;
		int len;
		term_t bin;
		require(4);
		a = get_byte();
		b = get_byte();
		c = get_byte();
		d = get_byte();
		len = ((a << 24) | (b << 16) | (c << 8) | d);
		require(len);
		bin = heap_binary(heap, len*8, (*data));
		(*data) += len;
		return bin;
	}
	case 110:
	{
		int len;
		int sign;
		mp_size prec;
		mp_int mp;
		mp_err rs;
		require(1);
		len = get_byte();
		sign = get_byte();
		require(len);
		prec = (len + (MP_DIGIT_SIZE-1)) / MP_DIGIT_SIZE;
		mp_init_size(&mp, prec, heap);
		//TODO: use mp_read_signed_bin
		rs = mp_read_unsigned_bin_lsb(&mp, *data, len, heap);
		if (rs != MP_OKAY)
			return noval;
		(*data) += len;
		if (sign == 1)
			mp_neg(&mp, &mp, heap);
		return mp_to_term(mp);
	}
	case 111:
	{
		int a, b, c, d;
		int len;
		int sign;
		mp_size prec;
		mp_int mp;
		mp_err rs;
		require(4);
		a = get_byte();
		b = get_byte();
		c = get_byte();
		d = get_byte();
		len = ((a << 24) | (b << 16) | (c << 8) | d);
		require(1);
		sign = get_byte();
		require(len);
		prec = (len + (MP_DIGIT_SIZE-1)) / MP_DIGIT_SIZE;
		mp_init_size(&mp, prec, heap);
		rs = mp_read_unsigned_bin_lsb(&mp, *data, len, heap);
		if (rs != MP_OKAY)
			return noval;
		(*data) += len;
		if (sign == 1)
			mp_neg(&mp, &mp, heap);
		return mp_to_term(mp);
	}
	default:
		return noval;	// only a subset of tags are supported; inspired by BERT
	}
}
Example #26
0
/* Do modular exponentiation using integer multiply code. */
mp_err mp_exptmod_safe_i(const mp_int *   montBase, 
                    const mp_int *   exponent, 
		    const mp_int *   modulus, 
		    mp_int *         result, 
		    mp_mont_modulus *mmm, 
		    int              nLen, 
		    mp_size          bits_in_exponent, 
		    mp_size          window_bits,
		    mp_size          num_powers)
{
  mp_int *pa1, *pa2, *ptmp;
  mp_size i;
  mp_size first_window;
  mp_err  res;
  int     expOff;
  mp_int  accum1, accum2, accum[WEAVE_WORD_SIZE];
  mp_int  tmp;
  unsigned char *powersArray;
  unsigned char *powers;

  MP_DIGITS(&accum1) = 0;
  MP_DIGITS(&accum2) = 0;
  MP_DIGITS(&accum[0]) = 0;
  MP_DIGITS(&accum[1]) = 0;
  MP_DIGITS(&accum[2]) = 0;
  MP_DIGITS(&accum[3]) = 0;
  MP_DIGITS(&tmp) = 0;

  powersArray = (unsigned char *)malloc(num_powers*(nLen*sizeof(mp_digit)+1));
  if (powersArray == NULL) {
    res = MP_MEM;
    goto CLEANUP;
  }

  /* powers[i] = base ** (i); */
  powers = (unsigned char *)MP_ALIGN(powersArray,num_powers);

  /* grab the first window value. This allows us to preload accumulator1
   * and save a conversion, some squares and a multiple*/
  MP_CHECKOK( mpl_get_bits(exponent, 
				bits_in_exponent-window_bits, window_bits) );
  first_window = (mp_size)res;

  MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) );
  MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) );
  MP_CHECKOK( mp_init_size(&tmp, 3 * nLen + 2) );

  /* build the first WEAVE_WORD powers inline */
  /* if WEAVE_WORD_SIZE is not 4, this code will have to change */
  if (num_powers > 2) {
    MP_CHECKOK( mp_init_size(&accum[0], 3 * nLen + 2) );
    MP_CHECKOK( mp_init_size(&accum[1], 3 * nLen + 2) );
    MP_CHECKOK( mp_init_size(&accum[2], 3 * nLen + 2) );
    MP_CHECKOK( mp_init_size(&accum[3], 3 * nLen + 2) );
    mp_set(&accum[0], 1);
    MP_CHECKOK( s_mp_to_mont(&accum[0], mmm, &accum[0]) );
    MP_CHECKOK( mp_copy(montBase, &accum[1]) );
    SQR(montBase, &accum[2]);
    MUL_NOWEAVE(montBase, &accum[2], &accum[3]);
    MP_CHECKOK( mpi_to_weave(accum, powers, nLen, num_powers) );
    if (first_window < 4) {
      MP_CHECKOK( mp_copy(&accum[first_window], &accum1) );
      first_window = num_powers;
    }
  } else {
      if (first_window == 0) {
        mp_set(&accum1, 1);
        MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) );
      } else {
        /* assert first_window == 1? */
        MP_CHECKOK( mp_copy(montBase, &accum1) );
      }
  }

  /*
   * calculate all the powers in the powers array.
   * this adds 2**(k-1)-2 square operations over just calculating the
   * odd powers where k is the window size in the two other mp_modexpt
   * implementations in this file. We will get some of that
   * back by not needing the first 'k' squares and one multiply for the 
   * first window */ 
  for (i = WEAVE_WORD_SIZE; i < num_powers; i++) {
    int acc_index = i & (WEAVE_WORD_SIZE-1); /* i % WEAVE_WORD_SIZE */
    if ( i & 1 ) {
      MUL_NOWEAVE(montBase, &accum[acc_index-1] , &accum[acc_index]);
      /* we've filled the array do our 'per array' processing */
      if (acc_index == (WEAVE_WORD_SIZE-1)) {
        MP_CHECKOK( mpi_to_weave(accum, powers + i - (WEAVE_WORD_SIZE-1),
							 nLen, num_powers) );

        if (first_window <= i) {
          MP_CHECKOK( mp_copy(&accum[first_window & (WEAVE_WORD_SIZE-1)], 
								&accum1) );
          first_window = num_powers;
        }
      }
    } else {
      /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source
       * and target are the same so we need to copy.. After that, the
       * value is overwritten, so we need to fetch it from the stored
       * weave array */
      if (i > 2* WEAVE_WORD_SIZE) {
        MP_CHECKOK(weave_to_mpi(&accum2, powers+i/2, nLen, num_powers));
        SQR(&accum2, &accum[acc_index]);
      } else {
	int half_power_index = (i/2) & (WEAVE_WORD_SIZE-1);
	if (half_power_index == acc_index) {
	   /* copy is cheaper than weave_to_mpi */
	   MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2));
	   SQR(&accum2,&accum[acc_index]);
	} else {
	   SQR(&accum[half_power_index],&accum[acc_index]);
	}
      }
    }
  }
  /* if the accum1 isn't set, Then there is something wrong with our logic 
   * above and is an internal programming error. 
   */
#if MP_ARGCHK == 2
  assert(MP_USED(&accum1) != 0);
#endif

  /* set accumulator to montgomery residue of 1 */
  pa1 = &accum1;
  pa2 = &accum2;

  for (expOff = bits_in_exponent - window_bits*2; expOff >= 0; expOff -= window_bits) {
    mp_size smallExp;
    MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) );
    smallExp = (mp_size)res;

    /* handle unroll the loops */
    switch (window_bits) {
    case 1:
	if (!smallExp) {
	    SQR(pa1,pa2); SWAPPA;
	} else if (smallExp & 1) {
	    SQR(pa1,pa2); MUL_NOWEAVE(montBase,pa2,pa1);
	} else {
	    abort();
	}
	break;
    case 6:
	SQR(pa1,pa2); SQR(pa2,pa1); 
	/* fall through */
    case 4:
	SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
	MUL(smallExp, pa1,pa2); SWAPPA;
	break;
    case 5:
	SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
	SQR(pa1,pa2); MUL(smallExp,pa2,pa1);
	break;
    default:
	abort(); /* could do a loop? */
    }
  }

  res = s_mp_redc(pa1, mmm);
  mp_exch(pa1, result);

CLEANUP:
  mp_clear(&accum1);
  mp_clear(&accum2);
  mp_clear(&accum[0]);
  mp_clear(&accum[1]);
  mp_clear(&accum[2]);
  mp_clear(&accum[3]);
  mp_clear(&tmp);
  /* PORT_Memset(powers,0,num_powers*nLen*sizeof(mp_digit)); */
  free(powersArray);
  return res;
}
Example #27
0
/* integer signed division. 
 * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
 * HAC pp.598 Algorithm 14.20
 *
 * Note that the description in HAC is horribly 
 * incomplete.  For example, it doesn't consider 
 * the case where digits are removed from 'x' in 
 * the inner loop.  It also doesn't consider the 
 * case that y has fewer than three digits, etc..
 *
 * The overall algorithm is as described as 
 * 14.20 from HAC but fixed to treat these cases.
*/
int mp_div MPA(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
{
  mp_int  q, x, y, t1, t2;
  int     res, n, t, i, norm, neg;

  /* is divisor zero ? */
  if (mp_iszero (b) == 1) {
    return MP_VAL;
  }

  /* if a < b then q=0, r = a */
  if (mp_cmp_mag (a, b) == MP_LT) {
    if (d != NULL) {
      res = mp_copy (MPST, a, d);
    } else {
      res = MP_OKAY;
    }
    if (c != NULL) {
      mp_zero (c);
    }
    return res;
  }

  if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
    return res;
  }
  q.used = a->used + 2;

  if ((res = mp_init (&t1)) != MP_OKAY) {
    goto LBL_Q;
  }

  if ((res = mp_init (&t2)) != MP_OKAY) {
    goto LBL_T1;
  }

  if ((res = mp_init_copy (MPST, &x, a)) != MP_OKAY) {
    goto LBL_T2;
  }

  if ((res = mp_init_copy (MPST, &y, b)) != MP_OKAY) {
    goto LBL_X;
  }

  /* fix the sign */
  neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
  x.sign = y.sign = MP_ZPOS;

  /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
  norm = mp_count_bits(&y) % DIGIT_BIT;
  if (norm < (int)(DIGIT_BIT-1)) {
     norm = (DIGIT_BIT-1) - norm;
     if ((res = mp_mul_2d (MPST, &x, norm, &x)) != MP_OKAY) {
       goto LBL_Y;
     }
     if ((res = mp_mul_2d (MPST, &y, norm, &y)) != MP_OKAY) {
       goto LBL_Y;
     }
  } else {
     norm = 0;
  }

  /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
  n = x.used - 1;
  t = y.used - 1;

  /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
  if ((res = mp_lshd (MPST, &y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
    goto LBL_Y;
  }

  while (mp_cmp (&x, &y) != MP_LT) {
    ++(q.dp[n - t]);
    if ((res = mp_sub (MPST, &x, &y, &x)) != MP_OKAY) {
      goto LBL_Y;
    }
  }

  /* reset y by shifting it back down */
  mp_rshd (&y, n - t);

  /* step 3. for i from n down to (t + 1) */
  for (i = n; i >= (t + 1); i--) {
    if (i > x.used) {
      continue;
    }

    /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 
     * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
    if (x.dp[i] == y.dp[t]) {
      q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
    } else {
      mp_word tmp;
      tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
      tmp |= ((mp_word) x.dp[i - 1]);
      tmp /= ((mp_word) y.dp[t]);
      if (tmp > (mp_word) MP_MASK)
        tmp = MP_MASK;
      q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
    }

    /* while (q{i-t-1} * (yt * b + y{t-1})) > 
             xi * b**2 + xi-1 * b + xi-2 
     
       do q{i-t-1} -= 1; 
    */
    q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
    do {
      q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;

      /* find left hand */
      mp_zero (&t1);
      t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
      t1.dp[1] = y.dp[t];
      t1.used = 2;
      if ((res = mp_mul_d (MPST, &t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
        goto LBL_Y;
      }

      /* find right hand */
      t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
      t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
      t2.dp[2] = x.dp[i];
      t2.used = 3;
    } while (mp_cmp_mag(&t1, &t2) == MP_GT);

    /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
    if ((res = mp_mul_d (MPST, &y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
      goto LBL_Y;
    }

    if ((res = mp_lshd (MPST, &t1, i - t - 1)) != MP_OKAY) {
      goto LBL_Y;
    }

    if ((res = mp_sub (MPST, &x, &t1, &x)) != MP_OKAY) {
      goto LBL_Y;
    }

    /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
    if (x.sign == MP_NEG) {
      if ((res = mp_copy (MPST, &y, &t1)) != MP_OKAY) {
        goto LBL_Y;
      }
      if ((res = mp_lshd (MPST, &t1, i - t - 1)) != MP_OKAY) {
        goto LBL_Y;
      }
      if ((res = mp_add (MPST, &x, &t1, &x)) != MP_OKAY) {
        goto LBL_Y;
      }

      q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
    }
  }

  /* now q is the quotient and x is the remainder 
   * [which we have to normalize] 
   */
  
  /* get sign before writing to c */
  x.sign = x.used == 0 ? MP_ZPOS : a->sign;

  if (c != NULL) {
    mp_clamp (&q);
    mp_managed_copy (MPST, &q, c);
    c->sign = neg;
  }

  if (d != NULL) {
    mp_div_2d (MPST, &x, norm, &x, NULL);
    mp_managed_copy (MPST, &x, d);
  }

  res = MP_OKAY;

LBL_Y:mp_clear (&y);
LBL_X:mp_clear (&x);
LBL_T2:mp_clear (&t2);
LBL_T1:mp_clear (&t1);
LBL_Q:mp_clear (&q);
  return res;
}
Example #28
0
mp_err  mpp_pprime(mp_int *a, int nt)
{
  mp_err   res;
  mp_int   x, amo, m, z;	/* "amo" = "a minus one" */
  int      iter;
  unsigned int jx;
  mp_size  b;

  ARGCHK(a != NULL, MP_BADARG);

  MP_DIGITS(&x) = 0;
  MP_DIGITS(&amo) = 0;
  MP_DIGITS(&m) = 0;
  MP_DIGITS(&z) = 0;

  /* Initialize temporaries... */
  MP_CHECKOK( mp_init(&amo));
  /* Compute amo = a - 1 for what follows...    */
  MP_CHECKOK( mp_sub_d(a, 1, &amo) );

  b = mp_trailing_zeros(&amo);
  if (!b) { /* a was even ? */
    res = MP_NO;
    goto CLEANUP;
  }

  MP_CHECKOK( mp_init_size(&x, MP_USED(a)) );
  MP_CHECKOK( mp_init(&z) );
  MP_CHECKOK( mp_init(&m) );
  MP_CHECKOK( mp_div_2d(&amo, b, &m, 0) );

  /* Do the test nt times... */
  for(iter = 0; iter < nt; iter++) {

    /* Choose a random value for x < a          */
    s_mp_pad(&x, USED(a));
    mpp_random(&x);
    MP_CHECKOK( mp_mod(&x, a, &x) );

    /* Compute z = (x ** m) mod a               */
    MP_CHECKOK( mp_exptmod(&x, &m, a, &z) );
    
    if(mp_cmp_d(&z, 1) == 0 || mp_cmp(&z, &amo) == 0) {
      res = MP_YES;
      continue;
    }
    
    res = MP_NO;  /* just in case the following for loop never executes. */
    for (jx = 1; jx < b; jx++) {
      /* z = z^2 (mod a) */
      MP_CHECKOK( mp_sqrmod(&z, a, &z) );
      res = MP_NO;	/* previous line set res to MP_YES */

      if(mp_cmp_d(&z, 1) == 0) {
	break;
      }
      if(mp_cmp(&z, &amo) == 0) {
	res = MP_YES;
	break;
      } 
    } /* end testing loop */

    /* If the test passes, we will continue iterating, but a failed
       test means the candidate is definitely NOT prime, so we will
       immediately break out of this loop
     */
    if(res == MP_NO)
      break;

  } /* end iterations loop */
  
CLEANUP:
  mp_clear(&m);
  mp_clear(&z);
  mp_clear(&x);
  mp_clear(&amo);
  return res;

} /* end mpp_pprime() */
Example #29
0
/* Karatsuba squaring, computes b = a*a using three
 * half size squarings
 *
 * See comments of karatsuba_mul for details.  It
 * is essentially the same algorithm but merely
 * tuned to perform recursive squarings.
 */
int mp_karatsuba_sqr(const mp_int *a, mp_int *b)
{
   mp_int  x0, x1, t1, t2, x0x0, x1x1;
   int     B, err;

   err = MP_MEM;

   /* min # of digits */
   B = a->used;

   /* now divide in two */
   B = B >> 1;

   /* init copy all the temps */
   if (mp_init_size(&x0, B) != MP_OKAY)
      goto LBL_ERR;
   if (mp_init_size(&x1, a->used - B) != MP_OKAY)
      goto X0;

   /* init temps */
   if (mp_init_size(&t1, a->used * 2) != MP_OKAY)
      goto X1;
   if (mp_init_size(&t2, a->used * 2) != MP_OKAY)
      goto T1;
   if (mp_init_size(&x0x0, B * 2) != MP_OKAY)
      goto T2;
   if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY)
      goto X0X0;

   {
      int x;
      mp_digit *dst, *src;

      src = a->dp;

      /* now shift the digits */
      dst = x0.dp;
      for (x = 0; x < B; x++) {
         *dst++ = *src++;
      }

      dst = x1.dp;
      for (x = B; x < a->used; x++) {
         *dst++ = *src++;
      }
   }

   x0.used = B;
   x1.used = a->used - B;

   mp_clamp(&x0);

   /* now calc the products x0*x0 and x1*x1 */
   if (mp_sqr(&x0, &x0x0) != MP_OKAY)
      goto X1X1;           /* x0x0 = x0*x0 */
   if (mp_sqr(&x1, &x1x1) != MP_OKAY)
      goto X1X1;           /* x1x1 = x1*x1 */

   /* now calc (x1+x0)**2 */
   if (s_mp_add(&x1, &x0, &t1) != MP_OKAY)
      goto X1X1;           /* t1 = x1 - x0 */
   if (mp_sqr(&t1, &t1) != MP_OKAY)
      goto X1X1;           /* t1 = (x1 - x0) * (x1 - x0) */

   /* add x0y0 */
   if (s_mp_add(&x0x0, &x1x1, &t2) != MP_OKAY)
      goto X1X1;           /* t2 = x0x0 + x1x1 */
   if (s_mp_sub(&t1, &t2, &t1) != MP_OKAY)
      goto X1X1;           /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */

   /* shift by B */
   if (mp_lshd(&t1, B) != MP_OKAY)
      goto X1X1;           /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
   if (mp_lshd(&x1x1, B * 2) != MP_OKAY)
      goto X1X1;           /* x1x1 = x1x1 << 2*B */

   if (mp_add(&x0x0, &t1, &t1) != MP_OKAY)
      goto X1X1;           /* t1 = x0x0 + t1 */
   if (mp_add(&t1, &x1x1, b) != MP_OKAY)
      goto X1X1;           /* t1 = x0x0 + t1 + x1x1 */

   err = MP_OKAY;

X1X1:
   mp_clear(&x1x1);
X0X0:
   mp_clear(&x0x0);
T2:
   mp_clear(&t2);
T1:
   mp_clear(&t1);
X1:
   mp_clear(&x1);
X0:
   mp_clear(&x0);
LBL_ERR:
   return err;
}
Example #30
0
/* Karatsuba squaring, computes b = a*a using three 
 * half size squarings
 *
 * See comments of karatsuba_mul for details.  It 
 * is essentially the same algorithm but merely 
 * tuned to perform recursive squarings.
 */
int mp_karatsuba_sqr (mp_int * a, mp_int * b)
{
  mp_int  x0, x1, t1, t2, x0x0, x1x1;
  int     B, err;

  err = MP_MEM;

  /* min # of digits */
  B = USED(a);

  /* now divide in two */
  B = B >> 1;

  /* init copy all the temps */
  if (mp_init_size (&x0, B) != MP_OKAY)
    goto ERR;
  if (mp_init_size (&x1, USED(a) - B) != MP_OKAY)
    goto X0;

  /* init temps */
  if (mp_init_size (&t1, USED(a) * 2) != MP_OKAY)
    goto X1;
  if (mp_init_size (&t2, USED(a) * 2) != MP_OKAY)
    goto T1;
  if (mp_init_size (&x0x0, B * 2) != MP_OKAY)
    goto T2;
  if (mp_init_size (&x1x1, (USED(a) - B) * 2) != MP_OKAY)
    goto X0X0;

  {
    register int x;
    register mp_digit *dst, *src;

    src = DIGITS(a);

    /* now shift the digits */
    dst = DIGITS(&x0);
    for (x = 0; x < B; x++) {
      *dst++ = *src++;
    }

    dst = DIGITS(&x1);
    for (x = B; x < USED(a); x++) {
      *dst++ = *src++;
    }
  }

  SET_USED(&x0,B);
  SET_USED(&x1,USED(a) - B);

  mp_clamp (&x0);

  /* now calc the products x0*x0 and x1*x1 */
  if (mp_sqr (&x0, &x0x0) != MP_OKAY)
    goto X1X1;           /* x0x0 = x0*x0 */
  if (mp_sqr (&x1, &x1x1) != MP_OKAY)
    goto X1X1;           /* x1x1 = x1*x1 */

  /* now calc (x1+x0)**2 */
  if (s_mp_add (&x1, &x0, &t1) != MP_OKAY)
    goto X1X1;           /* t1 = x1 - x0 */
  if (mp_sqr (&t1, &t1) != MP_OKAY)
    goto X1X1;           /* t1 = (x1 - x0) * (x1 - x0) */

  /* add x0y0 */
  if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
    goto X1X1;           /* t2 = x0x0 + x1x1 */
  if (s_mp_sub (&t1, &t2, &t1) != MP_OKAY)
    goto X1X1;           /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */

  /* shift by B */
  if (mp_lshd (&t1, B) != MP_OKAY)
    goto X1X1;           /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
  if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
    goto X1X1;           /* x1x1 = x1x1 << 2*B */

  if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
    goto X1X1;           /* t1 = x0x0 + t1 */
  if (mp_add (&t1, &x1x1, b) != MP_OKAY)
    goto X1X1;           /* t1 = x0x0 + t1 + x1x1 */

  err = MP_OKAY;

X1X1:mp_clear (&x1x1);
X0X0:mp_clear (&x0x0);
T2:mp_clear (&t2);
T1:mp_clear (&t1);
X1:mp_clear (&x1);
X0:mp_clear (&x0);
ERR:
  return err;
}