Beispiel #1
0
/* based on gmp's mpz_import.
 * see http://gmplib.org/manual/Integer-Import-and-Export.html
 */
int mp_import(mp_int* rop, size_t count, int order, size_t size, 
                            int endian, size_t nails, const void* op) {
	int result;
	size_t odd_nails, nail_bytes, i, j;
	unsigned char odd_nail_mask;

	mp_zero(rop);

	if (endian == 0) {
		union {
			unsigned int i;
			char c[4];
		} lint;
		lint.i = 0x01020304;

		endian = (lint.c[0] == 4) ? -1 : 1;
	}

	odd_nails = (nails % 8);
	odd_nail_mask = 0xff;
	for (i = 0; i < odd_nails; ++i) {
		odd_nail_mask ^= (1 << (7 - i));
	}
	nail_bytes = nails / 8;

	for (i = 0; i < count; ++i) {
		for (j = 0; j < (size - nail_bytes); ++j) {
			unsigned char byte = *(
					(unsigned char*)op + 
					(((order == 1) ? i : ((count - 1) - i)) * size) +
					((endian == 1) ? (j + nail_bytes) : (((size - 1) - j) - nail_bytes))
				);

			if (
				(result = mp_mul_2d(rop, ((j == 0) ? (8 - odd_nails) : 8), rop)) != MP_OKAY) {
				return result;
			}

			rop->dp[0] |= (j == 0) ? (byte & odd_nail_mask) : byte;
			rop->used  += 1;
		}
	}

	mp_clamp(rop);

	return MP_OKAY;
}
Beispiel #2
0
/* single digit division (based on routine from MPI) */
int
mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
{
  mp_int  q;
  mp_word w;
  mp_digit t;
  int     res, ix;
  
  if (b == 0) {
     return MP_VAL;
  }
  
  if (b == 3) {
     return mp_div_3(a, c, d);
  }
  
  if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
     return res;
  }
  
  q.used = a->used;
  q.sign = a->sign;
  w = 0;
  for (ix = a->used - 1; ix >= 0; ix--) {
     w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
     
     if (w >= b) {
        t = (mp_digit)(w / b);
        w = w % b;
      } else {
        t = 0;
      }
      q.dp[ix] = (mp_digit)t;
  }
  
  if (d != NULL) {
     *d = (mp_digit)w;
  }
  
  if (c != NULL) {
     mp_clamp(&q);
     mp_exch(&q, c);
  }
  mp_clear(&q);
  
  return res;
}
Beispiel #3
0
/* b = a/2 */
int
mp_div_2 (mp_int * a, mp_int * b)
{
    int     x, res, oldused;

    /* copy */
    if (b->alloc < a->used) {
        if ((res = mp_grow (b, a->used)) != MP_OKAY) {
            return res;
        }
    }

    oldused = b->used;
    b->used = a->used;
    {
        register mp_digit r, rr, *tmpa, *tmpb;

        /* source alias */
        tmpa = a->dp + b->used - 1;

        /* dest alias */
        tmpb = b->dp + b->used - 1;

        /* carry */
        r = 0;
        for (x = b->used - 1; x >= 0; x--) {
            /* get the carry for the next iteration */
            rr = *tmpa & 1;

            /* shift the current digit, add in carry and store */
            *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));

            /* forward carry to next iteration */
            r = rr;
        }

        /* zero excess digits */
        tmpb = b->dp + b->used;
        for (x = b->used; x < oldused; x++) {
            *tmpb++ = 0;
        }
    }
    b->sign = a->sign;
    mp_clamp (b);
    return MP_OKAY;
}
Beispiel #4
0
/* b = a/2 */
int mp_div_2(mp_int * a, mp_int * b)
{
  int     x, res, oldused;

  /* copy */
  if (ALLOC(b) < USED(a)) {
    if ((res = mp_grow (b, USED(a))) != MP_OKAY) {
      return res;
    }
  }

  oldused = USED(b);
  SET_USED(b,USED(a));
  {
    register mp_digit r, rr, *tmpa, *tmpb;

    /* source alias */
    tmpa = DIGITS(a) + USED(b) - 1;

    /* dest alias */
    tmpb = DIGITS(b) + USED(b) - 1;

    /* carry */
    r = 0;
    for (x = USED(b) - 1; x >= 0; x--) {
      /* get the carry for the next iteration */
      rr = *tmpa & 1;

      /* shift the current digit, add in carry and store */
      *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));

      /* forward carry to next iteration */
      r = rr;
    }

    /* zero excess digits */
    tmpb = DIGITS(b) + USED(b);
    for (x = USED(b); x < oldused; x++) {
      *tmpb++ = 0;
    }
  }
  SET_SIGN(b,SIGN(a));
  mp_clamp (b);
  return MP_OKAY;
}
Beispiel #5
0
static void from_num(MVMnum64 d, mp_int *a) {
    MVMnum64 d_digit = pow(2, DIGIT_BIT);
    MVMnum64 da      = fabs(d);
    MVMnum64 upper;
    MVMnum64 lower;
    MVMnum64 lowest;
    MVMnum64 rest;
    int      digits  = 0;

    mp_zero(a);

    while (da > d_digit * d_digit * d_digit) {;
        da /= d_digit;
        digits++;
    }
    mp_grow(a, digits + 3);

    /* populate the top 3 digits */
    upper = da / (d_digit*d_digit);
    rest = fmod(da, d_digit*d_digit);
    lower = rest / d_digit;
    lowest = fmod(rest,d_digit );
    if (upper >= 1) {
        mp_set_long(a, (unsigned long) upper);
        mp_mul_2d(a, DIGIT_BIT , a);
        DIGIT(a, 0) = (mp_digit) lower;
        mp_mul_2d(a, DIGIT_BIT , a);
    } else {
        if (lower >= 1) {
            mp_set_long(a, (unsigned long) lower);
            mp_mul_2d(a, DIGIT_BIT , a);
            a->used = 2;
        } else {
            a->used = 1;
        }
    }
    DIGIT(a, 0) = (mp_digit) lowest;

    /* shift the rest */
    mp_mul_2d(a, DIGIT_BIT * digits, a);
    if (d < 0)
        mp_neg(a, a);
    mp_clamp(a);
    mp_shrink(a);
}
/* reads a unsigned char array, assumes the msb is stored first [big endian] */
int
mp_read_unsigned_bin (mp_int * a, unsigned char *b, int c)
{
  int     res;
  mp_zero (a);
  while (c-- > 0) {
    if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
      return res;
    }

    if (DIGIT_BIT != 7) {
      a->dp[0] |= *b++;
      a->used += 1;
    } else {
      a->dp[0] = (*b & MP_MASK);
      a->dp[1] |= ((*b++ >> 7U) & 1);
      a->used += 2;
    }
  }
  mp_clamp (a);
  return MP_OKAY;
}
Beispiel #7
0
/* Taken from mp_set_long, but portably accepts a 64-bit number. */
int MVM_bigint_mp_set_uint64(mp_int * a, MVMuint64 b) {
  int     x, res;

  mp_zero (a);

  /* set four bits at a time */
  for (x = 0; x < sizeof(MVMuint64) * 2; x++) {
    /* shift the number up four bits */
    if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
      return res;
    }

    /* OR in the top four bits of the source */
    a->dp[0] |= (b >> ((sizeof(MVMuint64)) * 8 - 4)) & 15;

    /* shift the source up to the next four bits */
    b <<= 4;

    /* ensure that digits are not clamped off */
    a->used += 1;
  }
  mp_clamp(a);
  return MP_OKAY;
}
/* c = |a| * |b| using Karatsuba Multiplication using
 * three half size multiplications
 *
 * Let B represent the radix [e.g. 2**DIGIT_BIT] and
 * let n represent half of the number of digits in
 * the min(a,b)
 *
 * a = a1 * B**n + a0
 * b = b1 * B**n + b0
 *
 * Then, a * b =>
   a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0
 *
 * Note that a1b1 and a0b0 are used twice and only need to be
 * computed once.  So in total three half size (half # of
 * digit) multiplications are performed, a0b0, a1b1 and
 * (a1+b1)(a0+b0)
 *
 * Note that a multiplication of half the digits requires
 * 1/4th the number of single precision multiplications so in
 * total after one call 25% of the single precision multiplications
 * are saved.  Note also that the call to mp_mul can end up back
 * in this function if the a0, a1, b0, or b1 are above the threshold.
 * This is known as divide-and-conquer and leads to the famous
 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
 * the standard O(N**2) that the baseline/comba methods use.
 * Generally though the overhead of this method doesn't pay off
 * until a certain size (N ~ 80) is reached.
 */
int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
{
    mp_int  x0, x1, y0, y1, t1, x0y0, x1y1;
    int     B, err;

    /* default the return code to an error */
    err = MP_MEM;

    /* min # of digits */
    B = MIN (a->used, b->used);

    /* now divide in two */
    B = B >> 1;

    /* init copy all the temps */
    if (mp_init_size (&x0, B) != MP_OKAY)
        goto ERR;
    if (mp_init_size (&x1, a->used - B) != MP_OKAY)
        goto X0;
    if (mp_init_size (&y0, B) != MP_OKAY)
        goto X1;
    if (mp_init_size (&y1, b->used - B) != MP_OKAY)
        goto Y0;

    /* init temps */
    if (mp_init_size (&t1, B * 2) != MP_OKAY)
        goto Y1;
    if (mp_init_size (&x0y0, B * 2) != MP_OKAY)
        goto T1;
    if (mp_init_size (&x1y1, B * 2) != MP_OKAY)
        goto X0Y0;

    /* now shift the digits */
    x0.used = y0.used = B;
    x1.used = a->used - B;
    y1.used = b->used - B;

    {
        register int x;
        register mp_digit *tmpa, *tmpb, *tmpx, *tmpy;

        /* we copy the digits directly instead of using higher level functions
         * since we also need to shift the digits
         */
        tmpa = a->dp;
        tmpb = b->dp;

        tmpx = x0.dp;
        tmpy = y0.dp;
        for (x = 0; x < B; x++) {
            *tmpx++ = *tmpa++;
            *tmpy++ = *tmpb++;
        }

        tmpx = x1.dp;
        for (x = B; x < a->used; x++) {
            *tmpx++ = *tmpa++;
        }

        tmpy = y1.dp;
        for (x = B; x < b->used; x++) {
            *tmpy++ = *tmpb++;
        }
    }

    /* only need to clamp the lower words since by definition the
     * upper words x1/y1 must have a known number of digits
     */
    mp_clamp (&x0);
    mp_clamp (&y0);

    /* now calc the products x0y0 and x1y1 */
    /* after this x0 is no longer required, free temp [x0==t2]! */
    if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
        goto X1Y1;          /* x0y0 = x0*y0 */
    if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
        goto X1Y1;          /* x1y1 = x1*y1 */

    /* now calc x1+x0 and y1+y0 */
    if (s_mp_add (&x1, &x0, &t1) != MP_OKAY)
        goto X1Y1;          /* t1 = x1 - x0 */
    if (s_mp_add (&y1, &y0, &x0) != MP_OKAY)
        goto X1Y1;          /* t2 = y1 - y0 */
    if (mp_mul (&t1, &x0, &t1) != MP_OKAY)
        goto X1Y1;          /* t1 = (x1 + x0) * (y1 + y0) */

    /* add x0y0 */
    if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY)
        goto X1Y1;          /* t2 = x0y0 + x1y1 */
    if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY)
        goto X1Y1;          /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */

    /* shift by B */
    if (mp_lshd (&t1, B) != MP_OKAY)
        goto X1Y1;          /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
    if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
        goto X1Y1;          /* x1y1 = x1y1 << 2*B */

    if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
        goto X1Y1;          /* t1 = x0y0 + t1 */
    if (mp_add (&t1, &x1y1, c) != MP_OKAY)
        goto X1Y1;          /* t1 = x0y0 + t1 + x1y1 */

    /* Algorithm succeeded set the return code to MP_OKAY */
    err = MP_OKAY;

X1Y1:
    mp_clear (&x1y1);
X0Y0:
    mp_clear (&x0y0);
T1:
    mp_clear (&t1);
Y1:
    mp_clear (&y1);
Y0:
    mp_clear (&y0);
X1:
    mp_clear (&x1);
X0:
    mp_clear (&x0);
ERR:
    return err;
}
Beispiel #9
0
/* single digit division (based on routine from MPI) */
int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
{
  mp_int  q;
  mp_word w;
  mp_digit t;
  int     res, ix;

  /* cannot divide by zero */
  if (b == 0) {
     return MP_VAL;
  }

  /* quick outs */
  if (b == 1 || mp_iszero(a) == 1) {
     if (d != NULL) {
        *d = 0;
     }
     if (c != NULL) {
        return mp_copy(a, c);
     }
     return MP_OKAY;
  }

  /* power of two ? */
  if (s_is_power_of_two(b, &ix) == 1) {
     if (d != NULL) {
        *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
     }
     if (c != NULL) {
        return mp_div_2d(a, ix, c, NULL);
     }
     return MP_OKAY;
  }

#ifdef BN_MP_DIV_3_C
  /* three? */
  if (b == 3) {
     return mp_div_3(a, c, d);
  }
#endif

  /* no easy answer [c'est la vie].  Just division */
  if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
     return res;
  }

  q.used = a->used;
  q.sign = a->sign;
  w = 0;
  for (ix = a->used - 1; ix >= 0; ix--) {
     w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);

     if (w >= b) {
        t = (mp_digit)(w / b);
        w -= ((mp_word)t) * ((mp_word)b);
      } else {
        t = 0;
      }
      q.dp[ix] = (mp_digit)t;
  }

  if (d != NULL) {
     *d = (mp_digit)w;
  }

  if (c != NULL) {
     mp_clamp(&q);
     mp_exch(&q, c);
  }
  mp_clear(&q);

  return res;
}
Beispiel #10
0
/* single digit addition */
int
mp_add_d (mp_int * a, mp_digit b, mp_int * c)
{
  int     res, ix, oldused;
  mp_digit *tmpa, *tmpc, mu;

  /* grow c as required */
  if (c->alloc < a->used + 1) {
     if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
        return res;
     }
  }

  /* if a is negative and |a| >= b, call c = |a| - b */
  if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
     /* temporarily fix sign of a */
     a->sign = MP_ZPOS;

     /* c = |a| - b */
     res = mp_sub_d(a, b, c);

     /* fix sign  */
     a->sign = c->sign = MP_NEG;

     /* clamp */
     mp_clamp(c);

     return res;
  }

  /* old number of used digits in c */
  oldused = c->used;

  /* sign always positive */
  c->sign = MP_ZPOS;

  /* source alias */
  tmpa    = a->dp;

  /* destination alias */
  tmpc    = c->dp;

  /* if a is positive */
  if (a->sign == MP_ZPOS) {
     /* add digit, after this we're propagating
      * the carry.
      */
     *tmpc   = *tmpa++ + b;
     mu      = *tmpc >> DIGIT_BIT;
     *tmpc++ &= MP_MASK;

     /* now handle rest of the digits */
     for (ix = 1; ix < a->used; ix++) {
        *tmpc   = *tmpa++ + mu;
        mu      = *tmpc >> DIGIT_BIT;
        *tmpc++ &= MP_MASK;
     }
     /* set final carry */
     ix++;
     *tmpc++  = mu;

     /* setup size */
     c->used = a->used + 1;
  } else {
Beispiel #11
0
/* integer signed division. 
 * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
 * HAC pp.598 Algorithm 14.20
 *
 * Note that the description in HAC is horribly 
 * incomplete.  For example, it doesn't consider 
 * the case where digits are removed from 'x' in 
 * the inner loop.  It also doesn't consider the 
 * case that y has fewer than three digits, etc..
 *
 * The overall algorithm is as described as 
 * 14.20 from HAC but fixed to treat these cases.
*/
int mp_div MPA(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
{
  mp_int  q, x, y, t1, t2;
  int     res, n, t, i, norm, neg;

  /* is divisor zero ? */
  if (mp_iszero (b) == 1) {
    return MP_VAL;
  }

  /* if a < b then q=0, r = a */
  if (mp_cmp_mag (a, b) == MP_LT) {
    if (d != NULL) {
      res = mp_copy (MPST, a, d);
    } else {
      res = MP_OKAY;
    }
    if (c != NULL) {
      mp_zero (c);
    }
    return res;
  }

  if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
    return res;
  }
  q.used = a->used + 2;

  if ((res = mp_init (&t1)) != MP_OKAY) {
    goto LBL_Q;
  }

  if ((res = mp_init (&t2)) != MP_OKAY) {
    goto LBL_T1;
  }

  if ((res = mp_init_copy (MPST, &x, a)) != MP_OKAY) {
    goto LBL_T2;
  }

  if ((res = mp_init_copy (MPST, &y, b)) != MP_OKAY) {
    goto LBL_X;
  }

  /* fix the sign */
  neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
  x.sign = y.sign = MP_ZPOS;

  /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
  norm = mp_count_bits(&y) % DIGIT_BIT;
  if (norm < (int)(DIGIT_BIT-1)) {
     norm = (DIGIT_BIT-1) - norm;
     if ((res = mp_mul_2d (MPST, &x, norm, &x)) != MP_OKAY) {
       goto LBL_Y;
     }
     if ((res = mp_mul_2d (MPST, &y, norm, &y)) != MP_OKAY) {
       goto LBL_Y;
     }
  } else {
     norm = 0;
  }

  /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
  n = x.used - 1;
  t = y.used - 1;

  /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
  if ((res = mp_lshd (MPST, &y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
    goto LBL_Y;
  }

  while (mp_cmp (&x, &y) != MP_LT) {
    ++(q.dp[n - t]);
    if ((res = mp_sub (MPST, &x, &y, &x)) != MP_OKAY) {
      goto LBL_Y;
    }
  }

  /* reset y by shifting it back down */
  mp_rshd (&y, n - t);

  /* step 3. for i from n down to (t + 1) */
  for (i = n; i >= (t + 1); i--) {
    if (i > x.used) {
      continue;
    }

    /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 
     * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
    if (x.dp[i] == y.dp[t]) {
      q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
    } else {
      mp_word tmp;
      tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
      tmp |= ((mp_word) x.dp[i - 1]);
      tmp /= ((mp_word) y.dp[t]);
      if (tmp > (mp_word) MP_MASK)
        tmp = MP_MASK;
      q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
    }

    /* while (q{i-t-1} * (yt * b + y{t-1})) > 
             xi * b**2 + xi-1 * b + xi-2 
     
       do q{i-t-1} -= 1; 
    */
    q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
    do {
      q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;

      /* find left hand */
      mp_zero (&t1);
      t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
      t1.dp[1] = y.dp[t];
      t1.used = 2;
      if ((res = mp_mul_d (MPST, &t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
        goto LBL_Y;
      }

      /* find right hand */
      t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
      t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
      t2.dp[2] = x.dp[i];
      t2.used = 3;
    } while (mp_cmp_mag(&t1, &t2) == MP_GT);

    /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
    if ((res = mp_mul_d (MPST, &y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
      goto LBL_Y;
    }

    if ((res = mp_lshd (MPST, &t1, i - t - 1)) != MP_OKAY) {
      goto LBL_Y;
    }

    if ((res = mp_sub (MPST, &x, &t1, &x)) != MP_OKAY) {
      goto LBL_Y;
    }

    /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
    if (x.sign == MP_NEG) {
      if ((res = mp_copy (MPST, &y, &t1)) != MP_OKAY) {
        goto LBL_Y;
      }
      if ((res = mp_lshd (MPST, &t1, i - t - 1)) != MP_OKAY) {
        goto LBL_Y;
      }
      if ((res = mp_add (MPST, &x, &t1, &x)) != MP_OKAY) {
        goto LBL_Y;
      }

      q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
    }
  }

  /* now q is the quotient and x is the remainder 
   * [which we have to normalize] 
   */
  
  /* get sign before writing to c */
  x.sign = x.used == 0 ? MP_ZPOS : a->sign;

  if (c != NULL) {
    mp_clamp (&q);
    mp_managed_copy (MPST, &q, c);
    c->sign = neg;
  }

  if (d != NULL) {
    mp_div_2d (MPST, &x, norm, &x, NULL);
    mp_managed_copy (MPST, &x, d);
  }

  res = MP_OKAY;

LBL_Y:mp_clear (&y);
LBL_X:mp_clear (&x);
LBL_T2:mp_clear (&t2);
LBL_T1:mp_clear (&t1);
LBL_Q:mp_clear (&q);
  return res;
}
/* computes xR**-1 == x (mod N) via Montgomery Reduction
 *
 * This is an optimized implementation of montgomery_reduce
 * which uses the comba method to quickly calculate the columns of the
 * reduction.
 *
 * Based on Algorithm 14.32 on pp.601 of HAC.
*/
int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
{
  int     ix, res, olduse;
  mp_word W[MP_WARRAY] = { 0 };

  /* get old used count */
  olduse = x->used;

  /* grow a as required */
  if (x->alloc < n->used + 1) {
    if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
      return res;
    }
  }

  /* first we have to get the digits of the input into
   * an array of double precision words W[...]
   */
  {
    register mp_word *_W;
    register mp_digit *tmpx;

    /* alias for the W[] array */
    _W   = W;

    /* alias for the digits of  x*/
    tmpx = x->dp;

    /* copy the digits of a into W[0..a->used-1] */
    for (ix = 0; ix < x->used; ix++) {
      *_W++ = *tmpx++;
    }

    /* zero the high words of W[a->used..m->used*2] */
    for (; ix < n->used * 2 + 1; ix++) {
      *_W++ = 0;
    }
  }

  /* now we proceed to zero successive digits
   * from the least significant upwards
   */
  for (ix = 0; ix < n->used; ix++) {
    /* mu = ai * m' mod b
     *
     * We avoid a double precision multiplication (which isn't required)
     * by casting the value down to a mp_digit.  Note this requires
     * that W[ix-1] have  the carry cleared (see after the inner loop)
     */
    register mp_digit mu;
    mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);

    /* a = a + mu * m * b**i
     *
     * This is computed in place and on the fly.  The multiplication
     * by b**i is handled by offseting which columns the results
     * are added to.
     *
     * Note the comba method normally doesn't handle carries in the
     * inner loop In this case we fix the carry from the previous
     * column since the Montgomery reduction requires digits of the
     * result (so far) [see above] to work.  This is
     * handled by fixing up one carry after the inner loop.  The
     * carry fixups are done in order so after these loops the
     * first m->used words of W[] have the carries fixed
     */
    {
      register int iy;
      register mp_digit *tmpn;
      register mp_word *_W;

      /* alias for the digits of the modulus */
      tmpn = n->dp;

      /* Alias for the columns set by an offset of ix */
      _W = W + ix;

      /* inner loop */
      for (iy = 0; iy < n->used; iy++) {
          *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
      }
    }

    /* now fix carry for next digit, W[ix+1] */
    W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
  }

  /* now we have to propagate the carries and
   * shift the words downward [all those least
   * significant digits we zeroed].
   */
  {
    register mp_digit *tmpx;
    register mp_word *_W, *_W1;

    /* nox fix rest of carries */

    /* alias for current word */
    _W1 = W + ix;

    /* alias for next word, where the carry goes */
    _W = W + ++ix;

    for (; ix <= n->used * 2 + 1; ix++) {
      *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
    }

    /* copy out, A = A/b**n
     *
     * The result is A/b**n but instead of converting from an
     * array of mp_word to mp_digit than calling mp_rshd
     * we just copy them in the right order
     */

    /* alias for destination word */
    tmpx = x->dp;

    /* alias for shifted double precision result */
    _W = W + n->used;

    for (ix = 0; ix < n->used + 1; ix++) {
      *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
    }

    /* zero oldused digits, if the input a was larger than
     * m->used+1 we'll have to clear the digits
     */
    for (; ix < olduse; ix++) {
      *tmpx++ = 0;
    }
  }

  /* set the max used and clamp */
  x->used = n->used + 1;
  mp_clamp (x);

  /* if A >= m then A = A - m */
  if (mp_cmp_mag (x, n) != MP_LT) {
    return s_mp_sub (x, n, x);
  }
  return MP_OKAY;
}
/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
int s_mp_sqr (mp_int * a, mp_int * b)
{
  mp_int  t;
  int     res, ix, iy, pa;
  mp_word r;
  mp_digit u, tmpx, *tmpt;

  pa = a->used;
  if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
    return res;
  }

  /* default used is maximum possible size */
  t.used = 2*pa + 1;

  for (ix = 0; ix < pa; ix++) {
    /* first calculate the digit at 2*ix */
    /* calculate double precision result */
    r = ((mp_word) t.dp[2*ix]) +
        ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);

    /* store lower part in result */
    t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));

    /* get the carry */
    u           = (mp_digit)(r >> ((mp_word) DIGIT_BIT));

    /* left hand side of A[ix] * A[iy] */
    tmpx        = a->dp[ix];

    /* alias for where to store the results */
    tmpt        = t.dp + (2*ix + 1);
    
    for (iy = ix + 1; iy < pa; iy++) {
      /* first calculate the product */
      r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);

      /* now calculate the double precision result, note we use
       * addition instead of *2 since it's easier to optimize
       */
      r       = ((mp_word) *tmpt) + r + r + ((mp_word) u);

      /* store lower part */
      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));

      /* get carry */
      u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
    }
    /* propagate upwards */
    while (u != ((mp_digit) 0)) {
      r       = ((mp_word) *tmpt) + ((mp_word) u);
      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
      u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
    }
  }

  mp_clamp (&t);
  mp_exch (&t, b);
  mp_clear (&t);
  return MP_OKAY;
}
Beispiel #14
0
/* low level addition, based on HAC pp.594, Algorithm 14.7 */
int
s_mp_add (mp_int * a, mp_int * b, mp_int * c)
{
  mp_int *x;
  int     olduse, res, min, max;

  /* find sizes, we let |a| <= |b| which means we have to sort
   * them.  "x" will point to the input with the most digits
   */
  if (a->used > b->used) {
    min = b->used;
    max = a->used;
    x = a;
  } else {
    min = a->used;
    max = b->used;
    x = b;
  }

  /* init result */
  if (c->alloc < max + 1) {
    if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
      return res;
    }
  }

  /* get old used digit count and set new one */
  olduse = c->used;
  c->used = max + 1;

  {
    register mp_digit u, *tmpa, *tmpb, *tmpc;
    register int i;

    /* alias for digit pointers */

    /* first input */
    tmpa = a->dp;

    /* second input */
    tmpb = b->dp;

    /* destination */
    tmpc = c->dp;

    /* zero the carry */
    u = 0;
    for (i = 0; i < min; i++) {
      /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
      *tmpc = *tmpa++ + *tmpb++ + u;

      /* U = carry bit of T[i] */
      u = *tmpc >> ((mp_digit)DIGIT_BIT);

      /* take away carry bit from T[i] */
      *tmpc++ &= MP_MASK;
    }

    /* now copy higher words if any, that is in A+B 
     * if A or B has more digits add those in 
     */
    if (min != max) {
      for (; i < max; i++) {
        /* T[i] = X[i] + U */
        *tmpc = x->dp[i] + u;

        /* U = carry bit of T[i] */
        u = *tmpc >> ((mp_digit)DIGIT_BIT);

        /* take away carry bit from T[i] */
        *tmpc++ &= MP_MASK;
      }
    }

    /* add carry */
    *tmpc++ = u;

    /* clear digits above oldused */
    for (i = c->used; i < olduse; i++) {
      *tmpc++ = 0;
    }
  }

  mp_clamp (c);
  return MP_OKAY;
}
int mp_toom_cook_5_mul(mp_int *a, mp_int *b, mp_int *c)
{
   mp_int w1, w2, w3, w4, w5, w6, w7, w8, w9;
   mp_int tmp1, tmp2;
   mp_int a0, a1, a2, a3, a4;
   mp_int b0, b1, b2, b3, b4;
   int e = MP_OKAY;
   int B, count, sign;

   B = (MAX(a->used, b->used)) / 5;


   sign = (a->sign != b->sign) ? MP_NEG : MP_ZPOS;
   if (MIN(a->used, b->used) < TOOM_COOK_5_MUL_CO) {
      if ((e = mp_mul(a, b, c)) != MP_OKAY) {
         return e;
      }
      c->sign = sign;
      return MP_OKAY;
   }



   if ((e =
           mp_init_multi(&w1, &w2, &w3, &w4, &w5, &w6, &w7, &w8, &w9, &tmp1, &tmp2,
                         //&a0, &a1, &a2, &a3, &a4, &b0, &b1, &b2, &b3, &b4,
                         NULL)) != MP_OKAY) {
      goto ERR0;
      //goto ERR;
   }

   if ((e = mp_init_size(&a0, B)) != MP_OKAY) {
      goto ERRa0;
   }
   if ((e = mp_init_size(&a1, B)) != MP_OKAY) {
      goto ERRa1;
   }
   if ((e = mp_init_size(&a2, B)) != MP_OKAY) {
      goto ERRa2;
   }
   if ((e = mp_init_size(&a3, B)) != MP_OKAY) {
      goto ERRa3;
   }
   if ((e = mp_init_size(&a4, B)) != MP_OKAY) {
      goto ERRa4;
   }

   if ((e = mp_init_size(&b0, B)) != MP_OKAY) {
      goto ERRb0;
   }
   if ((e = mp_init_size(&b1, B)) != MP_OKAY) {
      goto ERRb1;
   }
   if ((e = mp_init_size(&b2, B)) != MP_OKAY) {
      goto ERRb2;
   }
   if ((e = mp_init_size(&b3, B)) != MP_OKAY) {
      goto ERRb3;
   }
   if ((e = mp_init_size(&b4, B)) != MP_OKAY) {
      goto ERRb4;
   }
   // A = a4*x^4 + a3*x^3 + a2*x^2 + a1*x + a0
   for (count = 0; count < a->used; count++) {
      switch (count / B) {
      case 0:
         a0.dp[count] = a->dp[count];
         a0.used++;
         break;
      case 1:
         a1.dp[count - B] = a->dp[count];
         a1.used++;
         break;
      case 2:
         a2.dp[count - 2 * B] = a->dp[count];
         a2.used++;
         break;
      case 3:
         a3.dp[count - 3 * B] = a->dp[count];
         a3.used++;
         break;
      case 4:
         a4.dp[count - 4 * B] = a->dp[count];
         a4.used++;
         break;
      default:
         a4.dp[count - 4 * B] = a->dp[count];
         a4.used++;
         break;
      }
   }
   mp_clamp(&a0);
   mp_clamp(&a1);
   mp_clamp(&a2);
   mp_clamp(&a3);
   mp_clamp(&a4);
   // B = b4*x^4 + b3*x^3 + b2*x^2 + b1*x + b0
   for (count = 0; count < b->used; count++) {
      switch (count / B) {
      case 0:
         b0.dp[count] = b->dp[count];
         b0.used++;
         break;
      case 1:
         b1.dp[count - B] = b->dp[count];
         b1.used++;
         break;
      case 2:
         b2.dp[count - 2 * B] = b->dp[count];
         b2.used++;
         break;
      case 3:
         b3.dp[count - 3 * B] = b->dp[count];
         b3.used++;
         break;
      case 4:
         b4.dp[count - 4 * B] = b->dp[count];
         b4.used++;
         break;
      default:
         b4.dp[count - 4 * B] = b->dp[count];
         b4.used++;
         break;
      }
   }
   mp_clamp(&b0);
   mp_clamp(&b1);
   mp_clamp(&b2);
   mp_clamp(&b3);
   mp_clamp(&b4);


   /*
     if ((e = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
       goto ERR;
     }

     if ((e = mp_copy(a, &a1)) != MP_OKAY) {
       goto ERR;
     }
     mp_rshd(&a1, B);
     mp_mod_2d(&a1, DIGIT_BIT * B, &a1);

     if ((e = mp_copy(a, &a2)) != MP_OKAY) {
       goto ERR;
     }
     mp_rshd(&a2, B * 2);
     mp_mod_2d(&a2, DIGIT_BIT * B, &a2);

     if ((e = mp_copy(a, &a3)) != MP_OKAY) {
       goto ERR;
     }
     mp_rshd(&a3, B * 3);

     mp_mod_2d(&a3, DIGIT_BIT * B, &a3);

     if ((e = mp_copy(a, &a4)) != MP_OKAY) {
       goto ERR;
     }
     mp_rshd(&a4, B * 4);



     if ((e = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
       goto ERR;
     }

     if ((e = mp_copy(a, &b1)) != MP_OKAY) {
       goto ERR;
     }
     mp_rshd(&b1, B);
     mp_mod_2d(&b1, DIGIT_BIT * B, &b1);

     if ((e = mp_copy(b, &b2)) != MP_OKAY) {
       goto ERR;
     }
     mp_rshd(&b2, B * 2);
     mp_mod_2d(&b2, DIGIT_BIT * B, &b2);

     if ((e = mp_copy(b, &b3)) != MP_OKAY) {
       goto ERR;
     }
     mp_rshd(&b3, B * 3);

     mp_mod_2d(&b3, DIGIT_BIT * B, &b3);

     if ((e = mp_copy(b, &b4)) != MP_OKAY) {
       goto ERR;
     }
     mp_rshd(&b4, B * 4);
   */

// S1 = a4*b4
   if ((e = mp_mul(&a4, &b4, &w1)) != MP_OKAY) {
      goto ERR;
   }
// S9 = a0*b0
   if ((e = mp_mul(&a0, &b0, &w9)) != MP_OKAY) {
      goto ERR;
   }

// S2 = (a0- 2*a1 +4*a2 -8*a3 +16*a4)

   if ((e = mp_mul_2d(&a1, 1, &tmp1)) != MP_OKAY) {
      goto ERR;
   }            // 2*a1     = tmp1
   if ((e = mp_sub(&a0, &tmp1, &w2)) != MP_OKAY) {
      goto ERR;
   }            // a0- 2*a1 = a0 - tmp1 = w2
   if ((e = mp_mul_2d(&a2, 2, &tmp1)) != MP_OKAY) {
      goto ERR;
   }            // 4*a2     = tmp1
   if ((e = mp_add(&w2, &tmp1, &w2)) != MP_OKAY) {
      goto ERR;
   }            // a0- 2*a1 +4*a2 = w2 + tmp1 = w2
   if ((e = mp_mul_2d(&a3, 3, &tmp1)) != MP_OKAY) {
      goto ERR;
   }            // 8*a3     = tmp1
   if ((e = mp_sub(&w2, &tmp1, &w2)) != MP_OKAY) {
      goto ERR;
   }            // a0- 2*a1 +4*a2 -8*a3 = w2 - tmp1 = w2
   if ((e = mp_mul_2d(&a4, 4, &tmp1)) != MP_OKAY) {
      goto ERR;
   }            // 16*a4     = tmp1
   if ((e = mp_add(&w2, &tmp1, &w2)) != MP_OKAY) {
      goto ERR;
   }            // a0- 2*a1 +4*a2 -8*a3 +16*a4 = w2 + tmp1 = w2

//    * (b0- 2*b1 +4*b2 -8*b3 +16*b4)
   if ((e = mp_mul_2d(&b1, 1, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_sub(&b0, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&b2, 2, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&b3, 3, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_sub(&tmp2, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&b4, 4, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }

   if ((e = mp_mul(&tmp2, &w2, &w2)) != MP_OKAY) {
      goto ERR;
   }
// S5 = (a0+ 2*a1+ 4*a2+ 8*a3+ 16*a4)
   if ((e = mp_mul_2d(&a1, 1, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&a0, &tmp1, &w5)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&a2, 2, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w5, &tmp1, &w5)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&a3, 3, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w5, &tmp1, &w5)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&a4, 4, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w5, &tmp1, &w5)) != MP_OKAY) {
      goto ERR;
   }
// *(b0+ 2*b1+ 4*b2+ 8*b3+ 16*b4)
   if ((e = mp_mul_2d(&b1, 1, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&b0, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&b2, 2, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&b3, 3, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&b4, 4, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }

   if ((e = mp_mul(&tmp2, &w5, &w5)) != MP_OKAY) {
      goto ERR;
   }
// S3 = (a4+ 2*a3+ 4*a2+ 8*a1+ 16*a0)
   if ((e = mp_mul_2d(&a3, 1, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&a4, &tmp1, &w3)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&a2, 2, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w3, &tmp1, &w3)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&a1, 3, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w3, &tmp1, &w3)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&a0, 4, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w3, &tmp1, &w3)) != MP_OKAY) {
      goto ERR;
   }
// *    (b4+ 2*b3+ 4*b2+ 8*b1+ 16*b0)
   if ((e = mp_mul_2d(&b3, 1, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&b4, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&b2, 2, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&b1, 3, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&b0, 4, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }

   if ((e = mp_mul(&tmp2, &w3, &w3)) != MP_OKAY) {
      goto ERR;
   }
// S8 = (a4- 2*a3+ 4*a2- 8*a1+ 16*a0)
   if ((e = mp_mul_2d(&a3, 1, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_sub(&a4, &tmp1, &w8)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&a2, 2, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w8, &tmp1, &w8)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&a1, 3, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_sub(&w8, &tmp1, &w8)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&a0, 4, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w8, &tmp1, &w8)) != MP_OKAY) {
      goto ERR;
   }
//*     (b4- 2*b3+ 4*b2- 8*b1+ 16*b0)
   if ((e = mp_mul_2d(&b3, 1, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_sub(&b4, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&b2, 2, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&b1, 3, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_sub(&tmp2, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&b0, 4, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }

   if ((e = mp_mul(&tmp2, &w8, &w8)) != MP_OKAY) {
      goto ERR;
   }
// S4 = (a0+ 4*a1+ 16*a2+ 64*a3+ 256*a4)
   if ((e = mp_mul_2d(&a1, 2, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&a0, &tmp1, &w4)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&a2, 4, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w4, &tmp1, &w4)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&a3, 6, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w4, &tmp1, &w4)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&a4, 8, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w4, &tmp1, &w4)) != MP_OKAY) {
      goto ERR;
   }
//*     (b0+ 4*b1+ 16*b2+ 64*b3+ 256*b4)
   if ((e = mp_mul_2d(&b1, 2, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&b0, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&b2, 4, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&b3, 6, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_mul_2d(&b4, 8, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp2, &tmp1, &tmp2)) != MP_OKAY) {
      goto ERR;
   }

   if ((e = mp_mul(&tmp2, &w4, &w4)) != MP_OKAY) {
      goto ERR;
   }
// S6 = (a0- a1+ a2- a3 +a4)
   if ((e = mp_sub(&a0, &a1, &w6)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w6, &a2, &w6)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_sub(&w6, &a3, &w6)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w6, &a4, &w6)) != MP_OKAY) {
      goto ERR;
   }
// *    (b0- b1+ b2- b3+ b4)
   if ((e = mp_sub(&b0, &b1, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp1, &b2, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_sub(&tmp1, &b3, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp1, &b4, &tmp1)) != MP_OKAY) {
      goto ERR;
   }

   if ((e = mp_mul(&tmp1, &w6, &w6)) != MP_OKAY) {
      goto ERR;
   }
// S7 = (a0+ a1+ a2+ a3+ a4)
   if ((e = mp_add(&a0, &a1, &w7)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w7, &a2, &w7)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w7, &a3, &w7)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w7, &a4, &w7)) != MP_OKAY) {
      goto ERR;
   }
// *    (b0+ b1+ b2+ b3+ b4)
   if ((e = mp_add(&b0, &b1, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp1, &b2, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp1, &b3, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp1, &b4, &tmp1)) != MP_OKAY) {
      goto ERR;
   }

   if ((e = mp_mul(&tmp1, &w7, &w7)) != MP_OKAY) {
      goto ERR;
   }
// S6 -= S7
   if ((e = mp_sub(&w6, &w7, &w6)) != MP_OKAY) {
      goto ERR;
   }
// S2 -= S5
   if ((e = mp_sub(&w2, &w5, &w2)) != MP_OKAY) {
      goto ERR;
   }
// S4 -= S9
   if ((e = mp_sub(&w4, &w9, &w4)) != MP_OKAY) {
      goto ERR;
   }
// S4 -= (2^16*S1)
   if ((e = mp_mul_2d(&w1, 16, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_sub(&w4, &tmp1, &w4)) != MP_OKAY) {
      goto ERR;
   }
// S8 -= S3
   if ((e = mp_sub(&w8, &w3, &w8)) != MP_OKAY) {
      goto ERR;
   }
// S6 /= 2
   if ((e = mp_div_2d(&w6, 1, &w6, NULL)) != MP_OKAY) {
      goto ERR;
   }
// S5 *= 2
   if ((e = mp_mul_2d(&w5, 1, &w5)) != MP_OKAY) {
      goto ERR;
   }
// S5 += S2
   if ((e = mp_add(&w5, &w2, &w5)) != MP_OKAY) {
      goto ERR;
   }
// S2 = -S2
   if ((e = mp_neg(&w2, &w2)) != MP_OKAY) {
      goto ERR;
   }
// S8 = -S8
   if ((e = mp_neg(&w8, &w8)) != MP_OKAY) {
      goto ERR;
   }
// S7 += S6
   if ((e = mp_add(&w7, &w6, &w7)) != MP_OKAY) {
      goto ERR;
   }
// S6 = -S6
   if ((e = mp_neg(&w6, &w6)) != MP_OKAY) {
      goto ERR;
   }
// S3 -= S7
   if ((e = mp_sub(&w3, &w7, &w3)) != MP_OKAY) {
      goto ERR;
   }
// S5 -= (512*S7)
   if ((e = mp_mul_2d(&w7, 9, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_sub(&w5, &tmp1, &w5)) != MP_OKAY) {
      goto ERR;
   }
// S3 *= 2
   if ((e = mp_mul_2d(&w3, 1, &w3)) != MP_OKAY) {
      goto ERR;
   }
// S3 -= S8
   if ((e = mp_sub(&w3, &w8, &w3)) != MP_OKAY) {
      goto ERR;
   }
// S7 -= S1
   if ((e = mp_sub(&w7, &w1, &w7)) != MP_OKAY) {
      goto ERR;
   }
// S7 -= S9
   if ((e = mp_sub(&w7, &w9, &w7)) != MP_OKAY) {
      goto ERR;
   }
// S8 += S2
   if ((e = mp_add(&w8, &w2, &w8)) != MP_OKAY) {
      goto ERR;
   }
// S5 += S3
   if ((e = mp_add(&w5, &w3, &w5)) != MP_OKAY) {
      goto ERR;
   }
// S8 -= (80*S6)
   if ((e = mp_mul_d(&w6, 80, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_sub(&w8, &tmp1, &w8)) != MP_OKAY) {
      goto ERR;
   }
// S3 -= (510*S9)
   if ((e = mp_mul_d(&w9, 510, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
      goto ERR;
   }
// S4 -= S2
   if ((e = mp_sub(&w4, &w2, &w4)) != MP_OKAY) {
      goto ERR;
   }
// S3 *= 3
   if ((e = mp_mul_d(&w3, 3, &w3)) != MP_OKAY) {
      goto ERR;
   }
// S3 += S5
   if ((e = mp_add(&w3, &w5, &w3)) != MP_OKAY) {
      goto ERR;
   }
// S8 /= 180 \\ division by 180
   if ((e = mp_div_d(&w8, 180, &w8, NULL)) != MP_OKAY) {
      goto ERR;
   }
// S5 += (378*S7)
   if ((e = mp_mul_d(&w7, 378, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w5, &tmp1, &w5)) != MP_OKAY) {
      goto ERR;
   }
// S2 /= 4
   if ((e = mp_div_2d(&w2, 2, &w2, NULL)) != MP_OKAY) {
      goto ERR;
   }
// S6 -= S2
   if ((e = mp_sub(&w6, &w2, &w6)) != MP_OKAY) {
      goto ERR;
   }
// S5 /= (-72) \\ division by -72
   if ((e = mp_div_d(&w5, 72, &w5, NULL)) != MP_OKAY) {
      goto ERR;
   }
   if (&w5.sign == MP_ZPOS)
      (&w5)->sign = MP_NEG;
   (&w5)->sign = MP_ZPOS;
// S3 /= (-360) \\ division by -360
   if ((e = mp_div_d(&w3, 360, &w3, NULL)) != MP_OKAY) {
      goto ERR;
   }
   if (&w3.sign == MP_ZPOS)
      (&w3)->sign = MP_NEG;
   (&w3)->sign = MP_ZPOS;
// S2 -= S8
   if ((e = mp_sub(&w2, &w8, &w2)) != MP_OKAY) {
      goto ERR;
   }
// S7 -= S3
   if ((e = mp_sub(&w7, &w3, &w7)) != MP_OKAY) {
      goto ERR;
   }
// S4 -= (256*S5)
   if ((e = mp_mul_2d(&w5, 8, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_sub(&w4, &tmp1, &w4)) != MP_OKAY) {
      goto ERR;
   }
// S3 -= S5
   if ((e = mp_sub(&w3, &w5, &w3)) != MP_OKAY) {
      goto ERR;
   }
// S4 -= (4096*S3)
   if ((e = mp_mul_2d(&w3, 12, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_sub(&w4, &tmp1, &w4)) != MP_OKAY) {
      goto ERR;
   }
// S4 -= (16*S7)
   if ((e = mp_mul_2d(&w7, 4, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_sub(&w4, &tmp1, &w4)) != MP_OKAY) {
      goto ERR;
   }
// S4 += (256*S6)
   if ((e = mp_mul_2d(&w6, 8, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w4, &tmp1, &w4)) != MP_OKAY) {
      goto ERR;
   }
// S6 += S2
   if ((e = mp_add(&w6, &w2, &w6)) != MP_OKAY) {
      goto ERR;
   }
// S2 *= 180
   if ((e = mp_mul_d(&w2, 180, &w2)) != MP_OKAY) {
      goto ERR;
   }
// S2 += S4
   if ((e = mp_add(&w2, &w4, &w2)) != MP_OKAY) {
      goto ERR;
   }
// S2 /= 11340 \\ division by 11340
   if ((e = mp_div_d(&w2, 11340, &w2, NULL)) != MP_OKAY) {
      goto ERR;
   }
// S4 += (720*S6)
   if ((e = mp_mul_d(&w6, 720, &tmp1)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&w4, &tmp1, &w4)) != MP_OKAY) {
      goto ERR;
   }
// S4 /= (-2160) \\ division by -2160
   if ((e = mp_div_d(&w4, 2160, &w4, NULL)) != MP_OKAY) {
      goto ERR;
   }
   if (&w4.sign == MP_ZPOS)
      (&w4)->sign = MP_NEG;
   (&w4)->sign = MP_ZPOS;
// S6 -= S4
   if ((e = mp_sub(&w6, &w4, &w6)) != MP_OKAY) {
      goto ERR;
   }
// S8 -= S2
   if ((e = mp_sub(&w8, &w2, &w8)) != MP_OKAY) {
      goto ERR;
   }
// P = S1*x^8 + S2*x^7 + S3*x^6 + S4*x^5 + S5*x^4 + S6*x^3 + S7*x^2 + S8*x + S9
   if ((e = mp_copy(&w9, &tmp1)) != MP_OKAY) {
      goto ERR;
   }

   if ((e = mp_lshd(&w8, B)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp1, &w8, &tmp1)) != MP_OKAY) {
      goto ERR;
   }

   if ((e = mp_lshd(&w7, 2 * B)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp1, &w7, &tmp1)) != MP_OKAY) {
      goto ERR;
   }

   if ((e = mp_lshd(&w6, 3 * B)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp1, &w6, &tmp1)) != MP_OKAY) {
      goto ERR;
   }

   if ((e = mp_lshd(&w5, 4 * B)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp1, &w5, &tmp1)) != MP_OKAY) {
      goto ERR;
   }

   if ((e = mp_lshd(&w4, 5 * B)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp1, &w4, &tmp1)) != MP_OKAY) {
      goto ERR;
   }

   if ((e = mp_lshd(&w3, 6 * B)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp1, &w3, &tmp1)) != MP_OKAY) {
      goto ERR;
   }

   if ((e = mp_lshd(&w2, 7 * B)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp1, &w2, &tmp1)) != MP_OKAY) {
      goto ERR;
   }

   if ((e = mp_lshd(&w1, 8 * B)) != MP_OKAY) {
      goto ERR;
   }
   if ((e = mp_add(&tmp1, &w1, c)) != MP_OKAY) {
      goto ERR;
   }
// P - A*B \\ == zero
   c->sign = sign;
ERR:

ERRb4:
   mp_clear(&b4);
ERRb3:
   mp_clear(&b3);
ERRb2:
   mp_clear(&b2);
ERRb1:
   mp_clear(&b1);
ERRb0:
   mp_clear(&b0);
ERRa4:
   mp_clear(&a4);
ERRa3:
   mp_clear(&a3);
ERRa2:
   mp_clear(&a2);
ERRa1:
   mp_clear(&a1);
ERRa0:
   mp_clear(&a0);

ERR0:

   mp_clear_multi(&w1, &w2, &w3, &w4, &w5, &w6, &w7, &w8, &w9, &tmp1, &tmp2,
                  // &a0, &a1, &a2, &a3, &a4, &b0, &b1, &b2, &b3, &b4,
                  NULL);
   return e;
}
/* this is a modified version of fast_s_mul_digs that only produces
 * output digits *above* digs.  See the comments for fast_s_mul_digs
 * to see how it works.
 *
 * This is used in the Barrett reduction since for one of the multiplications
 * only the higher digits were needed.  This essentially halves the work.
 *
 * Based on Algorithm 14.12 on pp.595 of HAC.
 */
int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
{
  int     olduse, res, pa, ix, iz;
  mp_digit W[MP_WARRAY];
  mp_word  _W;

  /* grow the destination as required */
  pa = a->used + b->used;
  if (c->alloc < pa) {
    if ((res = mp_grow (c, pa)) != MP_OKAY) {
      return res;
    }
  }

  /* number of output digits to produce */
  pa = a->used + b->used;
  _W = 0;
  for (ix = digs; ix < pa; ix++) { 
      int      tx, ty, iy;
      mp_digit *tmpx, *tmpy;

      /* get offsets into the two bignums */
      ty = MIN(b->used-1, ix);
      tx = ix - ty;

      /* setup temp aliases */
      tmpx = a->dp + tx;
      tmpy = b->dp + ty;

      /* this is the number of times the loop will iterrate, essentially its 
         while (tx++ < a->used && ty-- >= 0) { ... }
       */
      iy = MIN(a->used-tx, ty+1);

      /* execute loop */
      for (iz = 0; iz < iy; iz++) {
         _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
      }

      /* store term */
      W[ix] = ((mp_digit)_W) & MP_MASK;

      /* make next carry */
      _W = _W >> ((mp_word)DIGIT_BIT);
  }
  
  /* setup dest */
  olduse  = c->used;
  c->used = pa;

  {
    mp_digit *tmpc;

    tmpc = c->dp + digs;
    for (ix = digs; ix < pa; ix++) {
      /* now extract the previous digit [below the carry] */
      *tmpc++ = W[ix];
    }

    /* clear unused digits [that existed in the old copy of c] */
    for (; ix < olduse; ix++) {
      *tmpc++ = 0;
    }
  }
  mp_clamp (c);
  return MP_OKAY;
}
/* computes xR**-1 == x (mod N) via Montgomery Reduction */
int
mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
{
  int     ix, res, digs;
  mp_digit mu;

  /* can the fast reduction [comba] method be used?
   *
   * Note that unlike in mul you're safely allowed *less*
   * than the available columns [255 per default] since carries
   * are fixed up in the inner loop.
   */
  digs = n->used * 2 + 1;
  if ((digs < MP_WARRAY) &&
      n->used <
      (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
    return fast_mp_montgomery_reduce (x, n, rho);
  }

  /* grow the input as required */
  if (x->alloc < digs) {
    if ((res = mp_grow (x, digs)) != MP_OKAY) {
      return res;
    }
  }
  x->used = digs;

  for (ix = 0; ix < n->used; ix++) {
    /* mu = ai * rho mod b
     *
     * The value of rho must be precalculated via
     * montgomery_setup() such that
     * it equals -1/n0 mod b this allows the
     * following inner loop to reduce the
     * input one digit at a time
     */
    mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);

    /* a = a + mu * m * b**i */
    {
      register int iy;
      register mp_digit *tmpn, *tmpx, u;
      register mp_word r;

      /* alias for digits of the modulus */
      tmpn = n->dp;

      /* alias for the digits of x [the input] */
      tmpx = x->dp + ix;

      /* set the carry to zero */
      u = 0;

      /* Multiply and add in place */
      for (iy = 0; iy < n->used; iy++) {
        /* compute product and sum */
        r       = ((mp_word)mu) * ((mp_word)*tmpn++) +
                  ((mp_word) u) + ((mp_word) * tmpx);

        /* get carry */
        u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));

        /* fix digit */
        *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
      }
      /* At this point the ix'th digit of x should be zero */


      /* propagate carries upwards as required*/
      while (u) {
        *tmpx   += u;
        u        = *tmpx >> DIGIT_BIT;
        *tmpx++ &= MP_MASK;
      }
    }
  }

  /* at this point the n.used'th least
   * significant digits of x are all zero
   * which means we can shift x to the
   * right by n.used digits and the
   * residue is unchanged.
   */

  /* x = x/b**n.used */
  mp_clamp(x);
  mp_rshd (x, n->used);

  /* if x >= n then x = x - n */
  if (mp_cmp_mag (x, n) != MP_LT) {
    return s_mp_sub (x, n, x);
  }

  return MP_OKAY;
}
Beispiel #18
0
/* single digit subtraction */
int
mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
{
  mp_digit *tmpa, *tmpc, mu;
  int       res, ix, oldused;

  /* grow c as required */
  if (c->alloc < a->used + 1) {
     if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
        return res;
     }
  }

  /* if a is negative just do an unsigned
   * addition [with fudged signs]
   */
  if (a->sign == MP_NEG) {
     a->sign = MP_ZPOS;
     res     = mp_add_d(a, b, c);
     a->sign = c->sign = MP_NEG;

     /* clamp */
     mp_clamp(c);

     return res;
  }

  /* setup regs */
  oldused = c->used;
  tmpa    = a->dp;
  tmpc    = c->dp;

  /* if a <= b simply fix the single digit */
  if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
     if (a->used == 1) {
        *tmpc++ = b - *tmpa;
     } else {
        *tmpc++ = b;
     }
     ix      = 1;

     /* negative/1digit */
     c->sign = MP_NEG;
     c->used = 1;
  } else {
     /* positive/size */
     c->sign = MP_ZPOS;
     c->used = a->used;

     /* subtract first digit */
     *tmpc    = *tmpa++ - b;
     mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
     *tmpc++ &= MP_MASK;

     /* handle rest of the digits */
     for (ix = 1; ix < a->used; ix++) {
        *tmpc    = *tmpa++ - mu;
        mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
        *tmpc++ &= MP_MASK;
     }
  }

  /* zero excess digits */
  while (ix++ < oldused) {
     *tmpc++ = 0;
  }
  mp_clamp(c);
  return MP_OKAY;
}
/* Fast (comba) multiplier
 *
 * This is the fast column-array [comba] multiplier.  It is 
 * designed to compute the columns of the product first 
 * then handle the carries afterwards.  This has the effect 
 * of making the nested loops that compute the columns very
 * simple and schedulable on super-scalar processors.
 *
 * This has been modified to produce a variable number of 
 * digits of output so if say only a half-product is required 
 * you don't have to compute the upper half (a feature 
 * required for fast Barrett reduction).
 *
 * Based on Algorithm 14.12 on pp.595 of HAC.
 *
 */
int fast_s_mp_mul_digs(mp_int * a, mp_int * b, mp_int * c, int digs)
{
	int olduse, res, pa, ix;
	extern mp_word *W;

	/* grow the destination as required */
	if (c->alloc < digs) {
		if ((res = mp_grow(c, digs)) != MP_OKAY) {
			return res;
		}
	}

	/* clear temp buf (the columns) */
	memset(W, 0, sizeof(mp_word) * digs);

	/* calculate the columns */
	pa = a->used;
	for (ix = 0; ix < pa; ix++) {
		/* this multiplier has been modified to allow you to 
		 * control how many digits of output are produced.  
		 * So at most we want to make upto "digs" digits of output.
		 *
		 * this adds products to distinct columns (at ix+iy) of W
		 * note that each step through the loop is not dependent on
		 * the previous which means the compiler can easily unroll
		 * the loop without scheduling problems
		 */
		{
			register mp_digit tmpx, *tmpy;
			register mp_word *_W;
			register int iy, pb;

			/* alias for the the word on the left e.g. A[ix] * A[iy] */
			tmpx = a->dp[ix];

			/* alias for the right side */
			tmpy = b->dp;

			/* alias for the columns, each step through the loop adds a new
			   term to each column
			 */
			_W = W + ix;

			/* the number of digits is limited by their placement.  E.g.
			   we avoid multiplying digits that will end up above the # of
			   digits of precision requested
			 */
			pb = MIN(b->used, digs - ix);

			for (iy = 0; iy < pb; iy++) {
				*_W++ +=
				    ((mp_word) tmpx) * ((mp_word) *
							tmpy++);
			}
		}

	}

	/* setup dest */
	olduse = c->used;
	c->used = digs;

	{
		register mp_digit *tmpc;

		/* At this point W[] contains the sums of each column.  To get the
		 * correct result we must take the extra bits from each column and
		 * carry them down
		 *
		 * Note that while this adds extra code to the multiplier it 
		 * saves time since the carry propagation is removed from the 
		 * above nested loop.This has the effect of reducing the work 
		 * from N*(N+N*c)==N**2 + c*N**2 to N**2 + N*c where c is the 
		 * cost of the shifting.  On very small numbers this is slower 
		 * but on most cryptographic size numbers it is faster.
		 *
		 * In this particular implementation we feed the carries from
		 * behind which means when the loop terminates we still have one
		 * last digit to copy
		 */
		tmpc = c->dp;
		for (ix = 1; ix < digs; ix++) {
			/* forward the carry from the previous temp */
			W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT));

			/* now extract the previous digit [below the carry] */
			*tmpc++ =
			    (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
		}
		/* fetch the last digit */
		*tmpc++ = (mp_digit) (W[digs - 1] & ((mp_word) MP_MASK));

		/* clear unused digits [that existed in the old copy of c] */
		for (; ix < olduse; ix++) {
			*tmpc++ = 0;
		}
	}
	mp_clamp(c);
	return MP_OKAY;
}
Beispiel #20
0
/* Karatsuba squaring, computes b = a*a using three 
 * half size squarings
 *
 * See comments of karatsuba_mul for details.  It 
 * is essentially the same algorithm but merely 
 * tuned to perform recursive squarings.
 */
int mp_karatsuba_sqr (mp_int * a, mp_int * b)
{
  mp_int  x0, x1, t1, t2, x0x0, x1x1;
  int     B, err;

  err = MP_MEM;

  /* min # of digits */
  B = USED(a);

  /* now divide in two */
  B = B >> 1;

  /* init copy all the temps */
  if (mp_init_size (&x0, B) != MP_OKAY)
    goto ERR;
  if (mp_init_size (&x1, USED(a) - B) != MP_OKAY)
    goto X0;

  /* init temps */
  if (mp_init_size (&t1, USED(a) * 2) != MP_OKAY)
    goto X1;
  if (mp_init_size (&t2, USED(a) * 2) != MP_OKAY)
    goto T1;
  if (mp_init_size (&x0x0, B * 2) != MP_OKAY)
    goto T2;
  if (mp_init_size (&x1x1, (USED(a) - B) * 2) != MP_OKAY)
    goto X0X0;

  {
    register int x;
    register mp_digit *dst, *src;

    src = DIGITS(a);

    /* now shift the digits */
    dst = DIGITS(&x0);
    for (x = 0; x < B; x++) {
      *dst++ = *src++;
    }

    dst = DIGITS(&x1);
    for (x = B; x < USED(a); x++) {
      *dst++ = *src++;
    }
  }

  SET_USED(&x0,B);
  SET_USED(&x1,USED(a) - B);

  mp_clamp (&x0);

  /* now calc the products x0*x0 and x1*x1 */
  if (mp_sqr (&x0, &x0x0) != MP_OKAY)
    goto X1X1;           /* x0x0 = x0*x0 */
  if (mp_sqr (&x1, &x1x1) != MP_OKAY)
    goto X1X1;           /* x1x1 = x1*x1 */

  /* now calc (x1+x0)**2 */
  if (s_mp_add (&x1, &x0, &t1) != MP_OKAY)
    goto X1X1;           /* t1 = x1 - x0 */
  if (mp_sqr (&t1, &t1) != MP_OKAY)
    goto X1X1;           /* t1 = (x1 - x0) * (x1 - x0) */

  /* add x0y0 */
  if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
    goto X1X1;           /* t2 = x0x0 + x1x1 */
  if (s_mp_sub (&t1, &t2, &t1) != MP_OKAY)
    goto X1X1;           /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */

  /* shift by B */
  if (mp_lshd (&t1, B) != MP_OKAY)
    goto X1X1;           /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
  if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
    goto X1X1;           /* x1x1 = x1x1 << 2*B */

  if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
    goto X1X1;           /* t1 = x0x0 + t1 */
  if (mp_add (&t1, &x1x1, b) != MP_OKAY)
    goto X1X1;           /* t1 = x0x0 + t1 + x1x1 */

  err = MP_OKAY;

X1X1:mp_clear (&x1x1);
X0X0:mp_clear (&x0x0);
T2:mp_clear (&t2);
T1:mp_clear (&t1);
X1:mp_clear (&x1);
X0:mp_clear (&x0);
ERR:
  return err;
}
Beispiel #21
0
/* Karatsuba squaring, computes b = a*a using three
 * half size squarings
 *
 * See comments of karatsuba_mul for details.  It
 * is essentially the same algorithm but merely
 * tuned to perform recursive squarings.
 */
int mp_karatsuba_sqr(const mp_int *a, mp_int *b)
{
   mp_int  x0, x1, t1, t2, x0x0, x1x1;
   int     B, err;

   err = MP_MEM;

   /* min # of digits */
   B = a->used;

   /* now divide in two */
   B = B >> 1;

   /* init copy all the temps */
   if (mp_init_size(&x0, B) != MP_OKAY)
      goto LBL_ERR;
   if (mp_init_size(&x1, a->used - B) != MP_OKAY)
      goto X0;

   /* init temps */
   if (mp_init_size(&t1, a->used * 2) != MP_OKAY)
      goto X1;
   if (mp_init_size(&t2, a->used * 2) != MP_OKAY)
      goto T1;
   if (mp_init_size(&x0x0, B * 2) != MP_OKAY)
      goto T2;
   if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY)
      goto X0X0;

   {
      int x;
      mp_digit *dst, *src;

      src = a->dp;

      /* now shift the digits */
      dst = x0.dp;
      for (x = 0; x < B; x++) {
         *dst++ = *src++;
      }

      dst = x1.dp;
      for (x = B; x < a->used; x++) {
         *dst++ = *src++;
      }
   }

   x0.used = B;
   x1.used = a->used - B;

   mp_clamp(&x0);

   /* now calc the products x0*x0 and x1*x1 */
   if (mp_sqr(&x0, &x0x0) != MP_OKAY)
      goto X1X1;           /* x0x0 = x0*x0 */
   if (mp_sqr(&x1, &x1x1) != MP_OKAY)
      goto X1X1;           /* x1x1 = x1*x1 */

   /* now calc (x1+x0)**2 */
   if (s_mp_add(&x1, &x0, &t1) != MP_OKAY)
      goto X1X1;           /* t1 = x1 - x0 */
   if (mp_sqr(&t1, &t1) != MP_OKAY)
      goto X1X1;           /* t1 = (x1 - x0) * (x1 - x0) */

   /* add x0y0 */
   if (s_mp_add(&x0x0, &x1x1, &t2) != MP_OKAY)
      goto X1X1;           /* t2 = x0x0 + x1x1 */
   if (s_mp_sub(&t1, &t2, &t1) != MP_OKAY)
      goto X1X1;           /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */

   /* shift by B */
   if (mp_lshd(&t1, B) != MP_OKAY)
      goto X1X1;           /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
   if (mp_lshd(&x1x1, B * 2) != MP_OKAY)
      goto X1X1;           /* x1x1 = x1x1 << 2*B */

   if (mp_add(&x0x0, &t1, &t1) != MP_OKAY)
      goto X1X1;           /* t1 = x0x0 + t1 */
   if (mp_add(&t1, &x1x1, b) != MP_OKAY)
      goto X1X1;           /* t1 = x0x0 + t1 + x1x1 */

   err = MP_OKAY;

X1X1:
   mp_clear(&x1x1);
X0X0:
   mp_clear(&x0x0);
T2:
   mp_clear(&t2);
T1:
   mp_clear(&t1);
X1:
   mp_clear(&x1);
X0:
   mp_clear(&x0);
LBL_ERR:
   return err;
}
/* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
int
s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
{
  int     olduse, res, min, max;

  /* find sizes */
  min = b->used;
  max = a->used;

  /* init result */
  if (c->alloc < max) {
    if ((res = mp_grow (c, max)) != MP_OKAY) {
      return res;
    }
  }
  olduse = c->used;
  c->used = max;

  {
    register mp_digit u, *tmpa, *tmpb, *tmpc;
    register int i;

    /* alias for digit pointers */
    tmpa = a->dp;
    tmpb = b->dp;
    tmpc = c->dp;

    /* set carry to zero */
    u = 0;
    for (i = 0; i < min; i++) {
      /* T[i] = A[i] - B[i] - U */
      *tmpc = *tmpa++ - *tmpb++ - u;

      /* U = carry bit of T[i]
       * Note this saves performing an AND operation since
       * if a carry does occur it will propagate all the way to the
       * MSB.  As a result a single shift is enough to get the carry
       */
      u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));

      /* Clear carry from T[i] */
      *tmpc++ &= MP_MASK;
    }

    /* now copy higher words if any, e.g. if A has more digits than B  */
    for (; i < max; i++) {
      /* T[i] = A[i] - U */
      *tmpc = *tmpa++ - u;

      /* U = carry bit of T[i] */
      u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));

      /* Clear carry from T[i] */
      *tmpc++ &= MP_MASK;
    }

    /* clear digits above used (since we may not have grown result above) */
    for (i = c->used; i < olduse; i++) {
      *tmpc++ = 0;
    }
  }

  mp_clamp (c);
  return MP_OKAY;
}