Пример #1
0
int z_pull_twos(z *n, int *j, z *p)
{
	//n is overwritten
	int c = 0;
	z t1, t2;
	fp_digit r;

	zInit(&t1);
	zInit(&t2);

	while (!(n->val[0] & 1))
	{
		zShiftRight(n,n,1);
		c = 1 - c;
	}
	zExp(2,p,&t2);
	zSub(&t2,&zOne,&t1);
	r = zShortDiv(&t1,16,&t2);

	if ((c * r) == 8)
		*j *= -1;

	zFree(&t1);
	zFree(&t2);
	return c;
}
Пример #2
0
void zHex2Dec(z *u, z *v)
{
	//convert u[] in hex to v[] in decimal by repeatedly dividing
	//u by 1e9 = 0x3b9aca00
	//the remainder of the ith division is the ith decimal digit.
	//when the quotient = 0, stop

	z a,b;
	fp_digit r = 0;
	int su = abs(u->size);
	int approx_words = (int)((double)su * 1.5);	
	//because decimal takes more room than hex to store

	zInit(&a);
	zInit(&b);

	if (v->alloc < approx_words)
		zGrow(v,approx_words);
	zClear(v);

	if (a.alloc < approx_words)
	{
		zGrow(&a,approx_words);
		zClear(&a);
	}

	if (b.alloc < approx_words)
	{
		zGrow(&b,approx_words);
		zClear(&b);
	}
	
	zCopy(u,&a);
	v->size = 1;
	do
	{
		r = zShortDiv(&a,MAX_DEC_WORD,&b);
		v->val[v->size - 1] = r;
		v->size++;
		zCopy(&b,&a);
	} while (zCompare(&a,&zZero) != 0);
	v->size--;

	if (u->size < 0)
		v->size *= -1;

	zFree(&a);
	zFree(&b);
	return;
}
Пример #3
0
void monty_init(z *n)
{
	//for a input modulus n, initialize constants for 
	//montogomery representation
	//this assumes that n is relatively prime to 2, i.e. is odd.
	z g, b, q, r;

	//global montyconst structure
	zInit(&montyconst.nhat);
	zInit(&montyconst.r);
	zInit(&montyconst.rhat);
	zInit(&montyconst.one);

	
	if (abs(n->size) <= 16) 
	{
		fp_montgomery_setup(n,&montyconst.nhat.val[0]);
		fp_montgomery_calc_normalization(&montyconst.r,n);
		montyconst.one.val[0] = 1;
		montyconst.one.size = 1;
		to_monty(&montyconst.one,n);
		TFM_MONTY = 1;
		return;
	}
	else
		TFM_MONTY = 0;

	zInit(&g);
	zInit(&b);
	zInit(&q);
	zInit(&r);

	b.val[1]=1; b.size=2;

	//find r = b^t > N, where b = 2 ^32
	if (montyconst.r.alloc < n->size + 1)
		zGrow(&montyconst.r,n->size + 1);

	zClear(&montyconst.r);
	montyconst.r.size = n->size + 1;
	montyconst.r.val[montyconst.r.size - 1] = 1;

	//find nhat = -n^-1 mod b
	//nhat = -(n^-1 mod b) mod b = b - n^-1 mod b
	//since b is 2^32, this can be simplified, and made faster.
	xGCD(n,&b,&montyconst.nhat,&montyconst.rhat,&g);
	zSub(&b,&montyconst.nhat,&q);
	zCopy(&q,&montyconst.nhat);

	zCopy(&zOne,&montyconst.one);
	to_monty(&montyconst.one,n);

	zFree(&g);
	zFree(&b);
	zFree(&q);
	zFree(&r);
	return;
}
Пример #4
0
void to_monty(z *x, z *n)
{
	//given a number x in normal (hexadecimal) representation, 
	//find its montgomery representation

	//this uses some precomputed monty constants
	//xhat = (x * r) mod n
	z t1,t2;
	zInit(&t1);
	zInit(&t2);

	zMul(x,&montyconst.r,&t1);
	zDiv(&t1,n,&t2,x);

	zFree(&t1);
	zFree(&t2);

	return;
}
Пример #5
0
void zmModExp(z *a, z *b, z *u, z *nn)
{
	//computes a^b mod m = u using the right to left binary method
	//see, for instance, the handbook of applied cryptography
	//uses monty arith
	//a is already in monty rep, b doesn't need to be.
	z n,bb,aa,t;

	zInit(&aa);
	zInit(&bb);
	zInit(&n);
	zInit(&t);

	//overflow possibilities:
	//t ranges to 2x input 'a'
	//u needs at least as much space as modulus

	zCopy(&montyconst.one,&n);
	zCopy(a,&aa);

	zCopy(b,&bb);
	while (!isZero(&bb))
	{
		if (bb.val[0] & 0x1)
		{
			monty_mul(&n,&aa,&t,nn);
			zCopy(&t,&n);
		}
		zShiftRight(&bb,&bb,1);   //compute successive squares of a
		monty_sqr(&aa,&t,nn);
		zCopy(&t,&aa);
		if (aa.size < 0)
			aa.size *= -1;
	}
	zCopy(&n,u);

	zFree(&aa);
	zFree(&bb);
	zFree(&n);
	zFree(&t);
	return;
}
Пример #6
0
void swap(z *a, z *b)
{
	//do I actually have to physically copy here, or can I just swap pointers?
	z tmp;
	zInit(&tmp);
	zCopy(a,&tmp);
	zCopy(b,a);
	zCopy(&tmp,b);
	zFree(&tmp);
	return;
}
Пример #7
0
int ndigits(z *n)
{
	int i=0;
	z nn,tmp;
	fp_digit r;

	//can get within one digit using zBits and logs, which would
	//be tons faster.  Any way to 'correct' the +/- 1 error?
	zInit(&nn);
	zInit(&tmp);
	zCopy(n,&tmp);
	while (tmp.size > 1)
	{
		zCopy(&tmp,&nn);
		r = zShortDiv(&nn,MAX_DEC_WORD,&tmp);
		i += DEC_DIGIT_PER_WORD;
	}
	i += ndigits_1(tmp.val[0]);
	zFree(&nn);
	zFree(&tmp);
	return i;
}
Пример #8
0
void xGCD(z *a, z *b, z *x, z *y, z *g)
{
	//compute the extended GCD of a, b, returning g = GCD(a,b) and x, y 
	//such that ax + by = GCD(a,b) if a,b are coprime
	z t1,t2,t3,u,v,r,R,q,tmp;

//	int i;
	/*

	Step 1: 
	if a < b then 
	Set u=0, v=1, and r=b 
	Set U=1, V=0, and R=a 
	else 
	Set u=1, v=0, and r=a 
	Set U=0, V=1, and R=b 

	Step 2: 
	if R = 0 then return r (for the gcd) and no inverses exist. 
	if R = 1 then return R (for the gcd), V (for the inverse a(mod b)) and U (for the inverse of b(mod a)). 
	
	Step 3: 
	Calculate q = int(r/R) 
	Calculate t1 = u - U*q 
	Calculate t2 = v - V*q 
	Calculate t3 = r - R*q 
	set u=U, v=V, r=R 
	set U=t1, V=t2, R=t3 
	goto Step 2. 
	*/


	zInit(&tmp);
	zInit(&t1);
	zInit(&t2);
	zInit(&t3);
	zInit(&q);
	zInit(&r);
	zInit(&R);
	zInit(&u);
	zInit(&v);

	//need to check for temp allocation

	zClear(x);
	zClear(y);


	if (zCompare(a,b) < 0)
	{
		u.val[0]=0;
		v.val[0]=1;
		zCopy(b,&r);
		x->val[0]=1;
		y->val[0]=0;
		zCopy(a,&R);
	}
	else
	{
		u.val[0]=1;
		v.val[0]=0;
		zCopy(a,&r);
		x->val[0]=0;
		y->val[0]=1;
		zCopy(b,&R);
	}

	while (1)
	{
		if (zCompare(&zZero,&R) == 0)
		{
			zCopy(&r,g);
			zCopy(&zZero,x);
			zCopy(&zZero,y);
			break;
		}

		if (zCompare(&zOne,&R) == 0)
		{
			zCopy(&R,g);
			break;
		}

		zCopy(&r,&tmp);
		zDiv(&tmp,&R,&q,&t3);		//q = int(r/R), t3 = r % R

		zMul(&q,x,&tmp);			//t1 = u - U*q
		zSub(&u,&tmp,&t1);

		zMul(&q,y,&tmp);			//t2 = v - V*q
		zSub(&v,&tmp,&t2);

		zCopy(x,&u);
		zCopy(y,&v);
		zCopy(&R,&r);

		zCopy(&t1,x);
		zCopy(&t2,y);
		zCopy(&t3,&R);
	}

	if (x->size < 0)
	{
		x->size *= -1;
		zSub(b,x,x);
	}

	if (y->size < 0)
	{
		y->size *= -1;
		zSub(a,y,y);
	}

	zFree(&tmp);
	zFree(&t1);
	zFree(&t2);
	zFree(&t3);
	zFree(&q);
	zFree(&r);
	zFree(&R);
	zFree(&u);
	zFree(&v);
	x->type = UNKNOWN;
	y->type = UNKNOWN;
	g->type = UNKNOWN;
	return;
}
Пример #9
0
int isSquare(z *n)
{
	//thanks fenderbender @ mersenneforum.org
	unsigned long m;
	unsigned long largeMod;
	z w2,w3;
	int ans;

	// start with mod 128 rejection. 82% rejection rate
	// VERY fast, can read bits directly
	m=n->val[0] & 127; // n mod 128
	if ((m*0x8bc40d7d) & (m*0xa1e2f5d1) & 0x14020a) return 0; 

	//Other modulii share one BigInt modulus.
	largeMod=zShortMod(n,(63UL*25*11*17*19*23*31)); // SLOW, bigint modulus

	// residues mod 63. 75% rejection
	m=largeMod%63; // fast, all 32-bit math
	if ((m*0x3d491df7) & (m*0xc824a9f9) & 0x10f14008) return 0;

	// residues mod 25. 56% rejection
	m=largeMod%25; 
	if ((m*0x1929fc1b) & (m*0x4c9ea3b2) & 0x51001005) return 0;

	// residues mod 31. 48.4% rejection
	//  Bloom filter has a little different form to keep it perfect
	m=0xd10d829a*(largeMod%31); 
	if (m & (m+0x672a5354) & 0x21025115) return 0;

	// residues mod 23. 47.8% rejection
	m=largeMod%23; 
	if ((m*0x7bd28629) & (m*0xe7180889) & 0xf8300) return 0;

	// residues mod 19. 47.3% rejection
	m=largeMod%19; 
	if ((m*0x1b8bead3) & (m*0x4d75a124) & 0x4280082b) return 0;

	// residues mod 17. 47.1% rejection
	m=largeMod%17; 
	if ((m*0x6736f323) & (m*0x9b1d499) & 0xc0000300) return 0;

	// residues mod 11. 45.5% rejection
	m=largeMod%11; 
	if ((m*0xabf1a3a7) & (m*0x2612bf93) & 0x45854000) return 0;

	// Net nonsquare rejection rate: 99.92%

	// We COULD extend to another round, doing another BigInt modulus and
	// then followup rejections here, using
	// primes of  13 29 37 41 43 53.  That'd give 98% further rejection.
	// Empirical timing shows this second round would be useful for n>10^100 or so.

	// VERY expensive final definitive test

	zInit(&w2);
	zInit(&w3);
	zNroot(n,&w2,2);	//w2 = sqrt(w1)
	zSqr(&w2,&w3);		//w3 = w2^2
	ans = zCompare(n,&w3);
	zFree(&w2);
	zFree(&w3);
	return (ans == 0);
}
Пример #10
0
void zDec2Hex(z *u, z *v)
{
	//convert u[] in dec to v[] in hex by multiplying the ith digit by (1e9)*i
	//and adding to the previous digits

	z a,b,vv;
	int i,su = abs(u->size);

	zInit(&a);
	zInit(&b);
	zInit(&vv);

	if (v->alloc < su)
		zGrow(v,su);

	if (a.alloc < su)
	{
		zGrow(&a,su);
		zClear(&a);
	}

	if (b.alloc < su)
	{
		zGrow(&b,su);
		zClear(&b);
	}

	if (vv.alloc < su)
	{
		zGrow(&vv,su);
		zClear(&vv);
	}
	vv.size = su;

	//a holds the value of (1e9)*i
	a.size = 1;
	a.val[0] = 1;
	for (i=0;i<su;i++)
	{
		zShortMul(&a,u->val[i],&b);
		zAdd(&vv,&b,&vv);
		zShortMul(&a,MAX_DEC_WORD,&a);
	}

	//v may have unused high order limbs
	for (i=su-1;i>=0;i--)
	{
		if (vv.val[i] != 0)
			break;
	}
	vv.size = i+1;

	if (u->size < 0)
		vv.size *= -1;

	if (vv.size == 0)
		vv.size = 1;

	zCopy(&vv,v);

	zFree(&vv);
	zFree(&a);
	zFree(&b);
	return;
}
Пример #11
0
char *z2decstr(z *n, str_t *s)
{
	//pass in a pointer to a string.  if necessary, this routine will 
	//reallocate space for the string to accomodate its size.  If this happens
	//the pointer to the string's (likely) new location is automatically
	//updated and returned.
	z a;
	int i,sza;
	char *tmp;

	//for really long inputs, a significant amount of time is spent here.
	//for instance, in computing 10000!, 0.047sec is spent on actually 
	//computing the factorial, while ~.5 sec is needed for the Hex2Dec conversion
	//and ~.8 sec is required to print it to a string.
	//maybe try to unroll the loop a bit?

	strcpy(s->s,"");
	s->nchars = 1;
	zInit(&a);

	//printf("starting hex 2 dec conversion\n");
	zHex2Dec(n,&a);

	sza = abs(a.size);

	if (s->alloc < DEC_DIGIT_PER_WORD*sza + 2)
	{
		s->s = (char *)realloc(s->s,(DEC_DIGIT_PER_WORD*sza + 10)*sizeof(char));
		s->alloc = (DEC_DIGIT_PER_WORD*sza + 10);
	}

	tmp = (char *)malloc(30);

	//print negative sign, if necessary
	if (n->size < 0)
	{
		 sprintf(s->s,"-");
		 s->nchars++;
	}

	//print first word
#if BITS_PER_DIGIT == 32
		sprintf(s->s,"%s%u",s->s,(uint32)a.val[sza - 1]);
		s->nchars += ndigits_1(a.val[sza-1]) - 1;

		//print the rest
		for (i=sza - 2; i>=0; i--)
		{
			//sprintf(s->s,"%s%09u",s->s,a.val[i]);
			//s->nchars += 9;
			sprintf(tmp,"%09u",(uint32)a.val[i]);
			memcpy(s->s + s->nchars, tmp, 9);
			s->nchars += 9;
		}
#else
		sprintf(s->s,"%s%" PRIu64,s->s,a.val[sza - 1]);
		s->nchars += ndigits_1(a.val[sza-1]) - 1;

		//print the rest
		for (i=sza - 2; i>=0; i--)
		{
			//sprintf(s->s,"%s%09u",s->s,a.val[i]);
			//s->nchars += 9;
			sprintf(tmp,"%019" PRIu64,a.val[i]);
			memcpy(s->s + s->nchars, tmp, 19);
			s->nchars += 19;
		}
#endif

	s->s[s->nchars] = '\0';
	s->nchars++;

	zFree(&a);
	free(tmp);
	return s->s;
}
Пример #12
0
void zmModExpw(z *a, z *e, z *u, z *n, int k)
{
	//computes a^e mod m = u using the sliding window left to right binary method
	//see, for instance, the handbook of applied cryptography
	//uses monty arith
	//a is already in monty rep, b doesn't need to be.  k is the window size

	/*
	INPUT: g, e = (etet-1 . . . e1e0)2 with et = 1, and an integer k >= 1.
	OUTPUT: g^e.
	1. Precomputation.
	1.1 g1 = g, g2 = g^2.
	1.2 For i from 1 to (2^(k-1) - 1) do: g_{2i+1} =  g_{2i-1} * g2.
	2. A = 1, i = t.
	3. While i >= 0 do the following:
	3.1 If ei = 0 then do: A = A^2, i = i - 1.
	3.2 Otherwise (ei != 0), find the longest bitstring eiei-1 . . . el such that i-l+1 <= k
	and el = 1, and do the following:
	A = A^{2^{i-l+1}} * g_{eiei-1...el}2 , i = l - 1.
	4. Return(A).

	test -> 11749.  3 multiplications at i=7,4,0
	*/

	//need to allocate (2^(k-1) + 1) g's for precomputation.
	z *g, g2, ztmp;
	int numg, i, j, l, t, tmp1, tmp2;
	fp_digit utmp1;
	uint8 *bitarray;

	//overflow possibilities:
	//t ranges to 2x input 'a'
	//u needs at least as much space as modulus

	numg = (int)((1<<(k-1))+1);
	g = (z *)malloc(numg*sizeof(z));
	for (i=0;i<numg;i++)
		zInit(&g[i]);
	zInit(&g2);
	zInit(&ztmp);

	//precomputation
	zCopy(a,&g[0]);						//g[0] = a
	monty_sqr(a,&g2,n);					//g2 = a^2

	for (i=1;i<numg;i++)
		monty_mul(&g[i-1],&g2,&g[i],n);	//g[i] = g[i-1] * g2, where g[i] holds g^{2*i+1}

	zCopy(&montyconst.one,u);
	t = zBits(e);

	bitarray = (uint8 *)malloc(t * sizeof(uint8));
	//get e in one array
	for (i=0;i< e->size - 1;i++)
	{
		utmp1 = e->val[i];
		j=0;
		while (j<BITS_PER_DIGIT)
		{
			bitarray[BITS_PER_DIGIT*i+j] = (uint8)(utmp1 & 0x1);
			utmp1 >>= 1;
			j++;
		}
	}
	utmp1 = e->val[i];
	j=0;
	while (utmp1)
	{
		bitarray[BITS_PER_DIGIT*i+j] = (uint8)(utmp1 & 0x1);
		utmp1 >>= 1;
		j++;
	}

	i=t-1;
	while (i >= 0)
	{
		if (bitarray[i])
		{
			//find the longest bitstring ei,e1-1,...el such that i-l+1 <= k and el == 1
			l=i;
			if (i >= (k-1))
			{
				//protect against accessing bitarray past its boundaries
				for (j=k-1;j>0;j--)
				{
					if (bitarray[i-j])
					{
						//this is the longest possible string, exit
						l=i-j;
						break;
					}
				}
			}
			//now, bitarray[i] to bitarray[i-j] is the longest bitstring
			//figure out the g value to use corresponding to this bitstring
			tmp1 = 1;
			tmp2 = 0;
			for (j=l;j<=i;j++)
			{
				tmp2 += tmp1 * bitarray[j];
				tmp1 <<= 1;
			}
			tmp2 = (tmp2-1)/2;

			//do the operation A = A^{2^{i-l+1}} * g_{eiei-1...el}2
			for (j=0;j<(i-l+1);j++)
			{
				monty_sqr(u,&ztmp,n);
				zCopy(&ztmp,u);
			}
			monty_mul(u,&g[tmp2],&ztmp,n);
			zCopy(&ztmp,u);

			//decrement bit pointer
			i = l-1;
		}
		else
		{
			monty_sqr(u,&ztmp,n);
			zCopy(&ztmp,u);
			i--;
		}
	}

	for (i=0;i<numg;i++)
		zFree(&g[i]);
	free(g);
	zFree(&g2);
	zFree(&ztmp);
	free(bitarray);
	return;
}
Пример #13
0
void zREDC(z *T, z *n)
{
	/* from handbook of applied cryptography, ch. 14
	INPUT: integers m = (mn-1 . . .m1m0)b with gcd(m; b) = 1, R = b^n,m' = -m^-1 mod
	b, and T = (t2n-1 . . . t1t0)b <mR.
	OUTPUT: TR^-1 mod m, the reduction of T mod m in montgomery representation...
	1. A=T . (Notation: A = (a2n-1 . . . a1a0)b.)
	2. For i from 0 to (n - 1) do the following:
	2.1 ui=ai*m' mod b.
	2.2 A=A + ui*m*b^i.
	3. A=A/b^n.
	4. If A > m then A=A-m.
	5. Return(A).
	*/
	int i,j,ix,su;
	fp_digit nhat = montyconst.nhat.val[0], ui,k;
	z mtmp3;

	if (TFM_MONTY == 1)
	{
		fp_montgomery_reduce(T,n,montyconst.nhat.val[0]);
		return;
	}
	
	//printf("shouldn't get to here\n");
	zInit(&mtmp3);

	if (mtmp3.alloc < n->size * 2)
		zGrow(&mtmp3,n->size * 2);

	//T needs to have allocated montyconst.n.size + T.size
	if (T->alloc < n->size + T->size)
		zGrow(T,n->size + T->size + 1);
	
	for (i=0;i<n->size;i++)
	{
		//the mod b happens automatically because only the 
		//lower 32 bits of the product is returned.
		ui = T->val[i] * nhat;						//ui = a1*nhat mod b 	
		//zShortMul(&montyconst.n,ui,&mtmp3);			//t1 = ui * n
		
		//short mul
		k=0;
		su = n->size;
		for (ix=0;ix<su;++ix)
			spMulAdd(n->val[ix],ui,0,k,&mtmp3.val[ix],&k);

		//if still have a carry, add a digit to w
		if (k)
		{
			mtmp3.val[su]=k;
			su++;
		}
		
		//check for significant digits.  only necessary if v or u = 0?
		for (ix = su - 1;ix>=0;--ix)
		{
			if (mtmp3.val[ix] != 0)
				break;
		}
		mtmp3.size = ix+1;
		

		for (j=mtmp3.size - 1;j>=0;j--)				//t1 *= b^i 
			mtmp3.val[j+i] = mtmp3.val[j];
		mtmp3.size += i;
		zAdd(T,&mtmp3,T);								//A += t1
	}

	for (j=0; j<T->size; j++)						//A /= b^n
		T->val[j] = T->val[j+n->size];
	T->size -= n->size;

	if (zCompare(T,n) > 0)				//if A > n, A = A-n
		zSub(T,n,T);

	if (T->size == 0)
		zCopy(n,T);

	zFree(&mtmp3);

	return;	
}
Пример #14
0
void monty_mul_interleaved(z *a, z *b, z *c, z *n)
{

	fp_digit nhat = montyconst.nhat.val[0], u;
	int i,j,t=n->size;
	int szb = abs(b->size);
	fp_digit k;
	z *t1,*t2;
	z s1,s2;

	zInit(&s1);
	zInit(&s2);
	t1 = &s1;
	t2 = &s2;
	zClear(t1);
	zClear(t2);

	for (i=0;i<t;i++)
	{
		u = (t1->val[0] + a->val[i] * b->val[0]) * nhat;	//truncation will provide mod b
		
		/****** short mul of b with ai, simultaneous with addition of A (in t1) ********/
		for (j=t1->size;j<szb;j++)
			t1->val[j] = 0;		//zero any unused words up to size of b, so we can add
		//mul and add up to size of b
		k=0;
		for (j=0;j<szb ;j++)
			spMulAdd(b->val[j],a->val[i],t1->val[j],k,t2->val + j,&k);
		//continue with add if A has more words
		for (;j<t1->size;j++)
			spAdd(t1->val[j],k,t2->val+j,&k);

		//adjust size
		if (t1->size > szb)
			t2->size = t1->size;
		else
			t2->size = szb;

		//account for carry
		if (k)
		{
			t2->val[t2->size]=k;
			t2->size++;
			j++;
		}
		/****** short mul of b with ai, simultaneous with addition of A (in t1) ********/


		/****** short mul of n with u, simultaneous with add. of prev step (in t2) 
		 and with right shift of one word                                       ********/

		for (;j<t;j++)
			t2->val[j] = 0;		//zero any unused words up to size of n, so we can add
		//mul and add up to size of n, store into one word previous
		k=0;
		//needs first mul to get k set right, answer gets shifted to oblivion
		spMulAdd(n->val[0],u,t2->val[0],k,t1->val,&k);
		for (j=1;j<t;j++)
			spMulAdd(n->val[j],u,t2->val[j],k,t1->val + j - 1,&k);
		//continue if t2 is bigger than n
		for (;j<t2->size;j++)
			spAdd(t2->val[j],k,t1->val+j-1,&k);

		//adjust size
		if (t2->size > t)
			t1->size = t2->size - 1;
		else
			t1->size = t - 1;

		//account for carry
		if (k)
		{
			t1->val[t1->size]=k;
			t1->size++;
		}
		/****** short mul of n with u, simultaneous with add. of prev step (in t2) 
		 and with right shift of one word                                       ********/

	}

	//almost done
	if (zCompare(t1,n) >= 0)
		zSub(t1,n,c);
	else
		zCopy(t1,c);

	zFree(&s1);
	zFree(&s2);
	return;
}
Пример #15
0
void fp_mul_comba(z *A, z *B, z *C)
{
   int       ix, iy, iz, tx, ty, pa, sA, sB;
   fp_digit  c0, c1, c2, *tmpx, *tmpy;
   z    *dst;
   z loc;

   COMBA_START;
   COMBA_CLEAR;
   
   /* get size of output and trim */
   sA = abs(A->size);
   sB = abs(B->size);
   pa = sA + sB;

   if (A == C || B == C) {
	   zInit(&loc);
      //dst = &atmp1;
	   dst = &loc;
   } else {
      dst = C;
   }

	if (dst->alloc < pa)
		zGrow(dst,pa + LIMB_BLKSZ);
	zClear(dst);

   for (ix = 0; ix < pa; ix++) {
      /* get offsets into the two bignums */
      ty = MIN(ix, sB-1);
      tx = ix - ty;

      /* setup temp aliases */
      tmpx = A->val + tx;
      tmpy = B->val + ty;

      /* this is the number of times the loop will iterrate, essentially its 
         while (tx++ < a->used && ty-- >= 0) { ... }
       */
      iy = MIN(sA-tx, ty+1);

      /* execute loop */
      COMBA_FORWARD;
      for (iz = 0; iz < iy; ++iz) {
		  MULADD(*tmpx++, *tmpy--);
      }

      /* store term */
      COMBA_STORE(dst->val[ix]);
  }
  COMBA_FINI;

  dst->size = pa;
  if ((A->size * B->size) < 0)
	  dst->size *= -1;

  fp_clamp(dst);
  if (dst != C) {
	zCopy(dst, C);
	zFree(&loc);
  }
}
void fp_sqr_comba(z *A, z *B)
{
  int       pa, ix, iz, sA;
  fp_digit  c0, c1, c2;
  z    *dst;
  z loc;
#ifdef TFM_ISO
  uint64   tt;
#endif    

  /* get size of output and trim */
  sA = abs(A->size);
   pa = sA + sA;

  /* number of output digits to produce */
  COMBA_START;
  CLEAR_CARRY;

  if (A == B) {
     //zClear(&atmp1);
	  zInit(&loc);
     //dst = &atmp1;
	  dst = &loc;
  } else {
     zClear(B);
     dst = B;
  }

	if (dst->alloc < pa)
	{
		zGrow(dst,pa + LIMB_BLKSZ);
	}
	zClear(dst);
	
  for (ix = 0; ix < pa; ix++) { 
      int      tx, ty, iy;
      fp_digit *tmpy, *tmpx;

      /* get offsets into the two bignums */
      ty = MIN(sA-1, ix);
      tx = ix - ty;

      /* setup temp aliases */
      tmpx = A->val + tx;
      tmpy = A->val + ty;

      /* this is the number of times the loop will iterrate,
         while (tx++ < a->used && ty-- >= 0) { ... }
       */
      iy = MIN(sA-tx, ty+1);

      /* now for squaring tx can never equal ty 
       * we halve the distance since they approach 
       * at a rate of 2x and we have to round because 
       * odd cases need to be executed
       */
      iy = MIN(iy, (ty-tx+1)>>1);

      /* forward carries */
      CARRY_FORWARD;

      /* execute loop */
      for (iz = 0; iz < iy; iz++) {
		  SQRADD2(*tmpx++, *tmpy--);
      }

      /* even columns have the square term in them */
      if ((ix&1) == 0) {
		  SQRADD(A->val[ix>>1],A->val[ix>>1]);
      }

      /* store it */
      COMBA_STORE(dst->val[ix]);
  }