void monty_add(z *u, z *v, z *w, z *n) { //add two numbers in the montgomery representation, returning their //sum in montgomery representation //work with the currently defined montyconst zAdd(u,v,w); if (zCompare(w,n) >= 0) zSub(w,n,w); return; }
void zHex2Dec(z *u, z *v) { //convert u[] in hex to v[] in decimal by repeatedly dividing //u by 1e9 = 0x3b9aca00 //the remainder of the ith division is the ith decimal digit. //when the quotient = 0, stop z a,b; fp_digit r = 0; int su = abs(u->size); int approx_words = (int)((double)su * 1.5); //because decimal takes more room than hex to store zInit(&a); zInit(&b); if (v->alloc < approx_words) zGrow(v,approx_words); zClear(v); if (a.alloc < approx_words) { zGrow(&a,approx_words); zClear(&a); } if (b.alloc < approx_words) { zGrow(&b,approx_words); zClear(&b); } zCopy(u,&a); v->size = 1; do { r = zShortDiv(&a,MAX_DEC_WORD,&b); v->val[v->size - 1] = r; v->size++; zCopy(&b,&a); } while (zCompare(&a,&zZero) != 0); v->size--; if (u->size < 0) v->size *= -1; zFree(&a); zFree(&b); return; }
void xGCD(z *a, z *b, z *x, z *y, z *g) { //compute the extended GCD of a, b, returning g = GCD(a,b) and x, y //such that ax + by = GCD(a,b) if a,b are coprime z t1,t2,t3,u,v,r,R,q,tmp; // int i; /* Step 1: if a < b then Set u=0, v=1, and r=b Set U=1, V=0, and R=a else Set u=1, v=0, and r=a Set U=0, V=1, and R=b Step 2: if R = 0 then return r (for the gcd) and no inverses exist. if R = 1 then return R (for the gcd), V (for the inverse a(mod b)) and U (for the inverse of b(mod a)). Step 3: Calculate q = int(r/R) Calculate t1 = u - U*q Calculate t2 = v - V*q Calculate t3 = r - R*q set u=U, v=V, r=R set U=t1, V=t2, R=t3 goto Step 2. */ zInit(&tmp); zInit(&t1); zInit(&t2); zInit(&t3); zInit(&q); zInit(&r); zInit(&R); zInit(&u); zInit(&v); //need to check for temp allocation zClear(x); zClear(y); if (zCompare(a,b) < 0) { u.val[0]=0; v.val[0]=1; zCopy(b,&r); x->val[0]=1; y->val[0]=0; zCopy(a,&R); } else { u.val[0]=1; v.val[0]=0; zCopy(a,&r); x->val[0]=0; y->val[0]=1; zCopy(b,&R); } while (1) { if (zCompare(&zZero,&R) == 0) { zCopy(&r,g); zCopy(&zZero,x); zCopy(&zZero,y); break; } if (zCompare(&zOne,&R) == 0) { zCopy(&R,g); break; } zCopy(&r,&tmp); zDiv(&tmp,&R,&q,&t3); //q = int(r/R), t3 = r % R zMul(&q,x,&tmp); //t1 = u - U*q zSub(&u,&tmp,&t1); zMul(&q,y,&tmp); //t2 = v - V*q zSub(&v,&tmp,&t2); zCopy(x,&u); zCopy(y,&v); zCopy(&R,&r); zCopy(&t1,x); zCopy(&t2,y); zCopy(&t3,&R); } if (x->size < 0) { x->size *= -1; zSub(b,x,x); } if (y->size < 0) { y->size *= -1; zSub(a,y,y); } zFree(&tmp); zFree(&t1); zFree(&t2); zFree(&t3); zFree(&q); zFree(&r); zFree(&R); zFree(&u); zFree(&v); x->type = UNKNOWN; y->type = UNKNOWN; g->type = UNKNOWN; return; }
int isSquare(z *n) { //thanks fenderbender @ mersenneforum.org unsigned long m; unsigned long largeMod; z w2,w3; int ans; // start with mod 128 rejection. 82% rejection rate // VERY fast, can read bits directly m=n->val[0] & 127; // n mod 128 if ((m*0x8bc40d7d) & (m*0xa1e2f5d1) & 0x14020a) return 0; //Other modulii share one BigInt modulus. largeMod=zShortMod(n,(63UL*25*11*17*19*23*31)); // SLOW, bigint modulus // residues mod 63. 75% rejection m=largeMod%63; // fast, all 32-bit math if ((m*0x3d491df7) & (m*0xc824a9f9) & 0x10f14008) return 0; // residues mod 25. 56% rejection m=largeMod%25; if ((m*0x1929fc1b) & (m*0x4c9ea3b2) & 0x51001005) return 0; // residues mod 31. 48.4% rejection // Bloom filter has a little different form to keep it perfect m=0xd10d829a*(largeMod%31); if (m & (m+0x672a5354) & 0x21025115) return 0; // residues mod 23. 47.8% rejection m=largeMod%23; if ((m*0x7bd28629) & (m*0xe7180889) & 0xf8300) return 0; // residues mod 19. 47.3% rejection m=largeMod%19; if ((m*0x1b8bead3) & (m*0x4d75a124) & 0x4280082b) return 0; // residues mod 17. 47.1% rejection m=largeMod%17; if ((m*0x6736f323) & (m*0x9b1d499) & 0xc0000300) return 0; // residues mod 11. 45.5% rejection m=largeMod%11; if ((m*0xabf1a3a7) & (m*0x2612bf93) & 0x45854000) return 0; // Net nonsquare rejection rate: 99.92% // We COULD extend to another round, doing another BigInt modulus and // then followup rejections here, using // primes of 13 29 37 41 43 53. That'd give 98% further rejection. // Empirical timing shows this second round would be useful for n>10^100 or so. // VERY expensive final definitive test zInit(&w2); zInit(&w3); zNroot(n,&w2,2); //w2 = sqrt(w1) zSqr(&w2,&w3); //w3 = w2^2 ans = zCompare(n,&w3); zFree(&w2); zFree(&w3); return (ans == 0); }
void zREDC(z *T, z *n) { /* from handbook of applied cryptography, ch. 14 INPUT: integers m = (mn-1 . . .m1m0)b with gcd(m; b) = 1, R = b^n,m' = -m^-1 mod b, and T = (t2n-1 . . . t1t0)b <mR. OUTPUT: TR^-1 mod m, the reduction of T mod m in montgomery representation... 1. A=T . (Notation: A = (a2n-1 . . . a1a0)b.) 2. For i from 0 to (n - 1) do the following: 2.1 ui=ai*m' mod b. 2.2 A=A + ui*m*b^i. 3. A=A/b^n. 4. If A > m then A=A-m. 5. Return(A). */ int i,j,ix,su; fp_digit nhat = montyconst.nhat.val[0], ui,k; z mtmp3; if (TFM_MONTY == 1) { fp_montgomery_reduce(T,n,montyconst.nhat.val[0]); return; } //printf("shouldn't get to here\n"); zInit(&mtmp3); if (mtmp3.alloc < n->size * 2) zGrow(&mtmp3,n->size * 2); //T needs to have allocated montyconst.n.size + T.size if (T->alloc < n->size + T->size) zGrow(T,n->size + T->size + 1); for (i=0;i<n->size;i++) { //the mod b happens automatically because only the //lower 32 bits of the product is returned. ui = T->val[i] * nhat; //ui = a1*nhat mod b //zShortMul(&montyconst.n,ui,&mtmp3); //t1 = ui * n //short mul k=0; su = n->size; for (ix=0;ix<su;++ix) spMulAdd(n->val[ix],ui,0,k,&mtmp3.val[ix],&k); //if still have a carry, add a digit to w if (k) { mtmp3.val[su]=k; su++; } //check for significant digits. only necessary if v or u = 0? for (ix = su - 1;ix>=0;--ix) { if (mtmp3.val[ix] != 0) break; } mtmp3.size = ix+1; for (j=mtmp3.size - 1;j>=0;j--) //t1 *= b^i mtmp3.val[j+i] = mtmp3.val[j]; mtmp3.size += i; zAdd(T,&mtmp3,T); //A += t1 } for (j=0; j<T->size; j++) //A /= b^n T->val[j] = T->val[j+n->size]; T->size -= n->size; if (zCompare(T,n) > 0) //if A > n, A = A-n zSub(T,n,T); if (T->size == 0) zCopy(n,T); zFree(&mtmp3); return; }
void monty_mul_interleaved(z *a, z *b, z *c, z *n) { fp_digit nhat = montyconst.nhat.val[0], u; int i,j,t=n->size; int szb = abs(b->size); fp_digit k; z *t1,*t2; z s1,s2; zInit(&s1); zInit(&s2); t1 = &s1; t2 = &s2; zClear(t1); zClear(t2); for (i=0;i<t;i++) { u = (t1->val[0] + a->val[i] * b->val[0]) * nhat; //truncation will provide mod b /****** short mul of b with ai, simultaneous with addition of A (in t1) ********/ for (j=t1->size;j<szb;j++) t1->val[j] = 0; //zero any unused words up to size of b, so we can add //mul and add up to size of b k=0; for (j=0;j<szb ;j++) spMulAdd(b->val[j],a->val[i],t1->val[j],k,t2->val + j,&k); //continue with add if A has more words for (;j<t1->size;j++) spAdd(t1->val[j],k,t2->val+j,&k); //adjust size if (t1->size > szb) t2->size = t1->size; else t2->size = szb; //account for carry if (k) { t2->val[t2->size]=k; t2->size++; j++; } /****** short mul of b with ai, simultaneous with addition of A (in t1) ********/ /****** short mul of n with u, simultaneous with add. of prev step (in t2) and with right shift of one word ********/ for (;j<t;j++) t2->val[j] = 0; //zero any unused words up to size of n, so we can add //mul and add up to size of n, store into one word previous k=0; //needs first mul to get k set right, answer gets shifted to oblivion spMulAdd(n->val[0],u,t2->val[0],k,t1->val,&k); for (j=1;j<t;j++) spMulAdd(n->val[j],u,t2->val[j],k,t1->val + j - 1,&k); //continue if t2 is bigger than n for (;j<t2->size;j++) spAdd(t2->val[j],k,t1->val+j-1,&k); //adjust size if (t2->size > t) t1->size = t2->size - 1; else t1->size = t - 1; //account for carry if (k) { t1->val[t1->size]=k; t1->size++; } /****** short mul of n with u, simultaneous with add. of prev step (in t2) and with right shift of one word ********/ } //almost done if (zCompare(t1,n) >= 0) zSub(t1,n,c); else zCopy(t1,c); zFree(&s1); zFree(&s2); return; }