/* SU= 72 */ void BIG_mul(DBIG c,BIG a,BIG b) { int i,j; chunk carry; #ifdef dchunk dchunk t,co; #endif BIG_norm(a); /* needed here to prevent overflow from addition of partial products */ BIG_norm(b); /* Faster to Combafy it.. Let the compiler unroll the loops! */ #ifdef COMBA t=(dchunk)a[0]*b[0]; c[0]=(chunk)t&MASK; co=t>>BASEBITS; t=(dchunk)a[1]*b[0]+(dchunk)a[0]*b[1]+co; c[1]=(chunk)t&MASK; co=t>>BASEBITS; for (j=2;j<NLEN;j++) { t=co; for (i=0;i<=j;i++) t+=(dchunk)a[j-i]*b[i]; c[j]=(chunk)t&MASK; co=t>>BASEBITS; } for (j=NLEN;j<DNLEN-2;j++) { t=co; for (i=j-NLEN+1;i<NLEN;i++) t+=(dchunk)a[j-i]*b[i]; c[j]=(chunk)t&MASK; co=t>>BASEBITS; } t=(dchunk)a[NLEN-1]*b[NLEN-1]+co; c[DNLEN-2]=(chunk)t&MASK; co=t>>BASEBITS; c[DNLEN-1]=(chunk)co; #else BIG_dzero(c); for (i=0;i<NLEN;i++) { carry=0; for (j=0;j<NLEN;j++) carry=muladd(a[i],b[j],carry,&c[i+j]); c[NLEN+i]=carry; } #endif #ifdef DEBUG_NORM c[DNLEN]=0; #endif }
/* Set c=c-1 */ void BIG_dec(BIG c,int d) { BIG_norm(c); c[0]-=(chunk)d; #ifdef DEBUG_NORM c[NLEN]=1; #endif }
/* SU= 40 */ void BIG_smul(BIG c,BIG a,BIG b) { int i,j; chunk carry; BIG_norm(a); BIG_norm(b); BIG_zero(c); for (i=0;i<NLEN;i++) { carry=0; for (j=0;j<NLEN;j++) if (i+j<NLEN) carry=muladd(a[i],b[j],carry,&c[i+j]); } #ifdef DEBUG_NORM c[NLEN]=0; #endif }
/* r=x^n using XTR method on traces of FP12s */ void FP4_xtr_pow(FP4 *r,FP4 *x,BIG n) { int i,par,nb; BIG v; FP2 w; FP4 t,a,b,c; BIG_zero(v); BIG_inc(v,3); FP2_from_BIG(&w,v); FP4_from_FP2(&a,&w); FP4_copy(&b,x); FP4_xtr_D(&c,x); BIG_norm(n); par=BIG_parity(n); BIG_copy(v,n); BIG_shr(v,1); if (par==0) {BIG_dec(v,1); BIG_norm(v);} nb=BIG_nbits(v); for (i=nb-1;i>=0;i--) { if (!BIG_bit(v,i)) { FP4_copy(&t,&b); FP4_conj(x,x); FP4_conj(&c,&c); FP4_xtr_A(&b,&a,&b,x,&c); FP4_conj(x,x); FP4_xtr_D(&c,&t); FP4_xtr_D(&a,&a); } else { FP4_conj(&t,&a); FP4_xtr_D(&a,&b); FP4_xtr_A(&b,&c,&b,x,&t); FP4_xtr_D(&c,&c); } } if (par==0) FP4_copy(r,&c); else FP4_copy(r,&b); FP4_reduce(r); }
/* SU= 64 */ void BIG_toBytes(char *b,BIG a) { int i; BIG c; BIG_norm(a); BIG_copy(c,a); for (i=MODBYTES-1;i>=0;i--) { b[i]=c[0]&0xff; BIG_fshr(c,8); } }
/* SU= 16 */ int BIG_div3(BIG r) { int i; chunk ak,base,carry=0; BIG_norm(r); base=((chunk)1<<BASEBITS); for (i=NLEN-1;i>=0;i--) { ak=(carry*base+r[i]); r[i]=ak/3; carry=ak%3; } return (int)carry; }
/* SU= 24 */ chunk BIG_pmul(BIG r,BIG a,int c) { int i; chunk ak,carry=0; BIG_norm(a); for (i=0;i<NLEN;i++) { ak=a[i]; r[i]=0; carry=muladd(ak,(chunk)c,carry,&r[i]); } #ifdef DEBUG_NORM r[NLEN]=0; #endif return carry; }
/* SU= 240 */ void FP4_pow(FP4 *r,FP4* a,BIG b) { FP4 w; BIG z,zilch; int bt; BIG_zero(zilch); BIG_norm(b); BIG_copy(z,b); FP4_copy(&w,a); FP4_one(r); while(1) { bt=BIG_parity(z); BIG_shr(z,1); if (bt) FP4_mul(r,r,&w); if (BIG_comp(z,zilch)==0) break; FP4_sqr(&w,&w); } FP4_reduce(r); }
/* Optimal R-ate pairing r=e(P,Q) */ void PAIR_ate(FP12 *r,ECP2 *P,ECP *Q) { FP2 X; BIG x,n,Qx,Qy; int i,nb; ECP2 A; FP12 lv; #if CHOICE<BLS_CURVES ECP2 KA; #endif BIG_rcopy(Qx,CURVE_Fra); BIG_rcopy(Qy,CURVE_Frb); FP2_from_BIGs(&X,Qx,Qy); BIG_rcopy(x,CURVE_Bnx); #if CHOICE<BLS_CURVES BIG_pmul(n,x,6); BIG_dec(n,2); #else BIG_copy(n,x); #endif BIG_norm(n); ECP2_affine(P); ECP_affine(Q); BIG_copy(Qx,Q->x); BIG_copy(Qy,Q->y); ECP2_copy(&A,P); FP12_one(r); nb=BIG_nbits(n); /* Main Miller Loop */ for (i=nb-2; i>=1; i--) { PAIR_line(&lv,&A,&A,Qx,Qy); FP12_smul(r,&lv); if (BIG_bit(n,i)) { PAIR_line(&lv,&A,P,Qx,Qy); FP12_smul(r,&lv); } FP12_sqr(r,r); } PAIR_line(&lv,&A,&A,Qx,Qy); FP12_smul(r,&lv); if (BIG_parity(n)) { PAIR_line(&lv,&A,P,Qx,Qy); FP12_smul(r,&lv); } /* R-ate fixup required for BN curves */ #if CHOICE<BLS_CURVES ECP2_copy(&KA,P); ECP2_frob(&KA,&X); ECP2_neg(&A); FP12_conj(r,r); PAIR_line(&lv,&A,&KA,Qx,Qy); FP12_smul(r,&lv); ECP2_frob(&KA,&X); ECP2_neg(&KA); PAIR_line(&lv,&A,&KA,Qx,Qy); FP12_smul(r,&lv); #endif }
/* SU= 80 */ void BIG_sqr(DBIG c,BIG a) { int i,j; chunk carry; #ifdef dchunk dchunk t,co; #endif BIG_norm(a); /* Note 2*a[i] in loop below and extra addition */ #ifdef COMBA t=(dchunk)a[0]*a[0]; c[0]=(chunk)t&MASK; co=t>>BASEBITS; t=(dchunk)a[1]*a[0]; t+=t; t+=co; c[1]=(chunk)t&MASK; co=t>>BASEBITS; #if NLEN%2==1 for (j=2;j<NLEN-1;j+=2) { t=(dchunk)a[j]*a[0]; for (i=1;i<(j+1)/2;i++) t+=(dchunk)a[j-i]*a[i]; t+=t; t+=co; t+=(dchunk)a[j/2]*a[j/2]; c[j]=(chunk)t&MASK; co=t>>BASEBITS; t=(dchunk)a[j+1]*a[0]; for (i=1;i<(j+2)/2;i++) t+=(dchunk)a[j+1-i]*a[i]; t+=t; t+=co; c[j+1]=(chunk)t&MASK; co=t>>BASEBITS; } j=NLEN-1; t=(dchunk)a[j]*a[0]; for (i=1;i<(j+1)/2;i++) t+=(dchunk)a[j-i]*a[i]; t+=t; t+=co; t+=(dchunk)a[j/2]*a[j/2]; c[j]=(chunk)t&MASK; co=t>>BASEBITS; #else for (j=2;j<NLEN;j+=2) { t=(dchunk)a[j]*a[0]; for (i=1;i<(j+1)/2;i++) t+=(dchunk)a[j-i]*a[i]; t+=t; t+=co; t+=(dchunk)a[j/2]*a[j/2]; c[j]=(chunk)t&MASK; co=t>>BASEBITS; t=(dchunk)a[j+1]*a[0]; for (i=1;i<(j+2)/2;i++) t+=(dchunk)a[j+1-i]*a[i]; t+=t; t+=co; c[j+1]=(chunk)t&MASK; co=t>>BASEBITS; } #endif #if NLEN%2==1 j=NLEN; t=(dchunk)a[NLEN-1]*a[j-NLEN+1]; for (i=j-NLEN+2;i<(j+1)/2;i++) t+=(dchunk)a[j-i]*a[i]; t+=t; t+=co; c[j]=(chunk)t&MASK; co=t>>BASEBITS; for (j=NLEN+1;j<DNLEN-2;j+=2) { t=(dchunk)a[NLEN-1]*a[j-NLEN+1]; for (i=j-NLEN+2;i<(j+1)/2;i++) t+=(dchunk)a[j-i]*a[i]; t+=t; t+=co; t+=(dchunk)a[j/2]*a[j/2]; c[j]=(chunk)t&MASK; co=t>>BASEBITS; t=(dchunk)a[NLEN-1]*a[j-NLEN+2]; for (i=j-NLEN+3;i<(j+2)/2;i++) t+=(dchunk)a[j+1-i]*a[i]; t+=t; t+=co; c[j+1]=(chunk)t&MASK; co=t>>BASEBITS; } #else for (j=NLEN;j<DNLEN-2;j+=2) { t=(dchunk)a[NLEN-1]*a[j-NLEN+1]; for (i=j-NLEN+2;i<(j+1)/2;i++) t+=(dchunk)a[j-i]*a[i]; t+=t; t+=co; t+=(dchunk)a[j/2]*a[j/2]; c[j]=(chunk)t&MASK; co=t>>BASEBITS; t=(dchunk)a[NLEN-1]*a[j-NLEN+2]; for (i=j-NLEN+3;i<(j+2)/2;i++) t+=(dchunk)a[j+1-i]*a[i]; t+=t; t+=co; c[j+1]=(chunk)t&MASK; co=t>>BASEBITS; } #endif t=(dchunk)a[NLEN-1]*a[NLEN-1]+co; c[DNLEN-2]=(chunk)t&MASK; co=t>>BASEBITS; c[DNLEN-1]=(chunk)co; #else BIG_dzero(c); for (i=0;i<NLEN;i++) { carry=0; for (j=i+1;j<NLEN;j++) carry=muladd(a[i],a[j],carry,&c[i+j]); c[NLEN+i]=carry; } for (i=0;i<DNLEN;i++) c[i]*=2; for (i=0;i<NLEN;i++) c[2*i+1]+=muladd(a[i],a[i],0,&c[2*i]); BIG_dnorm(c); #endif #ifdef DEBUG_NORM c[DNLEN]=0; #endif }
void FP12_pow4(FP12 *p,FP12 *q,BIG u[4]) { int i,j,a[4],nb,m; FP12 g[8],c,s[2]; BIG t[4],mt; sign8 w[NLEN*BASEBITS+1]; for (i=0;i<4;i++) BIG_copy(t[i],u[i]); FP12_copy(&g[0],&q[0]); FP12_conj(&s[0],&q[1]); FP12_mul(&g[0],&s[0]); /* P/Q */ FP12_copy(&g[1],&g[0]); FP12_copy(&g[2],&g[0]); FP12_copy(&g[3],&g[0]); FP12_copy(&g[4],&q[0]); FP12_mul(&g[4],&q[1]); /* P*Q */ FP12_copy(&g[5],&g[4]); FP12_copy(&g[6],&g[4]); FP12_copy(&g[7],&g[4]); FP12_copy(&s[1],&q[2]); FP12_conj(&s[0],&q[3]); FP12_mul(&s[1],&s[0]); /* R/S */ FP12_conj(&s[0],&s[1]); FP12_mul(&g[1],&s[0]); FP12_mul(&g[2],&s[1]); FP12_mul(&g[5],&s[0]); FP12_mul(&g[6],&s[1]); FP12_copy(&s[1],&q[2]); FP12_mul(&s[1],&q[3]); /* R*S */ FP12_conj(&s[0],&s[1]); FP12_mul(&g[0],&s[0]); FP12_mul(&g[3],&s[1]); FP12_mul(&g[4],&s[0]); FP12_mul(&g[7],&s[1]); /* if power is even add 1 to power, and add q to correction */ FP12_one(&c); BIG_zero(mt); for (i=0;i<4;i++) { if (BIG_parity(t[i])==0) { BIG_inc(t[i],1); BIG_norm(t[i]); FP12_mul(&c,&q[i]); } BIG_add(mt,mt,t[i]); BIG_norm(mt); } FP12_conj(&c,&c); nb=1+BIG_nbits(mt); /* convert exponent to signed 1-bit window */ for (j=0;j<nb;j++) { for (i=0;i<4;i++) { a[i]=BIG_lastbits(t[i],2)-2; BIG_dec(t[i],a[i]); BIG_norm(t[i]); BIG_fshr(t[i],1); } w[j]=8*a[0]+4*a[1]+2*a[2]+a[3]; } w[nb]=8*BIG_lastbits(t[0],2)+4*BIG_lastbits(t[1],2)+2*BIG_lastbits(t[2],2)+BIG_lastbits(t[3],2); FP12_copy(p,&g[(w[nb]-1)/2]); for (i=nb-1;i>=0;i--) { m=w[i]>>7; j=(w[i]^m)-m; /* j=abs(w[i]) */ j=(j-1)/2; FP12_copy(&s[0],&g[j]); FP12_conj(&s[1],&g[j]); FP12_usqr(p,p); FP12_mul(p,&s[m&1]); } FP12_mul(p,&c); /* apply correction */ FP12_reduce(p); }
/* r=ck^a.cl^n using XTR double exponentiation method on traces of FP12s. See Stam thesis. */ void FP4_xtr_pow2(FP4 *r,FP4 *ck,FP4 *cl,FP4 *ckml,FP4 *ckm2l,BIG a,BIG b) { int i,f2,nb; BIG d,e,w; FP4 t,cu,cv,cumv,cum2v; BIG_norm(a); BIG_norm(b); BIG_copy(e,a); BIG_copy(d,b); FP4_copy(&cu,ck); FP4_copy(&cv,cl); FP4_copy(&cumv,ckml); FP4_copy(&cum2v,ckm2l); f2=0; while (BIG_parity(d)==0 && BIG_parity(e)==0) { BIG_shr(d,1); BIG_shr(e,1); f2++; } while (BIG_comp(d,e)!=0) { if (BIG_comp(d,e)>0) { BIG_imul(w,e,4); BIG_norm(w); if (BIG_comp(d,w)<=0) { BIG_copy(w,d); BIG_copy(d,e); BIG_sub(e,w,e); BIG_norm(e); FP4_xtr_A(&t,&cu,&cv,&cumv,&cum2v); FP4_conj(&cum2v,&cumv); FP4_copy(&cumv,&cv); FP4_copy(&cv,&cu); FP4_copy(&cu,&t); } else if (BIG_parity(d)==0) { BIG_shr(d,1); FP4_conj(r,&cum2v); FP4_xtr_A(&t,&cu,&cumv,&cv,r); FP4_xtr_D(&cum2v,&cumv); FP4_copy(&cumv,&t); FP4_xtr_D(&cu,&cu); } else if (BIG_parity(e)==1) { BIG_sub(d,d,e); BIG_norm(d); BIG_shr(d,1); FP4_xtr_A(&t,&cu,&cv,&cumv,&cum2v); FP4_xtr_D(&cu,&cu); FP4_xtr_D(&cum2v,&cv); FP4_conj(&cum2v,&cum2v); FP4_copy(&cv,&t); } else { BIG_copy(w,d); BIG_copy(d,e); BIG_shr(d,1); BIG_copy(e,w); FP4_xtr_D(&t,&cumv); FP4_conj(&cumv,&cum2v); FP4_conj(&cum2v,&t); FP4_xtr_D(&t,&cv); FP4_copy(&cv,&cu); FP4_copy(&cu,&t); } } if (BIG_comp(d,e)<0) { BIG_imul(w,d,4); BIG_norm(w); if (BIG_comp(e,w)<=0) { BIG_sub(e,e,d); BIG_norm(e); FP4_xtr_A(&t,&cu,&cv,&cumv,&cum2v); FP4_copy(&cum2v,&cumv); FP4_copy(&cumv,&cu); FP4_copy(&cu,&t); } else if (BIG_parity(e)==0) { BIG_copy(w,d); BIG_copy(d,e); BIG_shr(d,1); BIG_copy(e,w); FP4_xtr_D(&t,&cumv); FP4_conj(&cumv,&cum2v); FP4_conj(&cum2v,&t); FP4_xtr_D(&t,&cv); FP4_copy(&cv,&cu); FP4_copy(&cu,&t); } else if (BIG_parity(d)==1) { BIG_copy(w,e); BIG_copy(e,d); BIG_sub(w,w,d); BIG_norm(w); BIG_copy(d,w); BIG_shr(d,1); FP4_xtr_A(&t,&cu,&cv,&cumv,&cum2v); FP4_conj(&cumv,&cumv); FP4_xtr_D(&cum2v,&cu); FP4_conj(&cum2v,&cum2v); FP4_xtr_D(&cu,&cv); FP4_copy(&cv,&t); } else { BIG_shr(d,1); FP4_conj(r,&cum2v); FP4_xtr_A(&t,&cu,&cumv,&cv,r); FP4_xtr_D(&cum2v,&cumv); FP4_copy(&cumv,&t); FP4_xtr_D(&cu,&cu); } } } FP4_xtr_A(r,&cu,&cv,&cumv,&cum2v); for (i=0;i<f2;i++) FP4_xtr_D(r,r); FP4_xtr_pow(r,r,d); }
/* Optimal R-ate pairing r=e(P,Q) */ void PAIR_ate(FP12 *r,ECP2 *P,ECP *Q) { FP2 X; BIG x,n,Qx,Qy; int i,nb; ECP2 A,KA; FP12 lv; BIG_rcopy(Qx,CURVE_Fra); BIG_rcopy(Qy,CURVE_Frb); FP2_from_BIGs(&X,Qx,Qy); BIG_rcopy(x,CURVE_Bnx); BIG_pmul(n,x,6); BIG_dec(n,2); BIG_norm(n); ECP2_affine(P); ECP_affine(Q); BIG_copy(Qx,Q->x); BIG_copy(Qy,Q->y); ECP2_copy(&A,P); FP12_one(r); nb=BIG_nbits(n); /* Main Miller Loop */ for (i=nb-2;i>=1;i--) { PAIR_line(&lv,&A,&A,Qx,Qy); FP12_smul(r,&lv); if (BIG_bit(n,i)) { PAIR_line(&lv,&A,P,Qx,Qy); FP12_smul(r,&lv); } FP12_sqr(r,r); } PAIR_line(&lv,&A,&A,Qx,Qy); FP12_smul(r,&lv); /* R-ate fixup */ ECP2_copy(&KA,P); ECP2_frob(&KA,&X); ECP2_neg(&A); FP12_conj(r,r); PAIR_line(&lv,&A,&KA,Qx,Qy); FP12_smul(r,&lv); ECP2_frob(&KA,&X); ECP2_neg(&KA); PAIR_line(&lv,&A,&KA,Qx,Qy); FP12_smul(r,&lv); }