inline void extreme_value_distribution_impl( RNGType &rng, std::size_t n, RealType *r, RealType a, RealType b) { u01_oo_distribution(rng, n, r); log(n, r, r); mul(n, static_cast<RealType>(-1), r, r); log(n, r, r); muladd(n, r, -b, a, r); }
/* SU= 24 */ void BIG_pxmul(DBIG c,BIG a,int b) { int j; chunk carry; BIG_dzero(c); carry=0; for (j=0;j<NLEN;j++) carry=muladd(a[j],(chunk)b,carry,&c[j]); c[NLEN]=carry; #ifdef DEBUG_NORM c[DNLEN]=0; #endif }
/* SU= 72 */ void BIG_mul(DBIG c,BIG a,BIG b) { int i,j; chunk carry; #ifdef dchunk dchunk t,co; #endif BIG_norm(a); /* needed here to prevent overflow from addition of partial products */ BIG_norm(b); /* Faster to Combafy it.. Let the compiler unroll the loops! */ #ifdef COMBA t=(dchunk)a[0]*b[0]; c[0]=(chunk)t&MASK; co=t>>BASEBITS; t=(dchunk)a[1]*b[0]+(dchunk)a[0]*b[1]+co; c[1]=(chunk)t&MASK; co=t>>BASEBITS; for (j=2;j<NLEN;j++) { t=co; for (i=0;i<=j;i++) t+=(dchunk)a[j-i]*b[i]; c[j]=(chunk)t&MASK; co=t>>BASEBITS; } for (j=NLEN;j<DNLEN-2;j++) { t=co; for (i=j-NLEN+1;i<NLEN;i++) t+=(dchunk)a[j-i]*b[i]; c[j]=(chunk)t&MASK; co=t>>BASEBITS; } t=(dchunk)a[NLEN-1]*b[NLEN-1]+co; c[DNLEN-2]=(chunk)t&MASK; co=t>>BASEBITS; c[DNLEN-1]=(chunk)co; #else BIG_dzero(c); for (i=0;i<NLEN;i++) { carry=0; for (j=0;j<NLEN;j++) carry=muladd(a[i],b[j],carry,&c[i+j]); c[NLEN+i]=carry; } #endif #ifdef DEBUG_NORM c[DNLEN]=0; #endif }
/* SU= 24 */ chunk BIG_pmul(BIG r,BIG a,int c) { int i; chunk ak,carry=0; BIG_norm(a); for (i=0;i<NLEN;i++) { ak=a[i]; r[i]=0; carry=muladd(ak,(chunk)c,carry,&r[i]); } #ifdef DEBUG_NORM r[NLEN]=0; #endif return carry; }
/* SU= 40 */ void BIG_smul(BIG c,BIG a,BIG b) { int i,j; chunk carry; BIG_norm(a); BIG_norm(b); BIG_zero(c); for (i=0;i<NLEN;i++) { carry=0; for (j=0;j<NLEN;j++) if (i+j<NLEN) carry=muladd(a[i],b[j],carry,&c[i+j]); } #ifdef DEBUG_NORM c[NLEN]=0; #endif }
double dot(const fvec4d &ov) const { vector_type muladd(v[0] * ov.v[0] + v[1] * ov.v[1]); return muladd[0] + muladd[1]; }
/* SU= 80 */ void BIG_sqr(DBIG c,BIG a) { int i,j; chunk carry; #ifdef dchunk dchunk t,co; #endif BIG_norm(a); /* Note 2*a[i] in loop below and extra addition */ #ifdef COMBA t=(dchunk)a[0]*a[0]; c[0]=(chunk)t&MASK; co=t>>BASEBITS; t=(dchunk)a[1]*a[0]; t+=t; t+=co; c[1]=(chunk)t&MASK; co=t>>BASEBITS; #if NLEN%2==1 for (j=2;j<NLEN-1;j+=2) { t=(dchunk)a[j]*a[0]; for (i=1;i<(j+1)/2;i++) t+=(dchunk)a[j-i]*a[i]; t+=t; t+=co; t+=(dchunk)a[j/2]*a[j/2]; c[j]=(chunk)t&MASK; co=t>>BASEBITS; t=(dchunk)a[j+1]*a[0]; for (i=1;i<(j+2)/2;i++) t+=(dchunk)a[j+1-i]*a[i]; t+=t; t+=co; c[j+1]=(chunk)t&MASK; co=t>>BASEBITS; } j=NLEN-1; t=(dchunk)a[j]*a[0]; for (i=1;i<(j+1)/2;i++) t+=(dchunk)a[j-i]*a[i]; t+=t; t+=co; t+=(dchunk)a[j/2]*a[j/2]; c[j]=(chunk)t&MASK; co=t>>BASEBITS; #else for (j=2;j<NLEN;j+=2) { t=(dchunk)a[j]*a[0]; for (i=1;i<(j+1)/2;i++) t+=(dchunk)a[j-i]*a[i]; t+=t; t+=co; t+=(dchunk)a[j/2]*a[j/2]; c[j]=(chunk)t&MASK; co=t>>BASEBITS; t=(dchunk)a[j+1]*a[0]; for (i=1;i<(j+2)/2;i++) t+=(dchunk)a[j+1-i]*a[i]; t+=t; t+=co; c[j+1]=(chunk)t&MASK; co=t>>BASEBITS; } #endif #if NLEN%2==1 j=NLEN; t=(dchunk)a[NLEN-1]*a[j-NLEN+1]; for (i=j-NLEN+2;i<(j+1)/2;i++) t+=(dchunk)a[j-i]*a[i]; t+=t; t+=co; c[j]=(chunk)t&MASK; co=t>>BASEBITS; for (j=NLEN+1;j<DNLEN-2;j+=2) { t=(dchunk)a[NLEN-1]*a[j-NLEN+1]; for (i=j-NLEN+2;i<(j+1)/2;i++) t+=(dchunk)a[j-i]*a[i]; t+=t; t+=co; t+=(dchunk)a[j/2]*a[j/2]; c[j]=(chunk)t&MASK; co=t>>BASEBITS; t=(dchunk)a[NLEN-1]*a[j-NLEN+2]; for (i=j-NLEN+3;i<(j+2)/2;i++) t+=(dchunk)a[j+1-i]*a[i]; t+=t; t+=co; c[j+1]=(chunk)t&MASK; co=t>>BASEBITS; } #else for (j=NLEN;j<DNLEN-2;j+=2) { t=(dchunk)a[NLEN-1]*a[j-NLEN+1]; for (i=j-NLEN+2;i<(j+1)/2;i++) t+=(dchunk)a[j-i]*a[i]; t+=t; t+=co; t+=(dchunk)a[j/2]*a[j/2]; c[j]=(chunk)t&MASK; co=t>>BASEBITS; t=(dchunk)a[NLEN-1]*a[j-NLEN+2]; for (i=j-NLEN+3;i<(j+2)/2;i++) t+=(dchunk)a[j+1-i]*a[i]; t+=t; t+=co; c[j+1]=(chunk)t&MASK; co=t>>BASEBITS; } #endif t=(dchunk)a[NLEN-1]*a[NLEN-1]+co; c[DNLEN-2]=(chunk)t&MASK; co=t>>BASEBITS; c[DNLEN-1]=(chunk)co; #else BIG_dzero(c); for (i=0;i<NLEN;i++) { carry=0; for (j=i+1;j<NLEN;j++) carry=muladd(a[i],a[j],carry,&c[i+j]); c[NLEN+i]=carry; } for (i=0;i<DNLEN;i++) c[i]*=2; for (i=0;i<NLEN;i++) c[2*i+1]+=muladd(a[i],a[i],0,&c[2*i]); BIG_dnorm(c); #endif #ifdef DEBUG_NORM c[DNLEN]=0; #endif }