BX_CONST_FUNC float log(float _a) { int32_t exp; float ff = frexp(_a, &exp); if (ff < kSqrt2*0.5f) { ff *= 2.0f; --exp; } ff -= 1.0f; const float kk = float(exp); const float hi = kk*kLogNat2Hi; const float lo = kk*kLogNat2Lo; const float ss = ff / (2.0f + ff); const float s2 = square(ss); const float s4 = square(s2); const float tmp0 = mad(kLogC6, s4, kLogC4); const float tmp1 = mad(tmp0, s4, kLogC2); const float tmp2 = mad(tmp1, s4, kLogC0); const float t1 = s2*tmp2; const float tmp3 = mad(kLogC5, s4, kLogC3); const float tmp4 = mad(tmp3, s4, kLogC1); const float t2 = s4*tmp4; const float t12 = t1 + t2; const float hfsq = 0.5f*square(ff); const float result = hi - ( (hfsq - (ss*(hfsq+t12) + lo) ) - ff); return result; }
BX_CONST_FUNC float atan2(float _y, float _x) { const float ax = abs(_x); const float ay = abs(_y); const float maxaxy = max(ax, ay); const float minaxy = min(ax, ay); if (maxaxy == 0.0f) { return 0.0f*sign(_y); } const float mxy = minaxy / maxaxy; const float mxysq = square(mxy); const float tmp0 = mad(kAtan2C0, mxysq, kAtan2C1); const float tmp1 = mad(tmp0, mxysq, kAtan2C2); const float tmp2 = mad(tmp1, mxysq, kAtan2C3); const float tmp3 = mad(tmp2, mxysq, kAtan2C4); const float tmp4 = mad(tmp3, mxysq, kAtan2C5); const float tmp5 = tmp4 * mxy; const float tmp6 = ay > ax ? kPiHalf - tmp5 : tmp5; const float tmp7 = _x < 0.0f ? kPi - tmp6 : tmp6; const float result = sign(_y)*tmp7; return result; }
double2 fast_sqrt( const expression<E1>& a, double2_type ) { double2 w,x,r; w = a(); x = native_rsqrt(w); x = x*mad(x*x,-w,3); r = x*w; r = r*mad(-0.0625,r*x,0.75); return r; }
BX_CONST_FUNC float acos(float _a) { const float absa = abs(_a); const float tmp0 = mad(kAcosC3, absa, kAcosC2); const float tmp1 = mad(tmp0, absa, kAcosC1); const float tmp2 = mad(tmp1, absa, kAcosC0); const float tmp3 = tmp2 * sqrt(1.0f - absa); const float negate = float(_a < 0.0f); const float tmp4 = tmp3 - 2.0f*negate*tmp3; const float result = negate*kPi + tmp4; return result; }
/**\brief procedure described in Donoho et al., 1995. \ingroup grpwavelet */ double sureshrink(const double *data, int n){ int i,k; double lambda, sigma, sure, suremin; double *tmp; dprintf("Db: sureshrink\n"); tmp = (double*)malloc(n*sizeof(double)); sigma = mad(data, n)/0.6745; for(i=0; i<n; i++) tmp[i] = fabs(data[i])/sigma; /* tmp = memcpy(tmp, data, n*sizeof(double)); */ /* compute the SURESHRINK threshold */ suremin = DBL_MAX; qsort(tmp, n, sizeof(double), abscmp); lambda=0.0; for(k=0; k<n; k++){ sure = n - 2*(k+1)+(n-k)*pow(fabs(tmp[k]), 2); for(i=0; i<k; i++) sure = sure + pow(fabs(tmp[i]), 2); if(sure<suremin){ suremin = sure; lambda = fabs(tmp[k]); } } lambda = sigma * lambda; free(tmp); return lambda; }
bool ImageBufAlgo::invert (ImageBuf &dst, const ImageBuf &A, ROI roi, int nthreads) { // Calculate invert as simply 1-A == A*(-1)+1 return mad (dst, A, -1.0, 1.0, roi, nthreads); }
void AdaptiveManifoldFilterN::computeDTVer(vector<Mat>& srcCn, Mat& dst, float sigma_s, float sigma_r) { int cnNum = (int)srcCn.size(); int h = srcCn[0].rows; int w = srcCn[0].cols; dst.create(h-1, w, CV_32F); float sigmaRatioSqr = (float) SQR(sigma_s / sigma_r); float lnAlpha = (float) (-sqrt(2.0) / sigma_s); for (int i = 0; i < h-1; i++) { float *dstRow = dst.ptr<float>(i); for (int cn = 0; cn < cnNum; cn++) { float *srcRow1 = srcCn[cn].ptr<float>(i); float *srcRow2 = srcCn[cn].ptr<float>(i+1); if (cn == 0) sqr_dif(dstRow, srcRow1, srcRow2, w); else add_sqr_dif(dstRow, srcRow1, srcRow2, w); } mad(dstRow, dstRow, sigmaRatioSqr, 1.0f, w); sqrt_(dstRow, dstRow, w); mul(dstRow, dstRow, lnAlpha, w); //Exp_32f(dstRow, dstRow, w); } cv::exp(dst, dst); }
void pollard(big id,big dl) { int i; long iter; big_chinese bc; big w,Q,R,m,n,q; char stack_mem[mr_big_reserve(6,50)]; memset(stack_mem,0,mr_big_reserve(6,50)); w=mirvar_mem(stack_mem,0); Q=mirvar_mem(stack_mem,1); R=mirvar_mem(stack_mem,2); m=mirvar_mem(stack_mem,3); n=mirvar_mem(stack_mem,4); q=mirvar_mem(stack_mem,5); copy(id,q); crt_init(&bc,np,pp); for (i=0;i<np;i++) { /* accumulate solutions for each pp */ copy(p1,w); divide(w,pp[i],w); powmod(q,w,p,Q); powltr(PROOT,w,p,R); copy(pp[i],order); iter=rho(Q,R,m,n); xgcd(m,order,w,w,w); mad(w,n,n,order,order,rem[i]); printf("%9ld iterations needed\n",iter); } crt(&bc,rem,dl); /* apply chinese remainder thereom */ crt_end(&bc); }
int post_squelch(struct fm_state *fm) /* returns 1 for active signal, 0 for no signal */ { int dev_r, dev_j, len, sq_l; /* only for small samples, big samples need chunk processing */ len = fm->signal_len; sq_l = fm->squelch_level; dev_r = mad(&(fm->signal[0]), len, 2); dev_j = mad(&(fm->signal[1]), len, 2); if ((dev_r > sq_l) || (dev_j > sq_l)) { fm->squelch_hits = 0; return 1; } fm->squelch_hits++; return 0; }
ShGeneric<N, T> smoothstep(const ShGeneric<N, T>& a, const ShGeneric<N, T>& b, const ShGeneric<N, T> x) { ShGeneric<N, T> t = (x - a) / (b - a); // TODO fix this for other types t = clamp(t, 0.0f, 1.0f); return t * t * mad(-2.0f, t, ShConstAttrib1f(3.0f)); }
restore_bplatform_bin_file () { int rr; /* 0 */ read_fm( rr, 1 ); read_fm(vnbs,1); read_fm(ttms,1); read_fm(vad(1),vnbs+1); restore_alloc ( vad(vnbs+1) ); read_fm(mmts[1],vad(vnbs+1)); read_fm(vtt(1),vnbs); read_fm(tmbs[1],ttms); mtt(vnbs+1,0); /* !!! */ mad(vnbs+2,max_mem); /* !!! */ /* 1 */ read_fm( rr, 1 ); read_fm(sym,1); read_fm(stri[1],sym); read_fm(lexi[1],sym); read_fm(edit[1],sym); read_fm(prio[1],sym); read_fm(righ[1],sym); read_fm(clos[1],sym); read_fm(arity[1],sym); /* 2 */ read_fm( rr, 1 ); read_fm(hcod[1],max_sym); /* 3 */ read_fm( rr, 1 ); read_fm(rul,1); read_fm(ant[1],rul); read_fm(cns[1],rul); read_fm(rth[1],rul); read_fm(num[1],rul); read_fm(trl[1],rul); read_fm(lsb[1],rul); read_fm(pvd[1],rul); read_fm(sts[1],rul); /* 4 */ read_fm( rr, 1 ); read_fm(rttm,1); read_fm(rtmb[1],rttm); /* 7 */ read_fm( rr, 1 ); read_fm(the,1); read_fm(tru[1],the); read_fm(tna[1],the); read_fm(tnm[1],the); read_fm(ttc[1],the); read_fm(tft[1],the); /* 8 */ read_fm( rr, 1 ); read_fm(tttm,1); read_fm(ttmb[1],tttm); /* 9 */ read_fm( rr, 1 ); read_fm(ttdi,1); read_fm(tdir[1],ttdi); }
/**\brief Wavelet estimation of single trial ERP's using a conventional estimate. * \ingroup grpwavelet * Formula \f[ \lambda = \sigma \sqrt{2\log_e{n}} \f]. */ double conventional_thresholding(const double *data, int n){ dprintf("Db: conventional_thresholding\n"); double sigma, lambda; /* population sd, threshold */ sigma = mad(data,n)/0.6745; /* funny constant, eh? */ lambda = sigma * sqrt(2*log(n)); return lambda; }
/** \brief Wavelet estimation of single trial ERP's using Wang et al.'s (2007) technique. \ingroup grpwavelet Formula \f[ \lambda = \sigma \sqrt{2\log_e{n\log_2{n}}} \f]. */ double translation_invariant_thresholding(const double *data, int n){ dprintf("Db: translation_invariant_thresholding\n"); double sigma, lambda; /* population sd, threshold */ sigma = mad(data, n)/0.6745; /* funny constant, eh? */ lambda = sigma * sqrt(2*log(n*glog(n, 2))); return lambda; }
BX_CONST_FUNC float cos(float _a) { const float scaled = _a * 2.0f*kInvPi; const float real = floor(scaled); const float xx = _a - real * kPiHalf; const int32_t bits = int32_t(real) & 3; float c0, c2, c4, c6, c8, c10; if (bits == 0 || bits == 2) { c0 = 1.0f; c2 = kCosC2; c4 = kCosC4; c6 = kCosC6; c8 = kCosC8; c10 = kCosC10; } else { c0 = xx; c2 = kSinC2; c4 = kSinC4; c6 = kSinC6; c8 = kSinC8; c10 = kSinC10; } const float xsq = square(xx); const float tmp0 = mad(c10, xsq, c8 ); const float tmp1 = mad(tmp0, xsq, c6 ); const float tmp2 = mad(tmp1, xsq, c4 ); const float tmp3 = mad(tmp2, xsq, c2 ); const float tmp4 = mad(tmp3, xsq, 1.0); const float result = tmp4 * c0; return bits == 1 || bits == 2 ? -result : result ; }
int post_squelch(struct fm_state *fm) /* returns 1 for active signal, 0 for no signal */ { int i, i2, dev_r, dev_j, len, sq_l; /* only for small samples, big samples need chunk processing */ len = fm->signal_len; sq_l = fm->squelch_level; dev_r = mad(&(fm->signal[0]), len, 2); dev_j = mad(&(fm->signal[1]), len, 2); if ((dev_r > sq_l) || (dev_j > sq_l)) { fm->squelch_hits = 0; return 1; } fm->squelch_hits++; if (fm->term_squelch_hits) { return 0;} /* weak signal, kill it entirely */ for (i=0; i<len; i++) { fm->signal2[i] = 0; } return 0; }
void Explode::dispatch(double x, double y) { std::cout << "ecplode on (" << x << ")(" << y << ")" << std::endl; std::function<void(Effects::type, Effects::level)> mad = [this, x, y] (Effects::type t, Effects::level lvl) { if (t == Effects::Fire) dispatchAll(new Event::Type::FireExplosion(x, y, lvl)); if (t == Effects::Ice) dispatchAll(new Event::Type::IceExplosion(x, y, lvl)); if (t == Effects::Life) dispatchAll(new Event::Type::LifeExplosion(x, y, lvl)); if (t == Effects::Electricity) dispatchAll(new Event::Type::ElectricityExplosion(x, y, lvl)); if (t == Effects::Glyph) dispatchAll(new Event::Type::GlyphExplosion(x, y, lvl)); }; std::array<bool, 3> it = {true, false, false}; Effects::level lvl = Effects::low; if (elements[0] == elements[1]) { lvl = Effects::med; it[1] = true; } if (elements[0] == elements[2]) { lvl = (lvl == Effects::med) ? (Effects::high) : (Effects::med); it[2] = true; } mad(elements[0], lvl); if (!it[1]) { lvl = Effects::low; if (!it[2] && elements[1] == elements[2]) { lvl = Effects::med; it[2] = true; } mad(elements[1], lvl); } if (!it[2]) mad(elements[2], Effects::low); dispatchAll(new Event::Type::Explosion(x, y)); }
struct summary * summarise_vec( VEC v){ assert(NULL!=v); VEC quant = create_vec(5); vset(quant,0,0.); vset(quant,1,0.25); vset(quant,2,0.5); vset(quant,3,0.75); vset(quant,4,1.); struct summary * s = malloc(sizeof(struct summary)); s->mean = mean(v); s->var = variance(v); s->quantiles = quantiles(v,quant); s->mad = mad(v); s->data = v; return s; }
void iterate(big x,big q,big r,big a,big b) { /* apply Pollards random mapping */ if (compare(x,lim1)<0) { mad(x,q,q,p,p,x); incr(a,1,a); if (compare(a,order)==0) zero(a); return; } if (compare(x,lim2)<0) { mad(x,x,x,p,p,x); premult(a,2,a); if (compare(a,order)>=0) subtract(a,order,a); premult(b,2,b); if (compare(b,order)>=0) subtract(b,order,b); return; } mad(x,r,r,p,p,x); incr(b,1,b); if (compare(b,order)==0) zero(b); }
BX_CONST_FUNC float exp(float _a) { if (abs(_a) <= kNearZero) { return _a + 1.0f; } const float kk = round(_a*kInvLogNat2); const float hi = _a - kk*kLogNat2Hi; const float lo = kk*kLogNat2Lo; const float hml = hi - lo; const float hmlsq = square(hml); const float tmp0 = mad(kExpC4, hmlsq, kExpC3); const float tmp1 = mad(tmp0, hmlsq, kExpC2); const float tmp2 = mad(tmp1, hmlsq, kExpC1); const float tmp3 = mad(tmp2, hmlsq, kExpC0); const float tmp4 = hml - hmlsq * tmp3; const float tmp5 = hml*tmp4/(2.0f-tmp4); const float tmp6 = 1.0f - ( (lo - tmp5) - hi); const float result = ldexp(tmp6, int32_t(kk) ); return result; }
ShGeneric<N, T> hashlcg(const ShGeneric<N, T>& p) { ShAttrib<N, SH_TEMP, T> result = frac(p * 0.01); // TODO fix this for long tuples ShGeneric<N, T> a = fillcast<N>( ShConstAttrib4f(M_PI * M_PI * M_PI * M_PI, std::exp(4.0), std::pow(13.0, M_PI / 2.0), std::sqrt(1997.0))); ShGeneric<N, T> m = fillcast<N>( ShConstAttrib4f(std::sqrt(2.0), 1.0 / M_PI, std::sqrt(3.0), std::exp(-1.0))); for(int i = 0; i < LCG_REPS; ++i) result = frac(mad(result, a, m)); return result; }
void glv(const Big &e,Big &r,Big *W[2],Big *B[2][2],Big u[2]) { int i,j; Big v[2],w; for (i=0;i<2;i++) { v[i]=mad(*W[i],e,(Big)0,r,w); u[i]=0; } u[0]=e; for (i=0;i<2;i++) for (j=0;j<2;j++) u[i]-=v[j]*(*B[j][i]); return; }
BOOL double_inverse(_MIPD_ big n,big x,big y,big w,big z) { #ifdef MR_OS_THREADS miracl *mr_mip=get_mip(); #endif MR_IN(146) mad(_MIPP_ x,w,w,n,n,mr_mip->w6); if (size(mr_mip->w6)==0) { mr_berror(_MIPP_ MR_ERR_DIV_BY_ZERO); MR_OUT return FALSE; }
void galscott(const Big &e,Big &r,Big *WB[4],Big *B[4][4],Big u[4]) { int i,j; Big v[4],w; for (i=0;i<4;i++) { v[i]=mad(*WB[i],e,(Big)0,r,w); u[i]=0; } u[0]=e; for (i=0;i<4;i++) for (j=0;j<4;j++) u[i]-=v[j]*(*B[j][i]); return; }
void galscott(const Big &e,Big &r,Big *WB[6],Big *B[6][6],Big u[6]) { int i,j; Big v[6],w; for (i=0;i<6;i++) { v[i]=mad(*WB[i],e,(Big)0,r,w); u[i]=0; } u[0]=e; for (i=0;i<6;i++) { for (j=0;j<6;j++) u[i]-=v[j]*(*B[j][i]); } return; }
void ChrRegionCluster::removeOutliersWithMad(std::vector<const ChrRegion*>& cleanRegions) { if (elts.size() == 0) return; if (elts.size() == 1) { cleanRegions.push_back(elts[0]); return; } if (elts.size() == 2 && isInconsistent(elts[0]->length(), elts[1]->length())) { cleanRegions.push_back(elts[0]); cleanRegions.push_back(elts[1]); return; } std::vector<int> lens; toLengthList(lens); int m1 = median(lens); int m2 = mad(lens); for (auto itr = elts.begin(); itr != elts.end(); ++itr) { int l = (*itr)->length(); if (l >= m1 - 3 * m2 && l <= m1 + 3 * m2) cleanRegions.push_back(*itr); } }
void pollard(big id,big dl) { int i; long iter; big w,Q,R,m,n,q; big_chinese bc; w=mirvar(0); Q=mirvar(0); R=mirvar(0); m=mirvar(0); n=mirvar(0); q=mirvar(0); copy(id,q); crt_init(&bc,np,pp); for (i=0;i<np;i++) { /* accumulate solutions for each pp */ copy(p1,w); divide(w,pp[i],w); powmod(q,w,p,Q); powltr(PROOT,w,p,R); copy(pp[i],order); iter=rho(Q,R,m,n); xgcd(m,order,w,w,w); mad(w,n,n,order,order,rem[i]); printf("%9ld iterations needed\n",iter); } crt(&bc,rem,dl); /* apply chinese remainder thereom */ crt_end(&bc); mirkill(q); mirkill(n); mirkill(m); mirkill(R); mirkill(Q); mirkill(w); }
int main() { /* factoring program using Brents method */ long k,r,i,m,iter; big x,y,z,n,q,ys,c3; miracl *mip=mirsys(50,0); x=mirvar(mip,0); y=mirvar(mip,0); ys=mirvar(mip,0); z=mirvar(mip,0); n=mirvar(mip,0); q=mirvar(mip,0); c3=mirvar(mip,3); printf("input number to be factored\n"); cinnum(mip,n,stdin); if (isprime(mip,n)) { printf("this number is prime!\n"); return 0; } m=10L; r=1L; iter=0L; do { printf("iterations=%5ld",iter); convert(mip,1,q); do { copy(y,x); for (i=1L;i<=r;i++) mad(mip,y,y,c3,n,n,y); k=0; do { iter++; if (iter%10==0) printf("\b\b\b\b\b%5ld",iter); fflush(stdout); copy(y,ys); for (i=1L;i<=mr_min(m,r-k);i++) { mad(mip,y,y,c3,n,n,y); subtract(mip,y,x,z); mad(mip,z,q,q,n,n,q); } egcd(mip,q,n,z); k+=m; } while (k<r && size(z)==1); r*=2; } while (size(z)==1); if (compare(z,n)==0) do { /* back-track */ mad(mip,ys,ys,c3,n,n,ys); subtract(mip,ys,x,z); } while (egcd(mip,z,n,z)==1); if (!isprime(mip,z)) printf("\ncomposite factor "); else printf("\nprime factor "); cotnum(mip,z,stdout); if (compare(z,n)==0) return 0; divide(mip,n,z,n); divide(mip,y,n,n); } while (!isprime(mip,n)); printf("prime factor "); cotnum(mip,n,stdout); return 0; }
__kernel void TRIPLE_DGEMM_UPDATE_192_12_R(__global const double *Ain, uint offAin, __global double *d_dinvA, int blk, uint lda, int npages, int na) { // Ain is the non inverse matrix; the size of Ain is lda * na // offAin is the offset of Ain // d_dinvA is the inversed matrix. the size of d_invA is NB * (na-1)/NB + 1 // blk is subblock size, which is 12 here. // lda in leading dimension. Column major here // npages = (na-1)/12*2 + 1; for 96 this is 4 for 192 this is 8 //Work group size is [12] //global work size is [96*number of blocks] //each work item in each work group is responsible for every element in that row //each work group is responsible for one gemm;\ ////////////// A12*invA22 const uint gidx = get_group_id(0); const uint idx = get_local_id(0); const uint page = gidx % npages; const uint page_block = page / 8;//8 pages per page block const uint page_index_in_block = page % 8; __global double *B, *C; __local double lA[12][12]; __local double lB[12][12]; double privateC[12] = { (double)0 }; //decide A12 location for each page Ain = Ain + offAin; Ain += (page*blk * 2 + blk) * lda + page * 2 * blk; //decide invA22 (B) location for each page B = d_dinvA + page_block*NB*NB + (page_index_in_block*blk * 2 + blk) * NB + page_index_in_block * 2 * blk + blk; //decide invA12 location for each page C = d_dinvA + page_block*NB*NB + (page_index_in_block*blk * 2 + blk) * NB + page_index_in_block * 2 * blk; //read A and B into LDS no transpose operated here lA[idx][0] = Ain[idx]; lA[idx][1] = Ain[idx + lda]; lA[idx][2] = Ain[idx + lda * 2]; lA[idx][3] = Ain[idx + lda * 3]; lA[idx][4] = Ain[idx + lda * 4]; lA[idx][5] = Ain[idx + lda * 5]; lA[idx][6] = Ain[idx + lda * 6]; lA[idx][7] = Ain[idx + lda * 7]; lA[idx][8] = Ain[idx + lda * 8]; lA[idx][9] = Ain[idx + lda * 9]; lA[idx][10] = Ain[idx + lda * 10]; lA[idx][11] = Ain[idx + lda * 11]; lB[idx][0] = B[idx]; lB[idx][1] = B[idx + NB]; lB[idx][2] = B[idx + NB * 2]; lB[idx][3] = B[idx + NB * 3]; lB[idx][4] = B[idx + NB * 4]; lB[idx][5] = B[idx + NB * 5]; lB[idx][6] = B[idx + NB * 6]; lB[idx][7] = B[idx + NB * 7]; lB[idx][8] = B[idx + NB * 8]; lB[idx][9] = B[idx + NB * 9]; lB[idx][10] = B[idx + NB * 10]; lB[idx][11] = B[idx + NB * 11]; barrier(CLK_LOCAL_MEM_FENCE); //do math uint i = 0; do{ privateC[0] = mad(lA[idx][i], lB[i][0], privateC[0]); privateC[1] = mad(lA[idx][i], lB[i][1], privateC[1]); privateC[2] = mad(lA[idx][i], lB[i][2], privateC[2]); privateC[3] = mad(lA[idx][i], lB[i][3], privateC[3]); privateC[4] = mad(lA[idx][i], lB[i][4], privateC[4]); privateC[5] = mad(lA[idx][i], lB[i][5], privateC[5]); privateC[6] = mad(lA[idx][i], lB[i][6], privateC[6]); privateC[7] = mad(lA[idx][i], lB[i][7], privateC[7]); privateC[8] = mad(lA[idx][i], lB[i][8], privateC[8]); privateC[9] = mad(lA[idx][i], lB[i][9], privateC[9]); privateC[10] = mad(lA[idx][i], lB[i][10], privateC[10]); privateC[11] = mad(lA[idx][i], lB[i][11], privateC[11]); //mem_fence(CLK_LOCAL_MEM_FENCE); i = i + 1; } while (i < 12); i = 0; do{ C[NB*i + idx] = privateC[i]; i = i + 1; } while (i < 12); ////////////// -invA11*invA12 barrier(CLK_GLOBAL_MEM_FENCE); //A is moving to invA11 __global double *A; A = d_dinvA + page_block*NB*NB + ((page % 4)*blk * 2) * NB + (page % 4) * 2 * blk; //both B and C are pointing at invA12 B = C; //read A and B into LDS no transpose operated here lA[idx][0] = A[idx]; lA[idx][1] = A[idx + NB]; lA[idx][2] = A[idx + NB * 2]; lA[idx][3] = A[idx + NB * 3]; lA[idx][4] = A[idx + NB * 4]; lA[idx][5] = A[idx + NB * 5]; lA[idx][6] = A[idx + NB * 6]; lA[idx][7] = A[idx + NB * 7]; lA[idx][8] = A[idx + NB * 8]; lA[idx][9] = A[idx + NB * 9]; lA[idx][10] = A[idx + NB * 10]; lA[idx][11] = A[idx + NB * 11]; lB[idx][0] = B[idx]; lB[idx][1] = B[idx + NB]; lB[idx][2] = B[idx + NB * 2]; lB[idx][3] = B[idx + NB * 3]; lB[idx][4] = B[idx + NB * 4]; lB[idx][5] = B[idx + NB * 5]; lB[idx][6] = B[idx + NB * 6]; lB[idx][7] = B[idx + NB * 7]; lB[idx][8] = B[idx + NB * 8]; lB[idx][9] = B[idx + NB * 9]; lB[idx][10] = B[idx + NB * 10]; lB[idx][11] = B[idx + NB * 11]; barrier(CLK_LOCAL_MEM_FENCE); //do math i = 0; privateC[0] = 0; privateC[1] = 0; privateC[2] = 0; privateC[3] = 0; privateC[4] = 0; privateC[5] = 0; privateC[6] = 0; privateC[7] = 0; privateC[8] = 0; privateC[9] = 0; privateC[10] = 0; privateC[11] = 0; do{ privateC[0] = mad(lA[idx][i], lB[i][0], privateC[0]); privateC[1] = mad(lA[idx][i], lB[i][1], privateC[1]); privateC[2] = mad(lA[idx][i], lB[i][2], privateC[2]); privateC[3] = mad(lA[idx][i], lB[i][3], privateC[3]); privateC[4] = mad(lA[idx][i], lB[i][4], privateC[4]); privateC[5] = mad(lA[idx][i], lB[i][5], privateC[5]); privateC[6] = mad(lA[idx][i], lB[i][6], privateC[6]); privateC[7] = mad(lA[idx][i], lB[i][7], privateC[7]); privateC[8] = mad(lA[idx][i], lB[i][8], privateC[8]); privateC[9] = mad(lA[idx][i], lB[i][9], privateC[9]); privateC[10] = mad(lA[idx][i], lB[i][10], privateC[10]); privateC[11] = mad(lA[idx][i], lB[i][11], privateC[11]); //mem_fence(CLK_LOCAL_MEM_FENCE); i = i + 1; } while (i < 12); i = 0; do{ C[NB*i + idx] = -1 * privateC[i]; i = i + 1; } while (i < 12); }
int main() { FILE *fp; char ifname[50],ofname[50]; big a,b,p,q,x,y,d,r,s,k,hash; epoint *g; long seed; int bits; miracl instance; miracl *mip=&instance; char mem[MR_BIG_RESERVE(11)]; /* reserve space on the stack for 11 bigs */ char mem1[MR_ECP_RESERVE(1)]; /* and one elliptic curve points */ memset(mem,0,MR_BIG_RESERVE(11)); memset(mem1,0,MR_ECP_RESERVE(1)); /* get public data */ #ifndef MR_EDWARDS fp=fopen("common.ecs","rt"); if (fp==NULL) { printf("file common.ecs does not exist\n"); return 0; } fscanf(fp,"%d\n",&bits); #else fp=fopen("edwards.ecs","rt"); if (fp==NULL) { printf("file edwards.ecs does not exist\n"); return 0; } fscanf(fp,"%d\n",&bits); #endif mirsys(mip,bits/4,16); /* Use Hex internally */ a=mirvar_mem(mip,mem,0); b=mirvar_mem(mip,mem,1); p=mirvar_mem(mip,mem,2); q=mirvar_mem(mip,mem,3); x=mirvar_mem(mip,mem,4); y=mirvar_mem(mip,mem,5); d=mirvar_mem(mip,mem,6); r=mirvar_mem(mip,mem,7); s=mirvar_mem(mip,mem,8); k=mirvar_mem(mip,mem,9); hash=mirvar_mem(mip,mem,10); innum(mip,p,fp); /* modulus */ innum(mip,a,fp); /* curve parameters */ innum(mip,b,fp); innum(mip,q,fp); /* order of (x,y) */ innum(mip,x,fp); /* (x,y) point on curve of order q */ innum(mip,y,fp); fclose(fp); /* randomise */ printf("Enter 9 digit random number seed = "); scanf("%ld",&seed); getchar(); irand(mip,seed); ecurve_init(mip,a,b,p,MR_PROJECTIVE); /* initialise curve */ g=epoint_init_mem(mip,mem1,0); epoint_set(mip,x,y,0,g); /* initialise point of order q */ /* calculate r - this can be done offline, and hence amortized to almost nothing */ bigrand(mip,q,k); ecurve_mult(mip,k,g,g); /* see ebrick.c for method to speed this up */ epoint_get(mip,g,r,r); divide(mip,r,q,q); /* get private key of signer */ fp=fopen("private.ecs","rt"); if (fp==NULL) { printf("file private.ecs does not exist\n"); return 0; } innum(mip,d,fp); fclose(fp); /* calculate message digest */ printf("file to be signed = "); gets(ifname); strcpy(ofname,ifname); strip(ofname); strcat(ofname,".ecs"); if ((fp=fopen(ifname,"rb"))==NULL) { printf("Unable to open file %s\n",ifname); return 0; } hashing(mip,fp,hash); fclose(fp); /* calculate s */ xgcd(mip,k,q,k,k,k); mad(mip,d,r,hash,q,q,s); mad(mip,s,k,k,q,q,s); fp=fopen(ofname,"wt"); otnum(mip,r,fp); otnum(mip,s,fp); fclose(fp); memset(mem,0,MR_BIG_RESERVE(11)); memset(mem1,0,MR_ECP_RESERVE(1)); return 0; }
int xgcd(_MIPD_ big x,big y,big xd,big yd,big z) { /* greatest common divisor by Euclids method * * extended to also calculate xd and yd where * * z = x.xd + y.yd = gcd(x,y) * * if xd, yd not distinct, only xd calculated * * z only returned if distinct from xd and yd * * xd will always be positive, yd negative */ int s,n,iter; mr_small r,a,b,c,d; mr_small q,m,sr; #ifdef MR_FP mr_small dres; #endif #ifdef mr_dltype union doubleword uu,vv; mr_large u,v,lr; #else mr_small u,v,lr; #endif BOOL last,dplus=TRUE; big t; #ifdef MR_OS_THREADS miracl *mr_mip=get_mip(); #endif if (mr_mip->ERNUM) return 0; MR_IN(30) #ifdef MR_COUNT_OPS fpx++; #endif copy(x,mr_mip->w1); copy(y,mr_mip->w2); s=exsign(mr_mip->w1); insign(PLUS,mr_mip->w1); insign(PLUS,mr_mip->w2); convert(_MIPP_ 1,mr_mip->w3); zero(mr_mip->w4); last=FALSE; a=b=c=d=0; iter=0; while (size(mr_mip->w2)!=0) { if (b==0) { /* update mr_mip->w1 and mr_mip->w2 */ divide(_MIPP_ mr_mip->w1,mr_mip->w2,mr_mip->w5); t=mr_mip->w1,mr_mip->w1=mr_mip->w2,mr_mip->w2=t; /* swap(mr_mip->w1,mr_mip->w2) */ multiply(_MIPP_ mr_mip->w4,mr_mip->w5,mr_mip->w0); add(_MIPP_ mr_mip->w3,mr_mip->w0,mr_mip->w3); t=mr_mip->w3,mr_mip->w3=mr_mip->w4,mr_mip->w4=t; /* swap(xd,yd) */ iter++; } else { /* printf("a= %I64u b= %I64u c= %I64u d= %I64u \n",a,b,c,d); */ mr_pmul(_MIPP_ mr_mip->w1,c,mr_mip->w5); /* c*w1 */ mr_pmul(_MIPP_ mr_mip->w1,a,mr_mip->w1); /* a*w1 */ mr_pmul(_MIPP_ mr_mip->w2,b,mr_mip->w0); /* b*w2 */ mr_pmul(_MIPP_ mr_mip->w2,d,mr_mip->w2); /* d*w2 */ if (!dplus) { mr_psub(_MIPP_ mr_mip->w0,mr_mip->w1,mr_mip->w1); /* b*w2-a*w1 */ mr_psub(_MIPP_ mr_mip->w5,mr_mip->w2,mr_mip->w2); /* c*w1-d*w2 */ } else { mr_psub(_MIPP_ mr_mip->w1,mr_mip->w0,mr_mip->w1); /* a*w1-b*w2 */ mr_psub(_MIPP_ mr_mip->w2,mr_mip->w5,mr_mip->w2); /* d*w2-c*w1 */ } mr_pmul(_MIPP_ mr_mip->w3,c,mr_mip->w5); mr_pmul(_MIPP_ mr_mip->w3,a,mr_mip->w3); mr_pmul(_MIPP_ mr_mip->w4,b,mr_mip->w0); mr_pmul(_MIPP_ mr_mip->w4,d,mr_mip->w4); if (a==0) copy(mr_mip->w0,mr_mip->w3); else mr_padd(_MIPP_ mr_mip->w3,mr_mip->w0,mr_mip->w3); mr_padd(_MIPP_ mr_mip->w4,mr_mip->w5,mr_mip->w4); } if (mr_mip->ERNUM || size(mr_mip->w2)==0) break; n=(int)mr_mip->w1->len; if (n==1) { last=TRUE; u=mr_mip->w1->w[0]; v=mr_mip->w2->w[0]; } else { m=mr_mip->w1->w[n-1]+1; #ifndef MR_SIMPLE_BASE if (mr_mip->base==0) { #endif #ifndef MR_NOFULLWIDTH #ifdef mr_dltype /* use double length type if available */ if (n>2 && m!=0) { /* squeeze out as much significance as possible */ uu.h[MR_TOP]=muldvm(mr_mip->w1->w[n-1],mr_mip->w1->w[n-2],m,&sr); uu.h[MR_BOT]=muldvm(sr,mr_mip->w1->w[n-3],m,&sr); vv.h[MR_TOP]=muldvm(mr_mip->w2->w[n-1],mr_mip->w2->w[n-2],m,&sr); vv.h[MR_BOT]=muldvm(sr,mr_mip->w2->w[n-3],m,&sr); } else { uu.h[MR_TOP]=mr_mip->w1->w[n-1]; uu.h[MR_BOT]=mr_mip->w1->w[n-2]; vv.h[MR_TOP]=mr_mip->w2->w[n-1]; vv.h[MR_BOT]=mr_mip->w2->w[n-2]; if (n==2) last=TRUE; } u=uu.d; v=vv.d; #else if (m==0) { u=mr_mip->w1->w[n-1]; v=mr_mip->w2->w[n-1]; } else { u=muldvm(mr_mip->w1->w[n-1],mr_mip->w1->w[n-2],m,&sr); v=muldvm(mr_mip->w2->w[n-1],mr_mip->w2->w[n-2],m,&sr); } #endif #endif #ifndef MR_SIMPLE_BASE } else { #ifdef mr_dltype if (n>2) { /* squeeze out as much significance as possible */ u=muldiv(mr_mip->w1->w[n-1],mr_mip->base,mr_mip->w1->w[n-2],m,&sr); u=u*mr_mip->base+muldiv(sr,mr_mip->base,mr_mip->w1->w[n-3],m,&sr); v=muldiv(mr_mip->w2->w[n-1],mr_mip->base,mr_mip->w2->w[n-2],m,&sr); v=v*mr_mip->base+muldiv(sr,mr_mip->base,mr_mip->w2->w[n-3],m,&sr); } else { u=(mr_large)mr_mip->base*mr_mip->w1->w[n-1]+mr_mip->w1->w[n-2]; v=(mr_large)mr_mip->base*mr_mip->w2->w[n-1]+mr_mip->w2->w[n-2]; last=TRUE; } #else u=muldiv(mr_mip->w1->w[n-1],mr_mip->base,mr_mip->w1->w[n-2],m,&sr); v=muldiv(mr_mip->w2->w[n-1],mr_mip->base,mr_mip->w2->w[n-2],m,&sr); #endif } #endif } dplus=TRUE; a=1; b=0; c=0; d=1; forever { /* work only with most significant piece */ if (last) { if (v==0) break; q=qdiv(u,v); if (q==0) break; } else { if (dplus) { if ((mr_small)(v-c)==0 || (mr_small)(v+d)==0) break; q=qdiv(u+a,v-c); if (q==0) break; if (q!=qdiv(u-b,v+d)) break; } else { if ((mr_small)(v+c)==0 || (mr_small)(v-d)==0) break; q=qdiv(u-a,v+c); if (q==0) break; if (q!=qdiv(u+b,v-d)) break; } } if (q==1) { if ((mr_small)(b+d) >= MAXBASE) break; r=a+c; a=c; c=r; r=b+d; b=d; d=r; lr=u-v; u=v; v=lr; } else { if (q>=MR_DIV(MAXBASE-b,d)) break; r=a+q*c; a=c; c=r; r=b+q*d; b=d; d=r; lr=u-q*v; u=v; v=lr; } iter++; dplus=!dplus; } iter%=2; } if (s==MINUS) iter++; if (iter%2==1) subtract(_MIPP_ y,mr_mip->w3,mr_mip->w3); if (xd!=yd) { negify(x,mr_mip->w2); mad(_MIPP_ mr_mip->w2,mr_mip->w3,mr_mip->w1,y,mr_mip->w4,mr_mip->w4); copy(mr_mip->w4,yd); } copy(mr_mip->w3,xd); if (z!=xd && z!=yd) copy(mr_mip->w1,z); MR_OUT return (size(mr_mip->w1)); }