Пример #1
0
	BX_CONST_FUNC float log(float _a)
	{
		int32_t exp;
		float ff = frexp(_a, &exp);
		if (ff < kSqrt2*0.5f)
		{
			ff *= 2.0f;
			--exp;
		}

		ff -= 1.0f;
		const float kk     = float(exp);
		const float hi     = kk*kLogNat2Hi;
		const float lo     = kk*kLogNat2Lo;
		const float ss     = ff / (2.0f + ff);
		const float s2     = square(ss);
		const float s4     = square(s2);

		const float tmp0   = mad(kLogC6, s4, kLogC4);
		const float tmp1   = mad(tmp0,   s4, kLogC2);
		const float tmp2   = mad(tmp1,   s4, kLogC0);
		const float t1     = s2*tmp2;

		const float tmp3   = mad(kLogC5, s4, kLogC3);
		const float tmp4   = mad(tmp3,   s4, kLogC1);
		const float t2     = s4*tmp4;

		const float t12    = t1 + t2;
		const float hfsq   = 0.5f*square(ff);
		const float result = hi - ( (hfsq - (ss*(hfsq+t12) + lo) ) - ff);

		return result;
	}
Пример #2
0
	BX_CONST_FUNC float atan2(float _y, float _x)
	{
		const float ax     = abs(_x);
		const float ay     = abs(_y);
		const float maxaxy = max(ax, ay);
		const float minaxy = min(ax, ay);

		if (maxaxy == 0.0f)
		{
			return 0.0f*sign(_y);
		}

		const float mxy    = minaxy / maxaxy;
		const float mxysq  = square(mxy);
		const float tmp0   = mad(kAtan2C0, mxysq, kAtan2C1);
		const float tmp1   = mad(tmp0,     mxysq, kAtan2C2);
		const float tmp2   = mad(tmp1,     mxysq, kAtan2C3);
		const float tmp3   = mad(tmp2,     mxysq, kAtan2C4);
		const float tmp4   = mad(tmp3,     mxysq, kAtan2C5);
		const float tmp5   = tmp4 * mxy;
		const float tmp6   = ay > ax   ? kPiHalf - tmp5 : tmp5;
		const float tmp7   = _x < 0.0f ? kPi     - tmp6 : tmp6;
		const float result = sign(_y)*tmp7;

		return result;
	}
Пример #3
0
double2 fast_sqrt( const expression<E1>& a, double2_type  )
{
    double2 w,x,r;

    w = a();
    x = native_rsqrt(w);
    x = x*mad(x*x,-w,3);
    r = x*w;
    r = r*mad(-0.0625,r*x,0.75);

    return r;
}
Пример #4
0
	BX_CONST_FUNC float acos(float _a)
	{
		const float absa   = abs(_a);
		const float tmp0   = mad(kAcosC3, absa, kAcosC2);
		const float tmp1   = mad(tmp0,    absa, kAcosC1);
		const float tmp2   = mad(tmp1,    absa, kAcosC0);
		const float tmp3   = tmp2 * sqrt(1.0f - absa);
		const float negate = float(_a < 0.0f);
		const float tmp4   = tmp3 - 2.0f*negate*tmp3;
		const float result = negate*kPi + tmp4;

		return result;
	}
Пример #5
0
/**\brief procedure described in Donoho et al., 1995. 

 \ingroup grpwavelet
*/
double sureshrink(const double *data, int n){
  int i,k;
  double lambda, sigma, sure, suremin;
  double *tmp;
  dprintf("Db: sureshrink\n");

  tmp = (double*)malloc(n*sizeof(double));
  sigma  = mad(data, n)/0.6745;   

  for(i=0; i<n; i++)
    tmp[i] = fabs(data[i])/sigma;
  /*  tmp = memcpy(tmp, data, n*sizeof(double)); */
 
  /* compute the SURESHRINK threshold */
  suremin = DBL_MAX;
  qsort(tmp, n, sizeof(double), abscmp);
  
  lambda=0.0;
  for(k=0; k<n; k++){
    sure = n - 2*(k+1)+(n-k)*pow(fabs(tmp[k]), 2);
    for(i=0; i<k; i++)
      sure = sure + pow(fabs(tmp[i]), 2);
    if(sure<suremin){
      suremin = sure;
      lambda = fabs(tmp[k]);
    }
  }
  lambda = sigma * lambda;
  
  free(tmp);
  return lambda;
}
Пример #6
0
bool
ImageBufAlgo::invert (ImageBuf &dst, const ImageBuf &A,
                      ROI roi, int nthreads)
{
    // Calculate invert as simply 1-A == A*(-1)+1
    return mad (dst, A, -1.0, 1.0, roi, nthreads);
}
void AdaptiveManifoldFilterN::computeDTVer(vector<Mat>& srcCn, Mat& dst, float sigma_s, float sigma_r)
{
    int cnNum = (int)srcCn.size();
    int h = srcCn[0].rows;
    int w = srcCn[0].cols;

    dst.create(h-1, w, CV_32F);

    float sigmaRatioSqr = (float) SQR(sigma_s / sigma_r);
    float lnAlpha       = (float) (-sqrt(2.0) / sigma_s);

    for (int i = 0; i < h-1; i++)
    {
        float *dstRow = dst.ptr<float>(i);

        for (int cn = 0; cn < cnNum; cn++)
        {
            float *srcRow1 = srcCn[cn].ptr<float>(i);
            float *srcRow2 = srcCn[cn].ptr<float>(i+1);

            if (cn == 0)
                sqr_dif(dstRow, srcRow1, srcRow2, w);
            else
                add_sqr_dif(dstRow, srcRow1, srcRow2, w);
        }

        mad(dstRow, dstRow, sigmaRatioSqr, 1.0f, w);
        sqrt_(dstRow, dstRow, w);

        mul(dstRow, dstRow, lnAlpha, w);
        //Exp_32f(dstRow, dstRow, w);
    }

    cv::exp(dst, dst);
}
Пример #8
0
void pollard(big id,big dl)
{
    int i;
    long iter;
    big_chinese bc;
    big w,Q,R,m,n,q;
    char stack_mem[mr_big_reserve(6,50)];
    memset(stack_mem,0,mr_big_reserve(6,50));

    w=mirvar_mem(stack_mem,0);
    Q=mirvar_mem(stack_mem,1);
    R=mirvar_mem(stack_mem,2);
    m=mirvar_mem(stack_mem,3);
    n=mirvar_mem(stack_mem,4);
    q=mirvar_mem(stack_mem,5);

    copy(id,q);
    crt_init(&bc,np,pp);
    for (i=0;i<np;i++)
    { /* accumulate solutions for each pp */
        copy(p1,w);
        divide(w,pp[i],w);
        powmod(q,w,p,Q);
        powltr(PROOT,w,p,R);
        copy(pp[i],order);
        iter=rho(Q,R,m,n);
        xgcd(m,order,w,w,w);

        mad(w,n,n,order,order,rem[i]);
        printf("%9ld iterations needed\n",iter);
    }
    crt(&bc,rem,dl);  /* apply chinese remainder thereom */
    crt_end(&bc);
}
Пример #9
0
int post_squelch(struct fm_state *fm)
/* returns 1 for active signal, 0 for no signal */
{
	int dev_r, dev_j, len, sq_l;
	/* only for small samples, big samples need chunk processing */
	len = fm->signal_len;
	sq_l = fm->squelch_level;
	dev_r = mad(&(fm->signal[0]), len, 2);
	dev_j = mad(&(fm->signal[1]), len, 2);
	if ((dev_r > sq_l) || (dev_j > sq_l)) {
		fm->squelch_hits = 0;
		return 1;
	}
	fm->squelch_hits++;
	return 0;
}
Пример #10
0
ShGeneric<N, T> smoothstep(const ShGeneric<N, T>& a, const ShGeneric<N, T>& b,
    const ShGeneric<N, T> x) {
  ShGeneric<N, T> t = (x - a) / (b - a);
  // TODO fix this for other types
  t = clamp(t, 0.0f, 1.0f); 
  return t * t * mad(-2.0f, t, ShConstAttrib1f(3.0f));
}
restore_bplatform_bin_file ()
{
    int rr;
    /* 0 */
    read_fm( rr, 1 );
    read_fm(vnbs,1);
    read_fm(ttms,1);
    read_fm(vad(1),vnbs+1);
    restore_alloc ( vad(vnbs+1) );
    read_fm(mmts[1],vad(vnbs+1));
    read_fm(vtt(1),vnbs);
    read_fm(tmbs[1],ttms);
    mtt(vnbs+1,0);            /* !!! */
    mad(vnbs+2,max_mem);      /* !!! */
    /* 1 */
    read_fm( rr, 1 );
    read_fm(sym,1);
    read_fm(stri[1],sym);
    read_fm(lexi[1],sym);
    read_fm(edit[1],sym);
    read_fm(prio[1],sym);
    read_fm(righ[1],sym);
    read_fm(clos[1],sym);
    read_fm(arity[1],sym);
    /* 2 */
    read_fm( rr, 1 );
    read_fm(hcod[1],max_sym);
    /* 3 */
    read_fm( rr, 1 );
    read_fm(rul,1);
    read_fm(ant[1],rul);
    read_fm(cns[1],rul);
    read_fm(rth[1],rul);
    read_fm(num[1],rul);
    read_fm(trl[1],rul);
    read_fm(lsb[1],rul);
    read_fm(pvd[1],rul);
    read_fm(sts[1],rul);
    /* 4 */
    read_fm( rr, 1 );
    read_fm(rttm,1);
    read_fm(rtmb[1],rttm);
    /* 7 */
    read_fm( rr, 1 );
    read_fm(the,1);
    read_fm(tru[1],the);
    read_fm(tna[1],the);
    read_fm(tnm[1],the);
    read_fm(ttc[1],the);
    read_fm(tft[1],the);
    /* 8 */
    read_fm( rr, 1 );
    read_fm(tttm,1);
    read_fm(ttmb[1],tttm);
    /* 9 */
    read_fm( rr, 1 );
    read_fm(ttdi,1);
    read_fm(tdir[1],ttdi);
}
Пример #12
0
/**\brief  Wavelet estimation of single trial ERP's using a conventional estimate.
 *
	 \ingroup grpwavelet
 * Formula \f[ \lambda = \sigma \sqrt{2\log_e{n}} \f].
 */
double conventional_thresholding(const double *data, int n){
  dprintf("Db: conventional_thresholding\n");
  double sigma, lambda; /* population sd, threshold */
  
  sigma  = mad(data,n)/0.6745; /* funny constant, eh? */
  lambda = sigma * sqrt(2*log(n));
  return lambda;
}
Пример #13
0
/** \brief Wavelet estimation of single trial ERP's using Wang et al.'s (2007) technique. 
	 
	 \ingroup grpwavelet
	 Formula \f[ \lambda = \sigma \sqrt{2\log_e{n\log_2{n}}} \f].
*/
double translation_invariant_thresholding(const double *data, int n){
  dprintf("Db: translation_invariant_thresholding\n");
  double sigma, lambda; /* population sd, threshold */
  
  sigma  = mad(data, n)/0.6745; /* funny constant, eh? */
  lambda = sigma * sqrt(2*log(n*glog(n, 2)));
  return lambda;
}
Пример #14
0
	BX_CONST_FUNC float cos(float _a)
	{
		const float scaled = _a * 2.0f*kInvPi;
		const float real   = floor(scaled);
		const float xx     = _a - real * kPiHalf;
		const int32_t bits = int32_t(real) & 3;

		float c0, c2, c4, c6, c8, c10;

		if (bits == 0
		||  bits == 2)
		{
			c0  = 1.0f;
			c2  = kCosC2;
			c4  = kCosC4;
			c6  = kCosC6;
			c8  = kCosC8;
			c10 = kCosC10;
		}
		else
		{
			c0  = xx;
			c2  = kSinC2;
			c4  = kSinC4;
			c6  = kSinC6;
			c8  = kSinC8;
			c10 = kSinC10;
		}

		const float xsq    = square(xx);
		const float tmp0   = mad(c10,  xsq, c8 );
		const float tmp1   = mad(tmp0, xsq, c6 );
		const float tmp2   = mad(tmp1, xsq, c4 );
		const float tmp3   = mad(tmp2, xsq, c2 );
		const float tmp4   = mad(tmp3, xsq, 1.0);
		const float result = tmp4 * c0;

		return bits == 1 || bits == 2
			? -result
			:  result
			;
	}
Пример #15
0
int post_squelch(struct fm_state *fm)
/* returns 1 for active signal, 0 for no signal */
{
	int i, i2, dev_r, dev_j, len, sq_l;
	/* only for small samples, big samples need chunk processing */
	len = fm->signal_len;
	sq_l = fm->squelch_level;
	dev_r = mad(&(fm->signal[0]), len, 2);
	dev_j = mad(&(fm->signal[1]), len, 2);
	if ((dev_r > sq_l) || (dev_j > sq_l)) {
		fm->squelch_hits = 0;
		return 1;
	}
	fm->squelch_hits++;
	if (fm->term_squelch_hits) {
		return 0;}
	/* weak signal, kill it entirely */
	for (i=0; i<len; i++) {
		fm->signal2[i] = 0;
	}
	return 0;
}
Пример #16
0
  void	Explode::dispatch(double x, double y) {
    std::cout << "ecplode on (" << x << ")(" << y << ")" << std::endl;
    std::function<void(Effects::type, Effects::level)> mad =
      [this, x, y] (Effects::type t, Effects::level lvl) {
      if (t == Effects::Fire)
	dispatchAll(new Event::Type::FireExplosion(x, y, lvl));
      if (t == Effects::Ice)
	dispatchAll(new Event::Type::IceExplosion(x, y, lvl));
      if (t == Effects::Life)
	dispatchAll(new Event::Type::LifeExplosion(x, y, lvl));
      if (t == Effects::Electricity)
	dispatchAll(new Event::Type::ElectricityExplosion(x, y, lvl));
      if (t == Effects::Glyph)
	dispatchAll(new Event::Type::GlyphExplosion(x, y, lvl));
    };
    std::array<bool, 3> it = {true, false, false};
    Effects::level	lvl = Effects::low;

    if (elements[0] == elements[1]) {
      lvl = Effects::med;
      it[1] = true;
    }
    if (elements[0] == elements[2]) {
      lvl = (lvl == Effects::med) ? (Effects::high) : (Effects::med);
      it[2] = true;
    }
    mad(elements[0], lvl);
    if (!it[1]) {
      lvl = Effects::low;
      if (!it[2] && elements[1] == elements[2]) {
	lvl = Effects::med;
	it[2] = true;
      }
      mad(elements[1], lvl);
    }
    if (!it[2])
      mad(elements[2], Effects::low);
    dispatchAll(new Event::Type::Explosion(x, y));
  }
Пример #17
0
struct summary * summarise_vec( VEC v){
	assert(NULL!=v);
	VEC quant = create_vec(5);
	vset(quant,0,0.); vset(quant,1,0.25); vset(quant,2,0.5); vset(quant,3,0.75); vset(quant,4,1.);

	struct summary * s = malloc(sizeof(struct summary));
	s->mean = mean(v);
	s->var = variance(v);
	s->quantiles = quantiles(v,quant);
	s->mad = mad(v);
	s->data = v;

	return s;
}
Пример #18
0
void iterate(big x,big q,big r,big a,big b)
{ /* apply Pollards random mapping */
    if (compare(x,lim1)<0)
    {
        mad(x,q,q,p,p,x);
        incr(a,1,a);
        if (compare(a,order)==0) zero(a);
        return;

    }
    if (compare(x,lim2)<0)
    {
        mad(x,x,x,p,p,x);
        premult(a,2,a);
        if (compare(a,order)>=0) subtract(a,order,a);
        premult(b,2,b);
        if (compare(b,order)>=0) subtract(b,order,b);
        return;
    }
    mad(x,r,r,p,p,x);
    incr(b,1,b);
    if (compare(b,order)==0) zero(b);
}
Пример #19
0
	BX_CONST_FUNC float exp(float _a)
	{
		if (abs(_a) <= kNearZero)
		{
			return _a + 1.0f;
		}

		const float kk     = round(_a*kInvLogNat2);
		const float hi     = _a - kk*kLogNat2Hi;
		const float lo     =      kk*kLogNat2Lo;
		const float hml    = hi - lo;
		const float hmlsq  = square(hml);
		const float tmp0   = mad(kExpC4, hmlsq, kExpC3);
		const float tmp1   = mad(tmp0,   hmlsq, kExpC2);
		const float tmp2   = mad(tmp1,   hmlsq, kExpC1);
		const float tmp3   = mad(tmp2,   hmlsq, kExpC0);
		const float tmp4   = hml - hmlsq * tmp3;
		const float tmp5   = hml*tmp4/(2.0f-tmp4);
		const float tmp6   = 1.0f - ( (lo - tmp5) - hi);
		const float result = ldexp(tmp6, int32_t(kk) );

		return result;
	}
Пример #20
0
ShGeneric<N, T> hashlcg(const ShGeneric<N, T>& p) {
  ShAttrib<N, SH_TEMP, T> result = frac(p * 0.01);

  // TODO fix this for long tuples
  ShGeneric<N, T> a = fillcast<N>(
      ShConstAttrib4f(M_PI * M_PI * M_PI * M_PI, std::exp(4.0), 
          std::pow(13.0, M_PI / 2.0), std::sqrt(1997.0)));
  ShGeneric<N, T> m = fillcast<N>(
      ShConstAttrib4f(std::sqrt(2.0), 1.0 / M_PI, std::sqrt(3.0), 
          std::exp(-1.0)));

  for(int i = 0; i < LCG_REPS; ++i) result = frac(mad(result, a, m)); 
  return result;
}
Пример #21
0
void glv(const Big &e,Big &r,Big *W[2],Big *B[2][2],Big u[2])
{
	int i,j;
	Big v[2],w;
	for (i=0;i<2;i++)
	{
		v[i]=mad(*W[i],e,(Big)0,r,w);
		u[i]=0;
	}
	u[0]=e;
	for (i=0;i<2;i++)
		for (j=0;j<2;j++)
			u[i]-=v[j]*(*B[j][i]);
	return;
}
Пример #22
0
BOOL double_inverse(_MIPD_ big n,big x,big y,big w,big z)
{
#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();
#endif

    MR_IN(146)

    mad(_MIPP_ x,w,w,n,n,mr_mip->w6);
    if (size(mr_mip->w6)==0)
    {
        mr_berror(_MIPP_ MR_ERR_DIV_BY_ZERO);
        MR_OUT
        return FALSE;
    }
Пример #23
0
void galscott(const Big &e,Big &r,Big *WB[4],Big *B[4][4],Big u[4])
{
	int i,j;
	Big v[4],w;

	for (i=0;i<4;i++)
	{
		v[i]=mad(*WB[i],e,(Big)0,r,w);
		u[i]=0;
	}

	u[0]=e;
	for (i=0;i<4;i++)
		for (j=0;j<4;j++)
			u[i]-=v[j]*(*B[j][i]);
	return;
}
Пример #24
0
void galscott(const Big &e,Big &r,Big *WB[6],Big *B[6][6],Big u[6])
{
	int i,j;
	Big v[6],w;

	for (i=0;i<6;i++)
	{
		v[i]=mad(*WB[i],e,(Big)0,r,w);
		u[i]=0;
	}

	u[0]=e;
	for (i=0;i<6;i++)
	{
		for (j=0;j<6;j++)
			u[i]-=v[j]*(*B[j][i]);
	}
	return;
}
Пример #25
0
void ChrRegionCluster::removeOutliersWithMad(std::vector<const ChrRegion*>& cleanRegions) {
    if (elts.size() == 0) return;
    if (elts.size() == 1) {
	cleanRegions.push_back(elts[0]);
	return;
    }
    if (elts.size() == 2 && isInconsistent(elts[0]->length(), elts[1]->length())) {
	cleanRegions.push_back(elts[0]);
	cleanRegions.push_back(elts[1]);
	return;
    }
    std::vector<int> lens;
    toLengthList(lens);
    int m1 = median(lens);
    int m2 = mad(lens);
    for (auto itr = elts.begin(); itr != elts.end(); ++itr) {
	int l = (*itr)->length();
	if (l >= m1 - 3 * m2 && l <= m1 + 3 * m2) cleanRegions.push_back(*itr);
    }
}
Пример #26
0
void pollard(big id,big dl)
{
    int i;
    long iter;
    big w,Q,R,m,n,q;
    big_chinese bc;
    w=mirvar(0);
    Q=mirvar(0);
    R=mirvar(0);
    m=mirvar(0);
    n=mirvar(0);
    q=mirvar(0);
    
    copy(id,q);
    crt_init(&bc,np,pp);
    for (i=0;i<np;i++)
    { /* accumulate solutions for each pp */
        copy(p1,w);
        divide(w,pp[i],w);
        powmod(q,w,p,Q);
        powltr(PROOT,w,p,R);
        copy(pp[i],order);
        iter=rho(Q,R,m,n);
        xgcd(m,order,w,w,w);
        mad(w,n,n,order,order,rem[i]);
        printf("%9ld iterations needed\n",iter);
    }
    crt(&bc,rem,dl);  /* apply chinese remainder thereom */
    crt_end(&bc);
    mirkill(q);
    mirkill(n);
    mirkill(m);
    mirkill(R);
    mirkill(Q);
    mirkill(w);
}
Пример #27
0
int main()
{  /*  factoring program using Brents method */
    long k,r,i,m,iter;
    big x,y,z,n,q,ys,c3;
    miracl *mip=mirsys(50,0);
    x=mirvar(mip,0);
    y=mirvar(mip,0);
    ys=mirvar(mip,0);
    z=mirvar(mip,0);
    n=mirvar(mip,0);
    q=mirvar(mip,0);
    c3=mirvar(mip,3);
    printf("input number to be factored\n");
    cinnum(mip,n,stdin);
    if (isprime(mip,n))
    {
        printf("this number is prime!\n");
        return 0;
    }
    m=10L;
    r=1L;
    iter=0L;
    do
    {
        printf("iterations=%5ld",iter);
        convert(mip,1,q);
        do
        {
            copy(y,x);
            for (i=1L;i<=r;i++)
                mad(mip,y,y,c3,n,n,y);
            k=0;
            do
            {
                iter++;
                if (iter%10==0) printf("\b\b\b\b\b%5ld",iter);
                fflush(stdout);  
                copy(y,ys);
                for (i=1L;i<=mr_min(m,r-k);i++)
                {
                    mad(mip,y,y,c3,n,n,y);
                    subtract(mip,y,x,z);
                    mad(mip,z,q,q,n,n,q);
                }
                egcd(mip,q,n,z);
                k+=m;
            } while (k<r && size(z)==1);
            r*=2;
        } while (size(z)==1);
        if (compare(z,n)==0) do 
        { /* back-track */
            mad(mip,ys,ys,c3,n,n,ys);
            subtract(mip,ys,x,z);
        } while (egcd(mip,z,n,z)==1);
        if (!isprime(mip,z))
             printf("\ncomposite factor ");
        else printf("\nprime factor     ");
        cotnum(mip,z,stdout);
        if (compare(z,n)==0) return 0;
        divide(mip,n,z,n);
        divide(mip,y,n,n);
    } while (!isprime(mip,n));
    printf("prime factor     ");
    cotnum(mip,n,stdout);
    return 0;
}
Пример #28
0
	__kernel void TRIPLE_DGEMM_UPDATE_192_12_R(__global const double *Ain, uint offAin, __global double *d_dinvA, int blk, uint lda, int npages, int na)
{
	// Ain is the non inverse matrix; the size of Ain is lda * na
	// offAin is the offset of Ain
	// d_dinvA is the inversed matrix. the size of d_invA is NB * (na-1)/NB + 1
	// blk is subblock size, which is 12 here.
	// lda in leading dimension. Column major here
	// npages = (na-1)/12*2 + 1; for 96 this is 4 for 192 this is 8

	//Work group size is [12]
	//global work size is [96*number of blocks]
	//each work item in each work group is responsible for every element in that row
	//each work group is responsible for one gemm;\


	////////////// A12*invA22
	const uint gidx = get_group_id(0);
	const uint idx = get_local_id(0);

	const uint page = gidx % npages;
	const uint page_block = page / 8;//8 pages per page block
	const uint page_index_in_block = page % 8;


	__global double *B, *C;
	__local double lA[12][12];
	__local double lB[12][12];
	double privateC[12] = { (double)0 };

	//decide A12 location for each page
	Ain = Ain + offAin;
	Ain += (page*blk * 2 + blk) * lda + page * 2 * blk;

	//decide invA22 (B) location for each page
	B = d_dinvA + page_block*NB*NB + (page_index_in_block*blk * 2 + blk) * NB + page_index_in_block * 2 * blk + blk;

	//decide invA12 location for each page
	C = d_dinvA + page_block*NB*NB + (page_index_in_block*blk * 2 + blk) * NB + page_index_in_block * 2 * blk;

	//read A and B into LDS no transpose operated here
	lA[idx][0] = Ain[idx];
	lA[idx][1] = Ain[idx + lda];
	lA[idx][2] = Ain[idx + lda * 2];
	lA[idx][3] = Ain[idx + lda * 3];
	lA[idx][4] = Ain[idx + lda * 4];
	lA[idx][5] = Ain[idx + lda * 5];
	lA[idx][6] = Ain[idx + lda * 6];
	lA[idx][7] = Ain[idx + lda * 7];
	lA[idx][8] = Ain[idx + lda * 8];
	lA[idx][9] = Ain[idx + lda * 9];
	lA[idx][10] = Ain[idx + lda * 10];
	lA[idx][11] = Ain[idx + lda * 11];

	lB[idx][0] = B[idx];
	lB[idx][1] = B[idx + NB];
	lB[idx][2] = B[idx + NB * 2];
	lB[idx][3] = B[idx + NB * 3];
	lB[idx][4] = B[idx + NB * 4];
	lB[idx][5] = B[idx + NB * 5];
	lB[idx][6] = B[idx + NB * 6];
	lB[idx][7] = B[idx + NB * 7];
	lB[idx][8] = B[idx + NB * 8];
	lB[idx][9] = B[idx + NB * 9];
	lB[idx][10] = B[idx + NB * 10];
	lB[idx][11] = B[idx + NB * 11];
	barrier(CLK_LOCAL_MEM_FENCE);

	//do math

	uint i = 0;

	do{
		privateC[0] = mad(lA[idx][i], lB[i][0], privateC[0]);
		privateC[1] = mad(lA[idx][i], lB[i][1], privateC[1]);
		privateC[2] = mad(lA[idx][i], lB[i][2], privateC[2]);
		privateC[3] = mad(lA[idx][i], lB[i][3], privateC[3]);
		privateC[4] = mad(lA[idx][i], lB[i][4], privateC[4]);
		privateC[5] = mad(lA[idx][i], lB[i][5], privateC[5]);
		privateC[6] = mad(lA[idx][i], lB[i][6], privateC[6]);
		privateC[7] = mad(lA[idx][i], lB[i][7], privateC[7]);
		privateC[8] = mad(lA[idx][i], lB[i][8], privateC[8]);
		privateC[9] = mad(lA[idx][i], lB[i][9], privateC[9]);
		privateC[10] = mad(lA[idx][i], lB[i][10], privateC[10]);
		privateC[11] = mad(lA[idx][i], lB[i][11], privateC[11]);
		//mem_fence(CLK_LOCAL_MEM_FENCE);
		i = i + 1;
	} while (i < 12);

	i = 0;
	do{
		C[NB*i + idx] = privateC[i];
		i = i + 1;
	} while (i < 12);

	////////////// -invA11*invA12
	barrier(CLK_GLOBAL_MEM_FENCE);
	//A is moving to invA11
	__global double *A;
	A = d_dinvA + page_block*NB*NB + ((page % 4)*blk * 2) * NB + (page % 4) * 2 * blk;
	//both B and C are pointing at invA12
	B = C;

	//read A and B into LDS no transpose operated here
	lA[idx][0] = A[idx];
	lA[idx][1] = A[idx + NB];
	lA[idx][2] = A[idx + NB * 2];
	lA[idx][3] = A[idx + NB * 3];
	lA[idx][4] = A[idx + NB * 4];
	lA[idx][5] = A[idx + NB * 5];
	lA[idx][6] = A[idx + NB * 6];
	lA[idx][7] = A[idx + NB * 7];
	lA[idx][8] = A[idx + NB * 8];
	lA[idx][9] = A[idx + NB * 9];
	lA[idx][10] = A[idx + NB * 10];
	lA[idx][11] = A[idx + NB * 11];

	lB[idx][0] = B[idx];
	lB[idx][1] = B[idx + NB];
	lB[idx][2] = B[idx + NB * 2];
	lB[idx][3] = B[idx + NB * 3];
	lB[idx][4] = B[idx + NB * 4];
	lB[idx][5] = B[idx + NB * 5];
	lB[idx][6] = B[idx + NB * 6];
	lB[idx][7] = B[idx + NB * 7];
	lB[idx][8] = B[idx + NB * 8];
	lB[idx][9] = B[idx + NB * 9];
	lB[idx][10] = B[idx + NB * 10];
	lB[idx][11] = B[idx + NB * 11];
	barrier(CLK_LOCAL_MEM_FENCE);

	//do math

	i = 0;
	privateC[0] = 0;
	privateC[1] = 0;
	privateC[2] = 0;
	privateC[3] = 0;
	privateC[4] = 0;
	privateC[5] = 0;
	privateC[6] = 0;
	privateC[7] = 0;
	privateC[8] = 0;
	privateC[9] = 0;
	privateC[10] = 0;
	privateC[11] = 0;
	do{
		privateC[0] = mad(lA[idx][i], lB[i][0], privateC[0]);
		privateC[1] = mad(lA[idx][i], lB[i][1], privateC[1]);
		privateC[2] = mad(lA[idx][i], lB[i][2], privateC[2]);
		privateC[3] = mad(lA[idx][i], lB[i][3], privateC[3]);
		privateC[4] = mad(lA[idx][i], lB[i][4], privateC[4]);
		privateC[5] = mad(lA[idx][i], lB[i][5], privateC[5]);
		privateC[6] = mad(lA[idx][i], lB[i][6], privateC[6]);
		privateC[7] = mad(lA[idx][i], lB[i][7], privateC[7]);
		privateC[8] = mad(lA[idx][i], lB[i][8], privateC[8]);
		privateC[9] = mad(lA[idx][i], lB[i][9], privateC[9]);
		privateC[10] = mad(lA[idx][i], lB[i][10], privateC[10]);
		privateC[11] = mad(lA[idx][i], lB[i][11], privateC[11]);
		//mem_fence(CLK_LOCAL_MEM_FENCE);
		i = i + 1;
	} while (i < 12);

	i = 0;
	do{
		C[NB*i + idx] = -1 * privateC[i];
		i = i + 1;
	} while (i < 12);

}
Пример #29
0
int main()
{
    FILE *fp;
    char ifname[50],ofname[50];
    big a,b,p,q,x,y,d,r,s,k,hash;
    epoint *g;
    long seed;
    int bits;
    miracl instance;
    miracl *mip=&instance;
    char mem[MR_BIG_RESERVE(11)];            /* reserve space on the stack for 11 bigs */
    char mem1[MR_ECP_RESERVE(1)];            /* and one elliptic curve points         */
    memset(mem,0,MR_BIG_RESERVE(11));
    memset(mem1,0,MR_ECP_RESERVE(1));
 

/* get public data */

#ifndef MR_EDWARDS	
    fp=fopen("common.ecs","rt");
    if (fp==NULL)
    {
        printf("file common.ecs does not exist\n");
        return 0;
    }
    fscanf(fp,"%d\n",&bits); 
#else
    fp=fopen("edwards.ecs","rt");
    if (fp==NULL)
    {
        printf("file edwards.ecs does not exist\n");
        return 0;
    }
    fscanf(fp,"%d\n",&bits); 
#endif


    mirsys(mip,bits/4,16);   /* Use Hex internally */

    a=mirvar_mem(mip,mem,0);
    b=mirvar_mem(mip,mem,1);
    p=mirvar_mem(mip,mem,2);
    q=mirvar_mem(mip,mem,3);
    x=mirvar_mem(mip,mem,4);
    y=mirvar_mem(mip,mem,5);
    d=mirvar_mem(mip,mem,6);
    r=mirvar_mem(mip,mem,7);
    s=mirvar_mem(mip,mem,8);
    k=mirvar_mem(mip,mem,9);
    hash=mirvar_mem(mip,mem,10);

    innum(mip,p,fp);     /* modulus        */
    innum(mip,a,fp);     /* curve parameters */
    innum(mip,b,fp);     
    innum(mip,q,fp);     /* order of (x,y) */
    innum(mip,x,fp);     /* (x,y) point on curve of order q */
    innum(mip,y,fp);
    fclose(fp);

/* randomise */
    printf("Enter 9 digit random number seed  = ");
    scanf("%ld",&seed);
    getchar();
    irand(mip,seed);

    ecurve_init(mip,a,b,p,MR_PROJECTIVE);  /* initialise curve */
    g=epoint_init_mem(mip,mem1,0);
    epoint_set(mip,x,y,0,g); /* initialise point of order q */

/* calculate r - this can be done offline, 
   and hence amortized to almost nothing   */
    bigrand(mip,q,k);
    ecurve_mult(mip,k,g,g);      /* see ebrick.c for method to speed this up */
    epoint_get(mip,g,r,r);
    divide(mip,r,q,q);

/* get private key of signer */
    fp=fopen("private.ecs","rt");
    if (fp==NULL)
    {
        printf("file private.ecs does not exist\n");
        return 0;
    }
    innum(mip,d,fp);
    fclose(fp);

/* calculate message digest */
    printf("file to be signed = ");
    gets(ifname);
    strcpy(ofname,ifname);
    strip(ofname);
    strcat(ofname,".ecs");
    if ((fp=fopen(ifname,"rb"))==NULL)
    {
        printf("Unable to open file %s\n",ifname);
        return 0;
    }

    hashing(mip,fp,hash);
    fclose(fp);
/* calculate s */
    xgcd(mip,k,q,k,k,k);

    mad(mip,d,r,hash,q,q,s);
    mad(mip,s,k,k,q,q,s);
    fp=fopen(ofname,"wt");
    otnum(mip,r,fp);
    otnum(mip,s,fp);
    fclose(fp);

    memset(mem,0,MR_BIG_RESERVE(11));
    memset(mem1,0,MR_ECP_RESERVE(1));
 
    return 0;
}
Пример #30
0
int xgcd(_MIPD_ big x,big y,big xd,big yd,big z)
{ /* greatest common divisor by Euclids method  *
   * extended to also calculate xd and yd where *
   *      z = x.xd + y.yd = gcd(x,y)            *
   * if xd, yd not distinct, only xd calculated *
   * z only returned if distinct from xd and yd *
   * xd will always be positive, yd negative    */

    int s,n,iter;
    mr_small r,a,b,c,d;
    mr_small q,m,sr;
#ifdef MR_FP
    mr_small dres;
#endif

#ifdef mr_dltype
    union doubleword uu,vv;
    mr_large u,v,lr;
#else
    mr_small u,v,lr;
#endif

    BOOL last,dplus=TRUE;
    big t;
#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();
#endif

    if (mr_mip->ERNUM) return 0;

    MR_IN(30)

#ifdef MR_COUNT_OPS
    fpx++; 
#endif
  
    copy(x,mr_mip->w1);
    copy(y,mr_mip->w2);
    s=exsign(mr_mip->w1);
    insign(PLUS,mr_mip->w1);
    insign(PLUS,mr_mip->w2);
    convert(_MIPP_ 1,mr_mip->w3);
    zero(mr_mip->w4);
    last=FALSE;
    a=b=c=d=0;
    iter=0;

    while (size(mr_mip->w2)!=0)
    {
        if (b==0)
        { /* update mr_mip->w1 and mr_mip->w2 */

            divide(_MIPP_ mr_mip->w1,mr_mip->w2,mr_mip->w5);
            t=mr_mip->w1,mr_mip->w1=mr_mip->w2,mr_mip->w2=t;    /* swap(mr_mip->w1,mr_mip->w2) */
            multiply(_MIPP_ mr_mip->w4,mr_mip->w5,mr_mip->w0);
            add(_MIPP_ mr_mip->w3,mr_mip->w0,mr_mip->w3);
            t=mr_mip->w3,mr_mip->w3=mr_mip->w4,mr_mip->w4=t;    /* swap(xd,yd) */
            iter++;

        }
        else
        {

 /* printf("a= %I64u b= %I64u c= %I64u  d= %I64u \n",a,b,c,d);   */

            mr_pmul(_MIPP_ mr_mip->w1,c,mr_mip->w5);   /* c*w1 */
            mr_pmul(_MIPP_ mr_mip->w1,a,mr_mip->w1);   /* a*w1 */
            mr_pmul(_MIPP_ mr_mip->w2,b,mr_mip->w0);   /* b*w2 */
            mr_pmul(_MIPP_ mr_mip->w2,d,mr_mip->w2);   /* d*w2 */

            if (!dplus)
            {
                mr_psub(_MIPP_ mr_mip->w0,mr_mip->w1,mr_mip->w1); /* b*w2-a*w1 */
                mr_psub(_MIPP_ mr_mip->w5,mr_mip->w2,mr_mip->w2); /* c*w1-d*w2 */
            }
            else
            {
                mr_psub(_MIPP_ mr_mip->w1,mr_mip->w0,mr_mip->w1); /* a*w1-b*w2 */
                mr_psub(_MIPP_ mr_mip->w2,mr_mip->w5,mr_mip->w2); /* d*w2-c*w1 */
            }
            mr_pmul(_MIPP_ mr_mip->w3,c,mr_mip->w5);
            mr_pmul(_MIPP_ mr_mip->w3,a,mr_mip->w3);
            mr_pmul(_MIPP_ mr_mip->w4,b,mr_mip->w0);
            mr_pmul(_MIPP_ mr_mip->w4,d,mr_mip->w4);
    
            if (a==0) copy(mr_mip->w0,mr_mip->w3);
            else      mr_padd(_MIPP_ mr_mip->w3,mr_mip->w0,mr_mip->w3);
            mr_padd(_MIPP_ mr_mip->w4,mr_mip->w5,mr_mip->w4);
        }
        if (mr_mip->ERNUM || size(mr_mip->w2)==0) break;


        n=(int)mr_mip->w1->len;
        if (n==1)
        {
            last=TRUE;
            u=mr_mip->w1->w[0];
            v=mr_mip->w2->w[0];
        }
        else
        {
            m=mr_mip->w1->w[n-1]+1;
#ifndef MR_SIMPLE_BASE
            if (mr_mip->base==0)
            {
#endif
#ifndef MR_NOFULLWIDTH
#ifdef mr_dltype
 /* use double length type if available */
                if (n>2 && m!=0)
                { /* squeeze out as much significance as possible */
                    uu.h[MR_TOP]=muldvm(mr_mip->w1->w[n-1],mr_mip->w1->w[n-2],m,&sr);
                    uu.h[MR_BOT]=muldvm(sr,mr_mip->w1->w[n-3],m,&sr);
                    vv.h[MR_TOP]=muldvm(mr_mip->w2->w[n-1],mr_mip->w2->w[n-2],m,&sr);
                    vv.h[MR_BOT]=muldvm(sr,mr_mip->w2->w[n-3],m,&sr);
                }
                else
                {
                    uu.h[MR_TOP]=mr_mip->w1->w[n-1];
                    uu.h[MR_BOT]=mr_mip->w1->w[n-2];
                    vv.h[MR_TOP]=mr_mip->w2->w[n-1];
                    vv.h[MR_BOT]=mr_mip->w2->w[n-2];
                    if (n==2) last=TRUE;
                }

                u=uu.d;
                v=vv.d;
#else
                if (m==0)
                {
                    u=mr_mip->w1->w[n-1];
                    v=mr_mip->w2->w[n-1];   
                }
                else
                {
                    u=muldvm(mr_mip->w1->w[n-1],mr_mip->w1->w[n-2],m,&sr);
                    v=muldvm(mr_mip->w2->w[n-1],mr_mip->w2->w[n-2],m,&sr);
                }
#endif
#endif
#ifndef MR_SIMPLE_BASE
            }
            else
            {
#ifdef mr_dltype
                if (n>2)
                { /* squeeze out as much significance as possible */
                    u=muldiv(mr_mip->w1->w[n-1],mr_mip->base,mr_mip->w1->w[n-2],m,&sr);
                    u=u*mr_mip->base+muldiv(sr,mr_mip->base,mr_mip->w1->w[n-3],m,&sr);
                    v=muldiv(mr_mip->w2->w[n-1],mr_mip->base,mr_mip->w2->w[n-2],m,&sr);
                    v=v*mr_mip->base+muldiv(sr,mr_mip->base,mr_mip->w2->w[n-3],m,&sr);
                }
                else
                {
                    u=(mr_large)mr_mip->base*mr_mip->w1->w[n-1]+mr_mip->w1->w[n-2];
                    v=(mr_large)mr_mip->base*mr_mip->w2->w[n-1]+mr_mip->w2->w[n-2];
                    last=TRUE;
                }
#else
                u=muldiv(mr_mip->w1->w[n-1],mr_mip->base,mr_mip->w1->w[n-2],m,&sr);
                v=muldiv(mr_mip->w2->w[n-1],mr_mip->base,mr_mip->w2->w[n-2],m,&sr);
#endif
            }
#endif
        }

        dplus=TRUE;
        a=1; b=0; c=0; d=1;

        forever
        { /* work only with most significant piece */
            if (last)
            {
                if (v==0) break;
                q=qdiv(u,v);
                if (q==0) break;
            }
            else
            {
                if (dplus)
                { 
                    if ((mr_small)(v-c)==0 || (mr_small)(v+d)==0) break;

                    q=qdiv(u+a,v-c);

                    if (q==0) break;

                    if (q!=qdiv(u-b,v+d)) break;
                }
                else 
                {
                    if ((mr_small)(v+c)==0 || (mr_small)(v-d)==0) break;
                    q=qdiv(u-a,v+c);
                    if (q==0) break;
                    if (q!=qdiv(u+b,v-d)) break;
                }
            }

            if (q==1)
            {
                if ((mr_small)(b+d) >= MAXBASE) break; 
                r=a+c;  a=c; c=r;
                r=b+d;  b=d; d=r;
                lr=u-v; u=v; v=lr;      
            }
            else
            { 
                if (q>=MR_DIV(MAXBASE-b,d)) break;
                r=a+q*c;  a=c; c=r;
                r=b+q*d;  b=d; d=r;
                lr=u-q*v; u=v; v=lr;
            }
            iter++;
            dplus=!dplus;
        }
        iter%=2;

    }

    if (s==MINUS) iter++;
    if (iter%2==1) subtract(_MIPP_ y,mr_mip->w3,mr_mip->w3);

    if (xd!=yd)
    {
        negify(x,mr_mip->w2);
        mad(_MIPP_ mr_mip->w2,mr_mip->w3,mr_mip->w1,y,mr_mip->w4,mr_mip->w4);
        copy(mr_mip->w4,yd);
    }
    copy(mr_mip->w3,xd);
    if (z!=xd && z!=yd) copy(mr_mip->w1,z);

    MR_OUT
    return (size(mr_mip->w1));
}