示例#1
0
void SmearJacobireference(sFloat *res, gFloat **gaugeFull, sFloat *spinorField, double r, int steps) {
  sFloat *tmpSpinor = (sFloat *)malloc(V*spinorSiteSize*sizeof(sFloat));
  sFloat *spinorIn, *spinorOut, *tmp;

  //Copy the contents of original spinor into tmpSpinor
  xeqy(tmpSpinor, spinorField, V*spinorSiteSize);
  spinorIn = tmpSpinor;
  spinorOut = res;

  gFloat *gaugeEven[4], *gaugeOdd[4];
  for (int dir = 0; dir < 4; dir++) {  
    gaugeEven[dir] = gaugeFull[dir];
    gaugeOdd[dir]  = gaugeFull[dir]+Vh*gaugeSiteSize;
  }

  for(int iter = 0; iter < steps; iter++) { 

    xeqay(spinorOut, 1/(1+6*r), spinorIn, V*spinorSiteSize);

    for (int oddBit = 0; oddBit < 2; oddBit++) {
      for (int i = 0; i < Vh; i++) {
	int fullindex = oddBit*Vh + i;
	
	//Spatial smearing only
	for (int dir = 0; dir < 6; dir++) {
	  gFloat *gauge = gaugeLink(i, dir, oddBit, gaugeEven, gaugeOdd, 1);
	  sFloat *spinor = spinorNeighborFullLattice(fullindex, dir, spinorIn, 1);
	  sFloat gaugedSpinor[4*3*2];
	  
	  for (int s = 0; s < 4; s++) {
	    if (dir % 2 == 0) su3Mul(&gaugedSpinor[s*(3*2)], gauge, &spinor[s*(3*2)]);
	    else su3Tmul(&gaugedSpinor[s*(3*2)], gauge, &spinor[s*(3*2)]);
	  }
	
	  //Accumulate result from gaugedSpinor into spinorOut
	  xpeqay(&spinorOut[fullindex*(4*3*2)], r/(1+6*r), gaugedSpinor, 4*3*2);
	}
      }
    }

    //Swap the pointers
    tmp = spinorIn;
    spinorIn = spinorOut;
    spinorOut = tmp;
  }

  //Copy the contents of spinorIn into res, unless res already points to
  //spinorIn
  if(spinorIn != res) {
    xeqy(res, spinorIn, V*spinorSiteSize);
  }
  free(tmpSpinor);
}
示例#2
0
文件: laplb.c 项目: g-koutsou/CoS-2
/*
 * Solves lapl(u) x = b, for x, given b, using Conjugate Gradient
 */
void
cg(latparams lp, field **x, field **b, link **g)
{
  size_t L = lp.L;
  int max_iter = 100;
  float tol = 1e-9;

  /* Temporary fields needed for CG */
  field **r = new_field(lp);
  field **p = new_field(lp);
  field **Ap = new_field(lp);

  /* Initial residual and p-vector */
  lapl(lp, r, x, g);
  xmy(lp, b, r);
  xeqy(lp, p, r);

  /* Initial r-norm and b-norm */
  float rr = xdotx(lp, r);  
  float bb = xdotx(lp, b);
  double t_lapl = 0;
  int iter = 0;
  for(iter=0; iter<max_iter; iter++) {
    printf(" %6d, res = %+e\n", iter, rr/bb);
    if(sqrt(rr/bb) < tol)
      break;
    double t = stop_watch(0);
    lapl(lp, Ap, p, g);
    t_lapl += stop_watch(t);
    float pAp = xdoty(lp, p, Ap);
    float alpha = rr/pAp;
    axpy(lp, alpha, p, x);
    axpy(lp, -alpha, Ap, r);
    float r1r1 = xdotx(lp, r);
    float beta = r1r1/rr;
    xpay(lp, r, beta, p);
    rr = r1r1;
  }

  /* Recompute residual after convergence */
  lapl(lp, r, x, g);
  xmy(lp, b, r);
  rr = xdotx(lp, r);

  double beta_fp = 50*((double)L*L*L)/(t_lapl/(double)iter)*1e-9;
  double beta_io = 40*((double)L*L*L)/(t_lapl/(double)iter)*1e-9;
  printf(" Converged after %6d iterations, res = %+e\n", iter, rr/bb);  
  printf(" Time in lapl(): %+6.3e sec/call, %4.2e Gflop/s, %4.2e GB/s\n",
	 t_lapl/(double)iter, beta_fp, beta_io);  

  del_field(r);
  del_field(p);
  del_field(Ap);
  return;
}
示例#3
0
文件: lapl.c 项目: g-koutsou/LAP2015
/*
 * Solves lapl(u) x = b, for x, given b, using Conjugate Gradient
 */
void
cg(size_t L, _Complex float *x, _Complex float *b, _Complex float *u)
{
  int max_iter = 100;
  float tol = 1e-6;

  /* Temporary fields needed for CG */
  _Complex float *r = new_field(L);
  _Complex float *p = new_field(L);
  _Complex float *Ap = new_field(L);

  /* Initial residual and p-vector */
  lapl(L, r, x, u);
  xmy(L, b, r);
  xeqy(L, p, r);

  /* Initial r-norm and b-norm */
  float rr = xdotx(L, r);  
  float bb = xdotx(L, b);
  double t_lapl = 0;
  int iter = 0;
  for(iter=0; iter<max_iter; iter++) {
    printf(" %6d, res = %+e\n", iter, rr/bb);
    if(sqrt(rr/bb) < tol)
      break;
    double t = stop_watch(0);
    lapl(L, Ap, p, u);
    t_lapl += stop_watch(t);
    float pAp = xdoty(L, p, Ap);
    float alpha = rr/pAp;
    axpy(L, alpha, p, x);
    axpy(L, -alpha, Ap, r);
    float r1r1 = xdotx(L, r);
    float beta = r1r1/rr;
    xpay(L, r, beta, p);
    rr = r1r1;
  }

  /* Recompute residual after convergence */
  lapl(L, r, x, u);
  xmy(L, b, r);
  rr = xdotx(L, r);

  double beta_fp = 34*L*L/(t_lapl/(double)iter)*1e-9;
  double beta_io = 32*L*L/(t_lapl/(double)iter)*1e-9;
  printf(" Converged after %6d iterations, res = %+e\n", iter, rr/bb);  
  printf(" Time in lapl(): %+6.3e sec/call, %4.2e Gflop/s, %4.2e GB/s\n",
	 t_lapl/(double)iter, beta_fp, beta_io);  

  free(r);
  free(p);
  free(Ap);
  return;
}