void SmearJacobireference(sFloat *res, gFloat **gaugeFull, sFloat *spinorField, double r, int steps) { sFloat *tmpSpinor = (sFloat *)malloc(V*spinorSiteSize*sizeof(sFloat)); sFloat *spinorIn, *spinorOut, *tmp; //Copy the contents of original spinor into tmpSpinor xeqy(tmpSpinor, spinorField, V*spinorSiteSize); spinorIn = tmpSpinor; spinorOut = res; gFloat *gaugeEven[4], *gaugeOdd[4]; for (int dir = 0; dir < 4; dir++) { gaugeEven[dir] = gaugeFull[dir]; gaugeOdd[dir] = gaugeFull[dir]+Vh*gaugeSiteSize; } for(int iter = 0; iter < steps; iter++) { xeqay(spinorOut, 1/(1+6*r), spinorIn, V*spinorSiteSize); for (int oddBit = 0; oddBit < 2; oddBit++) { for (int i = 0; i < Vh; i++) { int fullindex = oddBit*Vh + i; //Spatial smearing only for (int dir = 0; dir < 6; dir++) { gFloat *gauge = gaugeLink(i, dir, oddBit, gaugeEven, gaugeOdd, 1); sFloat *spinor = spinorNeighborFullLattice(fullindex, dir, spinorIn, 1); sFloat gaugedSpinor[4*3*2]; for (int s = 0; s < 4; s++) { if (dir % 2 == 0) su3Mul(&gaugedSpinor[s*(3*2)], gauge, &spinor[s*(3*2)]); else su3Tmul(&gaugedSpinor[s*(3*2)], gauge, &spinor[s*(3*2)]); } //Accumulate result from gaugedSpinor into spinorOut xpeqay(&spinorOut[fullindex*(4*3*2)], r/(1+6*r), gaugedSpinor, 4*3*2); } } } //Swap the pointers tmp = spinorIn; spinorIn = spinorOut; spinorOut = tmp; } //Copy the contents of spinorIn into res, unless res already points to //spinorIn if(spinorIn != res) { xeqy(res, spinorIn, V*spinorSiteSize); } free(tmpSpinor); }
/* * Solves lapl(u) x = b, for x, given b, using Conjugate Gradient */ void cg(latparams lp, field **x, field **b, link **g) { size_t L = lp.L; int max_iter = 100; float tol = 1e-9; /* Temporary fields needed for CG */ field **r = new_field(lp); field **p = new_field(lp); field **Ap = new_field(lp); /* Initial residual and p-vector */ lapl(lp, r, x, g); xmy(lp, b, r); xeqy(lp, p, r); /* Initial r-norm and b-norm */ float rr = xdotx(lp, r); float bb = xdotx(lp, b); double t_lapl = 0; int iter = 0; for(iter=0; iter<max_iter; iter++) { printf(" %6d, res = %+e\n", iter, rr/bb); if(sqrt(rr/bb) < tol) break; double t = stop_watch(0); lapl(lp, Ap, p, g); t_lapl += stop_watch(t); float pAp = xdoty(lp, p, Ap); float alpha = rr/pAp; axpy(lp, alpha, p, x); axpy(lp, -alpha, Ap, r); float r1r1 = xdotx(lp, r); float beta = r1r1/rr; xpay(lp, r, beta, p); rr = r1r1; } /* Recompute residual after convergence */ lapl(lp, r, x, g); xmy(lp, b, r); rr = xdotx(lp, r); double beta_fp = 50*((double)L*L*L)/(t_lapl/(double)iter)*1e-9; double beta_io = 40*((double)L*L*L)/(t_lapl/(double)iter)*1e-9; printf(" Converged after %6d iterations, res = %+e\n", iter, rr/bb); printf(" Time in lapl(): %+6.3e sec/call, %4.2e Gflop/s, %4.2e GB/s\n", t_lapl/(double)iter, beta_fp, beta_io); del_field(r); del_field(p); del_field(Ap); return; }
/* * Solves lapl(u) x = b, for x, given b, using Conjugate Gradient */ void cg(size_t L, _Complex float *x, _Complex float *b, _Complex float *u) { int max_iter = 100; float tol = 1e-6; /* Temporary fields needed for CG */ _Complex float *r = new_field(L); _Complex float *p = new_field(L); _Complex float *Ap = new_field(L); /* Initial residual and p-vector */ lapl(L, r, x, u); xmy(L, b, r); xeqy(L, p, r); /* Initial r-norm and b-norm */ float rr = xdotx(L, r); float bb = xdotx(L, b); double t_lapl = 0; int iter = 0; for(iter=0; iter<max_iter; iter++) { printf(" %6d, res = %+e\n", iter, rr/bb); if(sqrt(rr/bb) < tol) break; double t = stop_watch(0); lapl(L, Ap, p, u); t_lapl += stop_watch(t); float pAp = xdoty(L, p, Ap); float alpha = rr/pAp; axpy(L, alpha, p, x); axpy(L, -alpha, Ap, r); float r1r1 = xdotx(L, r); float beta = r1r1/rr; xpay(L, r, beta, p); rr = r1r1; } /* Recompute residual after convergence */ lapl(L, r, x, u); xmy(L, b, r); rr = xdotx(L, r); double beta_fp = 34*L*L/(t_lapl/(double)iter)*1e-9; double beta_io = 32*L*L/(t_lapl/(double)iter)*1e-9; printf(" Converged after %6d iterations, res = %+e\n", iter, rr/bb); printf(" Time in lapl(): %+6.3e sec/call, %4.2e Gflop/s, %4.2e GB/s\n", t_lapl/(double)iter, beta_fp, beta_io); free(r); free(p); free(Ap); return; }