void test_staggered() { mdp << "START TESTING STAGGERED ACTIONS\n"; int box[]={64,6,6,6}, nc=3; generic_lattice lattice(4,box,default_partitioning<0>, torus_topology, 0, 3); gauge_field U(lattice,nc); gauge_field V(lattice,nc); staggered_field psi(lattice, nc); staggered_field chi1(lattice, nc); staggered_field chi2(lattice, nc); coefficients coeff; coeff["mass"]=1.0; double t0, t1; inversion_stats stats; set_hot(U); set_random(psi); mdp << "ATTENTION: need to adjust asqtad coefficnets\n"; default_staggered_action=StaggeredAsqtadActionFast::mul_Q; default_staggered_inverter=MinimumResidueInverter<staggered_field,gauge_field>; t0=mpi.time(); stats=mul_invQ(chi2,psi,U,coeff); t1=(mpi.time()-t0)/lattice.nvol_gl/stats.steps; cout << "Staggered Min Res TIME=" << t1 << endl; default_staggered_inverter=BiConjugateGradientStabilizedInverter<staggered_field,gauge_field>; t0=mpi.time(); stats=mul_invQ(chi2,psi,U,coeff); t1=(mpi.time()-t0)/lattice.nvol_gl/stats.steps; cout << "Staggered BiCGStab TIME=" << t1 << endl; default_staggered_inverter=StaggeredBiCGUML::inverter; t0=mpi.time(); stats=mul_invQ(chi2,psi,U,coeff); t1=(mpi.time()-t0)/lattice.nvol_gl/stats.steps; cout << "Staggered SSE BiCGStabUML TIME=" << t1 << endl; default_staggered_action=StaggeredAsqtadActionSSE2::mul_Q; default_staggered_inverter=MinimumResidueInverter<staggered_field,gauge_field>; t0=mpi.time(); stats=mul_invQ(chi2,psi,U,coeff); t1=(mpi.time()-t0)/lattice.nvol_gl/stats.steps; cout << "Staggered SSE Min Res TIME=" << t1 << endl; default_staggered_inverter=BiConjugateGradientStabilizedInverter<staggered_field,gauge_field>; t0=mpi.time(); stats=mul_invQ(chi2,psi,U,coeff); t1=(mpi.time()-t0)/lattice.nvol_gl/stats.steps; cout << "Staggered SSE BiCGStab TIME=" << t1 << endl; default_staggered_inverter=StaggeredBiCGUML::inverter; t0=mpi.time(); stats=mul_invQ(chi2,psi,U,coeff); t1=(mpi.time()-t0)/lattice.nvol_gl/stats.steps; cout << "Staggered SSE BiCGStabUML TIME=" << t1 << endl; }
void LUTUnb ( bool conjugate, const Matrix<Real>& U, const Matrix<Complex<Real>>& shifts, Matrix<Real>& XReal, Matrix<Real>& XImag ) { DEBUG_CSE typedef Complex<Real> C; const Int m = XReal.Height(); const Int n = XReal.Width(); if( conjugate ) XImag *= -1; const Real* UBuf = U.LockedBuffer(); Real* XRealBuf = XReal.Buffer(); Real* XImagBuf = XImag.Buffer(); const Int ldU = U.LDim(); const Int ldXReal = XReal.LDim(); const Int ldXImag = XImag.LDim(); Int k=0; while( k < m ) { const bool in2x2 = ( k+1<m && UBuf[(k+1)+k*ldU] != Real(0) ); if( in2x2 ) { // Solve the 2x2 linear systems via 2x2 QR decompositions produced // by the Givens rotation // | c s | | U(k, k)-shift | = | gamma11 | // | -conj(s) c | | U(k+1,k) | | 0 | // // and by also forming the right two entries of the 2x2 resulting // upper-triangular matrix, say gamma12 and gamma22 // // Extract the constant part of the 2x2 diagonal block, D const Real delta12 = UBuf[ k +(k+1)*ldU]; const Real delta21 = UBuf[(k+1)+ k *ldU]; for( Int j=0; j<n; ++j ) { const C delta11 = UBuf[ k + k *ldU] - shifts.Get(j,0); const C delta22 = UBuf[(k+1)+(k+1)*ldU] - shifts.Get(j,0); // Decompose D = Q R Real c; C s; const C gamma11 = Givens( delta11, C(delta21), c, s ); const C gamma12 = c*delta12 + s*delta22; const C gamma22 = -Conj(s)*delta12 + c*delta22; Real* xRealBuf = &XRealBuf[j*ldXReal]; Real* xImagBuf = &XImagBuf[j*ldXImag]; // Solve against R^T C chi1(xRealBuf[k ],xImagBuf[k ]); C chi2(xRealBuf[k+1],xImagBuf[k+1]); chi1 /= gamma11; chi2 -= gamma12*chi1; chi2 /= gamma22; // Solve against Q^T const C eta1 = c*chi1 - Conj(s)*chi2; const C eta2 = s*chi1 + c*chi2; xRealBuf[k ] = eta1.real(); xImagBuf[k ] = eta1.imag(); xRealBuf[k+1] = eta2.real(); xImagBuf[k+1] = eta2.imag(); // Update x2 := x2 - U12^T x1 blas::Axpy ( m-(k+2), -xRealBuf[k ], &UBuf[ k +(k+2)*ldU], ldU, &xRealBuf[k+2], 1 ); blas::Axpy ( m-(k+2), -xImagBuf[k ], &UBuf[ k +(k+2)*ldU], ldU, &xImagBuf[k+2], 1 ); blas::Axpy ( m-(k+2), -xRealBuf[k+1], &UBuf[(k+1)+(k+2)*ldU], ldU, &xRealBuf[k+2], 1 ); blas::Axpy ( m-(k+2), -xImagBuf[k+1], &UBuf[(k+1)+(k+2)*ldU], ldU, &xImagBuf[k+2], 1 ); } k += 2; } else { for( Int j=0; j<n; ++j ) { Real* xRealBuf = &XRealBuf[j*ldXReal]; Real* xImagBuf = &XImagBuf[j*ldXImag]; C eta1( xRealBuf[k], xImagBuf[k] ); eta1 /= UBuf[k+k*ldU] - shifts.Get(j,0); xRealBuf[k] = eta1.real(); xImagBuf[k] = eta1.imag(); blas::Axpy ( m-(k+1), -xRealBuf[k], &UBuf[k+(k+1)*ldU], ldU, &xRealBuf[k+1], 1 ); blas::Axpy ( m-(k+1), -xImagBuf[k], &UBuf[k+(k+1)*ldU], ldU, &xImagBuf[k+1], 1 ); } k += 1; } } if( conjugate ) XImag *= -1; }
void test1(double N) { Uniform U; double sum = 0.0, sumsq = 0.0, ar1 = 0.0, last = 0.0; double j; Array<double> chi0(0,15); Array<double> chi1(0,255); Array<double> chi1x(0,255); Array<double> chi2(0,65535); Array<double> chi2x(0,65535); chi0 = 0; chi1 = 0; chi1x = 0; chi2 = 0; chi2x = 0; Array<double> crawl7(0,127); Array<double> crawl8(0,255); Array<double> crawl15(0,32767); Array<double> crawl16(0,65535); crawl7 = 0; crawl8 = 0; crawl15 = 0; crawl16 = 0; unsigned long crawler = 0; int m_bits = (int)(log(N) / 0.693 - 0.471); // number of bits in sparse monkey test unsigned long M = 1; M <<= (m_bits - 3); // 2**m_bits / 8 String Seen(M, (char)0); // to accumulate results unsigned long mask1 = (M - 1); for (j = 0; j < N; ++j) { double u = U.Next(); if (u == 1.0) { cout << "Reject value == 1" << endl; continue; } double v = u - 0.5; sum += v; sumsq += v * v; ar1 += v * (last - 0.5); int k = (int)floor(u * 256); ++chi1(k); int m = (int)floor(u * 65536); ++chi2(m); int a = (int)floor(u * 16); ++chi0(a); if (j > 0) { int b = (int)floor(last * 16); ++chi1x(a + 16 * b); int l = (int)floor(last * 256); ++chi2x(k + 256 * l); } last = u; crawler <<= 1; if (v >= 0) ++crawler; if (j >= 6) ++crawl7(crawler & 0x7F); if (j >= 7) ++crawl8(crawler & 0xFF); if (j >= 14) ++crawl15(crawler & 0x7FFF); if (j >= 15) ++crawl16(crawler & 0xFFFF); if ( j >= (unsigned int)(m_bits-1) ) { unsigned char mask2 = 1; mask2 <<= crawler & 7; Seen[(crawler >> 3) & mask1] |= mask2; } }