コード例 #1
0
ファイル: benchmark.cpp プロジェクト: duuucccan23/fermiqcd
void test_staggered() {
  mdp << "START TESTING STAGGERED ACTIONS\n";

  int box[]={64,6,6,6}, nc=3;
  generic_lattice lattice(4,box,default_partitioning<0>,
			  torus_topology, 0, 3);
  gauge_field U(lattice,nc);
  gauge_field V(lattice,nc);
  staggered_field psi(lattice, nc);
  staggered_field chi1(lattice, nc);
  staggered_field chi2(lattice, nc);
  coefficients coeff;
  coeff["mass"]=1.0;
  double t0, t1;
  inversion_stats stats;
  set_hot(U);
  set_random(psi);

  mdp << "ATTENTION: need to adjust asqtad coefficnets\n";

  default_staggered_action=StaggeredAsqtadActionFast::mul_Q;

  default_staggered_inverter=MinimumResidueInverter<staggered_field,gauge_field>;
  t0=mpi.time();
  stats=mul_invQ(chi2,psi,U,coeff);
  t1=(mpi.time()-t0)/lattice.nvol_gl/stats.steps;
  cout << "Staggered Min Res TIME=" << t1 << endl;

  default_staggered_inverter=BiConjugateGradientStabilizedInverter<staggered_field,gauge_field>;
  t0=mpi.time();
  stats=mul_invQ(chi2,psi,U,coeff);
  t1=(mpi.time()-t0)/lattice.nvol_gl/stats.steps;
  cout << "Staggered BiCGStab TIME=" << t1 << endl;

  default_staggered_inverter=StaggeredBiCGUML::inverter;
  t0=mpi.time();
  stats=mul_invQ(chi2,psi,U,coeff);
  t1=(mpi.time()-t0)/lattice.nvol_gl/stats.steps;
  cout << "Staggered SSE BiCGStabUML TIME=" << t1 << endl;

  default_staggered_action=StaggeredAsqtadActionSSE2::mul_Q;

  default_staggered_inverter=MinimumResidueInverter<staggered_field,gauge_field>;
  t0=mpi.time();
  stats=mul_invQ(chi2,psi,U,coeff);
  t1=(mpi.time()-t0)/lattice.nvol_gl/stats.steps;
  cout << "Staggered SSE Min Res TIME=" << t1 << endl;

  default_staggered_inverter=BiConjugateGradientStabilizedInverter<staggered_field,gauge_field>;
  t0=mpi.time();
  stats=mul_invQ(chi2,psi,U,coeff);
  t1=(mpi.time()-t0)/lattice.nvol_gl/stats.steps;
  cout << "Staggered SSE BiCGStab TIME=" << t1 << endl;

  default_staggered_inverter=StaggeredBiCGUML::inverter;
  t0=mpi.time();
  stats=mul_invQ(chi2,psi,U,coeff);
  t1=(mpi.time()-t0)/lattice.nvol_gl/stats.steps;
  cout << "Staggered SSE BiCGStabUML TIME=" << t1 << endl;
}
コード例 #2
0
ファイル: LUT.hpp プロジェクト: YingzhouLi/Elemental
void LUTUnb
( bool conjugate, 
  const Matrix<Real>& U, 
  const Matrix<Complex<Real>>& shifts, 
        Matrix<Real>& XReal,
        Matrix<Real>& XImag )
{
    DEBUG_CSE
    typedef Complex<Real> C;
    const Int m = XReal.Height();
    const Int n = XReal.Width();
  
    if( conjugate )
        XImag *= -1;

    const Real* UBuf = U.LockedBuffer();
          Real* XRealBuf = XReal.Buffer();
          Real* XImagBuf = XImag.Buffer();
    const Int ldU = U.LDim();
    const Int ldXReal = XReal.LDim();
    const Int ldXImag = XImag.LDim();

    Int k=0;
    while( k < m )
    {
        const bool in2x2 = ( k+1<m && UBuf[(k+1)+k*ldU] != Real(0) );
        if( in2x2 )
        {
            // Solve the 2x2 linear systems via 2x2 QR decompositions produced
            // by the Givens rotation
            //    | c        s | | U(k,  k)-shift | = | gamma11 | 
            //    | -conj(s) c | | U(k+1,k)       |   | 0       |
            //
            // and by also forming the right two entries of the 2x2 resulting
            // upper-triangular matrix, say gamma12 and gamma22
            //
            // Extract the constant part of the 2x2 diagonal block, D
            const Real delta12 = UBuf[ k   +(k+1)*ldU];
            const Real delta21 = UBuf[(k+1)+ k   *ldU];
            for( Int j=0; j<n; ++j )
            {
                const C delta11 = UBuf[ k   + k   *ldU] - shifts.Get(j,0);
                const C delta22 = UBuf[(k+1)+(k+1)*ldU] - shifts.Get(j,0);
                // Decompose D = Q R
                Real c; C s;
                const C gamma11 = Givens( delta11, C(delta21), c, s );
                const C gamma12 =        c*delta12 + s*delta22;
                const C gamma22 = -Conj(s)*delta12 + c*delta22;

                Real* xRealBuf = &XRealBuf[j*ldXReal];
                Real* xImagBuf = &XImagBuf[j*ldXImag]; 

                // Solve against R^T
                C chi1(xRealBuf[k  ],xImagBuf[k  ]);
                C chi2(xRealBuf[k+1],xImagBuf[k+1]);
                chi1 /= gamma11;
                chi2 -= gamma12*chi1;
                chi2 /= gamma22;

                // Solve against Q^T
                const C eta1 = c*chi1 - Conj(s)*chi2;
                const C eta2 = s*chi1 +       c*chi2;
                xRealBuf[k  ] = eta1.real();
                xImagBuf[k  ] = eta1.imag();
                xRealBuf[k+1] = eta2.real();
                xImagBuf[k+1] = eta2.imag();

                // Update x2 := x2 - U12^T x1
                blas::Axpy
                ( m-(k+2), -xRealBuf[k  ],
                  &UBuf[ k   +(k+2)*ldU], ldU, &xRealBuf[k+2], 1 );
                blas::Axpy
                ( m-(k+2), -xImagBuf[k  ],
                  &UBuf[ k   +(k+2)*ldU], ldU, &xImagBuf[k+2], 1 );
                blas::Axpy
                ( m-(k+2), -xRealBuf[k+1],
                  &UBuf[(k+1)+(k+2)*ldU], ldU, &xRealBuf[k+2], 1 );
                blas::Axpy
                ( m-(k+2), -xImagBuf[k+1],
                  &UBuf[(k+1)+(k+2)*ldU], ldU, &xImagBuf[k+2], 1 );
            }
            k += 2;
        }
        else
        {
            for( Int j=0; j<n; ++j )
            {
                Real* xRealBuf = &XRealBuf[j*ldXReal];
                Real* xImagBuf = &XImagBuf[j*ldXImag];
                C eta1( xRealBuf[k], xImagBuf[k] );
                eta1 /= UBuf[k+k*ldU] - shifts.Get(j,0);
                xRealBuf[k] = eta1.real();
                xImagBuf[k] = eta1.imag();
                blas::Axpy
                ( m-(k+1), -xRealBuf[k], 
                  &UBuf[k+(k+1)*ldU], ldU, &xRealBuf[k+1], 1 );
                blas::Axpy
                ( m-(k+1), -xImagBuf[k], 
                  &UBuf[k+(k+1)*ldU], ldU, &xImagBuf[k+1], 1 );
            }
            k += 1;
        }
    }
    if( conjugate )
        XImag *= -1;
}
コード例 #3
0
ファイル: tryurng1.cpp プロジェクト: RuiVarela/Smokin
void test1(double N)
{
   Uniform U;
   double sum = 0.0, sumsq = 0.0, ar1 = 0.0, last = 0.0;
   double j;
   Array<double> chi0(0,15);
   Array<double> chi1(0,255);
   Array<double> chi1x(0,255);
   Array<double> chi2(0,65535);
   Array<double> chi2x(0,65535);
   chi0 = 0; chi1 = 0; chi1x = 0; chi2 = 0; chi2x = 0;

   Array<double> crawl7(0,127);
   Array<double> crawl8(0,255);
   Array<double> crawl15(0,32767);
   Array<double> crawl16(0,65535);
   crawl7 = 0;
   crawl8 = 0;
   crawl15 = 0;
   crawl16 = 0;
   unsigned long crawler = 0;


   int m_bits = (int)(log(N) / 0.693 - 0.471);  // number of bits in sparse monkey test
   unsigned long M = 1; M <<= (m_bits - 3);     // 2**m_bits / 8
   String Seen(M, (char)0);                     // to accumulate results
   unsigned long mask1 = (M - 1);

   for (j = 0; j < N; ++j)
   {
      double u = U.Next();
      if (u == 1.0) { cout << "Reject value == 1" << endl; continue; }
      double v = u - 0.5;
      sum += v;
      sumsq += v * v;
      ar1 += v * (last - 0.5);
      int k = (int)floor(u * 256); ++chi1(k);
      int m = (int)floor(u * 65536); ++chi2(m);
      int a = (int)floor(u * 16); ++chi0(a);
      if (j > 0)
      {
         int b = (int)floor(last * 16);
         ++chi1x(a + 16 * b);
         int l = (int)floor(last * 256); ++chi2x(k + 256 * l);
      }
      last = u;

      crawler <<= 1; if (v >= 0) ++crawler;
      if (j >= 6)  ++crawl7(crawler & 0x7F);
      if (j >= 7)  ++crawl8(crawler & 0xFF);
      if (j >= 14) ++crawl15(crawler & 0x7FFF);
      if (j >= 15) ++crawl16(crawler & 0xFFFF);

      
      if ( j >= (unsigned int)(m_bits-1) )
      {
         unsigned char mask2 = 1; mask2 <<= crawler & 7;
         Seen[(crawler >> 3) & mask1] |= mask2;
      }

   }