Beispiel #1
0
int main()
{
  BenchTimer t;
  int tries = 10;
  int rep = 400000;
  typedef Matrix3f Mat;
  typedef Vector3f Vec;
  Mat A = Mat::Random(3,3);
  A = A.adjoint() * A;

  SelfAdjointEigenSolver<Mat> eig(A);
  BENCH(t, tries, rep, eig.compute(A));
  std::cout << "Eigen:  " << t.best() << "s\n";

  Mat evecs;
  Vec evals;
  BENCH(t, tries, rep, eigen33(A,evecs,evals));
  std::cout << "Direct: " << t.best() << "s\n\n";

  std::cerr << "Eigenvalue/eigenvector diffs:\n";
  std::cerr << (evals - eig.eigenvalues()).transpose() << "\n";
  for(int k=0;k<3;++k)
    if(evecs.col(k).dot(eig.eigenvectors().col(k))<0)
      evecs.col(k) = -evecs.col(k);
  std::cerr << evecs - eig.eigenvectors() << "\n\n";
}
Beispiel #2
0
template<typename Quat> void bench(const std::string& label)
{
  int tries = 10;
  int rep = 1000000;
  BenchTimer t;
  
  Quat a(4, 1, 2, 3);
  Quat b(2, 3, 4, 5);
  Quat c;
  
  std::cout.precision(3);
  
  BENCH(t, tries, rep, quatmul_default(a,b,c));
  std::cout << label << " default " << 1e3*t.best(CPU_TIMER) << "ms  \t" << 1e-6*double(rep)/(t.best(CPU_TIMER)) << " M mul/s\n";
  
  BENCH(t, tries, rep, quatmul_novec(a,b,c));
  std::cout << label << " novec   " << 1e3*t.best(CPU_TIMER) << "ms  \t" << 1e-6*double(rep)/(t.best(CPU_TIMER)) << " M mul/s\n";
}
EIGEN_DONT_INLINE void bench_prod()
{
  typedef Matrix<Scalar,M,K> Lhs; Lhs a; a.setRandom();
  typedef Matrix<Scalar,K,N> Rhs; Rhs b; b.setRandom();
  typedef Matrix<Scalar,M,N> Res; Res c; c.setRandom();

  BenchTimer t;
  double n = 2.*double(M)*double(N)*double(K);
  int rep = 100000./n;
  rep /= 2;
  if(rep<1) rep = 1;
  do {
    rep *= 2;
    t.reset();
    BENCH(t,1,rep,prod<CoeffBasedProductMode>(a,b,c));
  } while(t.best()<0.1);
  
  t.reset();
  BENCH(t,5,rep,prod<Mode>(a,b,c));

  print_mode(Mode);
  std::cout << int(1e-6*n*rep/t.best()) << "\t";
}
int main(int argc, char *argv[])
{
  int rows = SIZE;
  int cols = SIZE;
  float density = DENSITY;

  EigenSparseMatrix sm1(rows,cols);
  DenseVector v1(cols), v2(cols);
  v1.setRandom();

  BenchTimer timer;
  for (float density = DENSITY; density>=MINDENSITY; density*=0.5)
  {
    //fillMatrix(density, rows, cols, sm1);
    fillMatrix2(7, rows, cols, sm1);

    // dense matrices
    #ifdef DENSEMATRIX
    {
      std::cout << "Eigen Dense\t" << density*100 << "%\n";
      DenseMatrix m1(rows,cols);
      eiToDense(sm1, m1);

      timer.reset();
      timer.start();
      for (int k=0; k<REPEAT; ++k)
        v2 = m1 * v1;
      timer.stop();
      std::cout << "   a * v:\t" << timer.best() << "  " << double(REPEAT)/timer.best() << " * / sec " << endl;

      timer.reset();
      timer.start();
      for (int k=0; k<REPEAT; ++k)
        v2 = m1.transpose() * v1;
      timer.stop();
      std::cout << "   a' * v:\t" << timer.best() << endl;
    }
    #endif

    // eigen sparse matrices
    {
      std::cout << "Eigen sparse\t" << sm1.nonZeros()/float(sm1.rows()*sm1.cols())*100 << "%\n";

      BENCH(asm("#myc"); v2 = sm1 * v1; asm("#myd");)
      std::cout << "   a * v:\t" << timer.best()/REPEAT << "  " << double(REPEAT)/timer.best(REAL_TIMER) << " * / sec " << endl;


      BENCH( { asm("#mya"); v2 = sm1.transpose() * v1; asm("#myb"); })

      std::cout << "   a' * v:\t" << timer.best()/REPEAT << endl;
    }
Beispiel #5
0
int main(int argc, char ** argv)
{
  std::ptrdiff_t l1 = internal::queryL1CacheSize();
  std::ptrdiff_t l2 = internal::queryTopLevelCacheSize();
  std::cout << "L1 cache size     = " << (l1>0 ? l1/1024 : -1) << " KB\n";
  std::cout << "L2/L3 cache size  = " << (l2>0 ? l2/1024 : -1) << " KB\n";
  typedef internal::gebp_traits<Scalar,Scalar> Traits;
  std::cout << "Register blocking = " << Traits::mr << " x " << Traits::nr << "\n";

  int rep = 1;    // number of repetitions per try
  int tries = 2;  // number of tries, we keep the best

  int s = 2048;
  int cache_size = -1;

  bool need_help = false;
  for (int i=1; i<argc; ++i)
  {
    if(argv[i][0]=='s')
      s = atoi(argv[i]+1);
    else if(argv[i][0]=='c')
      cache_size = atoi(argv[i]+1);
    else if(argv[i][0]=='t')
      tries = atoi(argv[i]+1);
    else if(argv[i][0]=='p')
      rep = atoi(argv[i]+1);
    else
      need_help = true;
  }

  if(need_help)
  {
    std::cout << argv[0] << " s<matrix size> c<cache size> t<nb tries> p<nb repeats>\n";
    return 1;
  }

  if(cache_size>0)
    setCpuCacheSizes(cache_size,96*cache_size);

  int m = s;
  int n = s;
  int p = s;
  A a(m,p); a.setRandom();
  B b(p,n); b.setRandom();
  C c(m,n); c.setOnes();
  C rc = c;

  std::cout << "Matrix sizes = " << m << "x" << p << " * " << p << "x" << n << "\n";
  std::ptrdiff_t mc(m), nc(n), kc(p);
  internal::computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
  std::cout << "blocking size (mc x kc) = " << mc << " x " << kc << "\n";

  C r = c;

  // check the parallel product is correct
  #if defined EIGEN_HAS_OPENMP
  int procs = omp_get_max_threads();
  if(procs>1)
  {
    #ifdef HAVE_BLAS
    blas_gemm(a,b,r);
    #else
    omp_set_num_threads(1);
    r.noalias() += a * b;
    omp_set_num_threads(procs);
    #endif
    c.noalias() += a * b;
    if(!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n";
  }
  #elif defined HAVE_BLAS
    blas_gemm(a,b,r);
    c.noalias() += a * b;
    if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n";
  #else
    gemm(a,b,c);
    r.noalias() += a.cast<Scalar>() * b.cast<Scalar>();
    if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n";
  #endif

  #ifdef HAVE_BLAS
  BenchTimer tblas;
  c = rc;
  BENCH(tblas, tries, rep, blas_gemm(a,b,c));
  std::cout << "blas  cpu         " << tblas.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tblas.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tblas.total(CPU_TIMER)  << "s)\n";
  std::cout << "blas  real        " << tblas.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/tblas.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << tblas.total(REAL_TIMER) << "s)\n";
  #endif

  BenchTimer tmt;
  c = rc;
  BENCH(tmt, tries, rep, gemm(a,b,c));
  std::cout << "eigen cpu         " << tmt.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tmt.total(CPU_TIMER)  << "s)\n";
  std::cout << "eigen real        " << tmt.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n";

  #ifdef EIGEN_HAS_OPENMP
  if(procs>1)
  {
    BenchTimer tmono;
    omp_set_num_threads(1);
    Eigen::internal::setNbThreads(1);
    c = rc;
    BENCH(tmono, tries, rep, gemm(a,b,c));
    std::cout << "eigen mono cpu    " << tmono.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tmono.total(CPU_TIMER)  << "s)\n";
    std::cout << "eigen mono real   " << tmono.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/tmono.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << tmono.total(REAL_TIMER) << "s)\n";
    std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER)  << " => " << (100.0*tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER))/procs << "%\n";
  }
  #endif
  
  #ifdef DECOUPLED
  if((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
  {
    M ar(m,p); ar.setRandom();
    M ai(m,p); ai.setRandom();
    M br(p,n); br.setRandom();
    M bi(p,n); bi.setRandom();
    M cr(m,n); cr.setRandom();
    M ci(m,n); ci.setRandom();
    
    BenchTimer t;
    BENCH(t, tries, rep, matlab_cplx_cplx(ar,ai,br,bi,cr,ci));
    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << t.total(CPU_TIMER)  << "s)\n";
    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
  }
  if((!NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
  {
    M a(m,p);  a.setRandom();
    M br(p,n); br.setRandom();
    M bi(p,n); bi.setRandom();
    M cr(m,n); cr.setRandom();
    M ci(m,n); ci.setRandom();
    
    BenchTimer t;
    BENCH(t, tries, rep, matlab_real_cplx(a,br,bi,cr,ci));
    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << t.total(CPU_TIMER)  << "s)\n";
    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
  }
  if((NumTraits<A::Scalar>::IsComplex) && (!NumTraits<B::Scalar>::IsComplex))
  {
    M ar(m,p); ar.setRandom();
    M ai(m,p); ai.setRandom();
    M b(p,n);  b.setRandom();
    M cr(m,n); cr.setRandom();
    M ci(m,n); ci.setRandom();
    
    BenchTimer t;
    BENCH(t, tries, rep, matlab_cplx_real(ar,ai,b,cr,ci));
    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << t.total(CPU_TIMER)  << "s)\n";
    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
  }
  #endif

  return 0;
}