int main() { BenchTimer t; int tries = 10; int rep = 400000; typedef Matrix3f Mat; typedef Vector3f Vec; Mat A = Mat::Random(3,3); A = A.adjoint() * A; SelfAdjointEigenSolver<Mat> eig(A); BENCH(t, tries, rep, eig.compute(A)); std::cout << "Eigen: " << t.best() << "s\n"; Mat evecs; Vec evals; BENCH(t, tries, rep, eigen33(A,evecs,evals)); std::cout << "Direct: " << t.best() << "s\n\n"; std::cerr << "Eigenvalue/eigenvector diffs:\n"; std::cerr << (evals - eig.eigenvalues()).transpose() << "\n"; for(int k=0;k<3;++k) if(evecs.col(k).dot(eig.eigenvectors().col(k))<0) evecs.col(k) = -evecs.col(k); std::cerr << evecs - eig.eigenvectors() << "\n\n"; }
template<typename Quat> void bench(const std::string& label) { int tries = 10; int rep = 1000000; BenchTimer t; Quat a(4, 1, 2, 3); Quat b(2, 3, 4, 5); Quat c; std::cout.precision(3); BENCH(t, tries, rep, quatmul_default(a,b,c)); std::cout << label << " default " << 1e3*t.best(CPU_TIMER) << "ms \t" << 1e-6*double(rep)/(t.best(CPU_TIMER)) << " M mul/s\n"; BENCH(t, tries, rep, quatmul_novec(a,b,c)); std::cout << label << " novec " << 1e3*t.best(CPU_TIMER) << "ms \t" << 1e-6*double(rep)/(t.best(CPU_TIMER)) << " M mul/s\n"; }
EIGEN_DONT_INLINE void bench_prod() { typedef Matrix<Scalar,M,K> Lhs; Lhs a; a.setRandom(); typedef Matrix<Scalar,K,N> Rhs; Rhs b; b.setRandom(); typedef Matrix<Scalar,M,N> Res; Res c; c.setRandom(); BenchTimer t; double n = 2.*double(M)*double(N)*double(K); int rep = 100000./n; rep /= 2; if(rep<1) rep = 1; do { rep *= 2; t.reset(); BENCH(t,1,rep,prod<CoeffBasedProductMode>(a,b,c)); } while(t.best()<0.1); t.reset(); BENCH(t,5,rep,prod<Mode>(a,b,c)); print_mode(Mode); std::cout << int(1e-6*n*rep/t.best()) << "\t"; }
int main(int argc, char *argv[]) { int rows = SIZE; int cols = SIZE; float density = DENSITY; EigenSparseMatrix sm1(rows,cols); DenseVector v1(cols), v2(cols); v1.setRandom(); BenchTimer timer; for (float density = DENSITY; density>=MINDENSITY; density*=0.5) { //fillMatrix(density, rows, cols, sm1); fillMatrix2(7, rows, cols, sm1); // dense matrices #ifdef DENSEMATRIX { std::cout << "Eigen Dense\t" << density*100 << "%\n"; DenseMatrix m1(rows,cols); eiToDense(sm1, m1); timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) v2 = m1 * v1; timer.stop(); std::cout << " a * v:\t" << timer.best() << " " << double(REPEAT)/timer.best() << " * / sec " << endl; timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) v2 = m1.transpose() * v1; timer.stop(); std::cout << " a' * v:\t" << timer.best() << endl; } #endif // eigen sparse matrices { std::cout << "Eigen sparse\t" << sm1.nonZeros()/float(sm1.rows()*sm1.cols())*100 << "%\n"; BENCH(asm("#myc"); v2 = sm1 * v1; asm("#myd");) std::cout << " a * v:\t" << timer.best()/REPEAT << " " << double(REPEAT)/timer.best(REAL_TIMER) << " * / sec " << endl; BENCH( { asm("#mya"); v2 = sm1.transpose() * v1; asm("#myb"); }) std::cout << " a' * v:\t" << timer.best()/REPEAT << endl; }
int main(int argc, char ** argv) { std::ptrdiff_t l1 = internal::queryL1CacheSize(); std::ptrdiff_t l2 = internal::queryTopLevelCacheSize(); std::cout << "L1 cache size = " << (l1>0 ? l1/1024 : -1) << " KB\n"; std::cout << "L2/L3 cache size = " << (l2>0 ? l2/1024 : -1) << " KB\n"; typedef internal::gebp_traits<Scalar,Scalar> Traits; std::cout << "Register blocking = " << Traits::mr << " x " << Traits::nr << "\n"; int rep = 1; // number of repetitions per try int tries = 2; // number of tries, we keep the best int s = 2048; int cache_size = -1; bool need_help = false; for (int i=1; i<argc; ++i) { if(argv[i][0]=='s') s = atoi(argv[i]+1); else if(argv[i][0]=='c') cache_size = atoi(argv[i]+1); else if(argv[i][0]=='t') tries = atoi(argv[i]+1); else if(argv[i][0]=='p') rep = atoi(argv[i]+1); else need_help = true; } if(need_help) { std::cout << argv[0] << " s<matrix size> c<cache size> t<nb tries> p<nb repeats>\n"; return 1; } if(cache_size>0) setCpuCacheSizes(cache_size,96*cache_size); int m = s; int n = s; int p = s; A a(m,p); a.setRandom(); B b(p,n); b.setRandom(); C c(m,n); c.setOnes(); C rc = c; std::cout << "Matrix sizes = " << m << "x" << p << " * " << p << "x" << n << "\n"; std::ptrdiff_t mc(m), nc(n), kc(p); internal::computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc); std::cout << "blocking size (mc x kc) = " << mc << " x " << kc << "\n"; C r = c; // check the parallel product is correct #if defined EIGEN_HAS_OPENMP int procs = omp_get_max_threads(); if(procs>1) { #ifdef HAVE_BLAS blas_gemm(a,b,r); #else omp_set_num_threads(1); r.noalias() += a * b; omp_set_num_threads(procs); #endif c.noalias() += a * b; if(!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n"; } #elif defined HAVE_BLAS blas_gemm(a,b,r); c.noalias() += a * b; if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n"; #else gemm(a,b,c); r.noalias() += a.cast<Scalar>() * b.cast<Scalar>(); if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n"; #endif #ifdef HAVE_BLAS BenchTimer tblas; c = rc; BENCH(tblas, tries, rep, blas_gemm(a,b,c)); std::cout << "blas cpu " << tblas.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tblas.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tblas.total(CPU_TIMER) << "s)\n"; std::cout << "blas real " << tblas.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tblas.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tblas.total(REAL_TIMER) << "s)\n"; #endif BenchTimer tmt; c = rc; BENCH(tmt, tries, rep, gemm(a,b,c)); std::cout << "eigen cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n"; std::cout << "eigen real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n"; #ifdef EIGEN_HAS_OPENMP if(procs>1) { BenchTimer tmono; omp_set_num_threads(1); Eigen::internal::setNbThreads(1); c = rc; BENCH(tmono, tries, rep, gemm(a,b,c)); std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER) << "s)\n"; std::cout << "eigen mono real " << tmono.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(REAL_TIMER) << "s)\n"; std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER) << " => " << (100.0*tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER))/procs << "%\n"; } #endif #ifdef DECOUPLED if((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex)) { M ar(m,p); ar.setRandom(); M ai(m,p); ai.setRandom(); M br(p,n); br.setRandom(); M bi(p,n); bi.setRandom(); M cr(m,n); cr.setRandom(); M ci(m,n); ci.setRandom(); BenchTimer t; BENCH(t, tries, rep, matlab_cplx_cplx(ar,ai,br,bi,cr,ci)); std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n"; std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n"; } if((!NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex)) { M a(m,p); a.setRandom(); M br(p,n); br.setRandom(); M bi(p,n); bi.setRandom(); M cr(m,n); cr.setRandom(); M ci(m,n); ci.setRandom(); BenchTimer t; BENCH(t, tries, rep, matlab_real_cplx(a,br,bi,cr,ci)); std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n"; std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n"; } if((NumTraits<A::Scalar>::IsComplex) && (!NumTraits<B::Scalar>::IsComplex)) { M ar(m,p); ar.setRandom(); M ai(m,p); ai.setRandom(); M b(p,n); b.setRandom(); M cr(m,n); cr.setRandom(); M ci(m,n); ci.setRandom(); BenchTimer t; BENCH(t, tries, rep, matlab_cplx_real(ar,ai,b,cr,ci)); std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n"; std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n"; } #endif return 0; }