int main(int argc, char* argv[]) { int size = SIZE * 8; int size2 = size * size; Scalar* a = internal::aligned_new<Scalar>(size2); Scalar* b = internal::aligned_new<Scalar>(size2+4)+1; Scalar* c = internal::aligned_new<Scalar>(size2); for (int i=0; i<size; ++i) { a[i] = b[i] = c[i] = 0; } BenchTimer timer; timer.reset(); for (int k=0; k<10; ++k) { timer.start(); benchVec(a, b, c, size2); timer.stop(); } std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; return 0; for (int innersize = size; innersize>2 ; --innersize) { if (size2%innersize==0) { int outersize = size2/innersize; MatrixXf ma = Map<MatrixXf>(a, innersize, outersize ); MatrixXf mb = Map<MatrixXf>(b, innersize, outersize ); MatrixXf mc = Map<MatrixXf>(c, innersize, outersize ); timer.reset(); for (int k=0; k<3; ++k) { timer.start(); benchVec(ma, mb, mc); timer.stop(); } std::cout << innersize << " x " << outersize << " " << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; } } VectorXf va = Map<VectorXf>(a, size2); VectorXf vb = Map<VectorXf>(b, size2); VectorXf vc = Map<VectorXf>(c, size2); timer.reset(); for (int k=0; k<3; ++k) { timer.start(); benchVec(va, vb, vc); timer.stop(); } std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; return 0; }
int main(int argc, char *argv[]) { int rows = SIZE; int cols = SIZE; float density = DENSITY; EigenSparseMatrix sm1(rows,cols); DenseVector v1(cols), v2(cols); v1.setRandom(); BenchTimer timer; for (float density = DENSITY; density>=MINDENSITY; density*=0.5) { //fillMatrix(density, rows, cols, sm1); fillMatrix2(7, rows, cols, sm1); // dense matrices #ifdef DENSEMATRIX { std::cout << "Eigen Dense\t" << density*100 << "%\n"; DenseMatrix m1(rows,cols); eiToDense(sm1, m1); timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) v2 = m1 * v1; timer.stop(); std::cout << " a * v:\t" << timer.best() << " " << double(REPEAT)/timer.best() << " * / sec " << endl; timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) v2 = m1.transpose() * v1; timer.stop(); std::cout << " a' * v:\t" << timer.best() << endl; } #endif // eigen sparse matrices { std::cout << "Eigen sparse\t" << sm1.nonZeros()/float(sm1.rows()*sm1.cols())*100 << "%\n"; BENCH(asm("#myc"); v2 = sm1 * v1; asm("#myd");) std::cout << " a * v:\t" << timer.best()/REPEAT << " " << double(REPEAT)/timer.best(REAL_TIMER) << " * / sec " << endl; BENCH( { asm("#mya"); v2 = sm1.transpose() * v1; asm("#myb"); }) std::cout << " a' * v:\t" << timer.best()/REPEAT << endl; }
void bench(int nfft,bool fwd,bool unscaled=false, bool halfspec=false) { typedef typename NumTraits<T>::Real Scalar; typedef typename std::complex<Scalar> Complex; int nits = NDATA/nfft; vector<T> inbuf(nfft); vector<Complex > outbuf(nfft); FFT< Scalar > fft; if (unscaled) { fft.SetFlag(fft.Unscaled); cout << "unscaled "; } if (halfspec) { fft.SetFlag(fft.HalfSpectrum); cout << "halfspec "; } std::fill(inbuf.begin(),inbuf.end(),0); fft.fwd( outbuf , inbuf); BenchTimer timer; timer.reset(); for (int k=0;k<8;++k) { timer.start(); if (fwd) for(int i = 0; i < nits; i++) fft.fwd( outbuf , inbuf); else for(int i = 0; i < nits; i++) fft.inv(inbuf,outbuf); timer.stop(); } cout << nameof<Scalar>() << " "; double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits ); if ( NumTraits<T>::IsComplex ) { cout << "complex"; }else{ cout << "real "; mflops /= 2; } if (fwd) cout << " fwd"; else cout << " inv"; cout << " NFFT=" << nfft << " " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; }
EIGEN_DONT_INLINE void bench_prod() { typedef Matrix<Scalar,M,K> Lhs; Lhs a; a.setRandom(); typedef Matrix<Scalar,K,N> Rhs; Rhs b; b.setRandom(); typedef Matrix<Scalar,M,N> Res; Res c; c.setRandom(); BenchTimer t; double n = 2.*double(M)*double(N)*double(K); int rep = 100000./n; rep /= 2; if(rep<1) rep = 1; do { rep *= 2; t.reset(); BENCH(t,1,rep,prod<CoeffBasedProductMode>(a,b,c)); } while(t.best()<0.1); t.reset(); BENCH(t,5,rep,prod<Mode>(a,b,c)); print_mode(Mode); std::cout << int(1e-6*n*rep/t.best()) << "\t"; }
static void run() { arg1 a1; a1.setIdentity(); arg2 a2; a2.setIdentity(); BenchTimer timer; timer.reset(); for (int k=0; k<10; ++k) { timer.start(); for (int k=0; k<REPEAT; ++k) a2 = func::run( a1, a2 ); timer.stop(); } cout << setprecision(4) << fixed << timer.value() << "s " << endl;; }
int main(int argc, char *argv[]) { // bench_sort(); int rows = SIZE; int cols = SIZE; float density = DENSITY; EigenSparseMatrix sm1(rows,cols), sm2(rows,cols), sm3(rows,cols), sm4(rows,cols); BenchTimer timer; for (int nnzPerCol = NNZPERCOL; nnzPerCol>1; nnzPerCol/=1.1) { sm1.setZero(); sm2.setZero(); fillMatrix2(nnzPerCol, rows, cols, sm1); fillMatrix2(nnzPerCol, rows, cols, sm2); // std::cerr << "filling OK\n"; // dense matrices #ifdef DENSEMATRIX { std::cout << "Eigen Dense\t" << nnzPerCol << "%\n"; DenseMatrix m1(rows,cols), m2(rows,cols), m3(rows,cols); eiToDense(sm1, m1); eiToDense(sm2, m2); timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) m3 = m1 * m2; timer.stop(); std::cout << " a * b:\t" << timer.value() << endl; timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2; timer.stop(); std::cout << " a' * b:\t" << timer.value() << endl; timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2.transpose(); timer.stop(); std::cout << " a' * b':\t" << timer.value() << endl; timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) m3 = m1 * m2.transpose(); timer.stop(); std::cout << " a * b':\t" << timer.value() << endl; } #endif // eigen sparse matrices { std::cout << "Eigen sparse\t" << sm1.nonZeros()/(float(sm1.rows())*float(sm1.cols()))*100 << "% * " << sm2.nonZeros()/(float(sm2.rows())*float(sm2.cols()))*100 << "%\n"; BENCH(sm3 = sm1 * sm2; ) std::cout << " a * b:\t" << timer.value() << endl; // BENCH(sm3 = sm1.transpose() * sm2; ) // std::cout << " a' * b:\t" << timer.value() << endl; // // // BENCH(sm3 = sm1.transpose() * sm2.transpose(); ) // std::cout << " a' * b':\t" << timer.value() << endl; // // // BENCH(sm3 = sm1 * sm2.transpose(); ) // std::cout << " a * b' :\t" << timer.value() << endl; // std::cout << "\n"; // // BENCH( sm3._experimentalNewProduct(sm1, sm2); ) // std::cout << " a * b:\t" << timer.value() << endl; // // BENCH(sm3._experimentalNewProduct(sm1.transpose(),sm2); ) // std::cout << " a' * b:\t" << timer.value() << endl; // // // BENCH(sm3._experimentalNewProduct(sm1.transpose(),sm2.transpose()); ) // std::cout << " a' * b':\t" << timer.value() << endl; // // // BENCH(sm3._experimentalNewProduct(sm1, sm2.transpose());) // std::cout << " a * b' :\t" << timer.value() << endl; } // eigen dyn-sparse matrices /*{ DynamicSparseMatrix<Scalar> m1(sm1), m2(sm2), m3(sm3); std::cout << "Eigen dyn-sparse\t" << m1.nonZeros()/(float(m1.rows())*float(m1.cols()))*100 << "% * " << m2.nonZeros()/(float(m2.rows())*float(m2.cols()))*100 << "%\n"; // timer.reset(); // timer.start(); BENCH(for (int k=0; k<REPEAT; ++k) m3 = m1 * m2;) // timer.stop(); std::cout << " a * b:\t" << timer.value() << endl; // std::cout << sm3 << "\n"; timer.reset(); timer.start(); // std::cerr << "transpose...\n"; // EigenSparseMatrix sm4 = sm1.transpose(); // std::cout << sm4.nonZeros() << " == " << sm1.nonZeros() << "\n"; // exit(1); // std::cerr << "transpose OK\n"; // std::cout << sm1 << "\n\n" << sm1.transpose() << "\n\n" << sm4.transpose() << "\n\n"; BENCH(for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2;) // timer.stop(); std::cout << " a' * b:\t" << timer.value() << endl; // timer.reset(); // timer.start(); BENCH( for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2.transpose(); ) // timer.stop(); std::cout << " a' * b':\t" << timer.value() << endl; // timer.reset(); // timer.start(); BENCH( for (int k=0; k<REPEAT; ++k) m3 = m1 * m2.transpose(); ) // timer.stop(); std::cout << " a * b' :\t" << timer.value() << endl; }*/ // CSparse #ifdef CSPARSE { std::cout << "CSparse \t" << nnzPerCol << "%\n"; cs *m1, *m2, *m3; eiToCSparse(sm1, m1); eiToCSparse(sm2, m2); // timer.reset(); // timer.start(); // for (int k=0; k<REPEAT; ++k) BENCH( { m3 = cs_sorted_multiply(m1, m2); if (!m3) { std::cerr << "cs_multiply failed\n"; // break; } // cs_print(m3, 0); cs_spfree(m3); } ); // timer.stop(); std::cout << " a * b:\t" << timer.value() << endl; // BENCH( { m3 = cs_sorted_multiply2(m1, m2); cs_spfree(m3); } ); // std::cout << " a * b:\t" << timer.value() << endl; }