int main(int argc, char *argv[]) { int rows = SIZE; int cols = SIZE; float density = DENSITY; BenchTimer timer; #if 1 EigenSparseTriMatrix sm1(rows,cols); typedef Matrix<Scalar,Dynamic,1> DenseVector; DenseVector b = DenseVector::Random(cols); DenseVector x = DenseVector::Random(cols); bool densedone = false; for (float density = DENSITY; density>=MINDENSITY; density*=0.5) { EigenSparseTriMatrix sm1(rows, cols); fillMatrix(density, rows, cols, sm1); // dense matrices #ifdef DENSEMATRIX if (!densedone) { densedone = true; std::cout << "Eigen Dense\t" << density*100 << "%\n"; DenseMatrix m1(rows,cols); Matrix<Scalar,Dynamic,Dynamic,Dynamic,Dynamic,RowMajorBit> m2(rows,cols); eiToDense(sm1, m1); m2 = m1; BENCH(x = m1.marked<UpperTriangular>().solveTriangular(b);) std::cout << " colmajor^-1 * b:\t" << timer.value() << endl; // std::cerr << x.transpose() << "\n"; BENCH(x = m2.marked<UpperTriangular>().solveTriangular(b);)
static void bench_record(SkPicture* src, const char* name, SkBBHFactory* bbhFactory) { BenchTimer timer; timer.start(); const int width = src ? src->width() : FLAGS_nullSize; const int height = src ? src->height() : FLAGS_nullSize; for (int i = 0; i < FLAGS_loops; i++) { if (FLAGS_skr) { EXPERIMENTAL::SkRecording recording(width, height); if (NULL != src) { src->draw(recording.canvas()); } // Release and delete the SkPlayback so that recording optimizes its SkRecord. SkDELETE(recording.releasePlayback()); } else { SkPictureRecorder recorder; SkCanvas* canvas = recorder.beginRecording(width, height, bbhFactory, FLAGS_flags); if (NULL != src) { src->draw(canvas); } if (FLAGS_endRecording) { SkAutoTUnref<SkPicture> dst(recorder.endRecording()); } } } timer.end(); const double msPerLoop = timer.fCpu / (double)FLAGS_loops; printf("%f\t%s\n", scale_time(msPerLoop), name); }
int main() { BenchTimer t; int tries = 10; int rep = 400000; typedef Matrix3f Mat; typedef Vector3f Vec; Mat A = Mat::Random(3,3); A = A.adjoint() * A; SelfAdjointEigenSolver<Mat> eig(A); BENCH(t, tries, rep, eig.compute(A)); std::cout << "Eigen: " << t.best() << "s\n"; Mat evecs; Vec evals; BENCH(t, tries, rep, eigen33(A,evecs,evals)); std::cout << "Direct: " << t.best() << "s\n\n"; std::cerr << "Eigenvalue/eigenvector diffs:\n"; std::cerr << (evals - eig.eigenvalues()).transpose() << "\n"; for(int k=0;k<3;++k) if(evecs.col(k).dot(eig.eigenvectors().col(k))<0) evecs.col(k) = -evecs.col(k); std::cerr << evecs - eig.eigenvectors() << "\n\n"; }
int main(int argc, char *argv[]) { int rows = SIZE; int cols = SIZE; float density = DENSITY; EigenSparseMatrix sm1(rows,cols), sm3(rows,cols); BenchTimer timer; for (float density = DENSITY; density>=MINDENSITY; density*=0.5) { fillMatrix(density, rows, cols, sm1); // dense matrices #ifdef DENSEMATRIX { DenseMatrix m1(rows,cols), m3(rows,cols); eiToDense(sm1, m1); BENCH(for (int k=0; k<REPEAT; ++k) m3 = m1.transpose();) std::cout << " Eigen dense:\t" << timer.value() << endl; } #endif std::cout << "Non zeros: " << sm1.nonZeros()/float(sm1.rows()*sm1.cols())*100 << "%\n"; // eigen sparse matrices { BENCH(for (int k=0; k<REPEAT; ++k) sm3 = sm1.transpose();) std::cout << " Eigen:\t" << timer.value() << endl; }
int main(int argc, char *argv[]) { int rows = SIZE; int cols = SIZE; bool fullyrand = true; BenchTimer timer; Coordinates coords; Values values; if(fullyrand) { Coordinates pool; pool.reserve(cols*NBPERROW); std::cerr << "fill pool" << "\n"; for (int i=0; i<cols*NBPERROW; ) { // DynamicSparseMatrix<int> stencil(SIZE,SIZE); Vector2i ij(ei_random<int>(0,rows-1),ei_random<int>(0,cols-1)); // if(stencil.coeffRef(ij.x(), ij.y())==0) { // stencil.coeffRef(ij.x(), ij.y()) = 1; pool.push_back(ij); } ++i; } std::cerr << "pool ok" << "\n"; int n = cols*NBPERROW*KK; coords.reserve(n); values.reserve(n); for (int i=0; i<n; ++i) { int i = ei_random<int>(0,pool.size()); coords.push_back(pool[i]); values.push_back(ei_random<Scalar>()); } } else { for (int j=0; j<cols; ++j) for (int i=0; i<NBPERROW; ++i) { coords.push_back(Vector2i(ei_random<int>(0,rows-1),j)); values.push_back(ei_random<Scalar>()); } } std::cout << "nnz = " << coords.size() << "\n"; CHECK_MEM // dense matrices #ifdef DENSEMATRIX { BENCH(setrand_eigen_dense(coords,values);) std::cout << "Eigen Dense\t" << timer.value() << "\n"; }
// Copy samples from archive with index_name // to new index copy_name. // Uses all samples in source archive or [start ... end[ . void copy(const stdString &index_name, const stdString ©_name, int RTreeM, const epicsTime *start, const epicsTime *end, const stdString &single_name) { IndexFile index(RTreeM), new_index(RTreeM); IndexFile::NameIterator names; size_t channel_count = 0, value_count = 0, back_count = 0; BenchTimer timer; stdString dir1, dir2; Filename::getDirname(index_name, dir1); Filename::getDirname(copy_name, dir2); if (dir1 == dir2) { printf("You have to assert that the new index (%s)\n" "is in a directory different from the old index\n" "(%s)\n", copy_name.c_str(), index_name.c_str()); return; } index.open(index_name, true); new_index.open(copy_name, false); if (verbose) printf("Copying values from '%s' to '%s'\n", index_name.c_str(), copy_name.c_str()); RawDataReader reader(index); if (single_name.empty()) { bool ok = index.getFirstChannel(names); while (ok) { copy_channel(names.getName(), start, end, index, reader, new_index, channel_count, value_count, back_count); ok = index.getNextChannel(names); } } else copy_channel(single_name, start, end, index, reader, new_index, channel_count, value_count, back_count); new_index.close(); index.close(); timer.stop(); if (verbose) { printf("Total: %lu channels, %lu values\n", (unsigned long) channel_count, (unsigned long) value_count); printf("Skipped %lu back-in-time values\n", (unsigned long) back_count); printf("Runtime: %s\n", timer.toString().c_str()); } }
template<typename Quat> void bench(const std::string& label) { int tries = 10; int rep = 1000000; BenchTimer t; Quat a(4, 1, 2, 3); Quat b(2, 3, 4, 5); Quat c; std::cout.precision(3); BENCH(t, tries, rep, quatmul_default(a,b,c)); std::cout << label << " default " << 1e3*t.best(CPU_TIMER) << "ms \t" << 1e-6*double(rep)/(t.best(CPU_TIMER)) << " M mul/s\n"; BENCH(t, tries, rep, quatmul_novec(a,b,c)); std::cout << label << " novec " << 1e3*t.best(CPU_TIMER) << "ms \t" << 1e-6*double(rep)/(t.best(CPU_TIMER)) << " M mul/s\n"; }
static void run() { arg1 a1; a1.setIdentity(); arg2 a2; a2.setIdentity(); BenchTimer timer; timer.reset(); for (int k=0; k<10; ++k) { timer.start(); for (int k=0; k<REPEAT; ++k) a2 = func::run( a1, a2 ); timer.stop(); } cout << setprecision(4) << fixed << timer.value() << "s " << endl;; }
EIGEN_DONT_INLINE void bench_prod() { typedef Matrix<Scalar,M,K> Lhs; Lhs a; a.setRandom(); typedef Matrix<Scalar,K,N> Rhs; Rhs b; b.setRandom(); typedef Matrix<Scalar,M,N> Res; Res c; c.setRandom(); BenchTimer t; double n = 2.*double(M)*double(N)*double(K); int rep = 100000./n; rep /= 2; if(rep<1) rep = 1; do { rep *= 2; t.reset(); BENCH(t,1,rep,prod<CoeffBasedProductMode>(a,b,c)); } while(t.best()<0.1); t.reset(); BENCH(t,5,rep,prod<Mode>(a,b,c)); print_mode(Mode); std::cout << int(1e-6*n*rep/t.best()) << "\t"; }
void test(Test &t) { build_list(&t.noobs, 100); std::vector<std::thread> producers; std::vector<std::thread> consumers; // XXX: we should probably synchronize thread starting work and // just time the actual work. BenchTimer timer; for (int i = 0; i < t.producers; i++) { producers.push_back(std::thread(producer, &t, i)); } for (int i = 0; i < t.consumers; i++) { consumers.push_back(std::thread(consumer, &t, i)); } joinAll(consumers); timer.stop(); t.consumersDone = true; joinAll(producers); timer.report(t.count * t.consumers, !BenchMode); }
void test(Test &t) { std::vector<std::thread> producers; std::vector<std::thread> consumers; // XXX: we should probably synchronize thread starting work and // just time the actual work. BenchTimer timer; for (int i = 0; i < t.producers; i++) { producers.push_back(std::thread(producer, &t)); } for (int i = 0; i < t.consumers; i++) { consumers.push_back(std::thread(consumer, &t, i)); } joinAll(consumers); timer.stop(); t.consumersDone = true; joinAll(producers); //printf("Max thing: %ld\n", t.totalSum.load()); timer.report(t.count * t.consumers, !BenchMode); }
void bench(int nfft,bool fwd,bool unscaled=false, bool halfspec=false) { typedef typename NumTraits<T>::Real Scalar; typedef typename std::complex<Scalar> Complex; int nits = NDATA/nfft; vector<T> inbuf(nfft); vector<Complex > outbuf(nfft); FFT< Scalar > fft; if (unscaled) { fft.SetFlag(fft.Unscaled); cout << "unscaled "; } if (halfspec) { fft.SetFlag(fft.HalfSpectrum); cout << "halfspec "; } std::fill(inbuf.begin(),inbuf.end(),0); fft.fwd( outbuf , inbuf); BenchTimer timer; timer.reset(); for (int k=0;k<8;++k) { timer.start(); if (fwd) for(int i = 0; i < nits; i++) fft.fwd( outbuf , inbuf); else for(int i = 0; i < nits; i++) fft.inv(inbuf,outbuf); timer.stop(); } cout << nameof<Scalar>() << " "; double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits ); if ( NumTraits<T>::IsComplex ) { cout << "complex"; }else{ cout << "real "; mflops /= 2; } if (fwd) cout << " fwd"; else cout << " inv"; cout << " NFFT=" << nfft << " " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; }
int main(int argc, char *argv[]) { // bench_sort(); int rows = SIZE; int cols = SIZE; float density = DENSITY; EigenSparseMatrix sm1(rows,cols), sm2(rows,cols), sm3(rows,cols), sm4(rows,cols); BenchTimer timer; for (int nnzPerCol = NNZPERCOL; nnzPerCol>1; nnzPerCol/=1.1) { sm1.setZero(); sm2.setZero(); fillMatrix2(nnzPerCol, rows, cols, sm1); fillMatrix2(nnzPerCol, rows, cols, sm2); // std::cerr << "filling OK\n"; // dense matrices #ifdef DENSEMATRIX { std::cout << "Eigen Dense\t" << nnzPerCol << "%\n"; DenseMatrix m1(rows,cols), m2(rows,cols), m3(rows,cols); eiToDense(sm1, m1); eiToDense(sm2, m2); timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) m3 = m1 * m2; timer.stop(); std::cout << " a * b:\t" << timer.value() << endl; timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2; timer.stop(); std::cout << " a' * b:\t" << timer.value() << endl; timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2.transpose(); timer.stop(); std::cout << " a' * b':\t" << timer.value() << endl; timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) m3 = m1 * m2.transpose(); timer.stop(); std::cout << " a * b':\t" << timer.value() << endl; } #endif // eigen sparse matrices { std::cout << "Eigen sparse\t" << sm1.nonZeros()/(float(sm1.rows())*float(sm1.cols()))*100 << "% * " << sm2.nonZeros()/(float(sm2.rows())*float(sm2.cols()))*100 << "%\n"; BENCH(sm3 = sm1 * sm2; ) std::cout << " a * b:\t" << timer.value() << endl; // BENCH(sm3 = sm1.transpose() * sm2; ) // std::cout << " a' * b:\t" << timer.value() << endl; // // // BENCH(sm3 = sm1.transpose() * sm2.transpose(); ) // std::cout << " a' * b':\t" << timer.value() << endl; // // // BENCH(sm3 = sm1 * sm2.transpose(); ) // std::cout << " a * b' :\t" << timer.value() << endl; // std::cout << "\n"; // // BENCH( sm3._experimentalNewProduct(sm1, sm2); ) // std::cout << " a * b:\t" << timer.value() << endl; // // BENCH(sm3._experimentalNewProduct(sm1.transpose(),sm2); ) // std::cout << " a' * b:\t" << timer.value() << endl; // // // BENCH(sm3._experimentalNewProduct(sm1.transpose(),sm2.transpose()); ) // std::cout << " a' * b':\t" << timer.value() << endl; // // // BENCH(sm3._experimentalNewProduct(sm1, sm2.transpose());) // std::cout << " a * b' :\t" << timer.value() << endl; } // eigen dyn-sparse matrices /*{ DynamicSparseMatrix<Scalar> m1(sm1), m2(sm2), m3(sm3); std::cout << "Eigen dyn-sparse\t" << m1.nonZeros()/(float(m1.rows())*float(m1.cols()))*100 << "% * " << m2.nonZeros()/(float(m2.rows())*float(m2.cols()))*100 << "%\n"; // timer.reset(); // timer.start(); BENCH(for (int k=0; k<REPEAT; ++k) m3 = m1 * m2;) // timer.stop(); std::cout << " a * b:\t" << timer.value() << endl; // std::cout << sm3 << "\n"; timer.reset(); timer.start(); // std::cerr << "transpose...\n"; // EigenSparseMatrix sm4 = sm1.transpose(); // std::cout << sm4.nonZeros() << " == " << sm1.nonZeros() << "\n"; // exit(1); // std::cerr << "transpose OK\n"; // std::cout << sm1 << "\n\n" << sm1.transpose() << "\n\n" << sm4.transpose() << "\n\n"; BENCH(for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2;) // timer.stop(); std::cout << " a' * b:\t" << timer.value() << endl; // timer.reset(); // timer.start(); BENCH( for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2.transpose(); ) // timer.stop(); std::cout << " a' * b':\t" << timer.value() << endl; // timer.reset(); // timer.start(); BENCH( for (int k=0; k<REPEAT; ++k) m3 = m1 * m2.transpose(); ) // timer.stop(); std::cout << " a * b' :\t" << timer.value() << endl; }*/ // CSparse #ifdef CSPARSE { std::cout << "CSparse \t" << nnzPerCol << "%\n"; cs *m1, *m2, *m3; eiToCSparse(sm1, m1); eiToCSparse(sm2, m2); // timer.reset(); // timer.start(); // for (int k=0; k<REPEAT; ++k) BENCH( { m3 = cs_sorted_multiply(m1, m2); if (!m3) { std::cerr << "cs_multiply failed\n"; // break; } // cs_print(m3, 0); cs_spfree(m3); } ); // timer.stop(); std::cout << " a * b:\t" << timer.value() << endl; // BENCH( { m3 = cs_sorted_multiply2(m1, m2); cs_spfree(m3); } ); // std::cout << " a * b:\t" << timer.value() << endl; }
int main(int argc, char *argv[]) { int rows = SIZE; int cols = SIZE; float density = DENSITY; EigenSparseMatrix sm1(rows,cols); DenseVector v1(cols), v2(cols); v1.setRandom(); BenchTimer timer; for (float density = DENSITY; density>=MINDENSITY; density*=0.5) { fillMatrix(density, rows, cols, sm1); // dense matrices #ifdef DENSEMATRIX { std::cout << "Eigen Dense\t" << density*100 << "%\n"; DenseMatrix m1(rows,cols); eiToDense(sm1, m1); timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) v2 = m1 * v1; timer.stop(); std::cout << " a * v:\t" << timer.value() << endl; timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) v2 = m1.transpose() * v1; timer.stop(); std::cout << " a' * v:\t" << timer.value() << endl; } #endif // eigen sparse matrices { std::cout << "Eigen sparse\t" << sm1.nonZeros()/float(sm1.rows()*sm1.cols())*100 << "%\n"; BENCH(for (int k=0; k<REPEAT; ++k) v2 = sm1 * v1;) std::cout << " a * v:\t" << timer.value() << endl; BENCH(for (int k=0; k<REPEAT; ++k) { asm("#mya"); v2 = sm1.transpose() * v1; asm("#myb"); }) std::cout << " a' * v:\t" << timer.value() << endl; }
int tool_main(int argc, char** argv) { SetupCrashHandler(); SkCommandLineFlags::Parse(argc, argv); #if SK_ENABLE_INST_COUNT if (FLAGS_leaks) { gPrintInstCount = true; } #endif SkAutoGraphics ag; // First, parse some flags. BenchLogger logger; if (FLAGS_logFile.count()) { logger.SetLogFile(FLAGS_logFile[0]); } LoggerResultsWriter logWriter(logger, FLAGS_timeFormat[0]); MultiResultsWriter writer; writer.add(&logWriter); SkAutoTDelete<JSONResultsWriter> jsonWriter; if (FLAGS_outResultsFile.count()) { jsonWriter.reset(SkNEW(JSONResultsWriter(FLAGS_outResultsFile[0]))); writer.add(jsonWriter.get()); } // Instantiate after all the writers have been added to writer so that we // call close() before their destructors are called on the way out. CallEnd<MultiResultsWriter> ender(writer); const uint8_t alpha = FLAGS_forceBlend ? 0x80 : 0xFF; SkTriState::State dither = SkTriState::kDefault; for (size_t i = 0; i < 3; i++) { if (strcmp(SkTriState::Name[i], FLAGS_forceDither[0]) == 0) { dither = static_cast<SkTriState::State>(i); } } BenchMode benchMode = kNormal_BenchMode; for (size_t i = 0; i < SK_ARRAY_COUNT(BenchMode_Name); i++) { if (strcmp(FLAGS_mode[0], BenchMode_Name[i]) == 0) { benchMode = static_cast<BenchMode>(i); } } SkTDArray<int> configs; bool runDefaultConfigs = false; // Try user-given configs first. for (int i = 0; i < FLAGS_config.count(); i++) { for (int j = 0; j < static_cast<int>(SK_ARRAY_COUNT(gConfigs)); ++j) { if (0 == strcmp(FLAGS_config[i], gConfigs[j].name)) { *configs.append() = j; } else if (0 == strcmp(FLAGS_config[i], kDefaultsConfigStr)) { runDefaultConfigs = true; } } } // If there weren't any, fill in with defaults. if (runDefaultConfigs) { for (int i = 0; i < static_cast<int>(SK_ARRAY_COUNT(gConfigs)); ++i) { if (gConfigs[i].runByDefault) { *configs.append() = i; } } } // Filter out things we can't run. if (kNormal_BenchMode != benchMode) { // Non-rendering configs only run in normal mode for (int i = 0; i < configs.count(); ++i) { const Config& config = gConfigs[configs[i]]; if (Benchmark::kNonRendering_Backend == config.backend) { configs.remove(i, 1); --i; } } } #if SK_SUPPORT_GPU for (int i = 0; i < configs.count(); ++i) { const Config& config = gConfigs[configs[i]]; if (Benchmark::kGPU_Backend == config.backend) { GrContext* context = gContextFactory.get(config.contextType); if (NULL == context) { SkDebugf("GrContext could not be created for config %s. Config will be skipped.\n", config.name); configs.remove(i); --i; continue; } if (config.sampleCount > context->getMaxSampleCount()){ SkDebugf( "Sample count (%d) for config %s is not supported. Config will be skipped.\n", config.sampleCount, config.name); configs.remove(i); --i; continue; } } } #endif // All flags should be parsed now. Report our settings. if (FLAGS_runOnce) { logger.logError("bench was run with --runOnce, so we're going to hide the times." " It's for your own good!\n"); } writer.option("mode", FLAGS_mode[0]); writer.option("alpha", SkStringPrintf("0x%02X", alpha).c_str()); writer.option("antialias", SkStringPrintf("%d", FLAGS_forceAA).c_str()); writer.option("filter", SkStringPrintf("%d", FLAGS_forceFilter).c_str()); writer.option("dither", SkTriState::Name[dither]); writer.option("rotate", SkStringPrintf("%d", FLAGS_rotate).c_str()); writer.option("scale", SkStringPrintf("%d", FLAGS_scale).c_str()); writer.option("clip", SkStringPrintf("%d", FLAGS_clip).c_str()); #if defined(SK_BUILD_FOR_WIN32) writer.option("system", "WIN32"); #elif defined(SK_BUILD_FOR_MAC) writer.option("system", "MAC"); #elif defined(SK_BUILD_FOR_ANDROID) writer.option("system", "ANDROID"); #elif defined(SK_BUILD_FOR_UNIX) writer.option("system", "UNIX"); #else writer.option("system", "other"); #endif #if defined(SK_DEBUG) writer.option("build", "DEBUG"); #else writer.option("build", "RELEASE"); #endif // Set texture cache limits if non-default. for (size_t i = 0; i < SK_ARRAY_COUNT(gConfigs); ++i) { #if SK_SUPPORT_GPU const Config& config = gConfigs[i]; if (Benchmark::kGPU_Backend != config.backend) { continue; } GrContext* context = gContextFactory.get(config.contextType); if (NULL == context) { continue; } size_t bytes; int count; context->getResourceCacheLimits(&count, &bytes); if (-1 != FLAGS_gpuCacheBytes) { bytes = static_cast<size_t>(FLAGS_gpuCacheBytes); } if (-1 != FLAGS_gpuCacheCount) { count = FLAGS_gpuCacheCount; } context->setResourceCacheLimits(count, bytes); #endif } // Run each bench in each configuration it supports and we asked for. Iter iter; Benchmark* bench; while ((bench = iter.next()) != NULL) { SkAutoTUnref<Benchmark> benchUnref(bench); if (SkCommandLineFlags::ShouldSkip(FLAGS_match, bench->getName())) { continue; } bench->setForceAlpha(alpha); bench->setForceAA(FLAGS_forceAA); bench->setForceFilter(FLAGS_forceFilter); bench->setDither(dither); bench->preDraw(); bool loggedBenchName = false; for (int i = 0; i < configs.count(); ++i) { const int configIndex = configs[i]; const Config& config = gConfigs[configIndex]; if (!bench->isSuitableFor(config.backend)) { continue; } GrContext* context = NULL; #if SK_SUPPORT_GPU SkGLContextHelper* glContext = NULL; if (Benchmark::kGPU_Backend == config.backend) { context = gContextFactory.get(config.contextType); if (NULL == context) { continue; } glContext = gContextFactory.getGLContext(config.contextType); } #endif SkAutoTUnref<SkCanvas> canvas; SkAutoTUnref<SkPicture> recordFrom; SkPictureRecorder recorderTo; const SkIPoint dim = bench->getSize(); SkAutoTUnref<SkSurface> surface; if (Benchmark::kNonRendering_Backend != config.backend) { surface.reset(make_surface(config.fColorType, dim, config.backend, config.sampleCount, context)); if (!surface.get()) { logger.logError(SkStringPrintf( "Device creation failure for config %s. Will skip.\n", config.name)); continue; } switch(benchMode) { case kDeferredSilent_BenchMode: case kDeferred_BenchMode: canvas.reset(SkDeferredCanvas::Create(surface.get())); break; case kRecord_BenchMode: canvas.reset(SkRef(recorderTo.beginRecording(dim.fX, dim.fY))); break; case kPictureRecord_BenchMode: { SkPictureRecorder recorderFrom; bench->draw(1, recorderFrom.beginRecording(dim.fX, dim.fY)); recordFrom.reset(recorderFrom.endRecording()); canvas.reset(SkRef(recorderTo.beginRecording(dim.fX, dim.fY))); break; } case kNormal_BenchMode: canvas.reset(SkRef(surface->getCanvas())); break; default: SkASSERT(false); } } if (NULL != canvas) { canvas->clear(SK_ColorWHITE); if (FLAGS_clip) { perform_clip(canvas, dim.fX, dim.fY); } if (FLAGS_scale) { perform_scale(canvas, dim.fX, dim.fY); } if (FLAGS_rotate) { perform_rotate(canvas, dim.fX, dim.fY); } } if (!loggedBenchName) { loggedBenchName = true; writer.bench(bench->getName(), dim.fX, dim.fY); } #if SK_SUPPORT_GPU SkGLContextHelper* contextHelper = NULL; if (Benchmark::kGPU_Backend == config.backend) { contextHelper = gContextFactory.getGLContext(config.contextType); } BenchTimer timer(contextHelper); #else BenchTimer timer; #endif double previous = std::numeric_limits<double>::infinity(); bool converged = false; // variables used to compute loopsPerFrame double frameIntervalTime = 0.0f; int frameIntervalTotalLoops = 0; bool frameIntervalComputed = false; int loopsPerFrame = 0; int loopsPerIter = 0; if (FLAGS_verbose) { SkDebugf("%s %s: ", bench->getName(), config.name); } if (!FLAGS_dryRun) { do { // Ramp up 1 -> 2 -> 4 -> 8 -> 16 -> ... -> ~1 billion. loopsPerIter = (loopsPerIter == 0) ? 1 : loopsPerIter * 2; if (loopsPerIter >= (1<<30) || timer.fWall > FLAGS_maxMs) { // If you find it takes more than a billion loops to get up to 20ms of runtime, // you've got a computer clocked at several THz or have a broken benchmark. ;) // "1B ought to be enough for anybody." logger.logError(SkStringPrintf( "\nCan't get %s %s to converge in %dms (%d loops)", bench->getName(), config.name, FLAGS_maxMs, loopsPerIter)); break; } if ((benchMode == kRecord_BenchMode || benchMode == kPictureRecord_BenchMode)) { // Clear the recorded commands so that they do not accumulate. canvas.reset(SkRef(recorderTo.beginRecording(dim.fX, dim.fY))); } timer.start(); // Inner loop that allows us to break the run into smaller // chunks (e.g. frames). This is especially useful for the GPU // as we can flush and/or swap buffers to keep the GPU from // queuing up too much work. for (int loopCount = loopsPerIter; loopCount > 0; ) { // Save and restore around each call to draw() to guarantee a pristine canvas. SkAutoCanvasRestore saveRestore(canvas, true/*also save*/); int loops; if (frameIntervalComputed && loopCount > loopsPerFrame) { loops = loopsPerFrame; loopCount -= loopsPerFrame; } else { loops = loopCount; loopCount = 0; } if (benchMode == kPictureRecord_BenchMode) { recordFrom->draw(canvas); } else { bench->draw(loops, canvas); } if (kDeferredSilent_BenchMode == benchMode) { static_cast<SkDeferredCanvas*>(canvas.get())->silentFlush(); } else if (NULL != canvas) { canvas->flush(); } #if SK_SUPPORT_GPU // swap drawing buffers on each frame to prevent the GPU // from queuing up too much work if (NULL != glContext) { glContext->swapBuffers(); } #endif } // Stop truncated timers before GL calls complete, and stop the full timers after. timer.truncatedEnd(); #if SK_SUPPORT_GPU if (NULL != glContext) { context->flush(); SK_GL(*glContext, Finish()); } #endif timer.end(); // setup the frame interval for subsequent iterations if (!frameIntervalComputed) { frameIntervalTime += timer.fWall; frameIntervalTotalLoops += loopsPerIter; if (frameIntervalTime >= FLAGS_minMs) { frameIntervalComputed = true; loopsPerFrame = (int)(((double)frameIntervalTotalLoops / frameIntervalTime) * FLAGS_minMs); if (loopsPerFrame < 1) { loopsPerFrame = 1; } // SkDebugf(" %s has %d loops in %f ms (normalized to %d)\n", // bench->getName(), frameIntervalTotalLoops, // timer.fWall, loopsPerFrame); } } const double current = timer.fWall / loopsPerIter; if (FLAGS_verbose && current > previous) { SkDebugf("↑"); } if (FLAGS_verbose) { SkDebugf("%.3g ", current); } converged = HasConverged(previous, current, timer.fWall); previous = current; } while (!FLAGS_runOnce && !converged); } if (FLAGS_verbose) { SkDebugf("\n"); } if (!FLAGS_dryRun && FLAGS_outDir.count() && Benchmark::kNonRendering_Backend != config.backend) { SkAutoTUnref<SkImage> image(surface->newImageSnapshot()); if (image.get()) { saveFile(bench->getName(), config.name, FLAGS_outDir[0], image); } } if (FLAGS_runOnce) { // Let's not mislead ourselves by looking at Debug build or single iteration bench times! continue; } // Normalize to ms per 1000 iterations. const double normalize = 1000.0 / loopsPerIter; const struct { char shortName; const char* longName; double ms; } times[] = { {'w', "msecs", normalize * timer.fWall}, {'W', "Wmsecs", normalize * timer.fTruncatedWall}, {'c', "cmsecs", normalize * timer.fCpu}, {'C', "Cmsecs", normalize * timer.fTruncatedCpu}, {'g', "gmsecs", normalize * timer.fGpu}, }; writer.config(config.name); for (size_t i = 0; i < SK_ARRAY_COUNT(times); i++) { if (strchr(FLAGS_timers[0], times[i].shortName) && times[i].ms > 0) { writer.timer(times[i].longName, times[i].ms); } } } } #if SK_SUPPORT_GPU gContextFactory.destroyContexts(); #endif return 0; }
int main(int argc, char* argv[]) { int size = SIZE * 8; int size2 = size * size; Scalar* a = internal::aligned_new<Scalar>(size2); Scalar* b = internal::aligned_new<Scalar>(size2+4)+1; Scalar* c = internal::aligned_new<Scalar>(size2); for (int i=0; i<size; ++i) { a[i] = b[i] = c[i] = 0; } BenchTimer timer; timer.reset(); for (int k=0; k<10; ++k) { timer.start(); benchVec(a, b, c, size2); timer.stop(); } std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; return 0; for (int innersize = size; innersize>2 ; --innersize) { if (size2%innersize==0) { int outersize = size2/innersize; MatrixXf ma = Map<MatrixXf>(a, innersize, outersize ); MatrixXf mb = Map<MatrixXf>(b, innersize, outersize ); MatrixXf mc = Map<MatrixXf>(c, innersize, outersize ); timer.reset(); for (int k=0; k<3; ++k) { timer.start(); benchVec(ma, mb, mc); timer.stop(); } std::cout << innersize << " x " << outersize << " " << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; } } VectorXf va = Map<VectorXf>(a, size2); VectorXf vb = Map<VectorXf>(b, size2); VectorXf vc = Map<VectorXf>(c, size2); timer.reset(); for (int k=0; k<3; ++k) { timer.start(); benchVec(va, vb, vc); timer.stop(); } std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; return 0; }
int main(int argc, char ** argv) { std::ptrdiff_t l1 = internal::queryL1CacheSize(); std::ptrdiff_t l2 = internal::queryTopLevelCacheSize(); std::cout << "L1 cache size = " << (l1>0 ? l1/1024 : -1) << " KB\n"; std::cout << "L2/L3 cache size = " << (l2>0 ? l2/1024 : -1) << " KB\n"; typedef internal::gebp_traits<Scalar,Scalar> Traits; std::cout << "Register blocking = " << Traits::mr << " x " << Traits::nr << "\n"; int rep = 1; // number of repetitions per try int tries = 2; // number of tries, we keep the best int s = 2048; int cache_size = -1; bool need_help = false; for (int i=1; i<argc; ++i) { if(argv[i][0]=='s') s = atoi(argv[i]+1); else if(argv[i][0]=='c') cache_size = atoi(argv[i]+1); else if(argv[i][0]=='t') tries = atoi(argv[i]+1); else if(argv[i][0]=='p') rep = atoi(argv[i]+1); else need_help = true; } if(need_help) { std::cout << argv[0] << " s<matrix size> c<cache size> t<nb tries> p<nb repeats>\n"; return 1; } if(cache_size>0) setCpuCacheSizes(cache_size,96*cache_size); int m = s; int n = s; int p = s; A a(m,p); a.setRandom(); B b(p,n); b.setRandom(); C c(m,n); c.setOnes(); C rc = c; std::cout << "Matrix sizes = " << m << "x" << p << " * " << p << "x" << n << "\n"; std::ptrdiff_t mc(m), nc(n), kc(p); internal::computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc); std::cout << "blocking size (mc x kc) = " << mc << " x " << kc << "\n"; C r = c; // check the parallel product is correct #if defined EIGEN_HAS_OPENMP int procs = omp_get_max_threads(); if(procs>1) { #ifdef HAVE_BLAS blas_gemm(a,b,r); #else omp_set_num_threads(1); r.noalias() += a * b; omp_set_num_threads(procs); #endif c.noalias() += a * b; if(!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n"; } #elif defined HAVE_BLAS blas_gemm(a,b,r); c.noalias() += a * b; if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n"; #else gemm(a,b,c); r.noalias() += a.cast<Scalar>() * b.cast<Scalar>(); if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n"; #endif #ifdef HAVE_BLAS BenchTimer tblas; c = rc; BENCH(tblas, tries, rep, blas_gemm(a,b,c)); std::cout << "blas cpu " << tblas.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tblas.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tblas.total(CPU_TIMER) << "s)\n"; std::cout << "blas real " << tblas.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tblas.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tblas.total(REAL_TIMER) << "s)\n"; #endif BenchTimer tmt; c = rc; BENCH(tmt, tries, rep, gemm(a,b,c)); std::cout << "eigen cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n"; std::cout << "eigen real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n"; #ifdef EIGEN_HAS_OPENMP if(procs>1) { BenchTimer tmono; omp_set_num_threads(1); Eigen::internal::setNbThreads(1); c = rc; BENCH(tmono, tries, rep, gemm(a,b,c)); std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER) << "s)\n"; std::cout << "eigen mono real " << tmono.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(REAL_TIMER) << "s)\n"; std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER) << " => " << (100.0*tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER))/procs << "%\n"; } #endif #ifdef DECOUPLED if((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex)) { M ar(m,p); ar.setRandom(); M ai(m,p); ai.setRandom(); M br(p,n); br.setRandom(); M bi(p,n); bi.setRandom(); M cr(m,n); cr.setRandom(); M ci(m,n); ci.setRandom(); BenchTimer t; BENCH(t, tries, rep, matlab_cplx_cplx(ar,ai,br,bi,cr,ci)); std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n"; std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n"; } if((!NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex)) { M a(m,p); a.setRandom(); M br(p,n); br.setRandom(); M bi(p,n); bi.setRandom(); M cr(m,n); cr.setRandom(); M ci(m,n); ci.setRandom(); BenchTimer t; BENCH(t, tries, rep, matlab_real_cplx(a,br,bi,cr,ci)); std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n"; std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n"; } if((NumTraits<A::Scalar>::IsComplex) && (!NumTraits<B::Scalar>::IsComplex)) { M ar(m,p); ar.setRandom(); M ai(m,p); ai.setRandom(); M b(p,n); b.setRandom(); M cr(m,n); cr.setRandom(); M ci(m,n); ci.setRandom(); BenchTimer t; BENCH(t, tries, rep, matlab_cplx_real(ar,ai,b,cr,ci)); std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n"; std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n"; } #endif return 0; }