int main(int argc, char* argv[]) { int size = SIZE * 8; int size2 = size * size; Scalar* a = internal::aligned_new<Scalar>(size2); Scalar* b = internal::aligned_new<Scalar>(size2+4)+1; Scalar* c = internal::aligned_new<Scalar>(size2); for (int i=0; i<size; ++i) { a[i] = b[i] = c[i] = 0; } BenchTimer timer; timer.reset(); for (int k=0; k<10; ++k) { timer.start(); benchVec(a, b, c, size2); timer.stop(); } std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; return 0; for (int innersize = size; innersize>2 ; --innersize) { if (size2%innersize==0) { int outersize = size2/innersize; MatrixXf ma = Map<MatrixXf>(a, innersize, outersize ); MatrixXf mb = Map<MatrixXf>(b, innersize, outersize ); MatrixXf mc = Map<MatrixXf>(c, innersize, outersize ); timer.reset(); for (int k=0; k<3; ++k) { timer.start(); benchVec(ma, mb, mc); timer.stop(); } std::cout << innersize << " x " << outersize << " " << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; } } VectorXf va = Map<VectorXf>(a, size2); VectorXf vb = Map<VectorXf>(b, size2); VectorXf vc = Map<VectorXf>(c, size2); timer.reset(); for (int k=0; k<3; ++k) { timer.start(); benchVec(va, vb, vc); timer.stop(); } std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; return 0; }
int main(int argc, char *argv[]) { int rows = SIZE; int cols = SIZE; float density = DENSITY; EigenSparseMatrix sm1(rows,cols); DenseVector v1(cols), v2(cols); v1.setRandom(); BenchTimer timer; for (float density = DENSITY; density>=MINDENSITY; density*=0.5) { //fillMatrix(density, rows, cols, sm1); fillMatrix2(7, rows, cols, sm1); // dense matrices #ifdef DENSEMATRIX { std::cout << "Eigen Dense\t" << density*100 << "%\n"; DenseMatrix m1(rows,cols); eiToDense(sm1, m1); timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) v2 = m1 * v1; timer.stop(); std::cout << " a * v:\t" << timer.best() << " " << double(REPEAT)/timer.best() << " * / sec " << endl; timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) v2 = m1.transpose() * v1; timer.stop(); std::cout << " a' * v:\t" << timer.best() << endl; } #endif // eigen sparse matrices { std::cout << "Eigen sparse\t" << sm1.nonZeros()/float(sm1.rows()*sm1.cols())*100 << "%\n"; BENCH(asm("#myc"); v2 = sm1 * v1; asm("#myd");) std::cout << " a * v:\t" << timer.best()/REPEAT << " " << double(REPEAT)/timer.best(REAL_TIMER) << " * / sec " << endl; BENCH( { asm("#mya"); v2 = sm1.transpose() * v1; asm("#myb"); }) std::cout << " a' * v:\t" << timer.best()/REPEAT << endl; }
static void bench_record(SkPicture* src, const char* name, SkBBHFactory* bbhFactory) { BenchTimer timer; timer.start(); const int width = src ? src->width() : FLAGS_nullSize; const int height = src ? src->height() : FLAGS_nullSize; for (int i = 0; i < FLAGS_loops; i++) { if (FLAGS_skr) { EXPERIMENTAL::SkRecording recording(width, height); if (NULL != src) { src->draw(recording.canvas()); } // Release and delete the SkPlayback so that recording optimizes its SkRecord. SkDELETE(recording.releasePlayback()); } else { SkPictureRecorder recorder; SkCanvas* canvas = recorder.beginRecording(width, height, bbhFactory, FLAGS_flags); if (NULL != src) { src->draw(canvas); } if (FLAGS_endRecording) { SkAutoTUnref<SkPicture> dst(recorder.endRecording()); } } } timer.end(); const double msPerLoop = timer.fCpu / (double)FLAGS_loops; printf("%f\t%s\n", scale_time(msPerLoop), name); }
void bench(int nfft,bool fwd,bool unscaled=false, bool halfspec=false) { typedef typename NumTraits<T>::Real Scalar; typedef typename std::complex<Scalar> Complex; int nits = NDATA/nfft; vector<T> inbuf(nfft); vector<Complex > outbuf(nfft); FFT< Scalar > fft; if (unscaled) { fft.SetFlag(fft.Unscaled); cout << "unscaled "; } if (halfspec) { fft.SetFlag(fft.HalfSpectrum); cout << "halfspec "; } std::fill(inbuf.begin(),inbuf.end(),0); fft.fwd( outbuf , inbuf); BenchTimer timer; timer.reset(); for (int k=0;k<8;++k) { timer.start(); if (fwd) for(int i = 0; i < nits; i++) fft.fwd( outbuf , inbuf); else for(int i = 0; i < nits; i++) fft.inv(inbuf,outbuf); timer.stop(); } cout << nameof<Scalar>() << " "; double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits ); if ( NumTraits<T>::IsComplex ) { cout << "complex"; }else{ cout << "real "; mflops /= 2; } if (fwd) cout << " fwd"; else cout << " inv"; cout << " NFFT=" << nfft << " " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; }
static void run() { arg1 a1; a1.setIdentity(); arg2 a2; a2.setIdentity(); BenchTimer timer; timer.reset(); for (int k=0; k<10; ++k) { timer.start(); for (int k=0; k<REPEAT; ++k) a2 = func::run( a1, a2 ); timer.stop(); } cout << setprecision(4) << fixed << timer.value() << "s " << endl;; }
int tool_main(int argc, char** argv) { SetupCrashHandler(); SkCommandLineFlags::Parse(argc, argv); #if SK_ENABLE_INST_COUNT if (FLAGS_leaks) { gPrintInstCount = true; } #endif SkAutoGraphics ag; // First, parse some flags. BenchLogger logger; if (FLAGS_logFile.count()) { logger.SetLogFile(FLAGS_logFile[0]); } LoggerResultsWriter logWriter(logger, FLAGS_timeFormat[0]); MultiResultsWriter writer; writer.add(&logWriter); SkAutoTDelete<JSONResultsWriter> jsonWriter; if (FLAGS_outResultsFile.count()) { jsonWriter.reset(SkNEW(JSONResultsWriter(FLAGS_outResultsFile[0]))); writer.add(jsonWriter.get()); } // Instantiate after all the writers have been added to writer so that we // call close() before their destructors are called on the way out. CallEnd<MultiResultsWriter> ender(writer); const uint8_t alpha = FLAGS_forceBlend ? 0x80 : 0xFF; SkTriState::State dither = SkTriState::kDefault; for (size_t i = 0; i < 3; i++) { if (strcmp(SkTriState::Name[i], FLAGS_forceDither[0]) == 0) { dither = static_cast<SkTriState::State>(i); } } BenchMode benchMode = kNormal_BenchMode; for (size_t i = 0; i < SK_ARRAY_COUNT(BenchMode_Name); i++) { if (strcmp(FLAGS_mode[0], BenchMode_Name[i]) == 0) { benchMode = static_cast<BenchMode>(i); } } SkTDArray<int> configs; bool runDefaultConfigs = false; // Try user-given configs first. for (int i = 0; i < FLAGS_config.count(); i++) { for (int j = 0; j < static_cast<int>(SK_ARRAY_COUNT(gConfigs)); ++j) { if (0 == strcmp(FLAGS_config[i], gConfigs[j].name)) { *configs.append() = j; } else if (0 == strcmp(FLAGS_config[i], kDefaultsConfigStr)) { runDefaultConfigs = true; } } } // If there weren't any, fill in with defaults. if (runDefaultConfigs) { for (int i = 0; i < static_cast<int>(SK_ARRAY_COUNT(gConfigs)); ++i) { if (gConfigs[i].runByDefault) { *configs.append() = i; } } } // Filter out things we can't run. if (kNormal_BenchMode != benchMode) { // Non-rendering configs only run in normal mode for (int i = 0; i < configs.count(); ++i) { const Config& config = gConfigs[configs[i]]; if (Benchmark::kNonRendering_Backend == config.backend) { configs.remove(i, 1); --i; } } } #if SK_SUPPORT_GPU for (int i = 0; i < configs.count(); ++i) { const Config& config = gConfigs[configs[i]]; if (Benchmark::kGPU_Backend == config.backend) { GrContext* context = gContextFactory.get(config.contextType); if (NULL == context) { SkDebugf("GrContext could not be created for config %s. Config will be skipped.\n", config.name); configs.remove(i); --i; continue; } if (config.sampleCount > context->getMaxSampleCount()){ SkDebugf( "Sample count (%d) for config %s is not supported. Config will be skipped.\n", config.sampleCount, config.name); configs.remove(i); --i; continue; } } } #endif // All flags should be parsed now. Report our settings. if (FLAGS_runOnce) { logger.logError("bench was run with --runOnce, so we're going to hide the times." " It's for your own good!\n"); } writer.option("mode", FLAGS_mode[0]); writer.option("alpha", SkStringPrintf("0x%02X", alpha).c_str()); writer.option("antialias", SkStringPrintf("%d", FLAGS_forceAA).c_str()); writer.option("filter", SkStringPrintf("%d", FLAGS_forceFilter).c_str()); writer.option("dither", SkTriState::Name[dither]); writer.option("rotate", SkStringPrintf("%d", FLAGS_rotate).c_str()); writer.option("scale", SkStringPrintf("%d", FLAGS_scale).c_str()); writer.option("clip", SkStringPrintf("%d", FLAGS_clip).c_str()); #if defined(SK_BUILD_FOR_WIN32) writer.option("system", "WIN32"); #elif defined(SK_BUILD_FOR_MAC) writer.option("system", "MAC"); #elif defined(SK_BUILD_FOR_ANDROID) writer.option("system", "ANDROID"); #elif defined(SK_BUILD_FOR_UNIX) writer.option("system", "UNIX"); #else writer.option("system", "other"); #endif #if defined(SK_DEBUG) writer.option("build", "DEBUG"); #else writer.option("build", "RELEASE"); #endif // Set texture cache limits if non-default. for (size_t i = 0; i < SK_ARRAY_COUNT(gConfigs); ++i) { #if SK_SUPPORT_GPU const Config& config = gConfigs[i]; if (Benchmark::kGPU_Backend != config.backend) { continue; } GrContext* context = gContextFactory.get(config.contextType); if (NULL == context) { continue; } size_t bytes; int count; context->getResourceCacheLimits(&count, &bytes); if (-1 != FLAGS_gpuCacheBytes) { bytes = static_cast<size_t>(FLAGS_gpuCacheBytes); } if (-1 != FLAGS_gpuCacheCount) { count = FLAGS_gpuCacheCount; } context->setResourceCacheLimits(count, bytes); #endif } // Run each bench in each configuration it supports and we asked for. Iter iter; Benchmark* bench; while ((bench = iter.next()) != NULL) { SkAutoTUnref<Benchmark> benchUnref(bench); if (SkCommandLineFlags::ShouldSkip(FLAGS_match, bench->getName())) { continue; } bench->setForceAlpha(alpha); bench->setForceAA(FLAGS_forceAA); bench->setForceFilter(FLAGS_forceFilter); bench->setDither(dither); bench->preDraw(); bool loggedBenchName = false; for (int i = 0; i < configs.count(); ++i) { const int configIndex = configs[i]; const Config& config = gConfigs[configIndex]; if (!bench->isSuitableFor(config.backend)) { continue; } GrContext* context = NULL; #if SK_SUPPORT_GPU SkGLContextHelper* glContext = NULL; if (Benchmark::kGPU_Backend == config.backend) { context = gContextFactory.get(config.contextType); if (NULL == context) { continue; } glContext = gContextFactory.getGLContext(config.contextType); } #endif SkAutoTUnref<SkCanvas> canvas; SkAutoTUnref<SkPicture> recordFrom; SkPictureRecorder recorderTo; const SkIPoint dim = bench->getSize(); SkAutoTUnref<SkSurface> surface; if (Benchmark::kNonRendering_Backend != config.backend) { surface.reset(make_surface(config.fColorType, dim, config.backend, config.sampleCount, context)); if (!surface.get()) { logger.logError(SkStringPrintf( "Device creation failure for config %s. Will skip.\n", config.name)); continue; } switch(benchMode) { case kDeferredSilent_BenchMode: case kDeferred_BenchMode: canvas.reset(SkDeferredCanvas::Create(surface.get())); break; case kRecord_BenchMode: canvas.reset(SkRef(recorderTo.beginRecording(dim.fX, dim.fY))); break; case kPictureRecord_BenchMode: { SkPictureRecorder recorderFrom; bench->draw(1, recorderFrom.beginRecording(dim.fX, dim.fY)); recordFrom.reset(recorderFrom.endRecording()); canvas.reset(SkRef(recorderTo.beginRecording(dim.fX, dim.fY))); break; } case kNormal_BenchMode: canvas.reset(SkRef(surface->getCanvas())); break; default: SkASSERT(false); } } if (NULL != canvas) { canvas->clear(SK_ColorWHITE); if (FLAGS_clip) { perform_clip(canvas, dim.fX, dim.fY); } if (FLAGS_scale) { perform_scale(canvas, dim.fX, dim.fY); } if (FLAGS_rotate) { perform_rotate(canvas, dim.fX, dim.fY); } } if (!loggedBenchName) { loggedBenchName = true; writer.bench(bench->getName(), dim.fX, dim.fY); } #if SK_SUPPORT_GPU SkGLContextHelper* contextHelper = NULL; if (Benchmark::kGPU_Backend == config.backend) { contextHelper = gContextFactory.getGLContext(config.contextType); } BenchTimer timer(contextHelper); #else BenchTimer timer; #endif double previous = std::numeric_limits<double>::infinity(); bool converged = false; // variables used to compute loopsPerFrame double frameIntervalTime = 0.0f; int frameIntervalTotalLoops = 0; bool frameIntervalComputed = false; int loopsPerFrame = 0; int loopsPerIter = 0; if (FLAGS_verbose) { SkDebugf("%s %s: ", bench->getName(), config.name); } if (!FLAGS_dryRun) { do { // Ramp up 1 -> 2 -> 4 -> 8 -> 16 -> ... -> ~1 billion. loopsPerIter = (loopsPerIter == 0) ? 1 : loopsPerIter * 2; if (loopsPerIter >= (1<<30) || timer.fWall > FLAGS_maxMs) { // If you find it takes more than a billion loops to get up to 20ms of runtime, // you've got a computer clocked at several THz or have a broken benchmark. ;) // "1B ought to be enough for anybody." logger.logError(SkStringPrintf( "\nCan't get %s %s to converge in %dms (%d loops)", bench->getName(), config.name, FLAGS_maxMs, loopsPerIter)); break; } if ((benchMode == kRecord_BenchMode || benchMode == kPictureRecord_BenchMode)) { // Clear the recorded commands so that they do not accumulate. canvas.reset(SkRef(recorderTo.beginRecording(dim.fX, dim.fY))); } timer.start(); // Inner loop that allows us to break the run into smaller // chunks (e.g. frames). This is especially useful for the GPU // as we can flush and/or swap buffers to keep the GPU from // queuing up too much work. for (int loopCount = loopsPerIter; loopCount > 0; ) { // Save and restore around each call to draw() to guarantee a pristine canvas. SkAutoCanvasRestore saveRestore(canvas, true/*also save*/); int loops; if (frameIntervalComputed && loopCount > loopsPerFrame) { loops = loopsPerFrame; loopCount -= loopsPerFrame; } else { loops = loopCount; loopCount = 0; } if (benchMode == kPictureRecord_BenchMode) { recordFrom->draw(canvas); } else { bench->draw(loops, canvas); } if (kDeferredSilent_BenchMode == benchMode) { static_cast<SkDeferredCanvas*>(canvas.get())->silentFlush(); } else if (NULL != canvas) { canvas->flush(); } #if SK_SUPPORT_GPU // swap drawing buffers on each frame to prevent the GPU // from queuing up too much work if (NULL != glContext) { glContext->swapBuffers(); } #endif } // Stop truncated timers before GL calls complete, and stop the full timers after. timer.truncatedEnd(); #if SK_SUPPORT_GPU if (NULL != glContext) { context->flush(); SK_GL(*glContext, Finish()); } #endif timer.end(); // setup the frame interval for subsequent iterations if (!frameIntervalComputed) { frameIntervalTime += timer.fWall; frameIntervalTotalLoops += loopsPerIter; if (frameIntervalTime >= FLAGS_minMs) { frameIntervalComputed = true; loopsPerFrame = (int)(((double)frameIntervalTotalLoops / frameIntervalTime) * FLAGS_minMs); if (loopsPerFrame < 1) { loopsPerFrame = 1; } // SkDebugf(" %s has %d loops in %f ms (normalized to %d)\n", // bench->getName(), frameIntervalTotalLoops, // timer.fWall, loopsPerFrame); } } const double current = timer.fWall / loopsPerIter; if (FLAGS_verbose && current > previous) { SkDebugf("↑"); } if (FLAGS_verbose) { SkDebugf("%.3g ", current); } converged = HasConverged(previous, current, timer.fWall); previous = current; } while (!FLAGS_runOnce && !converged); } if (FLAGS_verbose) { SkDebugf("\n"); } if (!FLAGS_dryRun && FLAGS_outDir.count() && Benchmark::kNonRendering_Backend != config.backend) { SkAutoTUnref<SkImage> image(surface->newImageSnapshot()); if (image.get()) { saveFile(bench->getName(), config.name, FLAGS_outDir[0], image); } } if (FLAGS_runOnce) { // Let's not mislead ourselves by looking at Debug build or single iteration bench times! continue; } // Normalize to ms per 1000 iterations. const double normalize = 1000.0 / loopsPerIter; const struct { char shortName; const char* longName; double ms; } times[] = { {'w', "msecs", normalize * timer.fWall}, {'W', "Wmsecs", normalize * timer.fTruncatedWall}, {'c', "cmsecs", normalize * timer.fCpu}, {'C', "Cmsecs", normalize * timer.fTruncatedCpu}, {'g', "gmsecs", normalize * timer.fGpu}, }; writer.config(config.name); for (size_t i = 0; i < SK_ARRAY_COUNT(times); i++) { if (strchr(FLAGS_timers[0], times[i].shortName) && times[i].ms > 0) { writer.timer(times[i].longName, times[i].ms); } } } } #if SK_SUPPORT_GPU gContextFactory.destroyContexts(); #endif return 0; }
int main(int argc, char *argv[]) { // bench_sort(); int rows = SIZE; int cols = SIZE; float density = DENSITY; EigenSparseMatrix sm1(rows,cols), sm2(rows,cols), sm3(rows,cols), sm4(rows,cols); BenchTimer timer; for (int nnzPerCol = NNZPERCOL; nnzPerCol>1; nnzPerCol/=1.1) { sm1.setZero(); sm2.setZero(); fillMatrix2(nnzPerCol, rows, cols, sm1); fillMatrix2(nnzPerCol, rows, cols, sm2); // std::cerr << "filling OK\n"; // dense matrices #ifdef DENSEMATRIX { std::cout << "Eigen Dense\t" << nnzPerCol << "%\n"; DenseMatrix m1(rows,cols), m2(rows,cols), m3(rows,cols); eiToDense(sm1, m1); eiToDense(sm2, m2); timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) m3 = m1 * m2; timer.stop(); std::cout << " a * b:\t" << timer.value() << endl; timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2; timer.stop(); std::cout << " a' * b:\t" << timer.value() << endl; timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2.transpose(); timer.stop(); std::cout << " a' * b':\t" << timer.value() << endl; timer.reset(); timer.start(); for (int k=0; k<REPEAT; ++k) m3 = m1 * m2.transpose(); timer.stop(); std::cout << " a * b':\t" << timer.value() << endl; } #endif // eigen sparse matrices { std::cout << "Eigen sparse\t" << sm1.nonZeros()/(float(sm1.rows())*float(sm1.cols()))*100 << "% * " << sm2.nonZeros()/(float(sm2.rows())*float(sm2.cols()))*100 << "%\n"; BENCH(sm3 = sm1 * sm2; ) std::cout << " a * b:\t" << timer.value() << endl; // BENCH(sm3 = sm1.transpose() * sm2; ) // std::cout << " a' * b:\t" << timer.value() << endl; // // // BENCH(sm3 = sm1.transpose() * sm2.transpose(); ) // std::cout << " a' * b':\t" << timer.value() << endl; // // // BENCH(sm3 = sm1 * sm2.transpose(); ) // std::cout << " a * b' :\t" << timer.value() << endl; // std::cout << "\n"; // // BENCH( sm3._experimentalNewProduct(sm1, sm2); ) // std::cout << " a * b:\t" << timer.value() << endl; // // BENCH(sm3._experimentalNewProduct(sm1.transpose(),sm2); ) // std::cout << " a' * b:\t" << timer.value() << endl; // // // BENCH(sm3._experimentalNewProduct(sm1.transpose(),sm2.transpose()); ) // std::cout << " a' * b':\t" << timer.value() << endl; // // // BENCH(sm3._experimentalNewProduct(sm1, sm2.transpose());) // std::cout << " a * b' :\t" << timer.value() << endl; } // eigen dyn-sparse matrices /*{ DynamicSparseMatrix<Scalar> m1(sm1), m2(sm2), m3(sm3); std::cout << "Eigen dyn-sparse\t" << m1.nonZeros()/(float(m1.rows())*float(m1.cols()))*100 << "% * " << m2.nonZeros()/(float(m2.rows())*float(m2.cols()))*100 << "%\n"; // timer.reset(); // timer.start(); BENCH(for (int k=0; k<REPEAT; ++k) m3 = m1 * m2;) // timer.stop(); std::cout << " a * b:\t" << timer.value() << endl; // std::cout << sm3 << "\n"; timer.reset(); timer.start(); // std::cerr << "transpose...\n"; // EigenSparseMatrix sm4 = sm1.transpose(); // std::cout << sm4.nonZeros() << " == " << sm1.nonZeros() << "\n"; // exit(1); // std::cerr << "transpose OK\n"; // std::cout << sm1 << "\n\n" << sm1.transpose() << "\n\n" << sm4.transpose() << "\n\n"; BENCH(for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2;) // timer.stop(); std::cout << " a' * b:\t" << timer.value() << endl; // timer.reset(); // timer.start(); BENCH( for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2.transpose(); ) // timer.stop(); std::cout << " a' * b':\t" << timer.value() << endl; // timer.reset(); // timer.start(); BENCH( for (int k=0; k<REPEAT; ++k) m3 = m1 * m2.transpose(); ) // timer.stop(); std::cout << " a * b' :\t" << timer.value() << endl; }*/ // CSparse #ifdef CSPARSE { std::cout << "CSparse \t" << nnzPerCol << "%\n"; cs *m1, *m2, *m3; eiToCSparse(sm1, m1); eiToCSparse(sm2, m2); // timer.reset(); // timer.start(); // for (int k=0; k<REPEAT; ++k) BENCH( { m3 = cs_sorted_multiply(m1, m2); if (!m3) { std::cerr << "cs_multiply failed\n"; // break; } // cs_print(m3, 0); cs_spfree(m3); } ); // timer.stop(); std::cout << " a * b:\t" << timer.value() << endl; // BENCH( { m3 = cs_sorted_multiply2(m1, m2); cs_spfree(m3); } ); // std::cout << " a * b:\t" << timer.value() << endl; }