Exemplo n.º 1
0
int main(int argc, char *argv[])
{
  int rows = SIZE;
  int cols = SIZE;
  float density = DENSITY;
  BenchTimer timer;
  #if 1
  EigenSparseTriMatrix sm1(rows,cols);
  typedef Matrix<Scalar,Dynamic,1> DenseVector;
  DenseVector b = DenseVector::Random(cols);
  DenseVector x = DenseVector::Random(cols);

  bool densedone = false;

  for (float density = DENSITY; density>=MINDENSITY; density*=0.5)
  {
    EigenSparseTriMatrix sm1(rows, cols);
    fillMatrix(density, rows, cols, sm1);

    // dense matrices
    #ifdef DENSEMATRIX
    if (!densedone)
    {
      densedone = true;
      std::cout << "Eigen Dense\t" << density*100 << "%\n";
      DenseMatrix m1(rows,cols);
      Matrix<Scalar,Dynamic,Dynamic,Dynamic,Dynamic,RowMajorBit> m2(rows,cols);
      eiToDense(sm1, m1);
      m2 = m1;

      BENCH(x = m1.marked<UpperTriangular>().solveTriangular(b);)
      std::cout << "   colmajor^-1 * b:\t" << timer.value() << endl;
//       std::cerr << x.transpose() << "\n";

      BENCH(x = m2.marked<UpperTriangular>().solveTriangular(b);)
Exemplo n.º 2
0
static void bench_record(SkPicture* src, const char* name, SkBBHFactory* bbhFactory) {
    BenchTimer timer;
    timer.start();
    const int width  = src ? src->width()  : FLAGS_nullSize;
    const int height = src ? src->height() : FLAGS_nullSize;

    for (int i = 0; i < FLAGS_loops; i++) {
        if (FLAGS_skr) {
            EXPERIMENTAL::SkRecording recording(width, height);
            if (NULL != src) {
                src->draw(recording.canvas());
            }
            // Release and delete the SkPlayback so that recording optimizes its SkRecord.
            SkDELETE(recording.releasePlayback());
        } else {
            SkPictureRecorder recorder;
            SkCanvas* canvas = recorder.beginRecording(width, height, bbhFactory, FLAGS_flags);
            if (NULL != src) {
                src->draw(canvas);
            }
            if (FLAGS_endRecording) {
                SkAutoTUnref<SkPicture> dst(recorder.endRecording());
            }
        }
    }
    timer.end();

    const double msPerLoop = timer.fCpu / (double)FLAGS_loops;
    printf("%f\t%s\n", scale_time(msPerLoop), name);
}
Exemplo n.º 3
0
int main()
{
  BenchTimer t;
  int tries = 10;
  int rep = 400000;
  typedef Matrix3f Mat;
  typedef Vector3f Vec;
  Mat A = Mat::Random(3,3);
  A = A.adjoint() * A;

  SelfAdjointEigenSolver<Mat> eig(A);
  BENCH(t, tries, rep, eig.compute(A));
  std::cout << "Eigen:  " << t.best() << "s\n";

  Mat evecs;
  Vec evals;
  BENCH(t, tries, rep, eigen33(A,evecs,evals));
  std::cout << "Direct: " << t.best() << "s\n\n";

  std::cerr << "Eigenvalue/eigenvector diffs:\n";
  std::cerr << (evals - eig.eigenvalues()).transpose() << "\n";
  for(int k=0;k<3;++k)
    if(evecs.col(k).dot(eig.eigenvectors().col(k))<0)
      evecs.col(k) = -evecs.col(k);
  std::cerr << evecs - eig.eigenvectors() << "\n\n";
}
Exemplo n.º 4
0
int main(int argc, char *argv[])
{
  int rows = SIZE;
  int cols = SIZE;
  float density = DENSITY;

  EigenSparseMatrix sm1(rows,cols), sm3(rows,cols);

  BenchTimer timer;
  for (float density = DENSITY; density>=MINDENSITY; density*=0.5)
  {
    fillMatrix(density, rows, cols, sm1);

    // dense matrices
    #ifdef DENSEMATRIX
    {
      DenseMatrix m1(rows,cols), m3(rows,cols);
      eiToDense(sm1, m1);
      BENCH(for (int k=0; k<REPEAT; ++k) m3 = m1.transpose();)
      std::cout << "  Eigen dense:\t" << timer.value() << endl;
    }
    #endif

    std::cout << "Non zeros: " << sm1.nonZeros()/float(sm1.rows()*sm1.cols())*100 << "%\n";

    // eigen sparse matrices
    {
      BENCH(for (int k=0; k<REPEAT; ++k) sm3 = sm1.transpose();)
      std::cout << "  Eigen:\t" << timer.value() << endl;
    }
Exemplo n.º 5
0
int main(int argc, char *argv[])
{
  int rows = SIZE;
  int cols = SIZE;
  bool fullyrand = true;

  BenchTimer timer;
  Coordinates coords;
  Values values;
  if(fullyrand)
  {
    Coordinates pool;
    pool.reserve(cols*NBPERROW);
    std::cerr << "fill pool" << "\n";
    for (int i=0; i<cols*NBPERROW; )
    {
//       DynamicSparseMatrix<int> stencil(SIZE,SIZE);
      Vector2i ij(ei_random<int>(0,rows-1),ei_random<int>(0,cols-1));
//       if(stencil.coeffRef(ij.x(), ij.y())==0)
      {
//         stencil.coeffRef(ij.x(), ij.y()) = 1;
        pool.push_back(ij);

      }
      ++i;
    }
    std::cerr << "pool ok" << "\n";
    int n = cols*NBPERROW*KK;
    coords.reserve(n);
    values.reserve(n);
    for (int i=0; i<n; ++i)
    {
      int i = ei_random<int>(0,pool.size());
      coords.push_back(pool[i]);
      values.push_back(ei_random<Scalar>());
    }
  }
  else
  {
    for (int j=0; j<cols; ++j)
    for (int i=0; i<NBPERROW; ++i)
    {
      coords.push_back(Vector2i(ei_random<int>(0,rows-1),j));
      values.push_back(ei_random<Scalar>());
    }
  }
  std::cout << "nnz = " << coords.size()  << "\n";
  CHECK_MEM

    // dense matrices
    #ifdef DENSEMATRIX
    {
      BENCH(setrand_eigen_dense(coords,values);)
      std::cout << "Eigen Dense\t" << timer.value() << "\n";
    }
Exemplo n.º 6
0
// Copy samples from archive with index_name
// to new index copy_name.
// Uses all samples in source archive or  [start ... end[ .
void copy(const stdString &index_name, const stdString &copy_name,
          int RTreeM, const epicsTime *start, const epicsTime *end,
          const stdString &single_name)
{
    IndexFile               index(RTreeM), new_index(RTreeM);
    IndexFile::NameIterator names;
    size_t                  channel_count = 0, value_count = 0, back_count = 0;
    BenchTimer              timer;
    stdString               dir1, dir2;
    Filename::getDirname(index_name, dir1);
    Filename::getDirname(copy_name, dir2);
    if (dir1 == dir2)
    {
        printf("You have to assert that the new index (%s)\n"
               "is in a  directory different from the old index\n"
               "(%s)\n", copy_name.c_str(), index_name.c_str());
        return;
    }
    index.open(index_name, true);
    new_index.open(copy_name, false);
    if (verbose)
        printf("Copying values from '%s' to '%s'\n",
               index_name.c_str(), copy_name.c_str());
    RawDataReader reader(index);
    if (single_name.empty())
    {
        bool ok = index.getFirstChannel(names);
        while (ok)
        {
            copy_channel(names.getName(), start, end, index, reader,
                         new_index, channel_count, value_count, back_count);
            ok = index.getNextChannel(names);
        }    
    }
    else
        copy_channel(single_name, start, end, index, reader,
                     new_index, channel_count, value_count, back_count);
    new_index.close();
    index.close();
    timer.stop();
    if (verbose)
    {
        printf("Total: %lu channels, %lu values\n",
               (unsigned long) channel_count, (unsigned long) value_count);
        printf("Skipped %lu back-in-time values\n",
               (unsigned long) back_count);
        printf("Runtime: %s\n", timer.toString().c_str());
    }
}
Exemplo n.º 7
0
template<typename Quat> void bench(const std::string& label)
{
  int tries = 10;
  int rep = 1000000;
  BenchTimer t;
  
  Quat a(4, 1, 2, 3);
  Quat b(2, 3, 4, 5);
  Quat c;
  
  std::cout.precision(3);
  
  BENCH(t, tries, rep, quatmul_default(a,b,c));
  std::cout << label << " default " << 1e3*t.best(CPU_TIMER) << "ms  \t" << 1e-6*double(rep)/(t.best(CPU_TIMER)) << " M mul/s\n";
  
  BENCH(t, tries, rep, quatmul_novec(a,b,c));
  std::cout << label << " novec   " << 1e3*t.best(CPU_TIMER) << "ms  \t" << 1e-6*double(rep)/(t.best(CPU_TIMER)) << " M mul/s\n";
}
Exemplo n.º 8
0
    static void run()
    {
	arg1 a1;
	a1.setIdentity();
	arg2 a2;
	a2.setIdentity();

	BenchTimer timer;
	timer.reset();
	for (int k=0; k<10; ++k)
	{
	    timer.start();
	    for (int k=0; k<REPEAT; ++k)
		a2 = func::run( a1, a2 );
	    timer.stop();
	}
	cout << setprecision(4) << fixed << timer.value() << "s  " << endl;;
    }
Exemplo n.º 9
0
EIGEN_DONT_INLINE void bench_prod()
{
  typedef Matrix<Scalar,M,K> Lhs; Lhs a; a.setRandom();
  typedef Matrix<Scalar,K,N> Rhs; Rhs b; b.setRandom();
  typedef Matrix<Scalar,M,N> Res; Res c; c.setRandom();

  BenchTimer t;
  double n = 2.*double(M)*double(N)*double(K);
  int rep = 100000./n;
  rep /= 2;
  if(rep<1) rep = 1;
  do {
    rep *= 2;
    t.reset();
    BENCH(t,1,rep,prod<CoeffBasedProductMode>(a,b,c));
  } while(t.best()<0.1);
  
  t.reset();
  BENCH(t,5,rep,prod<Mode>(a,b,c));

  print_mode(Mode);
  std::cout << int(1e-6*n*rep/t.best()) << "\t";
}
Exemplo n.º 10
0
void test(Test &t) {
    build_list(&t.noobs, 100);

    std::vector<std::thread> producers;
    std::vector<std::thread> consumers;

    // XXX: we should probably synchronize thread starting work and
    // just time the actual work.
    BenchTimer timer;

    for (int i = 0; i < t.producers; i++) {
        producers.push_back(std::thread(producer, &t, i));
    }
    for (int i = 0; i < t.consumers; i++) {
        consumers.push_back(std::thread(consumer, &t, i));
    }

    joinAll(consumers);
    timer.stop();
    t.consumersDone = true;
    joinAll(producers);

    timer.report(t.count * t.consumers, !BenchMode);
}
Exemplo n.º 11
0
void test(Test &t) {
    std::vector<std::thread> producers;
    std::vector<std::thread> consumers;

    // XXX: we should probably synchronize thread starting work and
    // just time the actual work.
    BenchTimer timer;

    for (int i = 0; i < t.producers; i++) {
        producers.push_back(std::thread(producer, &t));
    }
    for (int i = 0; i < t.consumers; i++) {
        consumers.push_back(std::thread(consumer, &t, i));
    }

    joinAll(consumers);
    timer.stop();
    t.consumersDone = true;
    joinAll(producers);

    //printf("Max thing: %ld\n", t.totalSum.load());

    timer.report(t.count * t.consumers, !BenchMode);
}
Exemplo n.º 12
0
void bench(int nfft,bool fwd,bool unscaled=false, bool halfspec=false)
{
    typedef typename NumTraits<T>::Real Scalar;
    typedef typename std::complex<Scalar> Complex;
    int nits = NDATA/nfft;
    vector<T> inbuf(nfft);
    vector<Complex > outbuf(nfft);
    FFT< Scalar > fft;

    if (unscaled) {
        fft.SetFlag(fft.Unscaled);
        cout << "unscaled ";
    }
    if (halfspec) {
        fft.SetFlag(fft.HalfSpectrum);
        cout << "halfspec ";
    }


    std::fill(inbuf.begin(),inbuf.end(),0);
    fft.fwd( outbuf , inbuf);

    BenchTimer timer;
    timer.reset();
    for (int k=0;k<8;++k) {
        timer.start();
        if (fwd)
            for(int i = 0; i < nits; i++)
                fft.fwd( outbuf , inbuf);
        else
            for(int i = 0; i < nits; i++)
                fft.inv(inbuf,outbuf);
        timer.stop();
    }

    cout << nameof<Scalar>() << " ";
    double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits );
    if ( NumTraits<T>::IsComplex ) {
        cout << "complex";
    }else{
        cout << "real   ";
        mflops /= 2;
    }


    if (fwd)
        cout << " fwd";
    else
        cout << " inv";

    cout << " NFFT=" << nfft << "  " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s  " << mflops << "MFLOPS\n";
}
Exemplo n.º 13
0
int main(int argc, char *argv[])
{
//   bench_sort();

  int rows = SIZE;
  int cols = SIZE;
  float density = DENSITY;

  EigenSparseMatrix sm1(rows,cols), sm2(rows,cols), sm3(rows,cols), sm4(rows,cols);

  BenchTimer timer;
  for (int nnzPerCol = NNZPERCOL; nnzPerCol>1; nnzPerCol/=1.1)
  {
    sm1.setZero();
    sm2.setZero();
    fillMatrix2(nnzPerCol, rows, cols, sm1);
    fillMatrix2(nnzPerCol, rows, cols, sm2);
//     std::cerr << "filling OK\n";

    // dense matrices
    #ifdef DENSEMATRIX
    {
      std::cout << "Eigen Dense\t" << nnzPerCol << "%\n";
      DenseMatrix m1(rows,cols), m2(rows,cols), m3(rows,cols);
      eiToDense(sm1, m1);
      eiToDense(sm2, m2);

      timer.reset();
      timer.start();
      for (int k=0; k<REPEAT; ++k)
        m3 = m1 * m2;
      timer.stop();
      std::cout << "   a * b:\t" << timer.value() << endl;

      timer.reset();
      timer.start();
      for (int k=0; k<REPEAT; ++k)
        m3 = m1.transpose() * m2;
      timer.stop();
      std::cout << "   a' * b:\t" << timer.value() << endl;

      timer.reset();
      timer.start();
      for (int k=0; k<REPEAT; ++k)
        m3 = m1.transpose() * m2.transpose();
      timer.stop();
      std::cout << "   a' * b':\t" << timer.value() << endl;

      timer.reset();
      timer.start();
      for (int k=0; k<REPEAT; ++k)
        m3 = m1 * m2.transpose();
      timer.stop();
      std::cout << "   a * b':\t" << timer.value() << endl;
    }
    #endif

    // eigen sparse matrices
    {
      std::cout << "Eigen sparse\t" << sm1.nonZeros()/(float(sm1.rows())*float(sm1.cols()))*100 << "% * "
                << sm2.nonZeros()/(float(sm2.rows())*float(sm2.cols()))*100 << "%\n";

      BENCH(sm3 = sm1 * sm2; )
      std::cout << "   a * b:\t" << timer.value() << endl;

//       BENCH(sm3 = sm1.transpose() * sm2; )
//       std::cout << "   a' * b:\t" << timer.value() << endl;
// //
//       BENCH(sm3 = sm1.transpose() * sm2.transpose(); )
//       std::cout << "   a' * b':\t" << timer.value() << endl;
// //
//       BENCH(sm3 = sm1 * sm2.transpose(); )
//       std::cout << "   a * b' :\t" << timer.value() << endl;


//       std::cout << "\n";
//
//       BENCH( sm3._experimentalNewProduct(sm1, sm2); )
//       std::cout << "   a * b:\t" << timer.value() << endl;
//
//       BENCH(sm3._experimentalNewProduct(sm1.transpose(),sm2); )
//       std::cout << "   a' * b:\t" << timer.value() << endl;
// //
//       BENCH(sm3._experimentalNewProduct(sm1.transpose(),sm2.transpose()); )
//       std::cout << "   a' * b':\t" << timer.value() << endl;
// //
//       BENCH(sm3._experimentalNewProduct(sm1, sm2.transpose());)
//       std::cout << "   a * b' :\t" << timer.value() << endl;
    }

    // eigen dyn-sparse matrices
    /*{
      DynamicSparseMatrix<Scalar> m1(sm1), m2(sm2), m3(sm3);
      std::cout << "Eigen dyn-sparse\t" << m1.nonZeros()/(float(m1.rows())*float(m1.cols()))*100 << "% * "
                << m2.nonZeros()/(float(m2.rows())*float(m2.cols()))*100 << "%\n";

//       timer.reset();
//       timer.start();
      BENCH(for (int k=0; k<REPEAT; ++k) m3 = m1 * m2;)
//       timer.stop();
      std::cout << "   a * b:\t" << timer.value() << endl;
//       std::cout << sm3 << "\n";

      timer.reset();
      timer.start();
//       std::cerr << "transpose...\n";
//       EigenSparseMatrix sm4 = sm1.transpose();
//       std::cout << sm4.nonZeros() << " == " << sm1.nonZeros() << "\n";
//       exit(1);
//       std::cerr << "transpose OK\n";
//       std::cout << sm1 << "\n\n" << sm1.transpose() << "\n\n" << sm4.transpose() << "\n\n";
      BENCH(for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2;)
//       timer.stop();
      std::cout << "   a' * b:\t" << timer.value() << endl;

//       timer.reset();
//       timer.start();
      BENCH( for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2.transpose(); )
//       timer.stop();
      std::cout << "   a' * b':\t" << timer.value() << endl;

//       timer.reset();
//       timer.start();
      BENCH( for (int k=0; k<REPEAT; ++k) m3 = m1 * m2.transpose(); )
//       timer.stop();
      std::cout << "   a * b' :\t" << timer.value() << endl;
    }*/

    // CSparse
    #ifdef CSPARSE
    {
      std::cout << "CSparse \t" << nnzPerCol << "%\n";
      cs *m1, *m2, *m3;
      eiToCSparse(sm1, m1);
      eiToCSparse(sm2, m2);

//       timer.reset();
//       timer.start();
//       for (int k=0; k<REPEAT; ++k)
      BENCH(
      {
        m3 = cs_sorted_multiply(m1, m2);
        if (!m3)
        {
          std::cerr << "cs_multiply failed\n";
//           break;
        }
//         cs_print(m3, 0);
        cs_spfree(m3);
      }
      );
//       timer.stop();
      std::cout << "   a * b:\t" << timer.value() << endl;

//       BENCH( { m3 = cs_sorted_multiply2(m1, m2); cs_spfree(m3); } );
//       std::cout << "   a * b:\t" << timer.value() << endl;
    }
Exemplo n.º 14
0
int main(int argc, char *argv[])
{
  int rows = SIZE;
  int cols = SIZE;
  float density = DENSITY;

  EigenSparseMatrix sm1(rows,cols);
  DenseVector v1(cols), v2(cols);
  v1.setRandom();

  BenchTimer timer;
  for (float density = DENSITY; density>=MINDENSITY; density*=0.5)
  {
    fillMatrix(density, rows, cols, sm1);

    // dense matrices
    #ifdef DENSEMATRIX
    {
      std::cout << "Eigen Dense\t" << density*100 << "%\n";
      DenseMatrix m1(rows,cols);
      eiToDense(sm1, m1);

      timer.reset();
      timer.start();
      for (int k=0; k<REPEAT; ++k)
        v2 = m1 * v1;
      timer.stop();
      std::cout << "   a * v:\t" << timer.value() << endl;

      timer.reset();
      timer.start();
      for (int k=0; k<REPEAT; ++k)
        v2 = m1.transpose() * v1;
      timer.stop();
      std::cout << "   a' * v:\t" << timer.value() << endl;
    }
    #endif

    // eigen sparse matrices
    {
      std::cout << "Eigen sparse\t" << sm1.nonZeros()/float(sm1.rows()*sm1.cols())*100 << "%\n";

      BENCH(for (int k=0; k<REPEAT; ++k) v2 = sm1 * v1;)
      std::cout << "   a * v:\t" << timer.value() << endl;

      
      BENCH(for (int k=0; k<REPEAT; ++k) { asm("#mya"); v2 = sm1.transpose() * v1; asm("#myb"); })
      
      std::cout << "   a' * v:\t" << timer.value() << endl;
    }
Exemplo n.º 15
0
int tool_main(int argc, char** argv) {
    SetupCrashHandler();
    SkCommandLineFlags::Parse(argc, argv);
#if SK_ENABLE_INST_COUNT
    if (FLAGS_leaks) {
        gPrintInstCount = true;
    }
#endif
    SkAutoGraphics ag;

    // First, parse some flags.
    BenchLogger logger;
    if (FLAGS_logFile.count()) {
        logger.SetLogFile(FLAGS_logFile[0]);
    }

    LoggerResultsWriter logWriter(logger, FLAGS_timeFormat[0]);
    MultiResultsWriter writer;
    writer.add(&logWriter);

    SkAutoTDelete<JSONResultsWriter> jsonWriter;
    if (FLAGS_outResultsFile.count()) {
        jsonWriter.reset(SkNEW(JSONResultsWriter(FLAGS_outResultsFile[0])));
        writer.add(jsonWriter.get());
    }

    // Instantiate after all the writers have been added to writer so that we
    // call close() before their destructors are called on the way out.
    CallEnd<MultiResultsWriter> ender(writer);

    const uint8_t alpha = FLAGS_forceBlend ? 0x80 : 0xFF;
    SkTriState::State dither = SkTriState::kDefault;
    for (size_t i = 0; i < 3; i++) {
        if (strcmp(SkTriState::Name[i], FLAGS_forceDither[0]) == 0) {
            dither = static_cast<SkTriState::State>(i);
        }
    }

    BenchMode benchMode = kNormal_BenchMode;
    for (size_t i = 0; i < SK_ARRAY_COUNT(BenchMode_Name); i++) {
        if (strcmp(FLAGS_mode[0], BenchMode_Name[i]) == 0) {
            benchMode = static_cast<BenchMode>(i);
        }
    }

    SkTDArray<int> configs;
    bool runDefaultConfigs = false;
    // Try user-given configs first.
    for (int i = 0; i < FLAGS_config.count(); i++) {
        for (int j = 0; j < static_cast<int>(SK_ARRAY_COUNT(gConfigs)); ++j) {
            if (0 == strcmp(FLAGS_config[i], gConfigs[j].name)) {
                *configs.append() = j;
            } else if (0 == strcmp(FLAGS_config[i], kDefaultsConfigStr)) {
                runDefaultConfigs = true;
            }
        }
    }
    // If there weren't any, fill in with defaults.
    if (runDefaultConfigs) {
        for (int i = 0; i < static_cast<int>(SK_ARRAY_COUNT(gConfigs)); ++i) {
            if (gConfigs[i].runByDefault) {
                *configs.append() = i;
            }
        }
    }
    // Filter out things we can't run.
    if (kNormal_BenchMode != benchMode) {
        // Non-rendering configs only run in normal mode
        for (int i = 0; i < configs.count(); ++i) {
            const Config& config = gConfigs[configs[i]];
            if (Benchmark::kNonRendering_Backend == config.backend) {
                configs.remove(i, 1);
                --i;
            }
        }
    }

#if SK_SUPPORT_GPU
    for (int i = 0; i < configs.count(); ++i) {
        const Config& config = gConfigs[configs[i]];

        if (Benchmark::kGPU_Backend == config.backend) {
            GrContext* context = gContextFactory.get(config.contextType);
            if (NULL == context) {
                SkDebugf("GrContext could not be created for config %s. Config will be skipped.\n",
                    config.name);
                configs.remove(i);
                --i;
                continue;
            }
            if (config.sampleCount > context->getMaxSampleCount()){
                SkDebugf(
                    "Sample count (%d) for config %s is not supported. Config will be skipped.\n",
                    config.sampleCount, config.name);
                configs.remove(i);
                --i;
                continue;
            }
        }
    }
#endif

    // All flags should be parsed now.  Report our settings.
    if (FLAGS_runOnce) {
        logger.logError("bench was run with --runOnce, so we're going to hide the times."
                        " It's for your own good!\n");
    }
    writer.option("mode", FLAGS_mode[0]);
    writer.option("alpha", SkStringPrintf("0x%02X", alpha).c_str());
    writer.option("antialias", SkStringPrintf("%d", FLAGS_forceAA).c_str());
    writer.option("filter", SkStringPrintf("%d", FLAGS_forceFilter).c_str());
    writer.option("dither",  SkTriState::Name[dither]);

    writer.option("rotate", SkStringPrintf("%d", FLAGS_rotate).c_str());
    writer.option("scale", SkStringPrintf("%d", FLAGS_scale).c_str());
    writer.option("clip", SkStringPrintf("%d", FLAGS_clip).c_str());

#if defined(SK_BUILD_FOR_WIN32)
    writer.option("system", "WIN32");
#elif defined(SK_BUILD_FOR_MAC)
    writer.option("system", "MAC");
#elif defined(SK_BUILD_FOR_ANDROID)
    writer.option("system", "ANDROID");
#elif defined(SK_BUILD_FOR_UNIX)
    writer.option("system", "UNIX");
#else
    writer.option("system", "other");
#endif

#if defined(SK_DEBUG)
    writer.option("build", "DEBUG");
#else
    writer.option("build", "RELEASE");
#endif

    // Set texture cache limits if non-default.
    for (size_t i = 0; i < SK_ARRAY_COUNT(gConfigs); ++i) {
#if SK_SUPPORT_GPU
        const Config& config = gConfigs[i];
        if (Benchmark::kGPU_Backend != config.backend) {
            continue;
        }
        GrContext* context = gContextFactory.get(config.contextType);
        if (NULL == context) {
            continue;
        }

        size_t bytes;
        int count;
        context->getResourceCacheLimits(&count, &bytes);
        if (-1 != FLAGS_gpuCacheBytes) {
            bytes = static_cast<size_t>(FLAGS_gpuCacheBytes);
        }
        if (-1 != FLAGS_gpuCacheCount) {
            count = FLAGS_gpuCacheCount;
        }
        context->setResourceCacheLimits(count, bytes);
#endif
    }

    // Run each bench in each configuration it supports and we asked for.
    Iter iter;
    Benchmark* bench;
    while ((bench = iter.next()) != NULL) {
        SkAutoTUnref<Benchmark> benchUnref(bench);
        if (SkCommandLineFlags::ShouldSkip(FLAGS_match, bench->getName())) {
            continue;
        }

        bench->setForceAlpha(alpha);
        bench->setForceAA(FLAGS_forceAA);
        bench->setForceFilter(FLAGS_forceFilter);
        bench->setDither(dither);
        bench->preDraw();

        bool loggedBenchName = false;
        for (int i = 0; i < configs.count(); ++i) {
            const int configIndex = configs[i];
            const Config& config = gConfigs[configIndex];

            if (!bench->isSuitableFor(config.backend)) {
                continue;
            }

            GrContext* context = NULL;
#if SK_SUPPORT_GPU
            SkGLContextHelper* glContext = NULL;
            if (Benchmark::kGPU_Backend == config.backend) {
                context = gContextFactory.get(config.contextType);
                if (NULL == context) {
                    continue;
                }
                glContext = gContextFactory.getGLContext(config.contextType);
            }
#endif

            SkAutoTUnref<SkCanvas> canvas;
            SkAutoTUnref<SkPicture> recordFrom;
            SkPictureRecorder recorderTo;
            const SkIPoint dim = bench->getSize();

            SkAutoTUnref<SkSurface> surface;
            if (Benchmark::kNonRendering_Backend != config.backend) {
                surface.reset(make_surface(config.fColorType,
                                           dim,
                                           config.backend,
                                           config.sampleCount,
                                           context));
                if (!surface.get()) {
                    logger.logError(SkStringPrintf(
                        "Device creation failure for config %s. Will skip.\n", config.name));
                    continue;
                }

                switch(benchMode) {
                    case kDeferredSilent_BenchMode:
                    case kDeferred_BenchMode:
                        canvas.reset(SkDeferredCanvas::Create(surface.get()));
                        break;
                    case kRecord_BenchMode:
                        canvas.reset(SkRef(recorderTo.beginRecording(dim.fX, dim.fY)));
                        break;
                    case kPictureRecord_BenchMode: {
                        SkPictureRecorder recorderFrom;
                        bench->draw(1, recorderFrom.beginRecording(dim.fX, dim.fY));
                        recordFrom.reset(recorderFrom.endRecording());
                        canvas.reset(SkRef(recorderTo.beginRecording(dim.fX, dim.fY)));
                        break;
                    }
                    case kNormal_BenchMode:
                        canvas.reset(SkRef(surface->getCanvas()));
                        break;
                    default:
                        SkASSERT(false);
                }
            }

            if (NULL != canvas) {
                canvas->clear(SK_ColorWHITE);
                if (FLAGS_clip)   {
                    perform_clip(canvas, dim.fX, dim.fY);
                }
                if (FLAGS_scale)  {
                    perform_scale(canvas, dim.fX, dim.fY);
                }
                if (FLAGS_rotate) {
                    perform_rotate(canvas, dim.fX, dim.fY);
                }
            }

            if (!loggedBenchName) {
                loggedBenchName = true;
                writer.bench(bench->getName(), dim.fX, dim.fY);
            }

#if SK_SUPPORT_GPU
            SkGLContextHelper* contextHelper = NULL;
            if (Benchmark::kGPU_Backend == config.backend) {
                contextHelper = gContextFactory.getGLContext(config.contextType);
            }
            BenchTimer timer(contextHelper);
#else
            BenchTimer timer;
#endif

            double previous = std::numeric_limits<double>::infinity();
            bool converged = false;

            // variables used to compute loopsPerFrame
            double frameIntervalTime = 0.0f;
            int frameIntervalTotalLoops = 0;

            bool frameIntervalComputed = false;
            int loopsPerFrame = 0;
            int loopsPerIter = 0;
            if (FLAGS_verbose) { SkDebugf("%s %s: ", bench->getName(), config.name); }
            if (!FLAGS_dryRun) {
                do {
                    // Ramp up 1 -> 2 -> 4 -> 8 -> 16 -> ... -> ~1 billion.
                    loopsPerIter = (loopsPerIter == 0) ? 1 : loopsPerIter * 2;
                    if (loopsPerIter >= (1<<30) || timer.fWall > FLAGS_maxMs) {
                        // If you find it takes more than a billion loops to get up to 20ms of runtime,
                        // you've got a computer clocked at several THz or have a broken benchmark.  ;)
                        //     "1B ought to be enough for anybody."
                        logger.logError(SkStringPrintf(
                            "\nCan't get %s %s to converge in %dms (%d loops)",
                             bench->getName(), config.name, FLAGS_maxMs, loopsPerIter));
                        break;
                    }

                    if ((benchMode == kRecord_BenchMode || benchMode == kPictureRecord_BenchMode)) {
                        // Clear the recorded commands so that they do not accumulate.
                        canvas.reset(SkRef(recorderTo.beginRecording(dim.fX, dim.fY)));
                    }

                    timer.start();
                    // Inner loop that allows us to break the run into smaller
                    // chunks (e.g. frames). This is especially useful for the GPU
                    // as we can flush and/or swap buffers to keep the GPU from
                    // queuing up too much work.
                    for (int loopCount = loopsPerIter; loopCount > 0; ) {
                        // Save and restore around each call to draw() to guarantee a pristine canvas.
                        SkAutoCanvasRestore saveRestore(canvas, true/*also save*/);

                        int loops;
                        if (frameIntervalComputed && loopCount > loopsPerFrame) {
                            loops = loopsPerFrame;
                            loopCount -= loopsPerFrame;
                        } else {
                            loops = loopCount;
                            loopCount = 0;
                        }

                        if (benchMode == kPictureRecord_BenchMode) {
                            recordFrom->draw(canvas);
                        } else {
                            bench->draw(loops, canvas);
                        }

                        if (kDeferredSilent_BenchMode == benchMode) {
                            static_cast<SkDeferredCanvas*>(canvas.get())->silentFlush();
                        } else if (NULL != canvas) {
                            canvas->flush();
                        }

    #if SK_SUPPORT_GPU
                        // swap drawing buffers on each frame to prevent the GPU
                        // from queuing up too much work
                        if (NULL != glContext) {
                            glContext->swapBuffers();
                        }
    #endif
                    }



                    // Stop truncated timers before GL calls complete, and stop the full timers after.
                    timer.truncatedEnd();
    #if SK_SUPPORT_GPU
                    if (NULL != glContext) {
                        context->flush();
                        SK_GL(*glContext, Finish());
                    }
    #endif
                    timer.end();

                    // setup the frame interval for subsequent iterations
                    if (!frameIntervalComputed) {
                        frameIntervalTime += timer.fWall;
                        frameIntervalTotalLoops += loopsPerIter;
                        if (frameIntervalTime >= FLAGS_minMs) {
                            frameIntervalComputed = true;
                            loopsPerFrame =
                              (int)(((double)frameIntervalTotalLoops / frameIntervalTime) * FLAGS_minMs);
                            if (loopsPerFrame < 1) {
                                loopsPerFrame = 1;
                            }
    //                        SkDebugf("  %s has %d loops in %f ms (normalized to %d)\n",
    //                                 bench->getName(), frameIntervalTotalLoops,
    //                                 timer.fWall, loopsPerFrame);
                        }
                    }

                    const double current = timer.fWall / loopsPerIter;
                    if (FLAGS_verbose && current > previous) { SkDebugf("↑"); }
                    if (FLAGS_verbose) { SkDebugf("%.3g ", current); }
                    converged = HasConverged(previous, current, timer.fWall);
                    previous = current;
                } while (!FLAGS_runOnce && !converged);
            }
            if (FLAGS_verbose) { SkDebugf("\n"); }

            if (!FLAGS_dryRun && FLAGS_outDir.count() && Benchmark::kNonRendering_Backend != config.backend) {
                SkAutoTUnref<SkImage> image(surface->newImageSnapshot());
                if (image.get()) {
                    saveFile(bench->getName(), config.name, FLAGS_outDir[0],
                             image);
                }
            }

            if (FLAGS_runOnce) {
                // Let's not mislead ourselves by looking at Debug build or single iteration bench times!
                continue;
            }

            // Normalize to ms per 1000 iterations.
            const double normalize = 1000.0 / loopsPerIter;
            const struct { char shortName; const char* longName; double ms; } times[] = {
                {'w', "msecs",  normalize * timer.fWall},
                {'W', "Wmsecs", normalize * timer.fTruncatedWall},
                {'c', "cmsecs", normalize * timer.fCpu},
                {'C', "Cmsecs", normalize * timer.fTruncatedCpu},
                {'g', "gmsecs", normalize * timer.fGpu},
            };

            writer.config(config.name);
            for (size_t i = 0; i < SK_ARRAY_COUNT(times); i++) {
                if (strchr(FLAGS_timers[0], times[i].shortName) && times[i].ms > 0) {
                    writer.timer(times[i].longName, times[i].ms);
                }
            }
        }
    }
#if SK_SUPPORT_GPU
    gContextFactory.destroyContexts();
#endif
    return 0;
}
Exemplo n.º 16
0
int main(int argc, char* argv[])
{
    int size = SIZE * 8;
    int size2 = size * size;
    Scalar* a = internal::aligned_new<Scalar>(size2);
    Scalar* b = internal::aligned_new<Scalar>(size2+4)+1;
    Scalar* c = internal::aligned_new<Scalar>(size2);

    for (int i=0; i<size; ++i)
    {
        a[i] = b[i] = c[i] = 0;
    }

    BenchTimer timer;

    timer.reset();
    for (int k=0; k<10; ++k)
    {
        timer.start();
        benchVec(a, b, c, size2);
        timer.stop();
    }
    std::cout << timer.value() << "s  " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
    return 0;
    for (int innersize = size; innersize>2 ; --innersize)
    {
        if (size2%innersize==0)
        {
            int outersize = size2/innersize;
            MatrixXf ma = Map<MatrixXf>(a, innersize, outersize );
            MatrixXf mb = Map<MatrixXf>(b, innersize, outersize );
            MatrixXf mc = Map<MatrixXf>(c, innersize, outersize );
            timer.reset();
            for (int k=0; k<3; ++k)
            {
                timer.start();
                benchVec(ma, mb, mc);
                timer.stop();
            }
            std::cout << innersize << " x " << outersize << "  " << timer.value() << "s   " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
        }
    }

    VectorXf va = Map<VectorXf>(a, size2);
    VectorXf vb = Map<VectorXf>(b, size2);
    VectorXf vc = Map<VectorXf>(c, size2);
    timer.reset();
    for (int k=0; k<3; ++k)
    {
        timer.start();
        benchVec(va, vb, vc);
        timer.stop();
    }
    std::cout << timer.value() << "s   " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";

    return 0;
}
Exemplo n.º 17
0
int main(int argc, char ** argv)
{
  std::ptrdiff_t l1 = internal::queryL1CacheSize();
  std::ptrdiff_t l2 = internal::queryTopLevelCacheSize();
  std::cout << "L1 cache size     = " << (l1>0 ? l1/1024 : -1) << " KB\n";
  std::cout << "L2/L3 cache size  = " << (l2>0 ? l2/1024 : -1) << " KB\n";
  typedef internal::gebp_traits<Scalar,Scalar> Traits;
  std::cout << "Register blocking = " << Traits::mr << " x " << Traits::nr << "\n";

  int rep = 1;    // number of repetitions per try
  int tries = 2;  // number of tries, we keep the best

  int s = 2048;
  int cache_size = -1;

  bool need_help = false;
  for (int i=1; i<argc; ++i)
  {
    if(argv[i][0]=='s')
      s = atoi(argv[i]+1);
    else if(argv[i][0]=='c')
      cache_size = atoi(argv[i]+1);
    else if(argv[i][0]=='t')
      tries = atoi(argv[i]+1);
    else if(argv[i][0]=='p')
      rep = atoi(argv[i]+1);
    else
      need_help = true;
  }

  if(need_help)
  {
    std::cout << argv[0] << " s<matrix size> c<cache size> t<nb tries> p<nb repeats>\n";
    return 1;
  }

  if(cache_size>0)
    setCpuCacheSizes(cache_size,96*cache_size);

  int m = s;
  int n = s;
  int p = s;
  A a(m,p); a.setRandom();
  B b(p,n); b.setRandom();
  C c(m,n); c.setOnes();
  C rc = c;

  std::cout << "Matrix sizes = " << m << "x" << p << " * " << p << "x" << n << "\n";
  std::ptrdiff_t mc(m), nc(n), kc(p);
  internal::computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
  std::cout << "blocking size (mc x kc) = " << mc << " x " << kc << "\n";

  C r = c;

  // check the parallel product is correct
  #if defined EIGEN_HAS_OPENMP
  int procs = omp_get_max_threads();
  if(procs>1)
  {
    #ifdef HAVE_BLAS
    blas_gemm(a,b,r);
    #else
    omp_set_num_threads(1);
    r.noalias() += a * b;
    omp_set_num_threads(procs);
    #endif
    c.noalias() += a * b;
    if(!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n";
  }
  #elif defined HAVE_BLAS
    blas_gemm(a,b,r);
    c.noalias() += a * b;
    if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n";
  #else
    gemm(a,b,c);
    r.noalias() += a.cast<Scalar>() * b.cast<Scalar>();
    if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n";
  #endif

  #ifdef HAVE_BLAS
  BenchTimer tblas;
  c = rc;
  BENCH(tblas, tries, rep, blas_gemm(a,b,c));
  std::cout << "blas  cpu         " << tblas.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tblas.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tblas.total(CPU_TIMER)  << "s)\n";
  std::cout << "blas  real        " << tblas.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/tblas.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << tblas.total(REAL_TIMER) << "s)\n";
  #endif

  BenchTimer tmt;
  c = rc;
  BENCH(tmt, tries, rep, gemm(a,b,c));
  std::cout << "eigen cpu         " << tmt.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tmt.total(CPU_TIMER)  << "s)\n";
  std::cout << "eigen real        " << tmt.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n";

  #ifdef EIGEN_HAS_OPENMP
  if(procs>1)
  {
    BenchTimer tmono;
    omp_set_num_threads(1);
    Eigen::internal::setNbThreads(1);
    c = rc;
    BENCH(tmono, tries, rep, gemm(a,b,c));
    std::cout << "eigen mono cpu    " << tmono.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tmono.total(CPU_TIMER)  << "s)\n";
    std::cout << "eigen mono real   " << tmono.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/tmono.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << tmono.total(REAL_TIMER) << "s)\n";
    std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER)  << " => " << (100.0*tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER))/procs << "%\n";
  }
  #endif
  
  #ifdef DECOUPLED
  if((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
  {
    M ar(m,p); ar.setRandom();
    M ai(m,p); ai.setRandom();
    M br(p,n); br.setRandom();
    M bi(p,n); bi.setRandom();
    M cr(m,n); cr.setRandom();
    M ci(m,n); ci.setRandom();
    
    BenchTimer t;
    BENCH(t, tries, rep, matlab_cplx_cplx(ar,ai,br,bi,cr,ci));
    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << t.total(CPU_TIMER)  << "s)\n";
    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
  }
  if((!NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
  {
    M a(m,p);  a.setRandom();
    M br(p,n); br.setRandom();
    M bi(p,n); bi.setRandom();
    M cr(m,n); cr.setRandom();
    M ci(m,n); ci.setRandom();
    
    BenchTimer t;
    BENCH(t, tries, rep, matlab_real_cplx(a,br,bi,cr,ci));
    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << t.total(CPU_TIMER)  << "s)\n";
    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
  }
  if((NumTraits<A::Scalar>::IsComplex) && (!NumTraits<B::Scalar>::IsComplex))
  {
    M ar(m,p); ar.setRandom();
    M ai(m,p); ai.setRandom();
    M b(p,n);  b.setRandom();
    M cr(m,n); cr.setRandom();
    M ci(m,n); ci.setRandom();
    
    BenchTimer t;
    BENCH(t, tries, rep, matlab_cplx_real(ar,ai,b,cr,ci));
    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << t.total(CPU_TIMER)  << "s)\n";
    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
  }
  #endif

  return 0;
}