C++ (Cpp) PerfTimer::start Beispiele

Programmiersprache: C++ (Cpp)

Klasse / Typ: PerfTimer

Methode / Funktion: start

Beispiele auf hotexamples.com: 5

C++ (Cpp) PerfTimer::start - 5 Beispiele gefunden. Dies sind die am besten bewerteten C++ (Cpp) Beispiele für die PerfTimer::start, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

elapsed(6)

start(5)

Start(3)

stop(3)

Stop(2)

tick(2)

ElapsedMilliseconds(1)

GetStats(1)

begin(1)

getElapsed(1)

ms(1)

reset(1)

restart(1)

Beispiel #1

Datei anzeigen

Datei: Tests.cpp Projekt: fenbf/PointerAccessTest

void TestVectorOfObjects::run(size_t count, size_t updates)
{
	PerfTimer perf;

	perf.start();
	std::vector<Particle> particles(count);
	perf.stop(&_creationTime);

	// randomize: no sense in this case...
	/*for (size_t i = 0; i < count / 2; ++i)
	{
		int a = rand() % count;
		int b = rand() % count;
		std::swap(particles[a], particles[b]);
	}*/

	_memoryKb = (particles.capacity()*sizeof(Particle)) / 1024.0;

	for (auto p = particles.begin(); p != particles.end(); ++p)
		p->generate();

	perf.start();
	for (size_t u = 0; u < updates; ++u)
	{
		for (auto p = particles.begin(); p != particles.end(); ++p)
			p->update(DELTA_TIME);
	}
	perf.stop(&_updatesTime);
}

Beispiel #2

Datei anzeigen

Datei: Tests.cpp Projekt: fenbf/PointerAccessTest

void TestVectorOfPointers::run(size_t count, size_t updates)
{
	PerfTimer perf;

	perf.start();
	std::vector<std::shared_ptr<Particle>> particles(count);
	for (auto p = particles.begin(); p != particles.end(); ++p)
	{
		*p = std::make_shared<Particle>();
	}

	perf.stop(&_creationTime);

	// randomize to simulate 
	for (size_t i = 0; i < count / 2; ++i)
	{
		int a = rand() % count;
		int b = rand() % count;
		if (a != b)
			std::swap(particles[a], particles[b]);
	}

	/*for (int i = 0; i < 10; ++i)
	{
		std::cout << (unsigned long)particles[i].get() << std::endl;
	}*/

	_memoryKb = (particles.capacity()*sizeof(Particle)) / 1024.0;

	for (auto p = particles.begin(); p != particles.end(); ++p)
		(*p)->generate();

	perf.start();
	for (size_t u = 0; u < updates; ++u)
	{
		for (auto p = particles.begin(); p != particles.end(); ++p)
			(*p)->update(DELTA_TIME);
	}
	perf.stop(&_updatesTime);
}

Beispiel #3

Datei anzeigen

Datei: CL_Electrostatics.cpp Projekt: jmainpri/electromag-with-cuda

CLerror CLElectrosFunctor<T>::LoadKernels ( size_t deviceID )
{
    PerfTimer timer;
    timer.start();
    FunctorData &data = m_functors[deviceID];

    cout<<" Reading kernel source"<<endl;
    using std::ifstream;
    ifstream reader("Electrostatics.cl.c", ifstream::in);
    if (!reader.good())
    {
        cout<<"Cannot open program source"<<endl;
        return -1;
    }
    reader.seekg (0, std::ios::end);
    size_t length = reader.tellg();
    reader.seekg (0, std::ios::beg);
    char *source = new char[length];
    reader.read(source, length);
    reader.close();

    /*
     * Different devices require different work group sizes to operate
     * optimally. The amount of __local memory on some kernels depends on these
     * work-group sizes. This causes a problem as explained below:
     * There are two ways to use group-local memory
     * 1) Allocate it as a parameter with clSetKernelArg()
     * 2) Declare it as a constant __local array within the cl kernel
     * Option (1) has the advantage of flexibility, but the extra indexing
     * overhead is a performance killer (20-25% easily lost on nvidia GPUs)
     * Option (2) has the advantage that the compiler knows the arrays are of
     * constant size, and is free to do extreme optimizations.
     * Of course, then both host and kernel have to agree on the size of the
     * work group.
     * We abuse the fact that the source code is compiled at runtime, decide
     * those sizes in the host code, then #define them in the kernel code,
     * before it is compiled.
     */

    // BLOCK size
    data.local = {BLOCK_X, 1, 1};
    size_t local_MT[3] = {BLOCK_X_MT, BLOCK_Y_MT, 1};
    // GRID size
    data.global = {((this->m_nLines + BLOCK_X - 1)/BLOCK_X)
                   * BLOCK_X, 1, 1
                  };
    data.global[0] /= data.vecWidth;
    data.local[0] /= data.vecWidth;
    cout<<"Local   : "<<data.local[0]<<" "<<data.local[1]<<" "
        <<data.local[2]<<endl;
    cout<<"Local_MT: "<<local_MT[0]<<" "<<local_MT[1]<<" "<<local_MT[2]<<endl;
    cout<<"Global  : "<<data.global[0]<<" "<<data.global[1]<<" "
        <<data.global[2]<<endl;

    char defines[1024];
    const size_t kernelSteps = this->m_pFieldLinesData->GetSize()
                               / this->m_nLines;
    snprintf(defines, sizeof(defines),
             "#define BLOCK_X %u\n"
             "#define BLOCK_X_MT %u\n"
             "#define BLOCK_Y_MT %u\n"
             "#define KERNEL_STEPS %u\n"
             "#define Tprec %s\n"
             "#define Tvec %s\n",
             (unsigned int) data.local[0],
             (unsigned int) local_MT[0], (unsigned int)local_MT[1],
             (unsigned int) kernelSteps,
             FindPrecType(),
             FindVecType(data.vecWidth)
            );

    cout<<" Calc'ed kern steps "<<kernelSteps<<endl;
    char *srcs[2] = {defines, source};
    CLerror err;
    cl_program prog = clCreateProgramWithSource(data.context, 2,
                      (const char**) srcs,
                      NULL, &err);
    if (err)cout<<"clCreateProgramWithSource returns: "<<err<<endl;

    char options[] = "-cl-fast-relaxed-math";
    err = clBuildProgram(prog, 0, NULL, options, NULL, NULL);
    if (err)cout<<"clBuildProgram returns: "<<err<<endl;

    size_t logSize;
    clGetProgramBuildInfo(prog, data.device->deviceID,
                          CL_PROGRAM_BUILD_LOG,
                          0, NULL, &logSize);
    char * log = (char*)malloc(logSize);
    clGetProgramBuildInfo(prog, data.device->deviceID,
                          CL_PROGRAM_BUILD_LOG,
                          logSize, log, 0);
    cout<<"Program Build Log:"<<endl<<log<<endl;
    CL_ASSERTE(err, "clBuildProgram failed");
    data.perfData.add(TimingInfo("Program compilation", timer.tick()));



    //==========================================================================
    cout<<" Preparing kernel"<<endl;
    data.kernel = clCreateKernel(prog, "CalcField_curvature", &err);
    CL_ASSERTE(err, "clCreateKernel");
    return CL_SUCCESS;
}

Beispiel #4

Datei anzeigen

Datei: CL_Electrostatics.cpp Projekt: jmainpri/electromag-with-cuda

unsigned long CLElectrosFunctor<T>::MainFunctor (
    size_t functorIndex,    ///< Functor whose data to process
    size_t deviceIndex      ///< Device on which to process data
)
{
    if(functorIndex != deviceIndex)
        cerr<<"WARNING: Different functor and device"<<endl;
    PerfTimer timer;
    FunctorData &funData = m_functors[functorIndex];
    FunctorData &devData = m_functors[deviceIndex];
    perfPacket &profiler = devData.perfData;
    timer.start();
    CLerror err;
    cl_context ctx = devData.context;

    cout<<" Preparing buffers"<<endl;
    Vector3<cl_mem> &arrdata = devData.devFieldMem;
    cl_mem &charges = devData.chargeMem;
    cl_kernel &kernel = devData.kernel;

    err = CL_SUCCESS;
    // __global float *x,
    err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), &arrdata.x);
    // __global float *y,
    err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &arrdata.y);
    // __global float *z,
    err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &arrdata.z);
    // __global pointCharge *Charges,
    err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &charges);
    // const unsigned int linePitch,
    cl_uint param = this->m_nLines;
    err |= clSetKernelArg(kernel, 4, sizeof(param), &param);
    // const unsigned int p,
    param = (cl_uint)this->m_pPointChargeData->GetSize();
    err |= clSetKernelArg(kernel, 5, sizeof(param), &param);
    // const unsigned int fieldIndex,
    param = 1;
    err |= clSetKernelArg(kernel, 6, sizeof(param), &param);

    // const float resolution
    T res = this->m_resolution;
    err |= clSetKernelArg(kernel, 7, sizeof(res), &res);
    if (err)cout<<"clSetKernelArg cummulates: "<<err<<endl;

    //==========================================================================
    cl_command_queue queue = clCreateCommandQueue(ctx,
                             devData.device->deviceID,
                             0, &err);
    if (err)cout<<"clCreateCommandQueue returns: "<<err<<endl;

    timer.tick();
    Vector3<T*> hostArr = this->m_pFieldLinesData->GetDataPointers();
    const size_t start = funData.startIndex;
    const size_t size = funData.elements * sizeof(T) * funData.steps;

    err = CL_SUCCESS;
    err |= clEnqueueWriteBuffer(queue, arrdata.x, CL_FALSE, 0, size,
                                &hostArr.x[start], 0, NULL, NULL);
    if (err)cout<<"Write 1 returns: "<<err<<endl;
    err |= clEnqueueWriteBuffer(queue, arrdata.y, CL_FALSE, 0, size,
                                &hostArr.y[start], 0, NULL, NULL);
    if (err)cout<<"Write 2 returns: "<<err<<endl;
    err |= clEnqueueWriteBuffer(queue, arrdata.z, CL_FALSE, 0, size,
                                &hostArr.z[start], 0, NULL, NULL);
    if (err)cout<<"Write 3 returns: "<<err<<endl;
    const size_t qSize = this->m_pPointChargeData->GetSizeBytes();
    err |= clEnqueueWriteBuffer(queue, charges, CL_FALSE, 0, qSize,
                                this->m_pPointChargeData->GetDataPointer(),
                                0, NULL, NULL);
    if (err)cout<<"Write 4 returns: "<<err<<endl;
    CL_ASSERTE(err, "Sending data to device failed");

    // Finish memory copies before starting the kernel
    CL_ASSERTE(clFinish(queue), "Pre-kernel sync");

    profiler.add(TimingInfo("Host to device transfer", timer.tick(),
                            3*size + qSize ));

    //==========================================================================

    cout<<" Executing kernel"<<endl;

    timer.tick();
    err |= clEnqueueNDRangeKernel(queue, kernel, 3, NULL,
                                  funData.global, funData.local,
                                  0, NULL, NULL);
    if (err)cout<<"clEnqueueNDRangeKernel returns: "<<err<<endl;
    // Let kernel finish before continuing
    CL_ASSERTE(clFinish(queue), "Post-kernel sync");
    double time = timer.tick();
    this->m_pPerfData->time = time;
    this->m_pPerfData->performance =
        ( this->m_nLines * ( ( 2500-1 ) * ( this->m_pPointChargeData->GetSize()
                                            * ( electroPartFieldFLOP + 3 ) + 13 ) ) / time ) / 1E9;
    profiler.add(TimingInfo("Kernel execution time", time));
    //==========================================================================
    cout<<" Recovering results"<<endl;

    timer.tick();
    err = CL_SUCCESS;
    err |= clEnqueueReadBuffer ( queue, arrdata.x, CL_FALSE, 0, size,
                                 hostArr.x, 0, NULL, NULL );
    if (err)cout<<" Read 1 returns: "<<err<<endl;
    err |= clEnqueueReadBuffer ( queue, arrdata.y, CL_FALSE, 0, size,
                                 hostArr.y, 0, NULL, NULL );
    if (err)cout<<" Read 2 returns: "<<err<<endl;
    err |= clEnqueueReadBuffer ( queue, arrdata.z, CL_FALSE, 0, size,
                                 hostArr.z, 0, NULL, NULL );
    if (err)cout<<" Read 3 returns: "<<err<<endl;
    if (err)cout<<"clEnqueueReadBuffer cummulates: "<<err<<endl;

    clFinish(queue);

    profiler.add(TimingInfo("Device to host transfer", timer.tick(),
                            3 * size));
    return CL_SUCCESS;
}

Beispiel #5

Datei anzeigen

Datei: main.cpp Projekt: ajturner/lerc

int _tmain(int argc, _TCHAR* argv[])
{
  // Sample 1: float image, 1 band, with some pixels set to invalid / void, maxZError = 0.1

  int h = 512;
  int w = 512;

  float* zImg = new float[w * h];
  memset(zImg, 0, w * h * sizeof(float));

  LercNS::BitMask bitMask(w, h);
  bitMask.SetAllValid();

  for (int k = 0, i = 0; i < h; i++)
  {
    for (int j = 0; j < w; j++, k++)
    {
      zImg[k] = sqrt((float)(i * i + j * j));    // smooth surface
      zImg[k] += rand() % 20;    // add some small amplitude noise

      if (j % 100 == 0 || i % 100 == 0)    // set some void points
        bitMask.SetInvalid(k);
    }
  }


  // compress into byte arr

  double maxZErrorWanted = 0.1;
  double eps = 0.0001;    // safety margin (optional), to account for finite floating point accuracy
  double maxZError = maxZErrorWanted - eps;

  size_t numBytesNeeded = 0;
  size_t numBytesWritten = 0;
  Lerc lerc;
  PerfTimer pt;

  if (!lerc.ComputeBufferSize((void*)zImg,    // raw image data, row by row, band by band
    Lerc::DT_Float,
    w, h, 1,
    &bitMask,                  // set 0 if all pixels are valid
    maxZError,                 // max coding error per pixel, or precision
    numBytesNeeded))           // size of outgoing Lerc blob
  {
    cout << "ComputeBufferSize failed" << endl;
  }

  size_t numBytesBlob = numBytesNeeded;
  Byte* pLercBlob = new Byte[numBytesBlob];

  pt.start();

  if (!lerc.Encode((void*)zImg,    // raw image data, row by row, band by band
    Lerc::DT_Float,
    w, h, 1,
    &bitMask,           // 0 if all pixels are valid
    maxZError,           // max coding error per pixel, or precision
    pLercBlob,           // buffer to write to, function will fail if buffer too small
    numBytesBlob,        // buffer size
    numBytesWritten))    // num bytes written to buffer
  {
    cout << "Encode failed" << endl;
  }

  pt.stop();

  double ratio = w * h * (0.125 + sizeof(float)) / numBytesBlob;
  cout << "sample 1 compression ratio = " << ratio << ", encode time = " << pt.ms() << " ms" << endl;

  // new data storage
  float* zImg3 = new float[w * h];
  memset(zImg3, 0, w * h * sizeof(float));

  BitMask bitMask3(w, h);
  bitMask3.SetAllValid();


  // decompress

  Lerc::LercInfo lercInfo;
  if (!lerc.GetLercInfo(pLercBlob, numBytesBlob, lercInfo))
    cout << "get header info failed" << endl;

  if (lercInfo.nCols != w || lercInfo.nRows != h || lercInfo.nBands != 1 || lercInfo.dt != Lerc::DT_Float)
    cout << "got wrong lerc info" << endl;

  pt.start();

  if (!lerc.Decode(pLercBlob, numBytesBlob, &bitMask3, w, h, 1, Lerc::DT_Float, (void*)zImg3))
    cout << "decode failed" << endl;

  pt.stop();


  // compare to orig

  double maxDelta = 0;
  for (int k = 0, i = 0; i < h; i++)
  {
    for (int j = 0; j < w; j++, k++)
    {
      if (bitMask3.IsValid(k) != bitMask.IsValid(k))
        cout << "Error in main: decoded bit mask differs from encoded bit mask" << endl;

      if (bitMask3.IsValid(k))
      {
        double delta = fabs(zImg3[k] - zImg[k]);
        if (delta > maxDelta)
          maxDelta = delta;
      }
    }
  }

  cout << "max z error per pixel = " << maxDelta << ", decode time = " << pt.ms() << " ms" << endl;

  delete[] zImg;
  delete[] zImg3;
  delete[] pLercBlob;
  pLercBlob = 0;


  // Sample 2: random byte image, 3 bands, all pixels valid, maxZError = 0 (lossless)

  h = 713;
  w = 257;

  Byte* byteImg = new Byte[w * h * 3];
  memset(byteImg, 0, w * h * 3);

  for (int iBand = 0; iBand < 3; iBand++)
  {
    Byte* arr = byteImg + iBand * w * h;
    for (int k = 0, i = 0; i < h; i++)
      for (int j = 0; j < w; j++, k++)
        arr[k] = rand() % 30;
  }

  // encode 

  if (!lerc.ComputeBufferSize((void*)byteImg, Lerc::DT_Byte, w, h, 3, 0, 0, numBytesNeeded))
    cout << "ComputeBufferSize failed" << endl;

  numBytesBlob = numBytesNeeded;
  pLercBlob = new Byte[numBytesBlob];

  pt.start();

  if (!lerc.Encode((void*)byteImg,    // raw image data, row by row, band by band
    Lerc::DT_Byte,
    w, h, 3,
    0,                   // 0 if all pixels are valid
    0,                   // max coding error per pixel, or precision
    pLercBlob,           // buffer to write to, function will fail if buffer too small
    numBytesBlob,        // buffer size
    numBytesWritten))    // num bytes written to buffer
  {
    cout << "Encode failed" << endl;
  }

  pt.stop();

  ratio = w * h * 3 / (double)numBytesBlob;
  cout << "sample 2 compression ratio = " << ratio << ", encode time = " << pt.ms() << " ms" << endl;

  // new data storage
  Byte* byteImg3 = new Byte[w * h * 3];
  memset(byteImg3, 0, w * h * 3);

  // decompress

  if (!lerc.GetLercInfo(pLercBlob, numBytesBlob, lercInfo))
    cout << "get header info failed" << endl;

  if (lercInfo.nCols != w || lercInfo.nRows != h || lercInfo.nBands != 3 || lercInfo.dt != Lerc::DT_Byte)
    cout << "got wrong lerc info" << endl;

  pt.start();

  if (!lerc.Decode(pLercBlob, numBytesBlob, 0, w, h, 3, Lerc::DT_Byte, (void*)byteImg3))
    cout << "decode failed" << endl;

  pt.stop();

  // compare to orig

  maxDelta = 0;
  for (int k = 0, i = 0; i < h; i++)
    for (int j = 0; j < w; j++, k++)
    {
      double delta = abs(byteImg3[k] - byteImg[k]);
      if (delta > maxDelta)
        maxDelta = delta;
    }

  cout << "max z error per pixel = " << maxDelta << ", decode time = " << pt.ms() << " ms" << endl;

  delete[] byteImg;
  delete[] byteImg3;
  delete[] pLercBlob;
  pLercBlob = 0;


#ifdef TestLegacyData

  Byte* pLercBuffer = new Byte[4 * 2048 * 2048];
  Byte* pDstArr     = new Byte[4 * 2048 * 2048];

  vector<string> fnVec;
  string path = "D:/GitHub/LercOpenSource/testData/";

  fnVec.push_back("amazon3.lerc1");
  fnVec.push_back("tuna.lerc1");
  fnVec.push_back("tuna_0_to_1_w1920_h925.lerc1");

  fnVec.push_back("testbytes.lerc2");
  fnVec.push_back("testHuffman_w30_h20_uchar0.lerc2");
  fnVec.push_back("testHuffman_w30_h20_ucharx.lerc2");
  fnVec.push_back("testHuffman_w1922_h1083_uchar.lerc2");

  fnVec.push_back("testall_w30_h20_char.lerc2");
  fnVec.push_back("testall_w30_h20_byte.lerc2");
  fnVec.push_back("testall_w30_h20_short.lerc2");
  fnVec.push_back("testall_w30_h20_ushort.lerc2");
  fnVec.push_back("testall_w30_h20_long.lerc2");
  fnVec.push_back("testall_w30_h20_ulong.lerc2");
  fnVec.push_back("testall_w30_h20_float.lerc2");

  fnVec.push_back("testall_w1922_h1083_char.lerc2");
  fnVec.push_back("testall_w1922_h1083_byte.lerc2");
  fnVec.push_back("testall_w1922_h1083_short.lerc2");
  fnVec.push_back("testall_w1922_h1083_ushort.lerc2");
  fnVec.push_back("testall_w1922_h1083_long.lerc2");
  fnVec.push_back("testall_w1922_h1083_ulong.lerc2");
  fnVec.push_back("testall_w1922_h1083_float.lerc2");

  fnVec.push_back("testuv_w30_h20_char.lerc2");
  fnVec.push_back("testuv_w30_h20_byte.lerc2");
  fnVec.push_back("testuv_w30_h20_short.lerc2");
  fnVec.push_back("testuv_w30_h20_ushort.lerc2");
  fnVec.push_back("testuv_w30_h20_long.lerc2");
  fnVec.push_back("testuv_w30_h20_ulong.lerc2");
  fnVec.push_back("testuv_w30_h20_float.lerc2");

  fnVec.push_back("testuv_w1922_h1083_char.lerc2");
  fnVec.push_back("testuv_w1922_h1083_byte.lerc2");
  fnVec.push_back("testuv_w1922_h1083_short.lerc2");
  fnVec.push_back("testuv_w1922_h1083_ushort.lerc2");
  fnVec.push_back("testuv_w1922_h1083_long.lerc2");
  fnVec.push_back("testuv_w1922_h1083_ulong.lerc2");
  fnVec.push_back("testuv_w1922_h1083_float.lerc2");

  for (size_t n = 0; n < fnVec.size(); n++)
  {
    string fn = path;
    fn += fnVec[n];

    FILE* fp = 0;
    fopen_s(&fp, fn.c_str(), "rb");
    fseek(fp, 0, SEEK_END);
    size_t fileSize = ftell(fp);    // get the file size
    fclose(fp);
    fp = 0;

    fopen_s(&fp, fn.c_str(), "rb");
    fread(pLercBuffer, 1, fileSize, fp);    // read Lerc blob into buffer
    fclose(fp);
    fp = 0;

    if (!lerc.GetLercInfo(pLercBuffer, fileSize, lercInfo))
      cout << "get header info failed" << endl;
    else
    {
      int w = lercInfo.nCols;
      int h = lercInfo.nRows;
      int nBands = lercInfo.nBands;
      Lerc::DataType dt = lercInfo.dt;

      pt.start();
      
      std::string resultMsg = "ok";
      BitMask bitMask;
      if (!lerc.Decode(pLercBuffer, fileSize, &bitMask, w, h, nBands, dt, (void*)pDstArr))
        resultMsg = "FAILED";

      pt.stop();
      printf("w = %4d, h = %4d, nBands = %2d, dt = %2d, time = %4d ms,  %s :  %s\n", w, h, nBands, (int)dt, pt.ms(), resultMsg.c_str(), fnVec[n].c_str());
    }
  }

#endif

  printf("\npress ENTER\n");
  getchar();
  
	return 0;
}