void benchmark(int iterations) { // allocate memory for result unsigned int *d_result; unsigned int size = width * height * sizeof(unsigned int); checkCudaErrors(cudaMalloc((void **) &d_result, size)); // warm-up gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads); checkCudaErrors(cudaDeviceSynchronize()); sdkStartTimer(&timer); // execute the kernel for (int i = 0; i < iterations; i++) { gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads); } checkCudaErrors(cudaDeviceSynchronize()); sdkStopTimer(&timer); // check if kernel execution generated an error getLastCudaError("Kernel execution failed"); printf("Processing time: %f (ms)\n", sdkGetTimerValue(&timer)); printf("%.2f Mpixels/sec\n", (width*height*iterations / (sdkGetTimerValue(&timer) / 1000.0f)) / 1e6); checkCudaErrors(cudaFree(d_result)); }
bool runSingleTest(const char *ref_file, const char *exec_path) { // allocate memory for result int nTotalErrors = 0; unsigned int *d_result; unsigned int size = width * height * sizeof(unsigned int); checkCudaErrors(cudaMalloc((void **) &d_result, size)); // warm-up gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads); checkCudaErrors(cudaDeviceSynchronize()); sdkStartTimer(&timer); gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads); checkCudaErrors(cudaDeviceSynchronize()); getLastCudaError("Kernel execution failed"); sdkStopTimer(&timer); unsigned char *h_result = (unsigned char *)malloc(width*height*4); checkCudaErrors(cudaMemcpy(h_result, d_result, width*height*4, cudaMemcpyDeviceToHost)); char dump_file[1024]; sprintf(dump_file, "lena_%02d.ppm", (int)sigma); sdkSavePPM4ub(dump_file, h_result, width, height); if (!sdkComparePPM(dump_file, sdkFindFilePath(ref_file, exec_path), MAX_EPSILON_ERROR, THRESHOLD, false)) { nTotalErrors++; } printf("Processing time: %f (ms)\n", sdkGetTimerValue(&timer)); printf("%.2f Mpixels/sec\n", (width*height / (sdkGetTimerValue(&timer) / 1000.0f)) / 1e6); checkCudaErrors(cudaFree(d_result)); free(h_result); printf("Summary: %d errors!\n", nTotalErrors); printf(nTotalErrors == 0 ? "Test passed\n": "Test failed!\n"); return (nTotalErrors == 0); }
void runAutoTest(int argc, char **argv) { // allocate memory for result unsigned int *d_result; unsigned int size = width * height * sizeof(unsigned int); cutilSafeCall( cudaMalloc( (void**) &d_result, size)); // warm-up gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads); cutilSafeCall( cudaThreadSynchronize() ); cutilCheckError( cutStartTimer( timer)); while (sigma <= 22) { gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads); cutilSafeCall( cudaThreadSynchronize() ); // check if kernel execution generated an error cutilCheckMsg("Kernel execution failed"); cudaMemcpy(g_CheckRender->imageData(), d_result, width*height*4, cudaMemcpyDeviceToHost); g_CheckRender->savePPM(sOriginal[g_Index], false, NULL); if (!g_CheckRender->PPMvsPPM(sOriginal[g_Index], sReference[g_Index], MAX_EPSILON_ERROR, 0.50f)) { g_TotalErrors++; } g_Index++; sigma += 4; } cutilCheckError( cutStopTimer( timer)); printf("Processing time: %f (ms)\n", cutGetTimerValue( timer)); printf("%.2f Mpixels/sec\n", (width*height*g_Index / (cutGetTimerValue( timer) / 1000.0f)) / 1e6); printf("Summary: %d errors!\n", g_TotalErrors); printf("Test %s!\n", (g_TotalErrors==0) ? "PASSED" : "FAILED"); cutilSafeCall(cudaFree(d_result)); }
// display results using OpenGL void display() { cutilCheckError(cutStartTimer(timer)); // execute filter, writing results to pbo unsigned int *d_result; cutilSafeCall(cudaGLMapBufferObject((void**)&d_result, pbo)); gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads); cutilSafeCall(cudaGLUnmapBufferObject(pbo)); // load texture from pbo glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); glBindTexture(GL_TEXTURE_2D, texid); glPixelStorei(GL_UNPACK_ALIGNMENT, 1); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, 0); glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0); // display results glClear(GL_COLOR_BUFFER_BIT); glEnable(GL_TEXTURE_2D); glDisable(GL_DEPTH_TEST); glBegin(GL_QUADS); glTexCoord2f(0, 1); glVertex2f(0, 0); glTexCoord2f(1, 1); glVertex2f(1, 0); glTexCoord2f(1, 0); glVertex2f(1, 1); glTexCoord2f(0, 0); glVertex2f(0, 1); glEnd(); glDisable(GL_TEXTURE_2D); if (g_CheckRender && g_CheckRender->IsQAReadback() && g_Verify) { // readback for QA testing printf("> (Frame %d) Readback BackBuffer\n", frameCount); g_CheckRender->readback( width, height ); g_CheckRender->savePPM(sOriginal[g_Index], true, NULL); if (!g_CheckRender->PPMvsPPM(sOriginal[g_Index], sReference[g_Index], MAX_EPSILON_ERROR, 0.50f )) { g_TotalErrors++; } g_Verify = false; } glutSwapBuffers(); cutilCheckError(cutStopTimer(timer)); computeFPS(); }
// display results using OpenGL void display() { sdkStartTimer(&timer); // execute filter, writing results to pbo unsigned int *d_result; checkCudaErrors(cudaGLMapBufferObject((void **)&d_result, pbo)); gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads); checkCudaErrors(cudaGLUnmapBufferObject(pbo)); // load texture from pbo glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); glBindTexture(GL_TEXTURE_2D, texid); glPixelStorei(GL_UNPACK_ALIGNMENT, 1); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, 0); glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0); // display results glClear(GL_COLOR_BUFFER_BIT); glEnable(GL_TEXTURE_2D); glDisable(GL_DEPTH_TEST); glBegin(GL_QUADS); glTexCoord2f(0, 1); glVertex2f(0, 0); glTexCoord2f(1, 1); glVertex2f(1, 0); glTexCoord2f(1, 0); glVertex2f(1, 1); glTexCoord2f(0, 0); glVertex2f(0, 1); glEnd(); glDisable(GL_TEXTURE_2D); glutSwapBuffers(); sdkStopTimer(&timer); computeFPS(); }