//////////////////////////////////////////////////////////////////////////////// //! Run a simple benchmark test for CUDA //////////////////////////////////////////////////////////////////////////////// void runBenchmark( int argc, char **argv ) { int devID = 0; shrLog("[runBenchmark]: [%s]\n", sSDKsample); devID = cutilChooseCudaDevice(argc, argv); loadImageData(argc, argv); initCuda(); g_CheckRender = new CheckBackBuffer(width, height, 4, false); g_CheckRender->setExecPath(argv[0]); unsigned int *d_result; cutilSafeCall( cudaMalloc( (void **)&d_result, width*height*sizeof(unsigned int)) ); // warm-up boxFilterRGBA(d_img, d_temp, d_temp, width, height, filter_radius, iterations, nthreads); cutilSafeCall( cutilDeviceSynchronize() ); // Start round-trip timer and process iCycles loops on the GPU iterations = 1; // standard 1-pass filtering const int iCycles = 150; double dProcessingTime = 0.0; shrLog("\nRunning BoxFilterGPU for %d cycles...\n\n", iCycles); shrDeltaT(2); for (int i = 0; i < iCycles; i++) { dProcessingTime += boxFilterRGBA(d_img, d_temp, d_img, width, height, filter_radius, iterations, nthreads); } // check if kernel execution generated an error and sync host cutilCheckMsg("Error: boxFilterRGBA Kernel execution FAILED"); cutilSafeCall(cutilDeviceSynchronize()); // Get average computation time dProcessingTime /= (double)iCycles; // log testname, throughput, timing and config info to sample and master logs shrLogEx(LOGBOTH | MASTER, 0, "boxFilter-texture, Throughput = %.4f M RGBA Pixels/s, Time = %.5f s, Size = %u RGBA Pixels, NumDevsUsed = %u, Workgroup = %u\n", (1.0e-6 * width * height)/dProcessingTime, dProcessingTime, (width * height), 1, nthreads); shrLog("\n"); }
//////////////////////////////////////////////////////////////////////////////// //! Run a simple benchmark test for CUDA //////////////////////////////////////////////////////////////////////////////// int runBenchmark() { printf("[runBenchmark]: [%s]\n", sSDKsample); initCuda(true); unsigned int *d_result; checkCudaErrors(cudaMalloc((void **)&d_result, width*height*sizeof(unsigned int))); // warm-up boxFilterRGBA(d_img, d_temp, d_temp, width, height, filter_radius, iterations, nthreads, kernel_timer); checkCudaErrors(cudaDeviceSynchronize()); sdkStartTimer(&kernel_timer); // Start round-trip timer and process iCycles loops on the GPU iterations = 1; // standard 1-pass filtering const int iCycles = 150; double dProcessingTime = 0.0; printf("\nRunning BoxFilterGPU for %d cycles...\n\n", iCycles); for (int i = 0; i < iCycles; i++) { dProcessingTime += boxFilterRGBA(d_img, d_temp, d_img, width, height, filter_radius, iterations, nthreads, kernel_timer); } // check if kernel execution generated an error and sync host getLastCudaError("Error: boxFilterRGBA Kernel execution FAILED"); checkCudaErrors(cudaDeviceSynchronize()); sdkStopTimer(&kernel_timer); // Get average computation time dProcessingTime /= (double)iCycles; // log testname, throughput, timing and config info to sample and master logs printf("boxFilter-texture, Throughput = %.4f M RGBA Pixels/s, Time = %.5f s, Size = %u RGBA Pixels, NumDevsUsed = %u, Workgroup = %u\n", (1.0e-6 * width * height)/dProcessingTime, dProcessingTime, (width * height), 1, nthreads); printf("\n"); return 0; }
// display results using OpenGL void display() { sdkStartTimer(&timer); // execute filter, writing results to pbo unsigned int *d_result; checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0)); size_t num_bytes; checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&d_result, &num_bytes, cuda_pbo_resource)); boxFilterRGBA(d_img, d_temp, d_result, width, height, filter_radius, iterations, nthreads, kernel_timer); checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0)); // OpenGL display code path { glClear(GL_COLOR_BUFFER_BIT); // load texture from pbo glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); glBindTexture(GL_TEXTURE_2D, texid); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, 0); glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0); // fragment program is required to display floating point texture glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader); glEnable(GL_FRAGMENT_PROGRAM_ARB); glDisable(GL_DEPTH_TEST); glBegin(GL_QUADS); { glTexCoord2f(0.0f, 0.0f); glVertex2f(0.0f, 0.0f); glTexCoord2f(1.0f, 0.0f); glVertex2f(1.0f, 0.0f); glTexCoord2f(1.0f, 1.0f); glVertex2f(1.0f, 1.0f); glTexCoord2f(0.0f, 1.0f); glVertex2f(0.0f, 1.0f); } glEnd(); glBindTexture(GL_TEXTURE_2D, 0); glDisable(GL_FRAGMENT_PROGRAM_ARB); } glutSwapBuffers(); glutReportErrors(); sdkStopTimer(&timer); computeFPS(); }
// This test specifies a single test (where you specify radius and/or iterations) int runSingleTest(char *ref_file, char *exec_path) { int nTotalErrors = 0; char dump_file[256]; printf("[runSingleTest]: [%s]\n", sSDKsample); initCuda(true); unsigned int *d_result; unsigned int *h_result = (unsigned int *)malloc(width * height * sizeof(unsigned int)); checkCudaErrors(cudaMalloc((void **)&d_result, width*height*sizeof(unsigned int))); // run the sample radius { printf("%s (radius=%d) (passes=%d) ", sSDKsample, filter_radius, iterations); boxFilterRGBA(d_img, d_temp, d_result, width, height, filter_radius, iterations, nthreads, kernel_timer); // check if kernel execution generated an error getLastCudaError("Error: boxFilterRGBA Kernel execution FAILED"); checkCudaErrors(cudaDeviceSynchronize()); // readback the results to system memory cudaMemcpy((unsigned char *)h_result, (unsigned char *)d_result, width*height*sizeof(unsigned int), cudaMemcpyDeviceToHost); sprintf(dump_file, "lenaRGB_%02d.ppm", filter_radius); sdkSavePPM4ub((const char *)dump_file, (unsigned char *)h_result, width, height); if (!sdkComparePPM(dump_file, sdkFindFilePath(ref_file, exec_path), MAX_EPSILON_ERROR, 0.15f, false)) { printf("Image is Different "); nTotalErrors++; } else { printf("Image is Matching "); } printf(" <%s>\n", ref_file); } printf("\n"); free(h_result); checkCudaErrors(cudaFree(d_result)); return nTotalErrors; }
void runAutoTest(int argc, char **argv) { int devID = 0; shrLog("[runAutoTest]: [%s] (automated testing w/ readback)\n", sSDKsample); devID = cutilChooseCudaDevice(argc, argv); loadImageData(argc, argv); initCuda(); g_CheckRender = new CheckBackBuffer(width, height, 4, false); g_CheckRender->setExecPath(argv[0]); unsigned int *d_result; cutilSafeCall( cudaMalloc( (void **)&d_result, width*height*sizeof(unsigned int)) ); while (filter_radius <= 22) { shrLog("[AutoTest]: %s (radius=%d)", sSDKsample, filter_radius ); boxFilterRGBA(d_img, d_temp, d_result, width, height, filter_radius, iterations, nthreads); // check if kernel execution generated an error cutilCheckMsg("Error: boxFilterRGBA Kernel execution FAILED"); cutilSafeCall( cutilDeviceSynchronize() ); cudaMemcpy(g_CheckRender->imageData(), d_result, width*height*sizeof(unsigned int), cudaMemcpyDeviceToHost); g_CheckRender->savePPM(sOriginal[g_Index], false, NULL); if (!g_CheckRender->PPMvsPPM(sOriginal[g_Index], sReference[g_Index], MAX_EPSILON_ERROR, 0.15f)) { g_TotalErrors++; } g_Index++; filter_radius += 4; } cutilSafeCall( cudaFree( d_result ) ); delete g_CheckRender; }
// display results using OpenGL void display() { cutilCheckError(cutStartTimer(timer)); // execute filter, writing results to pbo unsigned int *d_result; //DEPRECATED: cutilSafeCall( cudaGLMapBufferObject((void**)&d_result, pbo) ); cutilSafeCall(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0)); size_t num_bytes; cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void **)&d_result, &num_bytes, cuda_pbo_resource)); boxFilterRGBA(d_img, d_temp, d_result, width, height, filter_radius, iterations, nthreads); // DEPRECATED: cutilSafeCall(cudaGLUnmapBufferObject(pbo)); cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0)); if (g_bFBODisplay) { g_FrameBufferObject->bindRenderPath(); } // Common display code path { glClear(GL_COLOR_BUFFER_BIT); // load texture from pbo glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); glBindTexture(GL_TEXTURE_2D, texid); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, 0); glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0); // fragment program is required to display floating point texture glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader); glEnable(GL_FRAGMENT_PROGRAM_ARB); glDisable(GL_DEPTH_TEST); glBegin(GL_QUADS); if (GL_TEXTURE_TYPE == GL_TEXTURE_2D) { glTexCoord2f(0.0f, 0.0f); glVertex2f(0.0f, 0.0f); glTexCoord2f(1.0f, 0.0f); glVertex2f(1.0f, 0.0f); glTexCoord2f(1.0f, 1.0f); glVertex2f(1.0f, 1.0f); glTexCoord2f(0.0f, 1.0f); glVertex2f(0.0f, 1.0f); } else { glTexCoord2f(0.0f, 0.0f); glVertex2f(0.0f, 0.0f); glTexCoord2f((float)width, 0.0f); glVertex2f(1.0f, 0.0f); glTexCoord2f((float)width, (float)height); glVertex2f(1.0f, 1.0f); glTexCoord2f(0.0f, (float)height); glVertex2f(0.0f, 1.0f); } glEnd(); glBindTexture(GL_TEXTURE_TYPE, 0); glDisable(GL_FRAGMENT_PROGRAM_ARB); } if (g_bFBODisplay) { g_FrameBufferObject->unbindRenderPath(); } if (g_CheckRender && g_CheckRender->IsQAReadback() && g_Verify) { // readback for QA testing if (g_bFBODisplay) { shrLog("> (Frame %d) Readback FBO\n", frameCount); g_CheckRender->readback( width, height, g_FrameBufferObject->getFbo() ); } else { shrLog("> (Frame %d) Readback BackBuffer\n", frameCount); g_CheckRender->readback( width, height ); } g_CheckRender->savePPM(sOriginal[g_Index], true, NULL); if (!g_CheckRender->PPMvsPPM(sOriginal[g_Index], sReference[g_Index], MAX_EPSILON_ERROR, 0.15f)) { g_TotalErrors++; } g_Verify = false; } glutSwapBuffers(); glutReportErrors(); cutilCheckError(cutStopTimer(timer)); computeFPS(); }