void runAutoTest(int argc, char **argv) { int devID = 0; printf("[%s] - (automated testing w/ readback)\n", sSDKsample); devID = cutilChooseCudaDevice(argc, argv); // First load the image, so we know what the size of the image (imageW and imageH) printf("Allocating host and CUDA memory and loading image file...\n"); const char *image_path = cutFindFilePath("portrait_noise.bmp", argv[0]); if (image_path == NULL) { printf( "imageDenoisingGL was unable to find and load image file <portrait_noise.bmp>.\nExiting...\n"); shrQAFinishExit(argc, (const char **)argv, QA_FAILED); } LoadBMPFile(&h_Src, &imageW, &imageH, image_path); printf("Data init done.\n"); cutilSafeCall( CUDA_MallocArray(&h_Src, imageW, imageH) ); g_CheckRender = new CheckBackBuffer(imageW, imageH, sizeof(TColor), false); g_CheckRender->setExecPath(argv[0]); TColor *d_dst = NULL; cutilSafeCall( cudaMalloc( (void **)&d_dst, imageW*imageH*sizeof(TColor)) ); while (g_Kernel <= 3) { printf("[AutoTest]: %s <%s>\n", sSDKsample, filterMode[g_Kernel]); cutilSafeCall( CUDA_Bind2TextureArray() ); runImageFilters(d_dst); cutilSafeCall( CUDA_UnbindTexture() ); cutilSafeCall( cutilDeviceSynchronize() ); cudaMemcpy(g_CheckRender->imageData(), d_dst, imageW*imageH*sizeof(TColor), cudaMemcpyDeviceToHost); g_CheckRender->savePPM(sOriginal[g_Kernel], true, NULL); if (!g_CheckRender->PPMvsPPM(sOriginal[g_Kernel], sReference[g_Kernel], MAX_EPSILON_ERROR, 0.15f)) { g_TotalErrors++; } g_Kernel++; } cutilSafeCall( CUDA_FreeArray() ); free(h_Src); cutilSafeCall( cudaFree( d_dst ) ); delete g_CheckRender; printf("\n[%s] -> Test Results: %d errors\n", sSDKsample, g_TotalErrors); cutilDeviceReset(); shrQAFinishExit(argc, (const char **)argv, (!g_TotalErrors ? QA_PASSED : QA_FAILED)); }
//////////////////////////////////////////////////////////////////////////////// //! Run test //////////////////////////////////////////////////////////////////////////////// void runAutoTest(int argc, char** argv) { printf("[%s]\n", sSDKsample); // Cuda init int dev = cutilChooseCudaDevice(argc, argv); cudaDeviceProp deviceProp; cutilSafeCall(cudaGetDeviceProperties(&deviceProp, dev)); printf("Compute capability %d.%d\n", deviceProp.major, deviceProp.minor); int version = deviceProp.major*10 + deviceProp.minor; g_hasDouble = (version >= 13); if (inEmulationMode()) { // workaround since SM13 kernel doesn't produce correct output in emulation mode g_hasDouble = false; } // create FFT plan CUFFT_SAFE_CALL(cufftPlan2d(&fftPlan, meshW, meshH, CUFFT_C2R) ); // allocate memory fftInputW = (meshW / 2)+1; fftInputH = meshH; fftInputSize = (fftInputW*fftInputH)*sizeof(float2); cutilSafeCall(cudaMalloc((void **)&d_h0, fftInputSize) ); cutilSafeCall(cudaMalloc((void **)&d_ht, fftInputSize) ); h_h0 = (float2 *) malloc(fftInputSize); generate_h0(); cutilSafeCall(cudaMemcpy(d_h0, h_h0, fftInputSize, cudaMemcpyHostToDevice) ); cutilSafeCall(cudaMalloc((void **)&d_slope, meshW*meshH*sizeof(float2)) ); cutCreateTimer(&timer); cutStartTimer(timer); prevTime = cutGetTimerValue(timer); // Creating the Auto-Validation Code g_CheckRender = new CheckBackBuffer(windowH, windowH, 4, false); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); runCudaTest(g_hasDouble); cudaThreadExit(); }
//////////////////////////////////////////////////////////////////////////////// //! Run a simple benchmark test for CUDA //////////////////////////////////////////////////////////////////////////////// void runBenchmark( int argc, char **argv ) { int devID = 0; shrLog("[runBenchmark]: [%s]\n", sSDKsample); devID = cutilChooseCudaDevice(argc, argv); loadImageData(argc, argv); initCuda(); g_CheckRender = new CheckBackBuffer(width, height, 4, false); g_CheckRender->setExecPath(argv[0]); unsigned int *d_result; cutilSafeCall( cudaMalloc( (void **)&d_result, width*height*sizeof(unsigned int)) ); // warm-up boxFilterRGBA(d_img, d_temp, d_temp, width, height, filter_radius, iterations, nthreads); cutilSafeCall( cutilDeviceSynchronize() ); // Start round-trip timer and process iCycles loops on the GPU iterations = 1; // standard 1-pass filtering const int iCycles = 150; double dProcessingTime = 0.0; shrLog("\nRunning BoxFilterGPU for %d cycles...\n\n", iCycles); shrDeltaT(2); for (int i = 0; i < iCycles; i++) { dProcessingTime += boxFilterRGBA(d_img, d_temp, d_img, width, height, filter_radius, iterations, nthreads); } // check if kernel execution generated an error and sync host cutilCheckMsg("Error: boxFilterRGBA Kernel execution FAILED"); cutilSafeCall(cutilDeviceSynchronize()); // Get average computation time dProcessingTime /= (double)iCycles; // log testname, throughput, timing and config info to sample and master logs shrLogEx(LOGBOTH | MASTER, 0, "boxFilter-texture, Throughput = %.4f M RGBA Pixels/s, Time = %.5f s, Size = %u RGBA Pixels, NumDevsUsed = %u, Workgroup = %u\n", (1.0e-6 * width * height)/dProcessingTime, dProcessingTime, (width * height), 1, nthreads); shrLog("\n"); }
void runAutoTest(int argc, char **argv) { int devID = 0; shrLog("[runAutoTest]: [%s] (automated testing w/ readback)\n", sSDKsample); devID = cutilChooseCudaDevice(argc, argv); loadImageData(argc, argv); initCuda(); g_CheckRender = new CheckBackBuffer(width, height, 4, false); g_CheckRender->setExecPath(argv[0]); unsigned int *d_result; cutilSafeCall( cudaMalloc( (void **)&d_result, width*height*sizeof(unsigned int)) ); for(int i = 0; i < 4; i++) { shrLog("[AutoTest]: %s (radius=%d)", sSDKsample, filter_radius ); bilateralFilterRGBA(d_result, width, height, euclidean_delta, filter_radius, iterations, nthreads); // check if kernel execution generated an error cutilCheckMsg("Error: bilateralFilterRGBA Kernel execution FAILED"); cutilSafeCall( cutilDeviceSynchronize() ); cudaMemcpy(g_CheckRender->imageData(), d_result, width*height*sizeof(unsigned int), cudaMemcpyDeviceToHost); g_CheckRender->savePPM(sOriginal[i], false, NULL); if (!g_CheckRender->PPMvsPPM(sOriginal[i], sReference[i], MAX_EPSILON_ERROR, 0.15f)) { g_TotalErrors++; } gaussian_delta += 1.0f; euclidean_delta *= 1.25f; updateGaussian(gaussian_delta, filter_radius); } cutilSafeCall( cudaFree( d_result ) ); delete g_CheckRender; }
void runAutoTest(int argc, char **argv) { int devID = cutilChooseCudaDevice(argc, argv); // Initialize CUDA buffers for Marching Cubes initMC(argc, argv); g_CheckRender = new CheckBackBuffer(maxVerts*sizeof(float)*4, 1, 1, false); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); computeIsosurface(); if (g_bQAReadback) { dumpFile<float4>(d_pos, maxVerts, sizeof(float4), "marchCube_posArray.bin"); dumpFile<float4>(d_normal, maxVerts, sizeof(float4), "marchCube_normalArray.bin"); dumpFile<uint>(d_compVoxelArray, numVoxels, sizeof(uint), "marchCube_compVoxelArray.bin"); if (!g_CheckRender->compareBin2BinFloat("marchCube_posArray.bin", "posArray.bin", maxVerts*sizeof(float)*4, EPSILON, THRESHOLD)) g_TotalErrors++; if (!g_CheckRender->compareBin2BinFloat("marchCube_normalArray.bin", "normalArray.bin", maxVerts*sizeof(float)*4, EPSILON, THRESHOLD)) g_TotalErrors++; //printf("sizeof(uint) = %d\n", sizeof(uint)); if (!g_CheckRender->compareBin2BinFloat("marchCube_compVoxelArray.bin", "compVoxelArray.bin", numVoxels*sizeof(uint), EPSILON, THRESHOLD)) g_TotalErrors++; printf("%s\n", (g_TotalErrors > 0) ? "FAILED" : "PASSED"); } cleanup(); cudaThreadExit(); }
int main( int argc, char* argv[] ) { int argc2 = 2; char* argv2[2] = { "", "-device=1" }; cudaDeviceProp deviceProp; int devID = cutilChooseCudaDevice( argc2, argv2 ); if( devID < 0 ) { printf( "exiting...\n" ); cutilExit( argc, argv ); exit( 0 ); } cutilSafeCall( cudaGetDeviceProperties( &deviceProp, devID ) ); //Image4f im( "c:/tmp/tulip.png" ); //Image4f im( "c:/tmp/tulip_1080.png" ); // Jiawen version Image4f im( "../../apps/bilateral_grid/input.png" ); //Image4f im( "c:/tmp/church_panorama_5097x2889.pfm" ); im = im.flipUD(); Array2D< float > data( im.width(), im.height() ); Array2D< float > output( im.width(), im.height() ); for( int y = 0; y < im.height(); ++y ) { for( int x = 0; x < im.width(); ++x ) { Vector3f rgb = im.pixel( x, y ).xyz(); // float lum = ColorUtils::rgb2luminance( rgb ); // data( x, y ) = lum; // jrk: just use red data( x, y ) = rgb[0]; } } testBilateralFilter( data, 8, 0.1f, output ); saveArrayAsImage( output, "bf", 8, 0.1f ); testBilateralFilter( data, 16, 0.1f, output ); saveArrayAsImage( output, "bf", 16, 0.1f ); testBilateralFilter( data, 32, 0.2f, output ); saveArrayAsImage( output, "bf", 32, 0.2f ); testBilateralFilter( data, 64, 0.4f, output ); saveArrayAsImage( output, "bf", 64, 0.4f ); #if 0 Image4f edgeImage( "/tmp/step.png" ); edgeImage.flipUD(); Array2D< float > edge( im.width(), im.height() ); for( int y = 0; y < im.height(); ++y ) { for( int x = 0; x < im.width(); ++x ) { edge( x, y ) = edgeImage.pixel( x, y ).x; } } testCrossBilateralFilter( data, edge, 16, 0.1f, output ); saveArrayAsImage( output, "cbf", 16, 0.1f ); testCrossBilateralFilter( data, edge, 32, 0.2f, output ); saveArrayAsImage( output, "cbf", 32, 0.2f ); testCrossBilateralFilter( data, edge, 64, 0.4f, output ); saveArrayAsImage( output, "cbf", 64, 0.4f ); #endif return 0; }