void initGL(int *argc, char** argv) { glutInit( argc, argv ); glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE); glutInitWindowSize(wWidth, wHeight); glutCreateWindow("Cuda Edge Detection"); glewInit(); if (g_bFBODisplay) { if (!glewIsSupported( "GL_VERSION_2_0 GL_ARB_fragment_program GL_EXT_framebuffer_object" )) { fprintf(stderr, "Error: failed to get minimal extensions for demo\n"); fprintf(stderr, "This sample requires:\n"); fprintf(stderr, " OpenGL version 2.0\n"); fprintf(stderr, " GL_ARB_fragment_program\n"); fprintf(stderr, " GL_EXT_framebuffer_object\n"); cudaThreadExit(); cutilExit(*argc, argv); } } else { if (!glewIsSupported( "GL_VERSION_1_5 GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object" )) { fprintf(stderr, "Error: failed to get minimal extensions for demo\n"); fprintf(stderr, "This sample requires:\n"); fprintf(stderr, " OpenGL version 1.5\n"); fprintf(stderr, " GL_ARB_vertex_buffer_object\n"); fprintf(stderr, " GL_ARB_pixel_buffer_object\n"); cudaThreadExit(); cutilExit(*argc, argv); } } }
void initializeData(char *file, int argc, char **argv) { GLint bsize; unsigned int w, h; size_t file_length= strlen(file); if (!strcmp(&file[file_length-3], "pgm")) { if (cutLoadPGMub(file, &pixels, &w, &h) != CUTTrue) { printf("Failed to load image file: %s\n", file); exit(-1); } g_Bpp = 1; } else if (!strcmp(&file[file_length-3], "ppm")) { if (cutLoadPPM4ub(file, &pixels, &w, &h) != CUTTrue) { printf("Failed to load image file: %s\n", file); exit(-1); } g_Bpp = 4; } else { cudaThreadExit(); cutilExit(argc, argv); } imWidth = (int)w; imHeight = (int)h; setupTexture(imWidth, imHeight, pixels, g_Bpp); // copy function pointer tables to host side for later use setupFunctionTables(); memset(pixels, 0x0, g_Bpp * sizeof(Pixel) * imWidth * imHeight); if (!g_bQAReadback) { // use OpenGL Path glGenBuffers(1, &pbo_buffer); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_buffer); glBufferData(GL_PIXEL_UNPACK_BUFFER, g_Bpp * sizeof(Pixel) * imWidth * imHeight, pixels, GL_STREAM_DRAW); glGetBufferParameteriv(GL_PIXEL_UNPACK_BUFFER, GL_BUFFER_SIZE, &bsize); if ((GLuint)bsize != (g_Bpp * sizeof(Pixel) * imWidth * imHeight)) { printf("Buffer object (%d) has incorrect size (%d).\n", (unsigned)pbo_buffer, (unsigned)bsize); cudaThreadExit(); cutilExit(argc, argv); } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); // register this buffer object with CUDA cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, pbo_buffer, cudaGraphicsMapFlagsWriteDiscard)); glGenTextures(1, &texid); glBindTexture(GL_TEXTURE_2D, texid); glTexImage2D(GL_TEXTURE_2D, 0, ((g_Bpp==1) ? GL_LUMINANCE : GL_BGRA), imWidth, imHeight, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL); glBindTexture(GL_TEXTURE_2D, 0); glPixelStorei(GL_UNPACK_ALIGNMENT, 1); glPixelStorei(GL_PACK_ALIGNMENT, 1); } }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { if (cutCheckCmdLineFlag(argc, (const char **)argv, "qatest") || cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) { g_bQAReadback = true; g_bFBODisplay = false; fpsLimit = frameCheckNumber; } if (cutCheckCmdLineFlag(argc, (const char **)argv, "glverify")) { g_bOpenGLQA = true; } if (cutCheckCmdLineFlag(argc, (const char **)argv, "fbo")) { g_bFBODisplay = true; } if (g_bQAReadback) { runAutoTest(argc, argv); } else { runGraphicsTest(argc, argv); } cutilExit(argc, argv); }
// Main program int main(int argc, char** argv) { // Create the CUTIL timer cutilCheckError( cutCreateTimer( &timer)); if (CUTFalse == initGL(argc, argv)) { return CUTFalse; } initCuda(argc, argv); CUT_CHECK_ERROR_GL(); // register callbacks glutDisplayFunc(fpsDisplay); glutKeyboardFunc(keyboard); glutMouseFunc(mouse); glutMotionFunc(motion); // start rendering mainloop glutMainLoop(); // clean up cudaThreadExit(); cutilExit(argc, argv); }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { runTest(argc, argv); cudaThreadExit(); cutilExit(argc, argv); }
// initialize OpenGL void initGL(int *argc, char **argv) { glutInit(argc, argv); glutInitDisplayMode(GLUT_RGB | GLUT_DEPTH | GLUT_DOUBLE); glutInitWindowSize(winWidth, winHeight); glutCreateWindow("CUDA Smoke Particles"); glewInit(); if (!glewIsSupported("GL_VERSION_2_0 GL_VERSION_1_5")) { fprintf(stderr, "The following required OpenGL extensions missing:\n\tGL_VERSION_2_0\n\tGL_VERSION_1_5\n"); fprintf(stderr, " PASSED\n"); cutilExit(*argc, argv); exit(-1); } if (!glewIsSupported("GL_ARB_multitexture GL_ARB_vertex_buffer_object GL_EXT_geometry_shader4")) { fprintf(stderr, "The following required OpenGL extensions missing:\n\tGL_ARB_multitexture\n\tGL_ARB_vertex_buffer_object\n\tGL_EXT_geometry_shader4.\n"); fprintf(stderr, " PASSED\n"); cutilExit(*argc, argv); exit(-1); } #if defined (_WIN32) if (wglewIsSupported("WGL_EXT_swap_control")) { // disable vertical sync wglSwapIntervalEXT(0); } #endif glEnable(GL_DEPTH_TEST); // load floor texture char* imagePath = cutFindFilePath("floortile.ppm", argv[0]); if (imagePath == 0) { fprintf(stderr, "Error finding floor image file\n"); fprintf(stderr, " FAILED\n"); exit(EXIT_FAILURE); } floorTex = loadTexture(imagePath); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 16.0f); floorProg = new GLSLProgram(floorVS, floorPS); glutReportErrors(); }
int findCapableDevice(int argc, char **argv) { int dev; int bestDev = -1; int deviceCount = 0; if (cudaGetDeviceCount(&deviceCount) != cudaSuccess) { fprintf(stderr, "cudaGetDeviceCount FAILED CUDA Driver and Runtime version may be mismatched.\n"); fprintf(stderr, "\nFAILED\n"); cudaThreadExit(); cutilExit(argc, argv); } for (dev = 0; dev < deviceCount; ++dev) { cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, dev); if (dev == 0) { // This function call returns 9999 for both major & minor fields, if no CUDA capable devices are present if (deviceProp.major == 9999 && deviceProp.minor == 9999) fprintf(stderr,"There is no device supporting CUDA.\n"); else if (deviceCount == 1) fprintf(stderr,"There is 1 device supporting CUDA\n"); else fprintf(stderr,"There are %d devices supporting CUDA\n", deviceCount); } if( checkCUDAProfile( dev ) ) { fprintf(stderr,"\nFound capable device: %d\n", dev ); if( bestDev == -1 ) { bestDev = dev; fprintf(stderr, "Setting active device to %d\n", bestDev ); } } } if( bestDev == -1 ) { fprintf(stderr, "\nNo configuration with available capabilities was found. Test has been waived.\n"); fprintf(stderr, "This sample requires:\n"); fprintf(stderr, "\tGPU Device Compute >= 2.0 is required\n"); fprintf(stderr, "\tCUDA Runtime Version >= 3.1 is required\n"); fprintf(stderr, "PASSED\n"); } return bestDev; }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { // check for command line arguments if (argc > 1) { if (cutCheckCmdLineFlag(argc, (const char **)argv, "qatest") || cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) { g_bQAReadback = true; animate = false; fpsLimit = frameCheckNumber; } if (cutCheckCmdLineFlag(argc, (const char **)argv, "glverify")) { g_bOpenGLQA = true; animate = false; fpsLimit = frameCheckNumber; } } if (g_bQAReadback) { // Automated testing runAutoTest(argc, argv); } else { printf("[%s]\n\n" "Left mouse button - rotate\n" "Middle mouse button - pan\n" "Left + middle mouse button - zoom\n" "'w' key - toggle wireframe\n", sSDKsample); runGraphicsTest(argc, argv); } cutilExit(argc, argv); }
int main(int argc, char** argv) { printf("[%s]\n", sSDKsample); if (argc > 1) { if (cutCheckCmdLineFlag(argc, (const char **)argv, "help")) { printHelp(); } if (cutCheckCmdLineFlag(argc, (const char **)argv, "qatest") || cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) { g_bQAReadback = true; fpsLimit = frameCheckNumber; } if (cutCheckCmdLineFlag(argc, (const char **)argv, "glverify")) { g_bOpenGLQA = true; fpsLimit = frameCheckNumber; } if (cutCheckCmdLineFlag(argc, (const char **)argv, "fbo")) { g_bFBODisplay = true; fpsLimit = frameCheckNumber; } } if (g_bQAReadback) { runAutoTest(argc, argv); } else { // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. initGL( &argc, argv ); // use command-line specified CUDA device if possible, otherwise search for capable device if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) { cutilGLDeviceInit(argc, argv); int device; cudaGetDevice( &device ); if( checkCUDAProfile( device ) == false ) { cudaThreadExit(); cutilExit(argc, argv); } } else { //cudaGLSetGLDevice (cutGetMaxGflopsDeviceId() ); int dev = findCapableDevice(argc, argv); if( dev != -1 ) cudaGLSetGLDevice( dev ); else { cudaThreadExit(); cutilExit(argc, argv); } } cutilCheckError(cutCreateTimer(&timer)); cutilCheckError(cutResetTimer(timer)); glutDisplayFunc(display); glutKeyboardFunc(keyboard); glutReshapeFunc(reshape); glutIdleFunc(idle); if (g_bOpenGLQA) { loadDefaultImage( argc, argv ); } if (argc > 1) { char *filename; if (cutGetCmdLineArgumentstr(argc, (const char **)argv, "file", &filename)) { initializeData(filename, argc, argv); } } else { loadDefaultImage( argc, argv ); } // If code is not printing the USage, then we execute this path. if (!bQuit) { if (g_bOpenGLQA) { g_CheckRender = new CheckBackBuffer(wWidth, wHeight, 4); g_CheckRender->setPixelFormat(GL_BGRA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); } printf("I: display image\n"); printf("T: display Sobel edge detection (computed with tex)\n"); printf("S: display Sobel edge detection (computed with tex+shared memory)\n"); printf("Use the '-' and '=' keys to change the brightness.\n"); printf("b: switch block filter operation (mean/Sobel)\n"); printf("p: swtich point filter operation (threshold on/off)\n"); fflush(stdout); atexit(cleanup); glutMainLoop(); } } cudaThreadExit(); cutilExit(argc, argv); }
void runAutoTest(int argc, char **argv) { printf("[%s] (automated testing w/ readback)\n", sSDKsample); if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) { cutilDeviceInit(argc, argv); int device; cudaGetDevice( &device ); if( checkCUDAProfile( device ) == false ) { cudaThreadExit(); cutilExit(argc, argv); } } else { int dev = findCapableDevice(argc, argv); if( dev != -1 ) cudaSetDevice( dev ); else { cudaThreadExit(); cutilExit(argc, argv); } } loadDefaultImage( argc, argv ); if (argc > 1) { char *filename; if (cutGetCmdLineArgumentstr(argc, (const char **)argv, "file", &filename)) { initializeData(filename, argc, argv); } } else { loadDefaultImage( argc, argv ); } g_CheckRender = new CheckBackBuffer(imWidth, imHeight, sizeof(Pixel), false); g_CheckRender->setExecPath(argv[0]); Pixel *d_result; cutilSafeCall( cudaMalloc( (void **)&d_result, imWidth*imHeight*sizeof(Pixel)) ); while (g_SobelDisplayMode <= 2) { printf("AutoTest: %s <%s>\n", sSDKsample, filterMode[g_SobelDisplayMode]); sobelFilter(d_result, imWidth, imHeight, g_SobelDisplayMode, imageScale, blockOp, pointOp ); cutilSafeCall( cudaThreadSynchronize() ); cudaMemcpy(g_CheckRender->imageData(), d_result, imWidth*imHeight*sizeof(Pixel), cudaMemcpyDeviceToHost); g_CheckRender->savePGM(sOriginal[g_Index], false, NULL); if (!g_CheckRender->PGMvsPGM(sOriginal[g_Index], sReference[g_Index], MAX_EPSILON_ERROR, 0.15f)) { g_TotalErrors++; } g_Index++; g_SobelDisplayMode = (SobelDisplayMode)g_Index; } cutilSafeCall( cudaFree( d_result ) ); delete g_CheckRender; if (!g_TotalErrors) printf("PASSED\n"); else printf("FAILED\n"); }
int main(int argc, char** argv) { // EDISON ////////////////////////////////////////////////////////////////// sigmaS = 7.0f; sigmaR = 6.5f; edison.minRegion = 20.0f; cutLoadPPMub("image.ppm", &edison.inputImage_, &width, &height); edison.meanShift(); cutSavePPMub("segmimage.ppm", edison.segmImage_, width, height); cutSavePPMub("filtimage.ppm", edison.filtImage_, width, height); unsigned char data[height * width]; memset(data, 0, height * width * sizeof(unsigned char)); for(int i = 0; i < edison.numBoundaries_; i++) { data[edison.boundaries_[i]] = 255; } cutSavePGMub("bndyimage.pgm", data, width, height); //return 0; // EDISON ////////////////////////////////////////////////////////////////// if (argc > 1) { if (cutCheckCmdLineFlag(argc, (const char **)argv, "help")) { printHelp(); } if (cutCheckCmdLineFlag(argc, (const char **)argv, "qatest") || cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) { g_bQAReadback = true; fpsLimit = frameCheckNumber; } if (cutCheckCmdLineFlag(argc, (const char **)argv, "glverify")) { g_bOpenGLQA = true; fpsLimit = frameCheckNumber; } if (cutCheckCmdLineFlag(argc, (const char **)argv, "fbo")) { g_bFBODisplay = true; fpsLimit = frameCheckNumber; } } if (g_bQAReadback) { runAutoTest(argc, argv); } else { // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. initGL( argc, argv ); // use command-line specified CUDA device, otherwise use device with highest Gflops/s if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) { cutilGLDeviceInit(argc, argv); } else { cudaGLSetGLDevice (cutGetMaxGflopsDeviceId() ); } int device; struct cudaDeviceProp prop; cudaGetDevice( &device ); cudaGetDeviceProperties( &prop, device ); if(!strncmp( "Tesla", prop.name, 5 )) { printf("This sample needs a card capable of OpenGL and display.\n"); printf("Please choose a different device with the -device=x argument.\n"); cudaThreadExit(); cutilExit(argc, argv); } cutilCheckError(cutCreateTimer(&timer)); cutilCheckError(cutResetTimer(timer)); glutDisplayFunc(display); glutKeyboardFunc(keyboard); glutReshapeFunc(reshape); glutIdleFunc(idle); if (g_bOpenGLQA) { loadDefaultImage( argv[0] ); } if (argc > 1) { char *filename; if (cutGetCmdLineArgumentstr(argc, (const char **)argv, "file", &filename)) { initializeData(filename); } } else { loadDefaultImage( argv[0]); } // If code is not printing the USage, then we execute this path. if (!bQuit) { if (g_bOpenGLQA) { g_CheckRender = new CheckBackBuffer(wWidth, wHeight, 4); g_CheckRender->setPixelFormat(GL_BGRA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); } printf("I: display image\n"); printf("T: display Sobel edge detection (computed with tex)\n"); printf("S: display Sobel edge detection (computed with tex+shared memory)\n"); printf("Use the '-' and '=' keys to change the brightness.\n"); fflush(stdout); atexit(cleanup); glutMainLoop(); } } cudaThreadExit(); cutilExit(argc, argv); }
//////////////////////////////////////////////////////////////////////////////// // Main program //////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv){ const unsigned int OPT_N_MAX = 512; unsigned int useDoublePrecision; printf("[binomialOptions]\n"); int devID = cutilDeviceInit(argc, argv); if (devID < 0) { printf("exiting...\n"); cutilExit(argc, argv); exit(0); } cutilSafeCall(cudaGetDevice(&devID)); cudaDeviceProp deviceProp; cutilSafeCall(cudaGetDeviceProperties(&deviceProp, devID)); char *precisionChoice; cutGetCmdLineArgumentstr(argc, (const char **)argv, "type", &precisionChoice); if(precisionChoice == NULL) { useDoublePrecision = 0; } else { if(!strcasecmp(precisionChoice, "double")) useDoublePrecision = 1; else useDoublePrecision = 0; } printf(useDoublePrecision ? "Using double precision...\n" : "Using single precision...\n"); const int OPT_N = deviceEmulation() ? 1 : OPT_N_MAX; TOptionData optionData[OPT_N_MAX]; float callValueBS[OPT_N_MAX], callValueGPU[OPT_N_MAX], callValueCPU[OPT_N_MAX]; double sumDelta, sumRef, gpuTime, errorVal; unsigned int hTimer; int i; cutilCheckError( cutCreateTimer(&hTimer) ); int version = deviceProp.major * 10 + deviceProp.minor; if(useDoublePrecision && version < 13){ printf("Double precision is not supported.\n"); return 0; } printf("Generating input data...\n"); //Generate options set srand(123); for(i = 0; i < OPT_N; i++){ optionData[i].S = randData(5.0f, 30.0f); optionData[i].X = randData(1.0f, 100.0f); optionData[i].T = randData(0.25f, 10.0f); optionData[i].R = 0.06f; optionData[i].V = 0.10f; BlackScholesCall(callValueBS[i], optionData[i]); } printf("Running GPU binomial tree...\n"); cutilSafeCall( cudaThreadSynchronize() ); cutilCheckError( cutResetTimer(hTimer) ); cutilCheckError( cutStartTimer(hTimer) ); if(useDoublePrecision) binomialOptions_SM13(callValueGPU, optionData, OPT_N); else binomialOptions_SM10(callValueGPU, optionData, OPT_N); cutilSafeCall( cudaThreadSynchronize() ); cutilCheckError( cutStopTimer(hTimer) ); gpuTime = cutGetTimerValue(hTimer); printf("Options count : %i \n", OPT_N); printf("Time steps : %i \n", NUM_STEPS); printf("binomialOptionsGPU() time: %f msec\n", gpuTime); printf("Options per second : %f \n", OPT_N / (gpuTime * 0.001)); printf("Running CPU binomial tree...\n"); for(i = 0; i < OPT_N; i++) binomialOptionsCPU(callValueCPU[i], optionData[i]); printf("Comparing the results...\n"); sumDelta = 0; sumRef = 0; printf("GPU binomial vs. Black-Scholes\n"); for(i = 0; i < OPT_N; i++){ sumDelta += fabs(callValueBS[i] - callValueGPU[i]); sumRef += fabs(callValueBS[i]); } if(sumRef >1E-5) printf("L1 norm: %E\n", sumDelta / sumRef); else printf("Avg. diff: %E\n", sumDelta / (double)OPT_N); printf("CPU binomial vs. Black-Scholes\n"); sumDelta = 0; sumRef = 0; for(i = 0; i < OPT_N; i++){ sumDelta += fabs(callValueBS[i]- callValueCPU[i]); sumRef += fabs(callValueBS[i]); } if(sumRef >1E-5) printf("L1 norm: %E\n", sumDelta / sumRef); else printf("Avg. diff: %E\n", sumDelta / (double)OPT_N); printf("CPU binomial vs. GPU binomial\n"); sumDelta = 0; sumRef = 0; for(i = 0; i < OPT_N; i++){ sumDelta += fabs(callValueGPU[i] - callValueCPU[i]); sumRef += callValueCPU[i]; } if(sumRef > 1E-5) printf("L1 norm: %E\n", errorVal = sumDelta / sumRef); else printf("Avg. diff: %E\n", errorVal = sumDelta / (double)OPT_N); printf("Shutting down...\n"); printf("\n[binomialOptions] - Test Summary:\n"); printf((errorVal < 5e-4) ? "PASSED\n" : "FAILED\n"); cutilCheckError( cutDeleteTimer(hTimer) ); cudaThreadExit(); cutilExit(argc, argv); }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { int retVal = 0; retVal = xnInit( argc, argv ); printf("[ %s ]\n", sSDKsample); if (argc > 1) { cutGetCmdLineArgumenti( argc, (const char**) argv, "n", (int *) &numParticles); if (cutCheckCmdLineFlag(argc, (const char **)argv, "qatest") || cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt") ) { g_bQAReadback = true; } if (cutCheckCmdLineFlag(argc, (const char **)argv, "glverify")) { g_bQAGLVerify = true; } } if (g_bQAReadback) { // For Automated testing, we do not use OpenGL/CUDA interop if ( cutCheckCmdLineFlag(argc, (const char **)argv, "device")) { cutilDeviceInit (argc, argv); } else { cudaSetDevice (cutGetMaxGflopsDeviceId() ); } g_CheckRender = new CheckBackBuffer(winWidth, winHeight, 4, false); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); // This code path is used for Automated Testing initParticles(numParticles, false, false); initParams(); if (emitterOn) { runEmitter(); } SimParams ¶ms = psystem->getParams(); params.cursorPos = make_float3(cursorPosLag.x, cursorPosLag.y, cursorPosLag.z); psystem->step(timestep); float4 *pos = NULL, *vel = NULL; int g_TotalErrors = 0; psystem->dumpBin(&pos, &vel); g_CheckRender->dumpBin(pos, numParticles*sizeof(float4), "smokeParticles_pos.bin"); g_CheckRender->dumpBin(vel, numParticles*sizeof(float4), "smokeParticles_vel.bin"); if (!g_CheckRender->compareBin2BinFloat("smokeParticles_pos.bin", sRefBin[0], numParticles*sizeof(float4), MAX_EPSILON_ERROR, THRESHOLD)) g_TotalErrors++; if (!g_CheckRender->compareBin2BinFloat("smokeParticles_vel.bin", sRefBin[1], numParticles*sizeof(float4), MAX_EPSILON_ERROR, THRESHOLD)) g_TotalErrors++; delete psystem; delete g_CheckRender; printf("%s\n", (g_TotalErrors > 0) ? "FAILED" : "PASSED"); cudaThreadExit(); } else { // Normal smokeParticles rendering path // 1st initialize OpenGL context, so we can properly set the GL for CUDA. // This is needed to achieve optimal performance with OpenGL/CUDA interop. initGL( &argc, argv ); if ( cutCheckCmdLineFlag(argc, (const char **)argv, "device")) { cutilGLDeviceInit (argc, argv); } else { cudaGLSetGLDevice (cutGetMaxGflopsDeviceId() ); } if (g_bQAGLVerify) { g_CheckRender = new CheckBackBuffer(winWidth, winHeight, 4); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); } // This is the normal code path for SmokeParticles initParticles(numParticles, true, true); initParams(); initMenus(); glutDisplayFunc(display); glutReshapeFunc(reshape); glutMouseFunc(mouse); glutMotionFunc(motion); glutKeyboardFunc(key); glutKeyboardUpFunc(keyUp); glutSpecialFunc(special); glutIdleFunc(idle); glutMainLoop(); } cutilExit(argc, argv); return retVal; }
//////////////////////////////////////////////////////////////////////////////// // Test driver //////////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv){ uint *h_SrcKey, *h_SrcVal, *h_DstKey, *h_DstVal; uint *d_SrcKey, *d_SrcVal, *d_BufKey, *d_BufVal, *d_DstKey, *d_DstVal; uint hTimer; const uint N = 4 * 1048576; const uint DIR = 1; const uint numValues = 65536; printf("Allocating and initializing host arrays...\n\n"); cutCreateTimer(&hTimer); h_SrcKey = (uint *)malloc(N * sizeof(uint)); h_SrcVal = (uint *)malloc(N * sizeof(uint)); h_DstKey = (uint *)malloc(N * sizeof(uint)); h_DstVal = (uint *)malloc(N * sizeof(uint)); srand(2009); for(uint i = 0; i < N; i++) h_SrcKey[i] = rand() % numValues; fillValues(h_SrcVal, N); printf("Allocating and initializing CUDA arrays...\n\n"); cutilSafeCall( cudaMalloc((void **)&d_DstKey, N * sizeof(uint)) ); cutilSafeCall( cudaMalloc((void **)&d_DstVal, N * sizeof(uint)) ); cutilSafeCall( cudaMalloc((void **)&d_BufKey, N * sizeof(uint)) ); cutilSafeCall( cudaMalloc((void **)&d_BufVal, N * sizeof(uint)) ); cutilSafeCall( cudaMalloc((void **)&d_SrcKey, N * sizeof(uint)) ); cutilSafeCall( cudaMalloc((void **)&d_SrcVal, N * sizeof(uint)) ); cutilSafeCall( cudaMemcpy(d_SrcKey, h_SrcKey, N * sizeof(uint), cudaMemcpyHostToDevice) ); cutilSafeCall( cudaMemcpy(d_SrcVal, h_SrcVal, N * sizeof(uint), cudaMemcpyHostToDevice) ); printf("Initializing GPU merge sort...\n"); initMergeSort(); printf("Running GPU merge sort...\n"); cutilSafeCall( cudaThreadSynchronize() ); cutResetTimer(hTimer); cutStartTimer(hTimer); mergeSort( d_DstKey, d_DstVal, d_BufKey, d_BufVal, d_SrcKey, d_SrcVal, N, DIR ); cutilSafeCall( cudaThreadSynchronize() ); cutStopTimer(hTimer); printf("Time: %f ms\n", cutGetTimerValue(hTimer)); printf("Reading back GPU merge sort results...\n"); cutilSafeCall( cudaMemcpy(h_DstKey, d_DstKey, N * sizeof(uint), cudaMemcpyDeviceToHost) ); cutilSafeCall( cudaMemcpy(h_DstVal, d_DstVal, N * sizeof(uint), cudaMemcpyDeviceToHost) ); printf("Inspecting the results...\n"); uint keysFlag = validateSortedKeys( h_DstKey, h_SrcKey, 1, N, numValues, DIR ); uint valuesFlag = validateSortedValues( h_DstKey, h_DstVal, h_SrcKey, 1, N ); printf( (keysFlag && valuesFlag) ? "TEST PASSED\n" : "TEST FAILED\n"); printf("Shutting down...\n"); closeMergeSort(); cutilCheckError( cutDeleteTimer(hTimer) ); cutilSafeCall( cudaFree(d_SrcVal) ); cutilSafeCall( cudaFree(d_SrcKey) ); cutilSafeCall( cudaFree(d_BufVal) ); cutilSafeCall( cudaFree(d_BufKey) ); cutilSafeCall( cudaFree(d_DstVal) ); cutilSafeCall( cudaFree(d_DstKey) ); free(h_DstVal); free(h_DstKey); free(h_SrcVal); free(h_SrcKey); cudaThreadExit(); cutilExit(argc, argv); }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main( int argc, char** argv) { // use command-line specified CUDA device, otherwise use device with highest Gflops/s if (!cutCheckCmdLineFlag(argc, (const char **)argv, "noqatest") || cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) { g_bQAReadback = true; fpsLimit = frameCheckNumber; } if (argc > 1) { if (cutCheckCmdLineFlag(argc, (const char **)argv, "glverify")) { g_bOpenGLQA = true; fpsLimit = frameCheckNumber; } } printf("[%s] ", sSDKsample); if (g_bQAReadback) printf("(Automated Testing)\n"); if (g_bOpenGLQA) printf("(OpenGL Readback)\n"); // Get the path of the filename char *filename; if (cutGetCmdLineArgumentstr(argc, (const char**) argv, "image", &filename)) { image_filename = filename; } // load image char* image_path = cutFindFilePath(image_filename, argv[0]); if (image_path == 0) { fprintf(stderr, "Error finding image file '%s'\n", image_filename); cudaThreadExit(); exit(EXIT_FAILURE); } cutilCheckError( cutLoadPPM4ub(image_path, (unsigned char **) &h_img, &width, &height)); if (!h_img) { printf("Error opening file '%s'\n", image_path); cudaThreadExit(); exit(-1); } printf("Loaded '%s', %d x %d pixels\n", image_path, width, height); cutGetCmdLineArgumenti(argc, (const char**) argv, "threads", &nthreads); cutGetCmdLineArgumentf(argc, (const char**) argv, "sigma", &sigma); runBenchmark = cutCheckCmdLineFlag(argc, (const char**) argv, "bench"); int device; struct cudaDeviceProp prop; cudaGetDevice( &device ); cudaGetDeviceProperties( &prop, device ); if( !strncmp( "Tesla", prop.name, 5 ) ) { printf("Tesla card detected, running the test in benchmark mode (no OpenGL display)\n"); // runBenchmark = CUTTrue; g_bQAReadback = true; } // Benchmark or AutoTest mode detected, no OpenGL if (runBenchmark == CUTTrue || g_bQAReadback) { if( cutCheckCmdLineFlag( argc, (const char **)argv, "device" ) ) cutilDeviceInit( argc, argv ); else cudaSetDevice( cutGetMaxGflopsDeviceId() ); } else { // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. initGL(argc, argv); if( cutCheckCmdLineFlag( argc, (const char **)argv, "device" ) ) cutilGLDeviceInit( argc, argv ); else cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ); } initCudaBuffers(); if (g_bOpenGLQA) { g_CheckRender = new CheckBackBuffer(width, height, 4); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); } if (g_bQAReadback) { // This is the automated testing path g_CheckRender = new CheckBackBuffer(width, height, 4, false); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); runAutoTest(argc, argv); cleanup(); cudaThreadExit(); cutilExit(argc, argv); } if (runBenchmark) { benchmark(100); cleanup(); cudaThreadExit(); exit(0); } initGLBuffers(); atexit(cleanup); glutMainLoop(); cudaThreadExit(); cutilExit(argc, argv); }
int main( int argc, char* argv[] ) { int argc2 = 2; char* argv2[2] = { "", "-device=1" }; cudaDeviceProp deviceProp; int devID = cutilChooseCudaDevice( argc2, argv2 ); if( devID < 0 ) { printf( "exiting...\n" ); cutilExit( argc, argv ); exit( 0 ); } cutilSafeCall( cudaGetDeviceProperties( &deviceProp, devID ) ); //Image4f im( "c:/tmp/tulip.png" ); //Image4f im( "c:/tmp/tulip_1080.png" ); // Jiawen version Image4f im( "../../apps/bilateral_grid/input.png" ); //Image4f im( "c:/tmp/church_panorama_5097x2889.pfm" ); im = im.flipUD(); Array2D< float > data( im.width(), im.height() ); Array2D< float > output( im.width(), im.height() ); for( int y = 0; y < im.height(); ++y ) { for( int x = 0; x < im.width(); ++x ) { Vector3f rgb = im.pixel( x, y ).xyz(); // float lum = ColorUtils::rgb2luminance( rgb ); // data( x, y ) = lum; // jrk: just use red data( x, y ) = rgb[0]; } } testBilateralFilter( data, 8, 0.1f, output ); saveArrayAsImage( output, "bf", 8, 0.1f ); testBilateralFilter( data, 16, 0.1f, output ); saveArrayAsImage( output, "bf", 16, 0.1f ); testBilateralFilter( data, 32, 0.2f, output ); saveArrayAsImage( output, "bf", 32, 0.2f ); testBilateralFilter( data, 64, 0.4f, output ); saveArrayAsImage( output, "bf", 64, 0.4f ); #if 0 Image4f edgeImage( "/tmp/step.png" ); edgeImage.flipUD(); Array2D< float > edge( im.width(), im.height() ); for( int y = 0; y < im.height(); ++y ) { for( int x = 0; x < im.width(); ++x ) { edge( x, y ) = edgeImage.pixel( x, y ).x; } } testCrossBilateralFilter( data, edge, 16, 0.1f, output ); saveArrayAsImage( output, "cbf", 16, 0.1f ); testCrossBilateralFilter( data, edge, 32, 0.2f, output ); saveArrayAsImage( output, "cbf", 32, 0.2f ); testCrossBilateralFilter( data, edge, 64, 0.4f, output ); saveArrayAsImage( output, "cbf", 64, 0.4f ); #endif return 0; }
int main(int argc, char **argv) { char *precisionChoice; cutGetCmdLineArgumentstr(argc, (const char **)argv, "type", &precisionChoice); if(precisionChoice == NULL) useDoublePrecision = 0; else { if(!strcasecmp(precisionChoice, "double")) useDoublePrecision = 1; else useDoublePrecision = 0; } const int MAX_GPU_COUNT = 8; const int OPT_N = 256; const int PATH_N = 1 << 18; const unsigned int SEED = 777; //Input data array TOptionData optionData[OPT_N]; //Final GPU MC results TOptionValue callValueGPU[OPT_N]; //"Theoretical" call values by Black-Scholes formula float callValueBS[OPT_N]; //Solver config TOptionPlan optionSolver[MAX_GPU_COUNT]; //OS thread ID CUTThread threadID[MAX_GPU_COUNT]; //GPU number present in the system int GPU_N; int gpuBase, gpuIndex; int i; //Timer unsigned int hTimer; float time; double delta, ref, sumDelta, sumRef, sumReserve; cutilSafeCall( cudaGetDeviceCount(&GPU_N) ); cutilCheckError( cutCreateTimer(&hTimer) ); #ifdef _EMU GPU_N = 1; #endif printf("main(): generating input data...\n"); srand(123); for(i = 0; i < OPT_N; i++) { optionData[i].S = randFloat(5.0f, 50.0f); optionData[i].X = randFloat(10.0f, 25.0f); optionData[i].T = randFloat(1.0f, 5.0f); optionData[i].R = 0.06f; optionData[i].V = 0.10f; callValueGPU[i].Expected = -1.0f; callValueGPU[i].Confidence = -1.0f; } printf("main(): starting %i host threads...\n", GPU_N); //Get option count for each GPU for(i = 0; i < GPU_N; i++) optionSolver[i].optionCount = OPT_N / GPU_N; //Take into account cases with "odd" option counts for(i = 0; i < (OPT_N % GPU_N); i++) optionSolver[i].optionCount++; //Assign GPU option ranges gpuBase = 0; for(i = 0; i < GPU_N; i++) { optionSolver[i].device = i; optionSolver[i].optionData = optionData + gpuBase; optionSolver[i].callValue = callValueGPU + gpuBase; optionSolver[i].seed = SEED; optionSolver[i].pathN = PATH_N; gpuBase += optionSolver[i].optionCount; } //Start the timer cutilCheckError( cutResetTimer(hTimer) ); cutilCheckError( cutStartTimer(hTimer) ); //Start CPU thread for each GPU for(gpuIndex = 0; gpuIndex < GPU_N; gpuIndex++) threadID[gpuIndex] = cutStartThread((CUT_THREADROUTINE)solverThread, &optionSolver[gpuIndex]); //Stop the timer cutilCheckError( cutStopTimer(hTimer) ); time = cutGetTimerValue(hTimer); printf("main(): waiting for GPU results...\n"); cutWaitForThreads(threadID, GPU_N); printf("main(): GPU statistics\n"); for(i = 0; i < GPU_N; i++) { printf("GPU #%i\n", optionSolver[i].device); printf("Options : %i\n", optionSolver[i].optionCount); printf("Simulation paths: %i\n", optionSolver[i].pathN); } printf("\nTotal time (ms.): %f\n", time); printf("Options per sec.: %f\n", OPT_N / (time * 0.001)); #ifdef DO_CPU printf("main(): running CPU MonteCarlo...\n"); TOptionValue callValueCPU; sumDelta = 0; sumRef = 0; for(i = 0; i < OPT_N; i++) { MonteCarloCPU( callValueCPU, optionData[i], NULL, PATH_N ); delta = fabs(callValueCPU.Expected - callValueGPU[i].Expected); ref = callValueCPU.Expected; sumDelta += delta; sumRef += fabs(ref); printf("Exp : %f | %f\t", callValueCPU.Expected, callValueGPU[i].Expected); printf("Conf: %f | %f\n", callValueCPU.Confidence, callValueGPU[i].Confidence); } printf("L1 norm: %E\n", sumDelta / sumRef); #endif printf("main(): comparing Monte Carlo and Black-Scholes results...\n"); sumDelta = 0; sumRef = 0; sumReserve = 0; for(i = 0; i < OPT_N; i++) { BlackScholesCall( callValueBS[i], optionData[i] ); delta = fabs(callValueBS[i] - callValueGPU[i].Expected); ref = callValueBS[i]; sumDelta += delta; sumRef += fabs(ref); if(delta > 1e-6) sumReserve += callValueGPU[i].Confidence / delta; #ifdef PRINT_RESULTS printf("BS: %f; delta: %E\n", callValueBS[i], delta); #endif } sumReserve /= OPT_N; printf("L1 norm : %E\n", sumDelta / sumRef); printf("Average reserve: %f\n", sumReserve); printf((sumReserve > 1.0f) ? "PASSED\n" : "FAILED.\n"); printf("Shutting down...\n"); cutilCheckError( cutDeleteTimer(hTimer) ); cutilExit(argc, argv); }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { po::options_description desc("mephitis options"); desc.add_options() ("help", "this help") ("archive,a", po::value<std::string>(), "archive file") ("device,D", po::value<unsigned>(), "device") ("frames,f", po::value<unsigned>(), "number frames to display") ("voxelsize,s", po::value<float>(), "size of voxels (in centimeters)") ("no-processing,n", "display only, no processing") ("verbose,v", "verbose") ("debug,d", "debug") ; po::variables_map vm; po::store(po::parse_command_line(argc, argv, desc), vm); po::notify(vm); if (vm.count("help")) { std::cout << desc << "\n"; exit(0); } std::string archivefile = vm.count("archive") ? vm["archive"].as<std::string>() : "ARCHIVE_NOT_SPECIFIED"; int device = vm.count("device") ? vm["device"].as<unsigned>() : 0; voxelsize = vm.count("voxelsize") ? vm["voxelsize"].as<float>() : 1.0; no_processing = vm.count("no-processing"); if (vm.count("frames")) nframes = vm["frames"].as<unsigned>(); ms::verbose = vm.count("verbose"); ms::debug = vm.count("debug"); ms::regulator<ms::coalesced_points<ms::host> > regulator(queue, 3); ms::archive_source<ms::coalesced_points<ms::host> > source(archivefile, boost::ref(regulator), 5); boost::thread popperthread(boost::bind(&ms::archive_source<ms::coalesced_points<ms::host> >::run, boost::ref(source))); std::cout << "bag popper thread running, sleeping 1 second\n"; boost::this_thread::sleep(boost::posix_time::seconds(1)); std::cout << "done sleeping\n"; // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. if (CUTFalse == initGL(&argc, argv)) { return CUTFalse; } mephitis::set_device(device, true); monitor.run(5); // register callbacks glutDisplayFunc(display); glutKeyboardFunc(keyboard); glutMouseFunc(mouse); glutMotionFunc(motion); atexit(cleanup); // start rendering mainloop glutMainLoop(); cudaThreadExit(); cutilExit(argc, argv); }
int main(int argc, char **argv){ GpuProfiling::initProf(); uchar *h_Data; uint *h_HistogramCPU, *h_HistogramGPU; uchar *d_Data; uint *d_Histogram; uint hTimer; int PassFailFlag = 1; uint byteCount = 64 * 1048576; uint uiSizeMult = 1; cudaDeviceProp deviceProp; deviceProp.major = 0; deviceProp.minor = 0; int dev; printf("%s\n", sSDKsample); // set logfile name and start logs shrSetLogFileName ("histogram.txt"); shrLog("%s Starting...\n\n", argv[0]); //Use command-line specified CUDA device, otherwise use device with highest Gflops/s if( shrCheckCmdLineFlag(argc, (const char**)argv, "device") ) cutilDeviceInit(argc, argv); else cudaSetDevice( dev = cutGetMaxGflopsDeviceId() ); cutilSafeCall( cudaChooseDevice(&dev, &deviceProp) ); cutilSafeCall( cudaGetDeviceProperties(&deviceProp, dev) ); printf("CUDA device [%s] has %d Multi-Processors, Compute %d.%d\n", deviceProp.name, deviceProp.multiProcessorCount, deviceProp.major, deviceProp.minor); int version = deviceProp.major * 0x10 + deviceProp.minor; if(version < 0x11) { printf("There is no device supporting a minimum of CUDA compute capability 1.1 for this SDK sample\n"); printf("PASSED"); cudaThreadExit(); exit(0); cutilExit(argc, argv); } cutilCheckError(cutCreateTimer(&hTimer)); // Optional Command-line multiplier to increase size of array to histogram if (shrGetCmdLineArgumentu(argc, (const char**)argv, "sizemult", &uiSizeMult)) { uiSizeMult = CLAMP(uiSizeMult, 1, 10); byteCount *= uiSizeMult; } shrLog("Initializing data...\n"); shrLog("...allocating CPU memory.\n"); h_Data = (uchar *)malloc(byteCount); h_HistogramCPU = (uint *)malloc(HISTOGRAM256_BIN_COUNT * sizeof(uint)); h_HistogramGPU = (uint *)malloc(HISTOGRAM256_BIN_COUNT * sizeof(uint)); shrLog("...generating input data\n"); srand(2009); for(uint i = 0; i < byteCount; i++) h_Data[i] = rand() % 256; shrLog("...allocating GPU memory and copying input data\n\n"); cutilSafeCall( cudaMalloc((void **)&d_Data, byteCount ) ); cutilSafeCall( cudaMalloc((void **)&d_Histogram, HISTOGRAM256_BIN_COUNT * sizeof(uint) ) ); cutilSafeCall( cudaMemcpy(d_Data, h_Data, byteCount, cudaMemcpyHostToDevice) ); { shrLog("Starting up 64-bin histogram...\n\n"); initHistogram64(); shrLog("Running 64-bin GPU histogram for %u bytes (%u runs)...\n\n", byteCount, numRuns); for(int iter = -1; iter < numRuns; iter++){ //iter == -1 -- warmup iteration if(iter == 0){ cutilSafeCall( cudaThreadSynchronize() ); cutilCheckError( cutResetTimer(hTimer) ); cutilCheckError( cutStartTimer(hTimer) ); } histogram64(d_Histogram, d_Data, byteCount); } cutilSafeCall( cudaThreadSynchronize() ); cutilCheckError( cutStopTimer(hTimer)); double dAvgSecs = 1.0e-3 * (double)cutGetTimerValue(hTimer) / (double)numRuns; shrLog("histogram64() time (average) : %.5f sec, %.4f MB/sec\n\n", dAvgSecs, ((double)byteCount * 1.0e-6) / dAvgSecs); shrLogEx(LOGBOTH | MASTER, 0, "histogram64, Throughput = %.4f MB/s, Time = %.5f s, Size = %u Bytes, NumDevsUsed = %u, Workgroup = %u\n", (1.0e-6 * (double)byteCount / dAvgSecs), dAvgSecs, byteCount, 1, HISTOGRAM64_THREADBLOCK_SIZE); shrLog("\nValidating GPU results...\n"); shrLog(" ...reading back GPU results\n"); cutilSafeCall( cudaMemcpy(h_HistogramGPU, d_Histogram, HISTOGRAM64_BIN_COUNT * sizeof(uint), cudaMemcpyDeviceToHost) ); shrLog(" ...histogram64CPU()\n"); histogram64CPU( h_HistogramCPU, h_Data, byteCount ); shrLog(" ...comparing the results...\n"); for(uint i = 0; i < HISTOGRAM64_BIN_COUNT; i++) if(h_HistogramGPU[i] != h_HistogramCPU[i]) PassFailFlag = 0; shrLog(PassFailFlag ? " ...64-bin histograms match\n\n" : " ***64-bin histograms do not match!!!***\n\n" ); shrLog("Shutting down 64-bin histogram...\n\n\n"); closeHistogram64(); } { shrLog("Initializing 256-bin histogram...\n"); initHistogram256(); shrLog("Running 256-bin GPU histogram for %u bytes (%u runs)...\n\n", byteCount, numRuns); for(int iter = -1; iter < numRuns; iter++){ //iter == -1 -- warmup iteration if(iter == 0){ cutilSafeCall( cudaThreadSynchronize() ); cutilCheckError( cutResetTimer(hTimer) ); cutilCheckError( cutStartTimer(hTimer) ); } histogram256(d_Histogram, d_Data, byteCount); } cutilSafeCall( cudaThreadSynchronize() ); cutilCheckError( cutStopTimer(hTimer)); double dAvgSecs = 1.0e-3 * (double)cutGetTimerValue(hTimer) / (double)numRuns; shrLog("histogram256() time (average) : %.5f sec, %.4f MB/sec\n\n", dAvgSecs, ((double)byteCount * 1.0e-6) / dAvgSecs); shrLogEx(LOGBOTH | MASTER, 0, "histogram256, Throughput = %.4f MB/s, Time = %.5f s, Size = %u Bytes, NumDevsUsed = %u, Workgroup = %u\n", (1.0e-6 * (double)byteCount / dAvgSecs), dAvgSecs, byteCount, 1, HISTOGRAM256_THREADBLOCK_SIZE); shrLog("\nValidating GPU results...\n"); shrLog(" ...reading back GPU results\n"); cutilSafeCall( cudaMemcpy(h_HistogramGPU, d_Histogram, HISTOGRAM256_BIN_COUNT * sizeof(uint), cudaMemcpyDeviceToHost) ); shrLog(" ...histogram256CPU()\n"); histogram256CPU( h_HistogramCPU, h_Data, byteCount ); shrLog(" ...comparing the results\n"); for(uint i = 0; i < HISTOGRAM256_BIN_COUNT; i++) if(h_HistogramGPU[i] != h_HistogramCPU[i]) PassFailFlag = 0; shrLog(PassFailFlag ? " ...256-bin histograms match\n\n" : " ***256-bin histograms do not match!!!***\n\n" ); shrLog("Shutting down 256-bin histogram...\n\n\n"); closeHistogram256(); } shrLog("%s - Test Summary\n", sSDKsample); // pass or fail (for both 64 bit and 256 bit histograms) shrLog("%s\n\n", PassFailFlag ? "PASSED" : "FAILED"); GpuProfiling::printResults(); shrLog("Shutting down...\n"); cutilCheckError(cutDeleteTimer(hTimer)); cutilSafeCall( cudaFree(d_Histogram) ); cutilSafeCall( cudaFree(d_Data) ); free(h_HistogramGPU); free(h_HistogramCPU); free(h_Data); cudaThreadExit(); exit(0); shrEXIT(argc, (const char**)argv); }