//////////////////////////////////////////////////////////////////////////////// //! Run a simple test for CUDA //////////////////////////////////////////////////////////////////////////////// CUTBoolean runTest(int argc, char** argv) { if (!cutCheckCmdLineFlag(argc, (const char **)argv, "noqatest") || cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) { g_bQAReadback = true; fpsLimit = frameCheckNumber; } // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. if (CUTFalse == initGL(argc, argv)) { return CUTFalse; } // use command-line specified CUDA device, otherwise use device with highest Gflops/s if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) cutilGLDeviceInit(argc, argv); else { cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ); } // Create the CUTIL timer cutilCheckError( cutCreateTimer( &timer)); // register callbacks glutDisplayFunc(display); glutKeyboardFunc(keyboard); glutMouseFunc(mouse); glutMotionFunc(motion); if (g_bQAReadback) { g_CheckRender = new CheckBackBuffer(window_width, window_height, 4); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); } // create VBO createVBO(&vbo); // run the cuda part runCuda(vbo); // check result of Cuda step checkResultCuda(argc, argv, vbo); atexit(cleanup); // start rendering mainloop glutMainLoop(); cudaThreadExit(); return CUTTrue; }
bool initCUDA( int argc, char **argv) { return true; if ( cutCheckCmdLineFlag(argc, (const char **)argv, "device")) { cutilGLDeviceInit(argc, argv); } else { cudaGLSetGLDevice (cutGetMaxGflopsDeviceId()); } return true; }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main( int argc, char** argv) { //start logs shrSetLogFileName ("volumeRender.txt"); shrLog("%s Starting...\n\n", argv[0]); if (cutCheckCmdLineFlag(argc, (const char **)argv, "qatest") || cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) { g_bQAReadback = true; fpsLimit = frameCheckNumber; } if (cutCheckCmdLineFlag(argc, (const char **)argv, "glverify")) { g_bQAGLVerify = true; fpsLimit = frameCheckNumber; } if (g_bQAReadback) { // use command-line specified CUDA device, otherwise use device with highest Gflops/s if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) { cutilDeviceInit(argc, argv); } else { cudaSetDevice( cutGetMaxGflopsDeviceId() ); } } else { // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. initGL( &argc, argv ); // use command-line specified CUDA device, otherwise use device with highest Gflops/s if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) { cutilGLDeviceInit(argc, argv); } else { cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ); } /* int device; struct cudaDeviceProp prop; cudaGetDevice( &device ); cudaGetDeviceProperties( &prop, device ); if( !strncmp( "Tesla", prop.name, 5 ) ) { shrLog("This sample needs a card capable of OpenGL and display.\n"); shrLog("Please choose a different device with the -device=x argument.\n"); cutilExit(argc, argv); } */ } // parse arguments char *filename; if (cutGetCmdLineArgumentstr( argc, (const char**) argv, "file", &filename)) { volumeFilename = filename; } int n; if (cutGetCmdLineArgumenti( argc, (const char**) argv, "size", &n)) { volumeSize.width = volumeSize.height = volumeSize.depth = n; } if (cutGetCmdLineArgumenti( argc, (const char**) argv, "xsize", &n)) { volumeSize.width = n; } if (cutGetCmdLineArgumenti( argc, (const char**) argv, "ysize", &n)) { volumeSize.height = n; } if (cutGetCmdLineArgumenti( argc, (const char**) argv, "zsize", &n)) { volumeSize.depth = n; } // load volume data char* path = shrFindFilePath(volumeFilename, argv[0]); if (path == 0) { shrLog("Error finding file '%s'\n", volumeFilename); exit(EXIT_FAILURE); } size_t size = volumeSize.width*volumeSize.height*volumeSize.depth*sizeof(VolumeType); void *h_volume = loadRawFile(path, size); initCuda(h_volume, volumeSize); free(h_volume); cutilCheckError( cutCreateTimer( &timer)); shrLog("Press '=' and '-' to change density\n" " ']' and '[' to change brightness\n" " ';' and ''' to modify transfer function offset\n" " '.' and ',' to modify transfer function scale\n\n"); // calculate new grid size gridSize = dim3(iDivUp(width, blockSize.x), iDivUp(height, blockSize.y)); if (g_bQAReadback) { g_CheckRender = new CheckBackBuffer(width, height, 4, false); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); uint *d_output; cutilSafeCall(cudaMalloc((void**)&d_output, width*height*sizeof(uint))); cutilSafeCall(cudaMemset(d_output, 0, width*height*sizeof(uint))); float modelView[16] = { 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 4.0f, 1.0f }; invViewMatrix[0] = modelView[0]; invViewMatrix[1] = modelView[4]; invViewMatrix[2] = modelView[8]; invViewMatrix[3] = modelView[12]; invViewMatrix[4] = modelView[1]; invViewMatrix[5] = modelView[5]; invViewMatrix[6] = modelView[9]; invViewMatrix[7] = modelView[13]; invViewMatrix[8] = modelView[2]; invViewMatrix[9] = modelView[6]; invViewMatrix[10] = modelView[10]; invViewMatrix[11] = modelView[14]; // call CUDA kernel, writing results to PBO copyInvViewMatrix(invViewMatrix, sizeof(float4)*3); // Start timer 0 and process n loops on the GPU int nIter = 10; for (int i = -1; i < nIter; i++) { if( i == 0 ) { cudaThreadSynchronize(); cutStartTimer(timer); } render_kernel(gridSize, blockSize, d_output, width, height, density, brightness, transferOffset, transferScale); } cudaThreadSynchronize(); cutStopTimer(timer); // Get elapsed time and throughput, then log to sample and master logs double dAvgTime = cutGetTimerValue(timer)/(nIter * 1000.0); shrLogEx(LOGBOTH | MASTER, 0, "volumeRender, Throughput = %.4f MTexels/s, Time = %.5f s, Size = %u Texels, NumDevsUsed = %u, Workgroup = %u\n", (1.0e-6 * width * height)/dAvgTime, dAvgTime, (width * height), 1, blockSize.x * blockSize.y); cutilCheckMsg("Error: render_kernel() execution FAILED"); cutilSafeCall( cudaThreadSynchronize() ); cutilSafeCall( cudaMemcpy(g_CheckRender->imageData(), d_output, width*height*4, cudaMemcpyDeviceToHost) ); g_CheckRender->savePPM(sOriginal[g_Index], true, NULL); if (!g_CheckRender->PPMvsPPM(sOriginal[g_Index], sReference[g_Index], MAX_EPSILON_ERROR, THRESHOLD)) { shrLog("\nFAILED\n\n"); } else { shrLog("\nPASSED\n\n"); } cudaFree(d_output); freeCudaBuffers(); if (g_CheckRender) { delete g_CheckRender; g_CheckRender = NULL; } } else { // This is the normal rendering path for VolumeRender glutDisplayFunc(display); glutKeyboardFunc(keyboard); glutMouseFunc(mouse); glutMotionFunc(motion); glutReshapeFunc(reshape); glutIdleFunc(idle); initPixelBuffer(); if (g_bQAGLVerify) { g_CheckRender = new CheckBackBuffer(width, height, 4); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); } atexit(cleanup); glutMainLoop(); } cudaThreadExit(); shrEXIT(argc, (const char**)argv); }
int main(int argc, char** argv) { printf("[%s]\n", sSDKsample); if (argc > 1) { if (cutCheckCmdLineFlag(argc, (const char **)argv, "help")) { printHelp(); } if (cutCheckCmdLineFlag(argc, (const char **)argv, "qatest") || cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) { g_bQAReadback = true; fpsLimit = frameCheckNumber; } if (cutCheckCmdLineFlag(argc, (const char **)argv, "glverify")) { g_bOpenGLQA = true; fpsLimit = frameCheckNumber; } if (cutCheckCmdLineFlag(argc, (const char **)argv, "fbo")) { g_bFBODisplay = true; fpsLimit = frameCheckNumber; } } if (g_bQAReadback) { runAutoTest(argc, argv); } else { // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. initGL( &argc, argv ); // use command-line specified CUDA device if possible, otherwise search for capable device if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) { cutilGLDeviceInit(argc, argv); int device; cudaGetDevice( &device ); if( checkCUDAProfile( device ) == false ) { cudaThreadExit(); cutilExit(argc, argv); } } else { //cudaGLSetGLDevice (cutGetMaxGflopsDeviceId() ); int dev = findCapableDevice(argc, argv); if( dev != -1 ) cudaGLSetGLDevice( dev ); else { cudaThreadExit(); cutilExit(argc, argv); } } cutilCheckError(cutCreateTimer(&timer)); cutilCheckError(cutResetTimer(timer)); glutDisplayFunc(display); glutKeyboardFunc(keyboard); glutReshapeFunc(reshape); glutIdleFunc(idle); if (g_bOpenGLQA) { loadDefaultImage( argc, argv ); } if (argc > 1) { char *filename; if (cutGetCmdLineArgumentstr(argc, (const char **)argv, "file", &filename)) { initializeData(filename, argc, argv); } } else { loadDefaultImage( argc, argv ); } // If code is not printing the USage, then we execute this path. if (!bQuit) { if (g_bOpenGLQA) { g_CheckRender = new CheckBackBuffer(wWidth, wHeight, 4); g_CheckRender->setPixelFormat(GL_BGRA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); } printf("I: display image\n"); printf("T: display Sobel edge detection (computed with tex)\n"); printf("S: display Sobel edge detection (computed with tex+shared memory)\n"); printf("Use the '-' and '=' keys to change the brightness.\n"); printf("b: switch block filter operation (mean/Sobel)\n"); printf("p: swtich point filter operation (threshold on/off)\n"); fflush(stdout); atexit(cleanup); glutMainLoop(); } } cudaThreadExit(); cutilExit(argc, argv); }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { int retVal = 0; retVal = xnInit( argc, argv ); printf("[ %s ]\n", sSDKsample); if (argc > 1) { cutGetCmdLineArgumenti( argc, (const char**) argv, "n", (int *) &numParticles); if (cutCheckCmdLineFlag(argc, (const char **)argv, "qatest") || cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt") ) { g_bQAReadback = true; } if (cutCheckCmdLineFlag(argc, (const char **)argv, "glverify")) { g_bQAGLVerify = true; } } if (g_bQAReadback) { // For Automated testing, we do not use OpenGL/CUDA interop if ( cutCheckCmdLineFlag(argc, (const char **)argv, "device")) { cutilDeviceInit (argc, argv); } else { cudaSetDevice (cutGetMaxGflopsDeviceId() ); } g_CheckRender = new CheckBackBuffer(winWidth, winHeight, 4, false); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); // This code path is used for Automated Testing initParticles(numParticles, false, false); initParams(); if (emitterOn) { runEmitter(); } SimParams ¶ms = psystem->getParams(); params.cursorPos = make_float3(cursorPosLag.x, cursorPosLag.y, cursorPosLag.z); psystem->step(timestep); float4 *pos = NULL, *vel = NULL; int g_TotalErrors = 0; psystem->dumpBin(&pos, &vel); g_CheckRender->dumpBin(pos, numParticles*sizeof(float4), "smokeParticles_pos.bin"); g_CheckRender->dumpBin(vel, numParticles*sizeof(float4), "smokeParticles_vel.bin"); if (!g_CheckRender->compareBin2BinFloat("smokeParticles_pos.bin", sRefBin[0], numParticles*sizeof(float4), MAX_EPSILON_ERROR, THRESHOLD)) g_TotalErrors++; if (!g_CheckRender->compareBin2BinFloat("smokeParticles_vel.bin", sRefBin[1], numParticles*sizeof(float4), MAX_EPSILON_ERROR, THRESHOLD)) g_TotalErrors++; delete psystem; delete g_CheckRender; printf("%s\n", (g_TotalErrors > 0) ? "FAILED" : "PASSED"); cudaThreadExit(); } else { // Normal smokeParticles rendering path // 1st initialize OpenGL context, so we can properly set the GL for CUDA. // This is needed to achieve optimal performance with OpenGL/CUDA interop. initGL( &argc, argv ); if ( cutCheckCmdLineFlag(argc, (const char **)argv, "device")) { cutilGLDeviceInit (argc, argv); } else { cudaGLSetGLDevice (cutGetMaxGflopsDeviceId() ); } if (g_bQAGLVerify) { g_CheckRender = new CheckBackBuffer(winWidth, winHeight, 4); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); } // This is the normal code path for SmokeParticles initParticles(numParticles, true, true); initParams(); initMenus(); glutDisplayFunc(display); glutReshapeFunc(reshape); glutMouseFunc(mouse); glutMotionFunc(motion); glutKeyboardFunc(key); glutKeyboardUpFunc(keyUp); glutSpecialFunc(special); glutIdleFunc(idle); glutMainLoop(); } cutilExit(argc, argv); return retVal; }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main( int argc, char** argv) { shrQAStart(argc, argv); // start logs shrSetLogFileName ("bilateralFilter.txt"); shrLog("%s Starting...\n\n", argv[0]); // use command-line specified CUDA device, otherwise use device with highest Gflops/s cutGetCmdLineArgumenti( argc, (const char**) argv, "threads", &nthreads ); cutGetCmdLineArgumenti( argc, (const char**) argv, "radius", &filter_radius); // load image to process loadImageData(argc, argv); if (cutCheckCmdLineFlag(argc, (const char **)argv, "qatest") || cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) { // Running CUDA kernel (bilateralFilter) without visualization (QA Testing/Verification) runAutoTest(argc, argv); shrQAFinishExit(argc, (const char **)argv, (g_TotalErrors == 0 ? QA_PASSED : QA_FAILED)); } else if (cutCheckCmdLineFlag(argc, (const char **)argv, "benchmark")) { // Running CUDA kernel (bilateralFilter) in Benchmarking Mode runBenchmark(argc, argv); shrQAFinishExit(argc, (const char **)argv, (g_TotalErrors == 0 ? QA_PASSED : QA_FAILED)); } else { // Running CUDA kernel (bilateralFilter) in CUDA + OpenGL Visualization Mode if ( cutCheckCmdLineFlag(argc, (const char **)argv, "device")) { printf("[%s]\n", argv[0]); printf(" Does not explicitly support -device=n in OpenGL mode\n"); printf(" To use -device=n, the sample must be running w/o OpenGL\n\n"); printf(" > %s -device=n -qatest\n", argv[0]); printf("exiting...\n"); exit(0); } // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. initGL( argc, argv ); if ( cutCheckCmdLineFlag(argc, (const char **)argv, "device")) { cutilGLDeviceInit(argc, argv); } else { cudaGLSetGLDevice (cutGetMaxGflopsDeviceId() ); } initCuda(); initOpenGL(); } atexit(cleanup); printf("Running Standard Demonstration with GLUT loop...\n\n"); printf("Press '+' and '-' to change number of iterations\n" "Press LEFT and RIGHT change euclidean delta\n" "Press UP and DOWN to change gaussian delta\n" "Press '1' to show original image\n" "Press '2' to show result\n\n"); glutMainLoop(); cutilDeviceReset(); shrQAFinishExit(argc, (const char **)argv, (g_TotalErrors == 0 ? QA_PASSED : QA_FAILED)); }
////////////////////////////////////////////////////////////////////////////// // Program main ////////////////////////////////////////////////////////////////////////////// int main( int argc, char** argv) { bool bTestResults = true; shrQAStart(argc, argv); if( cutCheckCmdLineFlag(argc, (const char**)argv, "help") ) { showHelp(); return 0; } shrLog("Run \"nbody -benchmark [-n=<numBodies>]\" to measure perfomance.\n"); shrLog("\t-fullscreen (run n-body simulation in fullscreen mode)\n"); shrLog("\t-fp64 (use double precision floating point values for simulation)\n"); shrLog("\t-numdevices=N (use first N CUDA devices for simulation)\n"); // shrLog("\t-hostmem (stores simulation data in host memory)\n"); // shrLog("\t-cpu (performs simulation on the host)\n"); shrLog("\n"); bFullscreen = (cutCheckCmdLineFlag(argc, (const char**) argv, "fullscreen") != 0); if (bFullscreen) bShowSliders = false; benchmark = (cutCheckCmdLineFlag(argc, (const char**) argv, "benchmark") != 0); compareToCPU = ((cutCheckCmdLineFlag(argc, (const char**) argv, "compare") != 0) || (cutCheckCmdLineFlag(argc, (const char**) argv, "qatest") != 0)); QATest = (cutCheckCmdLineFlag(argc, (const char**) argv, "qatest") != 0); useHostMem = (cutCheckCmdLineFlag(argc, (const char**) argv, "hostmem") != 0); fp64 = (cutCheckCmdLineFlag(argc, (const char**) argv, "fp64") != 0); flopsPerInteraction = fp64 ? 30 : 20; useCpu = (cutCheckCmdLineFlag(argc, (const char**) argv, "cpu") != 0); cutGetCmdLineArgumenti(argc, (const char**) argv, "numdevices", &numDevsRequested); // for multi-device we currently require using host memory -- the devices share // data via the host if (numDevsRequested > 1) useHostMem = true; int numDevsAvailable = 0; bool customGPU = false; cudaGetDeviceCount(&numDevsAvailable); if (numDevsAvailable < numDevsRequested) { shrLog("Error: only %d Devices available, %d requested. Exiting.\n", numDevsAvailable, numDevsRequested); shrQAFinishExit(argc, (const char **)argv, QA_PASSED); } shrLog("> %s mode\n", bFullscreen ? "Fullscreen" : "Windowed"); shrLog("> Simulation data stored in %s memory\n", useHostMem ? "system" : "video" ); shrLog("> %s precision floating point simulation\n", fp64 ? "Double" : "Single"); shrLog("> %d Devices used for simulation\n", numDevsRequested); int devID; cudaDeviceProp props; // Initialize GL and GLUT if necessary if (!benchmark && !compareToCPU) { initGL(&argc, argv); initParameters(); } if (useCpu) { useHostMem = true; compareToCPU = false; bSupportDouble = true; #ifdef OPENMP shrLog("> Simulation with CPU using OpenMP\n"); #else shrLog("> Simulation with CPU\n"); #endif } else { // Now choose the CUDA Device // Either without GL interop: if (benchmark || compareToCPU || useHostMem) { // Note if we are using host memory for the body system, we // don't use CUDA-GL interop. if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) { devID = cutilDeviceInit(argc, argv); if (devID < 0) { printf("exiting...\n"); shrQAFinishExit(argc, (const char **)argv, QA_PASSED); } customGPU = true; } else { devID = cutGetMaxGflopsDeviceId(); cudaSetDevice( devID ); } } else // or with GL interop: { if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) { cutilGLDeviceInit(argc, argv); customGPU = true; } else { devID = cutGetMaxGflopsDeviceId(); cudaGLSetGLDevice( devID ); } } cutilSafeCall(cudaGetDevice(&devID)); cutilSafeCall(cudaGetDeviceProperties(&props, devID)); bSupportDouble = true; #if CUDART_VERSION < 4000 if (numDevsRequested > 1) { shrLog("MultiGPU n-body requires CUDA 4.0 or later\n"); cutilDeviceReset(); shrQAFinishExit(argc, (const char**)argv, QA_PASSED); } #endif // Initialize devices if(numDevsRequested > 1 && customGPU) { printf("You can't use --numdevices and --device at the same time.\n"); shrQAFinishExit(argc, (const char**)argv, QA_PASSED); } if(customGPU) { cudaDeviceProp props; cutilSafeCall(cudaGetDeviceProperties(&props, devID)); shrLog("> Compute %d.%d CUDA device: [%s]\n", props.major, props.minor, props.name); } else { for (int i = 0; i < numDevsRequested; i++) { cudaDeviceProp props; cutilSafeCall(cudaGetDeviceProperties(&props, i)); shrLog("> Compute %d.%d CUDA device: [%s]\n", props.major, props.minor, props.name); if (useHostMem) { #if CUDART_VERSION >= 2020 if(!props.canMapHostMemory) { fprintf(stderr, "Device %d cannot map host memory!\n", devID); cutilDeviceReset(); shrQAFinishExit(argc, (const char **)argv, QA_PASSED); } if (numDevsRequested > 1) cutilSafeCall(cudaSetDevice(i)); cutilSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost)); #else fprintf(stderr, "This CUDART version does not support <cudaDeviceProp.canMapHostMemory> field\n"); cutilDeviceReset(); shrQAFinishExit(argc, (const char **)argv, QA_PASSED); #endif } } // CC 1.2 and earlier do not support double precision if (props.major*10 + props.minor <= 12) bSupportDouble = false; } //if(numDevsRequested > 1) // cutilSafeCall(cudaSetDevice(devID)); if (fp64 && !bSupportDouble) { fprintf(stderr, "One or more of the requested devices does not support double precision floating-point\n"); cutilDeviceReset(); shrQAFinishExit(argc, (const char **)argv, QA_PASSED); } } numIterations = 0; p = 0; q = 1; cutGetCmdLineArgumenti(argc, (const char**) argv, "i", &numIterations); cutGetCmdLineArgumenti(argc, (const char**) argv, "p", &p); cutGetCmdLineArgumenti(argc, (const char**) argv, "q", &q); if (p == 0) // p not set on command line { p = 256; if (q * p > 256) { p = 256 / q; shrLog("Setting p=%d, q=%d to maintain %d threads per block\n", p, q, 256); } } // default number of bodies is #SMs * 4 * CTA size if (useCpu) #ifdef OPENMP numBodies = 8192; #else numBodies = 4096; #endif else if (numDevsRequested == 1)
////////////////////////////////////////////////////////////////////////////// // Program main ////////////////////////////////////////////////////////////////////////////// int main( int argc, char** argv) { printf("Run \"nbody -benchmark [-n=<numBodies>]\" to measure perfomance.\n\n"); bool benchmark = (cutCheckCmdLineFlag(argc, (const char**) argv, "benchmark") != 0); bool compareToCPU = ((cutCheckCmdLineFlag(argc, (const char**) argv, "compare") != 0) || !(cutCheckCmdLineFlag(argc, (const char**) argv, "noqatest") != 0)); bool regression = (cutCheckCmdLineFlag(argc, (const char**) argv, "regression") != 0); int devID; cudaDeviceProp props; // nBody has a mode that allows it to be run without using GL interop if (benchmark || compareToCPU || regression) { /* if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) { cutilDeviceInit(argc, argv); } else { devID = cutGetMaxGflopsDeviceId(); cudaSetDevice( devID ); } */ } else { // This mode shows the OpenGL results rendered // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. glutInit(&argc, argv); glutInitDisplayMode(GLUT_RGB | GLUT_DEPTH | GLUT_DOUBLE); glutInitWindowSize(720, 480); glutCreateWindow("CUDA n-body system"); GLenum err = glewInit(); if (GLEW_OK != err) { printf("GLEW Error: %s\n", glewGetErrorString(err)); } else { #if defined(WIN32) wglSwapIntervalEXT(0); #elif defined(LINUX) glxSwapIntervalSGI(0); #endif } initGL(); initParameters(); if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) { cutilGLDeviceInit(argc, argv); } else { devID = cutGetMaxGflopsDeviceId(); cudaGLSetGLDevice( devID ); } } // get number of SMs on this GPU cutilSafeCall(cudaGetDevice(&devID)); cutilSafeCall(cudaGetDeviceProperties(&props, devID)); numIterations = 0; int p = 256; int q = 1; cutGetCmdLineArgumenti(argc, (const char**) argv, "i", &numIterations); cutGetCmdLineArgumenti(argc, (const char**) argv, "p", &p); cutGetCmdLineArgumenti(argc, (const char**) argv, "q", &q); // default number of bodies is #SMs * 4 * CTA size numBodies = compareToCPU ? 4096 : p*q*4*props.multiProcessorCount; cutGetCmdLineArgumenti(argc, (const char**) argv, "n", &numBodies); switch (numBodies) { case 1024: activeParams.m_clusterScale = 1.52f; activeParams.m_velocityScale = 2.f; break; case 2048: activeParams.m_clusterScale = 1.56f; activeParams.m_velocityScale = 2.64f; break; case 4096: activeParams.m_clusterScale = 1.68f; activeParams.m_velocityScale = 2.98f; break; case 8192: activeParams.m_clusterScale = 1.98f; activeParams.m_velocityScale = 2.9f; break; default: case 16384: activeParams.m_clusterScale = 1.54f; activeParams.m_velocityScale = 8.f; break; case 32768: activeParams.m_clusterScale = 1.44f; activeParams.m_velocityScale = 11.f; break; } if (q * p > 256) { p = 256 / q; printf("Setting p=%d, q=%d to maintain %d threads per block\n", p, q, 256); } if (q == 1 && numBodies < p) { p = numBodies; } init(numBodies, p, q, !(benchmark || compareToCPU)); reset(nbody, numBodies, NBODY_CONFIG_SHELL, !(benchmark || compareToCPU)); if (benchmark) { if (numIterations <= 0) numIterations = 100; runBenchmark(numIterations); } else if (compareToCPU || regression) { compareResults(regression, numBodies); } else { glutDisplayFunc(display); glutReshapeFunc(reshape); glutMouseFunc(mouse); glutMotionFunc(motion); glutKeyboardFunc(key); glutSpecialFunc(special); glutIdleFunc(idle); cutilSafeCall(cudaEventRecord(startEvent, 0)); glutMainLoop(); } if (nbodyCPU) delete nbodyCPU; if (nbodyCUDA) delete nbodyCUDA; if (hPos) delete [] hPos; if (hVel) delete [] hVel; if (hColor) delete [] hColor; cutilSafeCall(cudaEventDestroy(startEvent)); cutilSafeCall(cudaEventDestroy(stopEvent)); cutilCheckError(cutDeleteTimer(demoTimer)); return 0; }
int main(int argc, char** argv) { // EDISON ////////////////////////////////////////////////////////////////// sigmaS = 7.0f; sigmaR = 6.5f; edison.minRegion = 20.0f; cutLoadPPMub("image.ppm", &edison.inputImage_, &width, &height); edison.meanShift(); cutSavePPMub("segmimage.ppm", edison.segmImage_, width, height); cutSavePPMub("filtimage.ppm", edison.filtImage_, width, height); unsigned char data[height * width]; memset(data, 0, height * width * sizeof(unsigned char)); for(int i = 0; i < edison.numBoundaries_; i++) { data[edison.boundaries_[i]] = 255; } cutSavePGMub("bndyimage.pgm", data, width, height); //return 0; // EDISON ////////////////////////////////////////////////////////////////// if (argc > 1) { if (cutCheckCmdLineFlag(argc, (const char **)argv, "help")) { printHelp(); } if (cutCheckCmdLineFlag(argc, (const char **)argv, "qatest") || cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) { g_bQAReadback = true; fpsLimit = frameCheckNumber; } if (cutCheckCmdLineFlag(argc, (const char **)argv, "glverify")) { g_bOpenGLQA = true; fpsLimit = frameCheckNumber; } if (cutCheckCmdLineFlag(argc, (const char **)argv, "fbo")) { g_bFBODisplay = true; fpsLimit = frameCheckNumber; } } if (g_bQAReadback) { runAutoTest(argc, argv); } else { // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. initGL( argc, argv ); // use command-line specified CUDA device, otherwise use device with highest Gflops/s if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) { cutilGLDeviceInit(argc, argv); } else { cudaGLSetGLDevice (cutGetMaxGflopsDeviceId() ); } int device; struct cudaDeviceProp prop; cudaGetDevice( &device ); cudaGetDeviceProperties( &prop, device ); if(!strncmp( "Tesla", prop.name, 5 )) { printf("This sample needs a card capable of OpenGL and display.\n"); printf("Please choose a different device with the -device=x argument.\n"); cudaThreadExit(); cutilExit(argc, argv); } cutilCheckError(cutCreateTimer(&timer)); cutilCheckError(cutResetTimer(timer)); glutDisplayFunc(display); glutKeyboardFunc(keyboard); glutReshapeFunc(reshape); glutIdleFunc(idle); if (g_bOpenGLQA) { loadDefaultImage( argv[0] ); } if (argc > 1) { char *filename; if (cutGetCmdLineArgumentstr(argc, (const char **)argv, "file", &filename)) { initializeData(filename); } } else { loadDefaultImage( argv[0]); } // If code is not printing the USage, then we execute this path. if (!bQuit) { if (g_bOpenGLQA) { g_CheckRender = new CheckBackBuffer(wWidth, wHeight, 4); g_CheckRender->setPixelFormat(GL_BGRA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); } printf("I: display image\n"); printf("T: display Sobel edge detection (computed with tex)\n"); printf("S: display Sobel edge detection (computed with tex+shared memory)\n"); printf("Use the '-' and '=' keys to change the brightness.\n"); fflush(stdout); atexit(cleanup); glutMainLoop(); } } cudaThreadExit(); cutilExit(argc, argv); }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main( int argc, char** argv) { // use command-line specified CUDA device, otherwise use device with highest Gflops/s if (!cutCheckCmdLineFlag(argc, (const char **)argv, "noqatest") || cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) { g_bQAReadback = true; fpsLimit = frameCheckNumber; } if (argc > 1) { if (cutCheckCmdLineFlag(argc, (const char **)argv, "glverify")) { g_bOpenGLQA = true; fpsLimit = frameCheckNumber; } } printf("[%s] ", sSDKsample); if (g_bQAReadback) printf("(Automated Testing)\n"); if (g_bOpenGLQA) printf("(OpenGL Readback)\n"); // Get the path of the filename char *filename; if (cutGetCmdLineArgumentstr(argc, (const char**) argv, "image", &filename)) { image_filename = filename; } // load image char* image_path = cutFindFilePath(image_filename, argv[0]); if (image_path == 0) { fprintf(stderr, "Error finding image file '%s'\n", image_filename); cudaThreadExit(); exit(EXIT_FAILURE); } cutilCheckError( cutLoadPPM4ub(image_path, (unsigned char **) &h_img, &width, &height)); if (!h_img) { printf("Error opening file '%s'\n", image_path); cudaThreadExit(); exit(-1); } printf("Loaded '%s', %d x %d pixels\n", image_path, width, height); cutGetCmdLineArgumenti(argc, (const char**) argv, "threads", &nthreads); cutGetCmdLineArgumentf(argc, (const char**) argv, "sigma", &sigma); runBenchmark = cutCheckCmdLineFlag(argc, (const char**) argv, "bench"); int device; struct cudaDeviceProp prop; cudaGetDevice( &device ); cudaGetDeviceProperties( &prop, device ); if( !strncmp( "Tesla", prop.name, 5 ) ) { printf("Tesla card detected, running the test in benchmark mode (no OpenGL display)\n"); // runBenchmark = CUTTrue; g_bQAReadback = true; } // Benchmark or AutoTest mode detected, no OpenGL if (runBenchmark == CUTTrue || g_bQAReadback) { if( cutCheckCmdLineFlag( argc, (const char **)argv, "device" ) ) cutilDeviceInit( argc, argv ); else cudaSetDevice( cutGetMaxGflopsDeviceId() ); } else { // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. initGL(argc, argv); if( cutCheckCmdLineFlag( argc, (const char **)argv, "device" ) ) cutilGLDeviceInit( argc, argv ); else cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ); } initCudaBuffers(); if (g_bOpenGLQA) { g_CheckRender = new CheckBackBuffer(width, height, 4); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); } if (g_bQAReadback) { // This is the automated testing path g_CheckRender = new CheckBackBuffer(width, height, 4, false); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); runAutoTest(argc, argv); cleanup(); cudaThreadExit(); cutilExit(argc, argv); } if (runBenchmark) { benchmark(100); cleanup(); cudaThreadExit(); exit(0); } initGLBuffers(); atexit(cleanup); glutMainLoop(); cudaThreadExit(); cutilExit(argc, argv); }