// Function to clean up and exit //***************************************************************************** void Cleanup(int iExitCode) { // Cleanup allocated objects shrLog("\nStarting Cleanup...\n\n"); if(pbo_source)deletePBO(&pbo_source); if(pbo_dest)deletePBO(&pbo_dest); if(tex_screen)deleteTexture(&tex_screen); if(ckKernel)clReleaseKernel(ckKernel); if(cpProgram)clReleaseProgram(cpProgram); if(cl_pbos[0])clReleaseMemObject(cl_pbos[0]); if(cl_pbos[1])clReleaseMemObject(cl_pbos[1]); if(cqCommandQueue)clReleaseCommandQueue(cqCommandQueue); if(cxGPUContext)clReleaseContext(cxGPUContext); shrLogEx(LOGBOTH | CLOSELOG, 0, "%s Exiting...\n", cExecutableName); // finalize logs and leave shrQAFinish2(bQATest, *pArgc, (const char **)pArgv, (iExitCode == 0) ? QA_PASSED : QA_FAILED); exit (iExitCode); }
// QATest sequence without any GL calls //***************************************************************************** void TestNoGL() { // Warmup call to assure OpenCL driver is awake psystem->update(timestep); // Start timer 0 and process n loops on the GPU const int iCycles = 20; shrDeltaT(0); for (int i = 0; i < iCycles; i++) { psystem->update(timestep); } // Get elapsed time and throughput, then log to sample and master logs double dAvgTime = shrDeltaT(0)/(double)iCycles; shrLogEx(LOGBOTH | MASTER, 0, "oclParticles, Throughput = %.4f KParticles/s, Time = %.5f s, Size = %u particles, NumDevsUsed = %u, Workgroup = %u\n", (1.0e-3 * numParticles)/dAvgTime, dAvgTime, numParticles, 1, 0); // Cleanup and exit shrQAFinish2(true, *pArgc, (const char **)pArgv, QA_PASSED); Cleanup (EXIT_SUCCESS); }
// GLUT key event handler // params commented out to remove unused parameter warnings in Linux //***************************************************************************** void KeyboardGL(unsigned char key, int /*x*/, int /*y*/) { switch (key) { case ' ': // toggle pause in simulation computations bPause = !bPause; shrLog("\nSimulation %s...\n", bPause ? "Paused" : "Running"); break; case 13: psystem->update(timestep); renderer->setVertexBuffer(psystem->getCurrentReadBuffer(), psystem->getNumParticles()); break; case '\033':// Escape quits case 'Q': // Q quits case 'q': // q quits bNoPrompt = shrTRUE; shrQAFinish2(false, *pArgc, (const char **)pArgv, QA_PASSED); Cleanup(EXIT_SUCCESS); break; case 'T': // Toggles from (T)our mode to standard mode and back case 't': // Toggles from (t)our mode to standard mode and back bTour = bTour ? shrFALSE : shrTRUE; shrLog("\nTour Mode %s...\n", bTour ? "ON" : "OFF"); break; case 'F': // F toggles main graphics display full screen case 'f': // f toggles main graphics display full screen bFullScreen = !bFullScreen; if (bFullScreen) { iGraphicsWinPosX = glutGet(GLUT_WINDOW_X) - 8; iGraphicsWinPosY = glutGet(GLUT_WINDOW_Y) - 30; iGraphicsWinWidth = min(glutGet(GLUT_WINDOW_WIDTH) , glutGet(GLUT_SCREEN_WIDTH) - 2*iGraphicsWinPosX ); iGraphicsWinHeight = min(glutGet(GLUT_WINDOW_HEIGHT), glutGet(GLUT_SCREEN_HEIGHT)- 2*iGraphicsWinPosY ); printf("(x,y)=(%d,%d), (w,h)=(%d,%d)\n", iGraphicsWinPosX, iGraphicsWinPosY, iGraphicsWinWidth, iGraphicsWinHeight); glutFullScreen(); } else { glutPositionWindow(iGraphicsWinPosX, iGraphicsWinPosY); glutReshapeWindow(iGraphicsWinWidth, iGraphicsWinHeight); } shrLog("\nMain Graphics %s...\n", bFullScreen ? "FullScreen" : "Windowed"); break; case 'V': case 'v': if (M_VIEW != mode) { shrLog("\nMouse View Mode...\n"); mode = M_VIEW; } break; case 'M': case 'm': if (M_MOVE != mode) { shrLog("\nMouse Move Mode...\n"); mode = M_MOVE; } break; case 'P': case 'p': displayMode = (ParticleRenderer::DisplayMode) ((displayMode + 1) % ParticleRenderer::PARTICLE_NUM_MODES); break; case 'D': case 'd': psystem->dumpGrid(); break; case 'U': case 'u': break; case 'R': case 'r': displayEnabled = !displayEnabled; break; case '1': ResetSim(1); break; case '2': ResetSim(2); break; case '3': ResetSim(3); break; case '4': ResetSim(4); break; case 'H': case 'h': displaySliders = !displaySliders; break; } // Trigger fps update and call for refresh TriggerFPSUpdate(); }
// Primary GLUT callback loop function //***************************************************************************** void DisplayGL() { // update the simulation, if not paused double dProcessingTime = 0.0; if (!bPause) { // start timer 0 if it's update time if (iFrameCount >= iFrameTrigger) { shrDeltaT(0); } // do the processing psystem->update(timestep); renderer->setVertexBuffer(psystem->getCurrentReadBuffer(), psystem->getNumParticles()); // get processing time from timer 0, if it's update time if (iFrameCount >= iFrameTrigger) { dProcessingTime = shrDeltaT(0); } } // Clear last frame glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Add cube glColor3f(1.0, 1.0, 1.0); glutWireCube(2.0); // Add collider glPushMatrix(); float3 p = psystem->getColliderPos(); glTranslatef(p.x, p.y, p.z); glColor3f(0.75, 0.0, 0.5); glutWireSphere(psystem->getColliderRadius(), 30, 15); glPopMatrix(); // Render if (displayEnabled) { renderer->display(displayMode); } // Display user interface if enabled if (displaySliders) { glDisable(GL_DEPTH_TEST); glBlendFunc(GL_ONE_MINUS_DST_COLOR, GL_ZERO); // invert color glEnable(GL_BLEND); params->Render(0, 0); glDisable(GL_BLEND); glEnable(GL_DEPTH_TEST); } // Flip backbuffer to screen glutSwapBuffers(); // Increment the frame counter, and do fps stuff if it's time if (iFrameCount++ > iFrameTrigger) { // Set the display window title char cTitle[256]; iFramesPerSec = (int)((double)iFrameCount/ shrDeltaT(1)); #ifdef GPU_PROFILING if(!bPause) { #ifdef _WIN32 sprintf_s(cTitle, 256, "%s Particles Simulation (%u particles) | %i fps | Proc. t = %.4f s", cProcessor[iProcFlag], numParticles, iFramesPerSec, dProcessingTime); #else sprintf(cTitle, "%s OpenCL Particles Simulation (%u particles) | %i fps | Proc. t = %.4f s", cProcessor[iProcFlag], numParticles, iFramesPerSec, dProcessingTime); #endif } else { #ifdef _WIN32 sprintf_s(cTitle, 256, "%s Particles Simulation (%u particles) (Paused) | %i fps", cProcessor[iProcFlag], numParticles, iFramesPerSec); #else sprintf(cTitle, "%s OpenCL Particles Simulation (%u particles) (Paused) | %i fps", cProcessor[iProcFlag], numParticles, iFramesPerSec); #endif } #else if(!bPause) { #ifdef _WIN32 sprintf_s(cTitle, 256, "%s Particles Simulation (%u particles)", cProcessor[iProcFlag], numParticles); #else sprintf(cTitle, "%s OpenCL Particles Simulation (%u particles)", cProcessor[iProcFlag], numParticles); #endif } else { #ifdef _WIN32 sprintf_s(cTitle, 256, "%s Particles Simulation (%u particles) (Paused)", cProcessor[iProcFlag], numParticles); #else sprintf(cTitle, "%s OpenCL Particles Simulation (%u particles) (Paused)", cProcessor[iProcFlag], numParticles); #endif } #endif glutSetWindowTitle(cTitle); // Log fps and processing info to console and file shrLog("%s\n", cTitle); // Set based options: QuickTest or cycle demo if (iSetCount++ == iTestSets) { if (bNoPrompt) { shrQAFinish2(false, *pArgc, (const char **)pArgv, QA_PASSED); Cleanup(EXIT_SUCCESS); } if (bTour) { static int iOption = 1; ResetSim(++iOption); if (iOption > 3)iOption = 0; } iSetCount = 0; } // reset the frame count and trigger iFrameCount = 0; iFrameTrigger = (iFramesPerSec > 1) ? iFramesPerSec * 2 : 1; } }
// Main program //***************************************************************************** int main(int argc, char** argv) { // Locals used with command line args int p = 256; // workgroup X dimension int q = 1; // workgroup Y dimension pArgc = &argc; pArgv = argv; shrQAStart(argc, argv); // latch the executable path for other funcs to use cExecutablePath = argv[0]; // start logs and show command line help shrSetLogFileName ("oclNbody.txt"); shrLog("%s Starting...\n\n", cExecutablePath); shrLog("Command line switches:\n"); shrLog(" --qatest\t\tCheck correctness of GPU execution and measure performance)\n"); shrLog(" --noprompt\t\tQuit simulation automatically after a brief period\n"); shrLog(" --n=<numbodies>\tSpecify # of bodies to simulate (default = %d)\n", numBodies); shrLog(" --double\t\tUse double precision floating point values for simulation\n"); shrLog(" --p=<workgroup X dim>\tSpecify X dimension of workgroup (default = %d)\n", p); shrLog(" --q=<workgroup Y dim>\tSpecify Y dimension of workgroup (default = %d)\n\n", q); // Get command line arguments if there are any and set vars accordingly if (argc > 0) { shrGetCmdLineArgumenti(argc, (const char**)argv, "p", &p); shrGetCmdLineArgumenti(argc, (const char**)argv, "q", &q); shrGetCmdLineArgumenti(argc, (const char**)argv, "n", &numBodies); bDouble = (shrTRUE == shrCheckCmdLineFlag(argc, (const char**)argv, "double")); bNoPrompt = shrCheckCmdLineFlag(argc, (const char**)argv, "noprompt"); bQATest = shrCheckCmdLineFlag(argc, (const char**)argv, "qatest"); } //Get the NVIDIA platform cl_int ciErrNum = oclGetPlatformID(&cpPlatform); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); shrLog("clGetPlatformID...\n\n"); if (bDouble) { shrLog("Double precision execution...\n\n"); } else { shrLog("Single precision execution...\n\n"); } flopsPerInteraction = bDouble ? 30 : 20; //Get all the devices shrLog("Get the Device info and select Device...\n"); ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 0, NULL, &uiNumDevices); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); cdDevices = (cl_device_id *)malloc(uiNumDevices * sizeof(cl_device_id) ); ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, uiNumDevices, cdDevices, NULL); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // Set target device and Query number of compute units on uiTargetDevice shrLog(" # of Devices Available = %u\n", uiNumDevices); if(shrGetCmdLineArgumentu(argc, (const char**)argv, "device", &uiTargetDevice)== shrTRUE) { uiTargetDevice = CLAMP(uiTargetDevice, 0, (uiNumDevices - 1)); } shrLog(" Using Device %u, ", uiTargetDevice); oclPrintDevName(LOGBOTH, cdDevices[uiTargetDevice]); cl_uint uiNumComputeUnits; clGetDeviceInfo(cdDevices[uiTargetDevice], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(uiNumComputeUnits), &uiNumComputeUnits, NULL); shrLog(" # of Compute Units = %u\n", uiNumComputeUnits); //Create the context shrLog("clCreateContext...\n"); cxContext = clCreateContext(0, uiNumDevsUsed, &cdDevices[uiTargetDevice], NULL, NULL, &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // Create a command-queue shrLog("clCreateCommandQueue...\n\n"); cqCommandQueue = clCreateCommandQueue(cxContext, cdDevices[uiTargetDevice], CL_QUEUE_PROFILING_ENABLE, &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // Log and config for number of bodies shrLog("Number of Bodies = %d\n", numBodies); switch (numBodies) { case 1024: activeParams.m_clusterScale = 1.52f; activeParams.m_velocityScale = 2.f; break; case 2048: activeParams.m_clusterScale = 1.56f; activeParams.m_velocityScale = 2.64f; break; case 4096: activeParams.m_clusterScale = 1.68f; activeParams.m_velocityScale = 2.98f; break; case 7680: case 8192: activeParams.m_clusterScale = 1.98f; activeParams.m_velocityScale = 2.9f; break; default: case 15360: case 16384: activeParams.m_clusterScale = 1.54f; activeParams.m_velocityScale = 8.f; break; case 30720: case 32768: activeParams.m_clusterScale = 1.44f; activeParams.m_velocityScale = 11.f; break; } if ((q * p) > 256) { p = 256 / q; shrLog("Setting p=%d to maintain %d threads per block\n", p, 256); } if ((q == 1) && (numBodies < p)) { p = numBodies; shrLog("Setting p=%d because # of bodies < p\n", p); } shrLog("Workgroup Dims = (%d x %d)\n\n", p, q); // Initialize OpenGL items if using GL if (bQATest == shrFALSE) { assert(0); /* shrLog("Calling InitGL...\n"); InitGL(&argc, argv); */ } else { shrLog("Skipping InitGL...\n"); } // CL/GL interop disabled bUsePBO = (false && (bQATest == shrFALSE)); InitNbody(cdDevices[uiTargetDevice], cxContext, cqCommandQueue, numBodies, p, q, bUsePBO, bDouble); ResetSim(nbody, numBodies, NBODY_CONFIG_SHELL, bUsePBO); // init timers shrDeltaT(DEMOTIME); // timer 0 is for timing demo periods shrDeltaT(FUNCTIME); // timer 1 is for logging function delta t's shrDeltaT(FPSTIME); // timer 2 is for fps measurement // Standard simulation if (bQATest == shrFALSE) { assert(0); /* shrLog("Running standard oclNbody simulation...\n\n"); glutDisplayFunc(DisplayGL); glutReshapeFunc(ReshapeGL); glutMouseFunc(MouseGL); glutMotionFunc(MotionGL); glutKeyboardFunc(KeyboardGL); glutSpecialFunc(SpecialGL); glutIdleFunc(IdleGL); glutMainLoop(); */ } // Compare to host, profile and write out file for regression analysis if (bQATest == shrTRUE) { bool bTestResults = false; shrLog("Running oclNbody Results Comparison...\n\n"); bTestResults = CompareResults(numBodies); //shrLog("Profiling oclNbody...\n\n"); //RunProfiling(100, (unsigned int)(p * q)); // 100 iterations shrQAFinish(argc, (const char **)argv, bTestResults ? QA_PASSED : QA_FAILED); } else { // Cleanup/exit bNoPrompt = shrTRUE; shrQAFinish2(false, *pArgc, (const char **)pArgv, QA_PASSED); } Cleanup(EXIT_SUCCESS); }
// GLUT key event handler //***************************************************************************** void KeyboardGL(unsigned char key, int /*x*/, int /*y*/) { switch (key) { case ' ': // space toggle computation flag on/off bPause = !bPause; shrLog("\nSim %s...\n\n", bPause ? "Paused" : "Running"); break; case 's': case 'S': // Tilda toggles slider display bShowSliders = !bShowSliders; shrLog("\nSlider Display %s...\n\n", bShowSliders ? "ON" : "OFF"); break; case 'p': // 'p' falls through to 'P' case 'P': // p switched between points and blobs displayMode = (ParticleRenderer::DisplayMode)((displayMode + 1) % ParticleRenderer::PARTICLE_NUM_MODES); break; case 'c': // 'c' falls through to 'C' case 'C': // c switches between cycle demo mode and fixed demo mode bTour = bTour ? shrFALSE : shrTRUE; shrLog("\nTour Mode %s...\n\n", bTour ? "ON" : "OFF"); break; case '[': activeDemo = (activeDemo == 0) ? numDemos - 1 : (activeDemo - 1) % numDemos; SelectDemo(activeDemo); break; case ']': activeDemo = (activeDemo + 1) % numDemos; SelectDemo(activeDemo); break; case 'd': // 'd' falls through to 'D' case 'D': // d toggled main graphics display on/off displayEnabled = !displayEnabled; shrLog("\nMain Graphics Display %s...\n\n", displayEnabled ? "ON" : "OFF"); break; case 'f': // 'f' falls through to 'F' case 'F': // f toggles main graphics display full screen bFullScreen = !bFullScreen; if (bFullScreen) { iGraphicsWinPosX = glutGet(GLUT_WINDOW_X) - 8; iGraphicsWinPosY = glutGet(GLUT_WINDOW_Y) - 30; iGraphicsWinWidth = min(glutGet(GLUT_WINDOW_WIDTH) , glutGet(GLUT_SCREEN_WIDTH) - 2*iGraphicsWinPosX ); iGraphicsWinHeight = min(glutGet(GLUT_WINDOW_HEIGHT), glutGet(GLUT_SCREEN_HEIGHT)- 2*iGraphicsWinPosY ); printf("(x,y)=(%d,%d), (w,h)=(%d,%d)\n", iGraphicsWinPosX, iGraphicsWinPosY, iGraphicsWinWidth, iGraphicsWinHeight); glutFullScreen(); } else { glutPositionWindow(iGraphicsWinPosX, iGraphicsWinPosY); glutReshapeWindow(iGraphicsWinWidth, iGraphicsWinHeight); } shrLog("\nMain Graphics %s...\n\n", bFullScreen ? "FullScreen" : "Windowed"); break; case 'o': // 'o' falls through to 'O' case 'O': // 'O' prints Nbody sim physical parameters activeParams.print(); break; case 'T': // Toggles from (T)our mode to standard mode and back case 't': // Toggles from (t)our mode to standard mode and back bTour = bTour ? shrFALSE : shrTRUE; shrLog("\nTour Mode %s...\n", bTour ? "ON" : "OFF"); break; case '1': ResetSim(nbody, numBodies, NBODY_CONFIG_SHELL, true); break; case '2': ResetSim(nbody, numBodies, NBODY_CONFIG_RANDOM, true); break; case '3': ResetSim(nbody, numBodies, NBODY_CONFIG_EXPAND, true); break; case '\033': // escape quits case '\015': // Enter quits case 'Q': // Q quits case 'q': // q (or escape) quits // Cleanup and quit bNoPrompt = shrTRUE; shrQAFinish2(false, *pArgc, (const char **)pArgv, QA_PASSED); Cleanup(EXIT_SUCCESS); break; } // Trigger fps update and call for refresh TriggerFPSUpdate(); glutPostRedisplay(); }
// Primary GLUT callback loop function //***************************************************************************** void DisplayGL() { // update the simulation, unless paused double dProcessingTime = 0.0; if (!bPause) { // start timer FUNCTIME if it's update time if (iFrameCount >= iFrameTrigger) { shrDeltaT(FUNCTIME); } // Run the simlation computations nbody->update(activeParams.m_timestep); nbody->getArray(BodySystem::BODYSYSTEM_POSITION); // Make graphics work with or without CL/GL interop if (bUsePBO) { renderer->setPBO((unsigned int)nbody->getCurrentReadBuffer(), nbody->getNumBodies()); } else { renderer->setPositions((float*)nbody->getCurrentReadBuffer(), nbody->getNumBodies()); } // get processing time from timer FUNCTIME, if it's update time if (iFrameCount >= iFrameTrigger) { dProcessingTime = shrDeltaT(FUNCTIME); } } // Redraw main graphics display, if enabled glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); if (displayEnabled) { // view transform glMatrixMode(GL_MODELVIEW); glLoadIdentity(); for (int c = 0; c < 3; ++c) { camera_trans_lag[c] += (camera_trans[c] - camera_trans_lag[c]) * inertia; camera_rot_lag[c] += (camera_rot[c] - camera_rot_lag[c]) * inertia; } glTranslatef(camera_trans_lag[0], camera_trans_lag[1], camera_trans_lag[2]); glRotatef(camera_rot_lag[0], 1.0, 0.0, 0.0); glRotatef(camera_rot_lag[1], 0.0, 1.0, 0.0); renderer->setSpriteSize(activeParams.m_pointSize); renderer->display(displayMode); } // Display user interface if enabled if (bShowSliders) { glBlendFunc(GL_ONE_MINUS_DST_COLOR, GL_ZERO); // invert color glEnable(GL_BLEND); paramlist->Render(0, 0); glDisable(GL_BLEND); } // Flip backbuffer to screen glutSwapBuffers(); // If frame count has triggerd, increment the frame counter, and do fps stuff if (iFrameCount++ > iFrameTrigger) { // If tour mode is enabled & interval has timed out, switch to next tour/demo mode dElapsedTime += shrDeltaT(DEMOTIME); if (bTour && (dElapsedTime > demoTime)) { dElapsedTime = 0.0; activeDemo = (activeDemo + 1) % numDemos; SelectDemo(activeDemo); } // get the perf and fps stats iFramesPerSec = (int)((double)iFrameCount/ shrDeltaT(FPSTIME)); double dGigaInteractionsPerSecond = 0.0; double dGigaFlops = 0.0; ComputePerfStats(dGigaInteractionsPerSecond, dGigaFlops, dProcessingTime, 1); // If not paused, set the display window title, reset trigger and log info char cTitle[256]; if(!bPause) { #ifdef GPU_PROFILING #ifdef _WIN32 sprintf_s(cTitle, "OpenCL for GPU Nbody Demo (%d bodies): %i fps | %0.4f BIPS | %0.4f GFLOP/s", numBodies, iFramesPerSec, dGigaInteractionsPerSecond, dGigaFlops); #else sprintf(cTitle, "OpenCL for GPU Nbody Demo (%d bodies): %i fps | %0.4f BIPS | %0.4f GFLOP/s", numBodies, iFramesPerSec, dGigaInteractionsPerSecond, dGigaFlops); #endif #else #ifdef _WIN32 sprintf_s(cTitle, "OpenCL for GPU Nbody Demo (%d bodies)", numBodies, iFramesPerSec, dGigaInteractionsPerSecond); #else sprintf(cTitle, "OpenCL for GPU Nbody Demo (%d bodies)", numBodies, iFramesPerSec, dGigaInteractionsPerSecond); #endif #endif #ifndef __EMSCRIPTEN__ glutSetWindowTitle(cTitle); #else printf("%s\n",cTitle); #endif // Log fps and processing info to console and file shrLog("%s\n", cTitle); // if doing quick test, exit if ((bNoPrompt) && (!--iTestSets)) { // Cleanup up and quit shrQAFinish2(false, *pArgc, (const char **)pArgv, QA_PASSED); Cleanup(EXIT_SUCCESS); } // reset the frame counter and adjust trigger iFrameCount = 0; iFrameTrigger = (iFramesPerSec > 1) ? iFramesPerSec * 2 : 1; } } #ifndef __EMSCRIPTEN__ glutReportErrors(); #endif }