void initGL(int *argc, char** argv) { glutInit( argc, argv ); glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE); glutInitWindowSize(wWidth, wHeight); glutCreateWindow("Cuda Edge Detection"); glewInit(); if (g_bFBODisplay) { if (!glewIsSupported( "GL_VERSION_2_0 GL_ARB_fragment_program GL_EXT_framebuffer_object" )) { fprintf(stderr, "Error: failed to get minimal extensions for demo\n"); fprintf(stderr, "This sample requires:\n"); fprintf(stderr, " OpenGL version 2.0\n"); fprintf(stderr, " GL_ARB_fragment_program\n"); fprintf(stderr, " GL_EXT_framebuffer_object\n"); cudaThreadExit(); cutilExit(*argc, argv); } } else { if (!glewIsSupported( "GL_VERSION_1_5 GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object" )) { fprintf(stderr, "Error: failed to get minimal extensions for demo\n"); fprintf(stderr, "This sample requires:\n"); fprintf(stderr, " OpenGL version 1.5\n"); fprintf(stderr, " GL_ARB_vertex_buffer_object\n"); fprintf(stderr, " GL_ARB_pixel_buffer_object\n"); cudaThreadExit(); cutilExit(*argc, argv); } } }
void initializeData(char *file, int argc, char **argv) { GLint bsize; unsigned int w, h; size_t file_length= strlen(file); if (!strcmp(&file[file_length-3], "pgm")) { if (cutLoadPGMub(file, &pixels, &w, &h) != CUTTrue) { printf("Failed to load image file: %s\n", file); exit(-1); } g_Bpp = 1; } else if (!strcmp(&file[file_length-3], "ppm")) { if (cutLoadPPM4ub(file, &pixels, &w, &h) != CUTTrue) { printf("Failed to load image file: %s\n", file); exit(-1); } g_Bpp = 4; } else { cudaThreadExit(); cutilExit(argc, argv); } imWidth = (int)w; imHeight = (int)h; setupTexture(imWidth, imHeight, pixels, g_Bpp); // copy function pointer tables to host side for later use setupFunctionTables(); memset(pixels, 0x0, g_Bpp * sizeof(Pixel) * imWidth * imHeight); if (!g_bQAReadback) { // use OpenGL Path glGenBuffers(1, &pbo_buffer); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_buffer); glBufferData(GL_PIXEL_UNPACK_BUFFER, g_Bpp * sizeof(Pixel) * imWidth * imHeight, pixels, GL_STREAM_DRAW); glGetBufferParameteriv(GL_PIXEL_UNPACK_BUFFER, GL_BUFFER_SIZE, &bsize); if ((GLuint)bsize != (g_Bpp * sizeof(Pixel) * imWidth * imHeight)) { printf("Buffer object (%d) has incorrect size (%d).\n", (unsigned)pbo_buffer, (unsigned)bsize); cudaThreadExit(); cutilExit(argc, argv); } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); // register this buffer object with CUDA cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, pbo_buffer, cudaGraphicsMapFlagsWriteDiscard)); glGenTextures(1, &texid); glBindTexture(GL_TEXTURE_2D, texid); glTexImage2D(GL_TEXTURE_2D, 0, ((g_Bpp==1) ? GL_LUMINANCE : GL_BGRA), imWidth, imHeight, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL); glBindTexture(GL_TEXTURE_2D, 0); glPixelStorei(GL_UNPACK_ALIGNMENT, 1); glPixelStorei(GL_PACK_ALIGNMENT, 1); } }
// Function to perform final cleanup, which makes sure that all // CUDA data is deallocated, and thread exist is called. Must be // called before destructor is called! void NLConsoleInterface::finalCleanUp() { _synthesizer->finalCleanUp(); cudaThreadExit(); qDebug() << Stats::instance().allTimerStatistics(); }
// Main program int main(int argc, char** argv) { // Create the CUTIL timer cutilCheckError( cutCreateTimer( &timer)); if (CUTFalse == initGL(argc, argv)) { return CUTFalse; } initCuda(argc, argv); CUT_CHECK_ERROR_GL(); // register callbacks glutDisplayFunc(fpsDisplay); glutKeyboardFunc(keyboard); glutMouseFunc(mouse); glutMotionFunc(motion); // start rendering mainloop glutMainLoop(); // clean up cudaThreadExit(); cutilExit(argc, argv); }
reg_f3d_gpu<T>::reg_f3d_gpu(int refTimePoint,int floTimePoint) :reg_f3d<T>::reg_f3d(refTimePoint,floTimePoint) { this->currentReference_gpu=NULL; this->currentFloating_gpu=NULL; this->currentMask_gpu=NULL; this->warped_gpu=NULL; this->controlPointGrid_gpu=NULL; this->deformationFieldImage_gpu=NULL; this->warpedGradientImage_gpu=NULL; this->voxelBasedMeasureGradientImage_gpu=NULL; this->nodeBasedGradientImage_gpu=NULL; this->conjugateG_gpu=NULL; this->conjugateH_gpu=NULL; this->bestControlPointPosition_gpu=NULL; this->logJointHistogram_gpu=NULL; this->currentReference2_gpu=NULL; this->currentFloating2_gpu=NULL; this->warped2_gpu=NULL; this->warpedGradientImage2_gpu=NULL; NR_CUDA_SAFE_CALL(cudaThreadExit()) #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_f3d_gpu constructor called\n"); #endif }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { runTest(argc, argv); cudaThreadExit(); cutilExit(argc, argv); }
SolverThread::~SolverThread() { std::cout<<" solver exit\n"; cudaThreadExit(); delete m_hostK; delete m_stiffnessMatrix; }
//-------------------------------------------------------------------------- // CUDA exit //-------------------------------------------------------------------------- void CUDAContext::configExit() { #ifdef EQUALIZER_USE_CUDA // Clean up all runtime-related resources associated with this thread. cudaThreadExit(); #else setError( ERROR_CUDACONTEXT_MISSING_SUPPORT ); #endif }
//////////////////////////////////////////////////////////////////////////////// //! Run a simple test for CUDA //////////////////////////////////////////////////////////////////////////////// CUTBoolean runTest(int argc, char** argv) { if (!cutCheckCmdLineFlag(argc, (const char **)argv, "noqatest") || cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) { g_bQAReadback = true; fpsLimit = frameCheckNumber; } // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. if (CUTFalse == initGL(argc, argv)) { return CUTFalse; } // use command-line specified CUDA device, otherwise use device with highest Gflops/s if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) cutilGLDeviceInit(argc, argv); else { cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ); } // Create the CUTIL timer cutilCheckError( cutCreateTimer( &timer)); // register callbacks glutDisplayFunc(display); glutKeyboardFunc(keyboard); glutMouseFunc(mouse); glutMotionFunc(motion); if (g_bQAReadback) { g_CheckRender = new CheckBackBuffer(window_width, window_height, 4); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); } // create VBO createVBO(&vbo); // run the cuda part runCuda(vbo); // check result of Cuda step checkResultCuda(argc, argv, vbo); atexit(cleanup); // start rendering mainloop glutMainLoop(); cudaThreadExit(); return CUTTrue; }
void cleanup() { cutilCheckError(cutStopTimer(timer)); cutilCheckError(cutDeleteTimer( timer)); cudaFree(a_d);cudaFree(b_d);cudaFree(r_d); cudaThreadExit(); }
void op_cuda_exit ( ) { for ( int i = 0; i < OP_dat_index; i++ ) { cutilSafeCall ( cudaFree ( OP_dat_list[i]->data_d ) ); } cudaThreadExit ( ); }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { numParticles = 1024; uint gridDim = 64; numIterations = 1; cutGetCmdLineArgumenti( argc, (const char**) argv, "n", (int *) &numParticles); cutGetCmdLineArgumenti( argc, (const char**) argv, "grid", (int *) &gridDim); gridSize.x = gridSize.y = gridSize.z = gridDim; printf("grid: %d x %d x %d = %d cells\n", gridSize.x, gridSize.y, gridSize.z, gridSize.x*gridSize.y*gridSize.z); bool benchmark = !cutCheckCmdLineFlag(argc, (const char**) argv, "noqatest") != 0; cutGetCmdLineArgumenti( argc, (const char**) argv, "i", &numIterations); cudaInit(argc, argv); glutInit(&argc, argv); glutInitDisplayMode(GLUT_RGB | GLUT_DEPTH | GLUT_DOUBLE); glutInitWindowSize(640, 480); glutCreateWindow("CUDA particles"); initGL(); init(numParticles, gridSize); initParams(); initMenus(); if (benchmark) { if (numIterations <= 0) numIterations = 300; runBenchmark(numIterations); } else { glutDisplayFunc(display); glutReshapeFunc(reshape); glutMouseFunc(mouse); glutMotionFunc(motion); glutKeyboardFunc(key); glutSpecialFunc(special); glutIdleFunc(idle); glutMainLoop(); } if (psystem) delete psystem; cudaThreadExit(); return 0; }
void runGraphicsTest(int argc, char** argv) { printf("MarchingCubes "); if (g_bFBODisplay) printf("[w/ FBO] "); if (g_bOpenGLQA) printf("[Readback Comparisons] "); printf("\n"); if ( cutCheckCmdLineFlag(argc, (const char **)argv, "device")) { printf("[%s]\n", argv[0]); printf(" Does not explicitly support -device=n in OpenGL mode\n"); printf(" To use -device=n, the sample must be running w/o OpenGL\n\n"); printf(" > %s -device=n -qatest\n", argv[0]); printf("exiting...\n"); exit(0); } // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. if(CUTFalse == initGL(&argc, argv)) { return; } cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ); // register callbacks glutDisplayFunc(display); glutKeyboardFunc(keyboard); glutMouseFunc(mouse); glutMotionFunc(motion); glutIdleFunc(idle); glutReshapeFunc(reshape); initMenus(); // Initialize CUDA buffers for Marching Cubes initMC(argc, argv); cutilCheckError( cutCreateTimer( &timer)); if (g_bOpenGLQA) { g_CheckRender = new CheckBackBuffer(window_width, window_height, 4); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); } // start rendering mainloop glutMainLoop(); cudaThreadExit(); }
//////////////////////////////////////////////////////////////////////////////// //! Keyboard events handler //////////////////////////////////////////////////////////////////////////////// void keyboard(unsigned char key, int /*x*/, int /*y*/) { switch(key) { case(27) : cleanup(); cudaThreadExit(); exit(0); case '=': isoValue += 0.01; break; case '-': isoValue -= 0.01; break; case '+': isoValue += 0.1; break; case '_': isoValue -= 0.1; break; case 'w': wireframe = !wireframe; break; case ' ': animate = !animate; break; case 'l': lighting = !lighting; break; case 'r': render = !render; break; case 'c': compute = !compute; break; } printf("isoValue = %f\n", isoValue); printf("voxels = %d\n", activeVoxels); printf("verts = %d\n", totalVerts); printf("occupancy: %d / %d = %.2f%%\n", activeVoxels, numVoxels, activeVoxels*100.0f / (float) numVoxels); if (!compute) { computeIsosurface(); } glutPostRedisplay(); }
/** * \brief Frees the memory used in the plan * \param [in] plan The data and memory locations for the plan. * \sa parseCUDAMEMPlan * \sa makeCUDAMEMPlan * \sa initCUDAMEMPlan * \sa execCUDAMEMPlan * \sa perfCUDAMEMPlan */ void * killCUDAMEMPlan(void *plan) { Plan *p; CUDAMEMdata *d; p = (Plan *)plan; d = (CUDAMEMdata*)p->vptr; CUDA_CALL( cudaThreadSynchronize() ); // if(d->DC) CUDA_CALL( cudaFree((void*)(d->DC)) ); if(d->devicearray) CUDA_CALL( cudaFree((void*)(d->devicearray)) ); if(d->hostarray) CUDA_CALL( cudaFreeHost((void*)(d->hostarray)) ); CUDA_CALL( cudaThreadExit() ); free((void*)(d)); free((void*)(p)); return (void*)NULL; }
static CUT_THREADPROC solverThread(TOptionPlan *plan) { //Init GPU cutilSafeCall( cudaSetDevice(plan->device) ); cudaDeviceProp deviceProp; cutilSafeCall(cudaGetDeviceProperties(&deviceProp, plan->device)); int version = deviceProp.major * 10 + deviceProp.minor; if(useDoublePrecision && version < 13) { printf("Double precision is not supported on device %i.\n", plan->device); exit(0); } //Allocate memory for normally distributed samples cutilSafeCall( cudaMalloc( (void **)&plan->d_Samples, plan->pathN * sizeof(float) ) ); //Generate normally distributed samples if(useDoublePrecision) inverseCND_SM13(plan->d_Samples, NULL, plan->pathN); else inverseCND_SM10(plan->d_Samples, NULL, plan->pathN); //Allocate intermediate memory for MC integrator if(useDoublePrecision) initMonteCarlo_SM13(plan); else initMonteCarlo_SM10(plan); //Main computations if(useDoublePrecision) MonteCarlo_SM13(plan); else MonteCarlo_SM10(plan); cutilSafeCall( cudaThreadSynchronize() ); //Shut down this GPU if(useDoublePrecision) closeMonteCarlo_SM13(plan); else closeMonteCarlo_SM10(plan); cutilSafeCall( cudaFree(plan->d_Samples) ); cudaThreadExit(); CUT_THREADEND; }
int main(int argc, char **argv) { cudaError_t err = cudaSuccess; int deviceCount = 0; size_t totalDevMem, freeDevMem; size_t lastLineLength = 0; // MUST be initialized to zero signal(SIGTERM, signalHandler); signal(SIGQUIT, signalHandler); signal(SIGINT, signalHandler); signal(SIGHUP, signalHandler); writeLine(lastLineLength, "Preparing..."); err = cudaGetDeviceCount(&deviceCount); if (err != cudaSuccess) { std::cerr << "ERROR: " << cudaGetErrorString(err) << std::endl; } while (err == cudaSuccess && gRun) { std::ostringstream stream; for (int i=0; i < deviceCount; ++i) { if (err == cudaSuccess) { err = cudaSetDevice(i); if (err == cudaSuccess) { cudaMemGetInfo(&freeDevMem, &totalDevMem); if (i != 0) stream << " : "; stream << "Dev " << i << " (" << (freeDevMem/1024) << " KB of " << (totalDevMem/1048576) << " MB free)"; } } } if (err == cudaSuccess) { writeLine(lastLineLength, stream.str()); } sleep(5); // TODO - make the cycle time an optional command line flag... } cudaThreadExit(); std::cout << std::endl; return 0; }
void cleanup() { free(a_h);free(b_h);free(r_h); free(control); cutilCheckError(cutStopTimer(timer)); cutilCheckError(cutDeleteTimer( timer)); cudaFree(a_d);cudaFree(b_d);cudaFree(r_d); cutilSafeCall(release()); checkCUDAError("release"); cudaThreadExit(); }
//////////////////////////////////////////////////////////////////////////////// //! Run test //////////////////////////////////////////////////////////////////////////////// void runAutoTest(int argc, char** argv) { printf("[%s]\n", sSDKsample); // Cuda init int dev = cutilChooseCudaDevice(argc, argv); cudaDeviceProp deviceProp; cutilSafeCall(cudaGetDeviceProperties(&deviceProp, dev)); printf("Compute capability %d.%d\n", deviceProp.major, deviceProp.minor); int version = deviceProp.major*10 + deviceProp.minor; g_hasDouble = (version >= 13); if (inEmulationMode()) { // workaround since SM13 kernel doesn't produce correct output in emulation mode g_hasDouble = false; } // create FFT plan CUFFT_SAFE_CALL(cufftPlan2d(&fftPlan, meshW, meshH, CUFFT_C2R) ); // allocate memory fftInputW = (meshW / 2)+1; fftInputH = meshH; fftInputSize = (fftInputW*fftInputH)*sizeof(float2); cutilSafeCall(cudaMalloc((void **)&d_h0, fftInputSize) ); cutilSafeCall(cudaMalloc((void **)&d_ht, fftInputSize) ); h_h0 = (float2 *) malloc(fftInputSize); generate_h0(); cutilSafeCall(cudaMemcpy(d_h0, h_h0, fftInputSize, cudaMemcpyHostToDevice) ); cutilSafeCall(cudaMalloc((void **)&d_slope, meshW*meshH*sizeof(float2)) ); cutCreateTimer(&timer); cutStartTimer(timer); prevTime = cutGetTimerValue(timer); // Creating the Auto-Validation Code g_CheckRender = new CheckBackBuffer(windowH, windowH, 4, false); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); runCudaTest(g_hasDouble); cudaThreadExit(); }
CMarchingCubes::~CMarchingCubes(void) { if (m_pdVolume) cutilSafeCall(cudaFree(m_pdVolume)); cutilSafeCall(cudaFree(m_pdEdgeTable)); cutilSafeCall(cudaFree(m_pdTriTable)); cutilSafeCall(cudaFree(m_pdNumVertsTable)); cutilSafeCall(cudaFree(m_pdVoxelVerts)); cutilSafeCall(cudaFree(m_pdVoxelVertsScan)); cutilSafeCall(cudaFree(m_pdVoxelOccupied)); cutilSafeCall(cudaFree(m_pdVoxelOccupiedScan)); cutilSafeCall(cudaFree(m_pdCompactedVoxelArray)); cudppDestroyPlan(m_Scanplan); cudaThreadExit(); }
int main() { cudaDeviceReset(); cudaDeviceSynchronize(); // print device properties print_device(); // create pointers to data const uint64_t size = N; DataArray* data_arr_ptr = (DataArray*) malloc((size_t) sizeof(DataArray)); // change to global variable <- easier to code // allocate memory for array of streams const uint8_t num_streams = 2; // rewrite on defines? streams_arr = (cudaStream_t*) malloc( (size_t) sizeof(cudaStream_t)*num_streams); // create threads const uint8_t num_threads = 2; printf("host thread id\t %u\ndevice thread id %u\n",KERNEL_THRD, MEMORY_THRD); pthread_t* thread_ptr_arr = (pthread_t*) malloc( (size_t) sizeof(pthread_t)*num_threads ); // alternatively pthread_t* thread_ptr_arr[num_threads]; // init barier for threads pthread_barrier_init (&barrier, NULL, num_threads); // last number tells how many threads should be synchronized by this barier pthread_create(&thread_ptr_arr[KERNEL_THRD], NULL, host_thread, (void*) data_arr_ptr); pthread_create(&thread_ptr_arr[MEMORY_THRD], NULL, device_thread, (void*) data_arr_ptr); void* status; pthread_join(thread_ptr_arr[HOST_THRD], &status); pthread_join(thread_ptr_arr[DEVICE_THRD], &status); //printf("data visible in main thread:\n"); // Cleaning up free(thread_ptr_arr); free(streams_arr); free(data_arr_ptr); cudaThreadExit(); cudaDeviceSynchronize(); printf("Main: program completed. Exiting...\n"); return EXIT_SUCCESS; }
//////////////////////////////////////////////////////////////////////////////// //! Initialize GL //////////////////////////////////////////////////////////////////////////////// CUTBoolean initGL(int *argc, char **argv) { // Create GL context glutInit(argc, argv); glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE | GLUT_DEPTH); glutInitWindowSize(windowW, windowH); glutCreateWindow("CUDA FFT Ocean Simulation"); vertShaderPath = cutFindFilePath("ocean.vert", argv[0]); fragShaderPath = cutFindFilePath("ocean.frag", argv[0]); if (vertShaderPath == 0 || fragShaderPath == 0) { fprintf(stderr, "Error finding shader files!\n"); cudaThreadExit(); exit(EXIT_FAILURE); } // initialize necessary OpenGL extensions glewInit(); if (! glewIsSupported("GL_VERSION_2_0 " )) { fprintf(stderr, "ERROR: Support for necessary OpenGL extensions missing."); fflush(stderr); return CUTFalse; } if (!glewIsSupported( "GL_VERSION_1_5 GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object" )) { fprintf(stderr, "Error: failed to get minimal extensions for demo\n"); fprintf(stderr, "This sample requires:\n"); fprintf(stderr, " OpenGL version 1.5\n"); fprintf(stderr, " GL_ARB_vertex_buffer_object\n"); fprintf(stderr, " GL_ARB_pixel_buffer_object\n"); cleanup(); exit(-1); } // default initialization glClearColor(0.0, 0.0, 0.0, 1.0); glEnable(GL_DEPTH_TEST); // load shader shaderProg = loadGLSLProgram(vertShaderPath, fragShaderPath); CUT_CHECK_ERROR_GL(); return CUTTrue; }
int findCapableDevice(int argc, char **argv) { int dev; int bestDev = -1; int deviceCount = 0; if (cudaGetDeviceCount(&deviceCount) != cudaSuccess) { fprintf(stderr, "cudaGetDeviceCount FAILED CUDA Driver and Runtime version may be mismatched.\n"); fprintf(stderr, "\nFAILED\n"); cudaThreadExit(); cutilExit(argc, argv); } for (dev = 0; dev < deviceCount; ++dev) { cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, dev); if (dev == 0) { // This function call returns 9999 for both major & minor fields, if no CUDA capable devices are present if (deviceProp.major == 9999 && deviceProp.minor == 9999) fprintf(stderr,"There is no device supporting CUDA.\n"); else if (deviceCount == 1) fprintf(stderr,"There is 1 device supporting CUDA\n"); else fprintf(stderr,"There are %d devices supporting CUDA\n", deviceCount); } if( checkCUDAProfile( dev ) ) { fprintf(stderr,"\nFound capable device: %d\n", dev ); if( bestDev == -1 ) { bestDev = dev; fprintf(stderr, "Setting active device to %d\n", bestDev ); } } } if( bestDev == -1 ) { fprintf(stderr, "\nNo configuration with available capabilities was found. Test has been waived.\n"); fprintf(stderr, "This sample requires:\n"); fprintf(stderr, "\tGPU Device Compute >= 2.0 is required\n"); fprintf(stderr, "\tCUDA Runtime Version >= 3.1 is required\n"); fprintf(stderr, "PASSED\n"); } return bestDev; }
void gpuReset() { // releases previous contexts // opencl version #ifdef USE_OPENCL if (run_opencl) clReleaseContext (mocl.context); #endif // cuda version #ifdef USE_CUDA if (run_cuda) { #if CUDA_VERSION < 4000 cudaThreadExit(); #else cudaDeviceReset(); #endif } #endif }
reg_f3d_gpu<T>::~reg_f3d_gpu() { if(this->currentReference_gpu!=NULL) cudaCommon_free(&this->currentReference_gpu); if(this->currentFloating_gpu!=NULL) cudaCommon_free(&this->currentFloating_gpu); if(this->currentMask_gpu!=NULL) cudaCommon_free<int>(&this->currentMask_gpu); if(this->warped_gpu!=NULL) cudaCommon_free<float>(&this->warped_gpu); if(this->controlPointGrid_gpu!=NULL) cudaCommon_free<float4>(&this->controlPointGrid_gpu); if(this->deformationFieldImage_gpu!=NULL) cudaCommon_free<float4>(&this->deformationFieldImage_gpu); if(this->warpedGradientImage_gpu!=NULL) cudaCommon_free<float4>(&this->warpedGradientImage_gpu); if(this->voxelBasedMeasureGradientImage_gpu!=NULL) cudaCommon_free<float4>(&this->voxelBasedMeasureGradientImage_gpu); if(this->nodeBasedGradientImage_gpu!=NULL) cudaCommon_free<float4>(&this->nodeBasedGradientImage_gpu); if(this->conjugateG_gpu!=NULL) cudaCommon_free<float4>(&this->conjugateG_gpu); if(this->conjugateH_gpu!=NULL) cudaCommon_free<float4>(&this->conjugateH_gpu); if(this->bestControlPointPosition_gpu!=NULL) cudaCommon_free<float4>(&this->bestControlPointPosition_gpu); if(this->logJointHistogram_gpu!=NULL) cudaCommon_free<float>(&this->logJointHistogram_gpu); if(this->currentReference2_gpu!=NULL) cudaCommon_free(&this->currentReference2_gpu); if(this->currentFloating2_gpu!=NULL) cudaCommon_free(&this->currentFloating2_gpu); if(this->warped2_gpu!=NULL) cudaCommon_free<float>(&this->warped2_gpu); if(this->warpedGradientImage2_gpu!=NULL) cudaCommon_free<float4>(&this->warpedGradientImage2_gpu); NR_CUDA_SAFE_CALL(cudaThreadExit()) #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_f3d_gpu destructor called\n"); #endif }
void initGL() { glewInit(); if (!glewIsSupported("GL_VERSION_2_0 " "GL_VERSION_1_5 " "GL_ARB_multitexture " "GL_ARB_vertex_buffer_object")) { fprintf(stderr, "Required OpenGL extensions missing."); cudaThreadExit(); exit(-1); } glEnable(GL_DEPTH_TEST); glClearColor(0.0, 0.0, 0.0, 1.0); renderer = new ParticleRenderer; checkGLErrors("initGL"); }
void initGL(int argc, char **argv) { glutInit(&argc, argv); glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE); glutInitWindowSize(width, height); glutCreateWindow(sSDKsample); glutDisplayFunc(display); glutKeyboardFunc(keyboard); glutReshapeFunc(reshape); glutIdleFunc(idle); printf("Press '+' and '-' to change filter width\n"); printf("0, 1, 2 - change filter order\n"); glewInit(); if (!glewIsSupported("GL_VERSION_2_0 GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object")) { fprintf(stderr, "Required OpenGL extensions missing."); cudaThreadExit(); exit(-1); } }
void runAutoTest(int argc, char **argv) { int devID = cutilChooseCudaDevice(argc, argv); // Initialize CUDA buffers for Marching Cubes initMC(argc, argv); g_CheckRender = new CheckBackBuffer(maxVerts*sizeof(float)*4, 1, 1, false); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); computeIsosurface(); if (g_bQAReadback) { dumpFile<float4>(d_pos, maxVerts, sizeof(float4), "marchCube_posArray.bin"); dumpFile<float4>(d_normal, maxVerts, sizeof(float4), "marchCube_normalArray.bin"); dumpFile<uint>(d_compVoxelArray, numVoxels, sizeof(uint), "marchCube_compVoxelArray.bin"); if (!g_CheckRender->compareBin2BinFloat("marchCube_posArray.bin", "posArray.bin", maxVerts*sizeof(float)*4, EPSILON, THRESHOLD)) g_TotalErrors++; if (!g_CheckRender->compareBin2BinFloat("marchCube_normalArray.bin", "normalArray.bin", maxVerts*sizeof(float)*4, EPSILON, THRESHOLD)) g_TotalErrors++; //printf("sizeof(uint) = %d\n", sizeof(uint)); if (!g_CheckRender->compareBin2BinFloat("marchCube_compVoxelArray.bin", "compVoxelArray.bin", numVoxels*sizeof(uint), EPSILON, THRESHOLD)) g_TotalErrors++; printf("%s\n", (g_TotalErrors > 0) ? "FAILED" : "PASSED"); } cleanup(); cudaThreadExit(); }
void WindowDrawTask::execute(HardwareContext *pContext, DrawEnv *pEnv) { Window *pWindow = pEnv->getWindow(); OSG_ASSERT(pWindow != NULL); switch(_uiTypeTask) { case Init: { #ifdef OSG_DUMP_WINTASK fprintf(stderr, "Init\n"); fflush(stderr); #endif if(_bCreatePrivateContext == true) pWindow->init(); pWindow->doActivate (); pWindow->doFrameInit (_bReinitExtFunctions); pWindow->setupGL (); pWindow->setOpenGLInit(); if(_oInitFunc) { _oInitFunc(); } if(pWindow->getKeepContextActive() == false) pWindow->doDeactivate(); } break; case Activate: { #ifdef OSG_DUMP_WINTASK fprintf(stderr, "Activate\n"); fflush(stderr); #endif if(pWindow->getKeepContextActive() == false) pWindow->doActivate(); } break; case FrameInit: { #ifdef OSG_DUMP_WINTASK fprintf(stderr, "FrameInit\n"); fflush(stderr); #endif if(pWindow->getKeepContextActive() == false) pWindow->doActivate(); pWindow->doFrameInit(); } break; case FrameExit: { #ifdef OSG_DUMP_WINTASK fprintf(stderr, "FrameExit\n"); fflush(stderr); #endif pWindow->doFrameExit(); if(pWindow->getKeepContextActive() == false) pWindow->doDeactivate(); commitChangesAndClear(); } break; case WaitAtBarrier: { #ifdef OSG_DUMP_WINTASK fprintf(stderr, "WaitAtBarrier\n"); fflush(stderr); #endif OSG_ASSERT(_pBarrier != NULL); _pBarrier->enter(); } break; case Swap: { #ifdef OSG_DUMP_WINTASK fprintf(stderr, "Swap\n"); fflush(stderr); #endif pWindow->doSwap(); #ifdef OSG_SWAP_BARRIER OSG_ASSERT(_pBarrier != NULL); _pBarrier->enter(); #endif } break; case EndThread: { if(pWindow->getKeepContextActive() == false) pWindow->doActivate(); pWindow->doFrameExit(); #ifdef OSG_WITH_CUDA if(0x0000 != (pWindow->getInitState() & HardwareContext::CudaInitialized)) { cudaThreadExit(); } #endif pWindow->doDeactivate(); pWindow->_pContextThread->endRunning(); } break; default: break; } }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main( int argc, char** argv) { //start logs shrSetLogFileName ("volumeRender.txt"); shrLog("%s Starting...\n\n", argv[0]); if (cutCheckCmdLineFlag(argc, (const char **)argv, "qatest") || cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) { g_bQAReadback = true; fpsLimit = frameCheckNumber; } if (cutCheckCmdLineFlag(argc, (const char **)argv, "glverify")) { g_bQAGLVerify = true; fpsLimit = frameCheckNumber; } if (g_bQAReadback) { // use command-line specified CUDA device, otherwise use device with highest Gflops/s if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) { cutilDeviceInit(argc, argv); } else { cudaSetDevice( cutGetMaxGflopsDeviceId() ); } } else { // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. initGL( &argc, argv ); // use command-line specified CUDA device, otherwise use device with highest Gflops/s if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) { cutilGLDeviceInit(argc, argv); } else { cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ); } /* int device; struct cudaDeviceProp prop; cudaGetDevice( &device ); cudaGetDeviceProperties( &prop, device ); if( !strncmp( "Tesla", prop.name, 5 ) ) { shrLog("This sample needs a card capable of OpenGL and display.\n"); shrLog("Please choose a different device with the -device=x argument.\n"); cutilExit(argc, argv); } */ } // parse arguments char *filename; if (cutGetCmdLineArgumentstr( argc, (const char**) argv, "file", &filename)) { volumeFilename = filename; } int n; if (cutGetCmdLineArgumenti( argc, (const char**) argv, "size", &n)) { volumeSize.width = volumeSize.height = volumeSize.depth = n; } if (cutGetCmdLineArgumenti( argc, (const char**) argv, "xsize", &n)) { volumeSize.width = n; } if (cutGetCmdLineArgumenti( argc, (const char**) argv, "ysize", &n)) { volumeSize.height = n; } if (cutGetCmdLineArgumenti( argc, (const char**) argv, "zsize", &n)) { volumeSize.depth = n; } // load volume data char* path = shrFindFilePath(volumeFilename, argv[0]); if (path == 0) { shrLog("Error finding file '%s'\n", volumeFilename); exit(EXIT_FAILURE); } size_t size = volumeSize.width*volumeSize.height*volumeSize.depth*sizeof(VolumeType); void *h_volume = loadRawFile(path, size); initCuda(h_volume, volumeSize); free(h_volume); cutilCheckError( cutCreateTimer( &timer)); shrLog("Press '=' and '-' to change density\n" " ']' and '[' to change brightness\n" " ';' and ''' to modify transfer function offset\n" " '.' and ',' to modify transfer function scale\n\n"); // calculate new grid size gridSize = dim3(iDivUp(width, blockSize.x), iDivUp(height, blockSize.y)); if (g_bQAReadback) { g_CheckRender = new CheckBackBuffer(width, height, 4, false); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); uint *d_output; cutilSafeCall(cudaMalloc((void**)&d_output, width*height*sizeof(uint))); cutilSafeCall(cudaMemset(d_output, 0, width*height*sizeof(uint))); float modelView[16] = { 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 4.0f, 1.0f }; invViewMatrix[0] = modelView[0]; invViewMatrix[1] = modelView[4]; invViewMatrix[2] = modelView[8]; invViewMatrix[3] = modelView[12]; invViewMatrix[4] = modelView[1]; invViewMatrix[5] = modelView[5]; invViewMatrix[6] = modelView[9]; invViewMatrix[7] = modelView[13]; invViewMatrix[8] = modelView[2]; invViewMatrix[9] = modelView[6]; invViewMatrix[10] = modelView[10]; invViewMatrix[11] = modelView[14]; // call CUDA kernel, writing results to PBO copyInvViewMatrix(invViewMatrix, sizeof(float4)*3); // Start timer 0 and process n loops on the GPU int nIter = 10; for (int i = -1; i < nIter; i++) { if( i == 0 ) { cudaThreadSynchronize(); cutStartTimer(timer); } render_kernel(gridSize, blockSize, d_output, width, height, density, brightness, transferOffset, transferScale); } cudaThreadSynchronize(); cutStopTimer(timer); // Get elapsed time and throughput, then log to sample and master logs double dAvgTime = cutGetTimerValue(timer)/(nIter * 1000.0); shrLogEx(LOGBOTH | MASTER, 0, "volumeRender, Throughput = %.4f MTexels/s, Time = %.5f s, Size = %u Texels, NumDevsUsed = %u, Workgroup = %u\n", (1.0e-6 * width * height)/dAvgTime, dAvgTime, (width * height), 1, blockSize.x * blockSize.y); cutilCheckMsg("Error: render_kernel() execution FAILED"); cutilSafeCall( cudaThreadSynchronize() ); cutilSafeCall( cudaMemcpy(g_CheckRender->imageData(), d_output, width*height*4, cudaMemcpyDeviceToHost) ); g_CheckRender->savePPM(sOriginal[g_Index], true, NULL); if (!g_CheckRender->PPMvsPPM(sOriginal[g_Index], sReference[g_Index], MAX_EPSILON_ERROR, THRESHOLD)) { shrLog("\nFAILED\n\n"); } else { shrLog("\nPASSED\n\n"); } cudaFree(d_output); freeCudaBuffers(); if (g_CheckRender) { delete g_CheckRender; g_CheckRender = NULL; } } else { // This is the normal rendering path for VolumeRender glutDisplayFunc(display); glutKeyboardFunc(keyboard); glutMouseFunc(mouse); glutMotionFunc(motion); glutReshapeFunc(reshape); glutIdleFunc(idle); initPixelBuffer(); if (g_bQAGLVerify) { g_CheckRender = new CheckBackBuffer(width, height, 4); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); } atexit(cleanup); glutMainLoop(); } cudaThreadExit(); shrEXIT(argc, (const char**)argv); }