void initGL(int *argc, char** argv)
{
    glutInit( argc, argv );    
    glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
    glutInitWindowSize(wWidth, wHeight);
    glutCreateWindow("Cuda Edge Detection");

    glewInit();

    if (g_bFBODisplay) {
        if (!glewIsSupported( "GL_VERSION_2_0 GL_ARB_fragment_program GL_EXT_framebuffer_object" )) {
            fprintf(stderr, "Error: failed to get minimal extensions for demo\n");
            fprintf(stderr, "This sample requires:\n");
            fprintf(stderr, "  OpenGL version 2.0\n");
            fprintf(stderr, "  GL_ARB_fragment_program\n");
            fprintf(stderr, "  GL_EXT_framebuffer_object\n");
            cudaThreadExit();
            cutilExit(*argc, argv);
        } 
    } else {
        if (!glewIsSupported( "GL_VERSION_1_5 GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object" )) {
            fprintf(stderr, "Error: failed to get minimal extensions for demo\n");
            fprintf(stderr, "This sample requires:\n");
            fprintf(stderr, "  OpenGL version 1.5\n");
            fprintf(stderr, "  GL_ARB_vertex_buffer_object\n");
            fprintf(stderr, "  GL_ARB_pixel_buffer_object\n");
            cudaThreadExit();
            cutilExit(*argc, argv);
        }
    }
}
void initializeData(char *file, int argc, char **argv) {
    GLint bsize;
    unsigned int w, h;
    size_t file_length= strlen(file);

    if (!strcmp(&file[file_length-3], "pgm")) {
        if (cutLoadPGMub(file, &pixels, &w, &h) != CUTTrue) {
            printf("Failed to load image file: %s\n", file);
            exit(-1);
        }
        g_Bpp = 1;
    } else if (!strcmp(&file[file_length-3], "ppm")) {
        if (cutLoadPPM4ub(file, &pixels, &w, &h) != CUTTrue) {
            printf("Failed to load image file: %s\n", file);
            exit(-1);
        }
        g_Bpp = 4;
    } else {
        cudaThreadExit();
		cutilExit(argc, argv);
    }
    imWidth = (int)w; imHeight = (int)h;
    setupTexture(imWidth, imHeight, pixels, g_Bpp);
	// copy function pointer tables to host side for later use
	setupFunctionTables();

    memset(pixels, 0x0, g_Bpp * sizeof(Pixel) * imWidth * imHeight);

    if (!g_bQAReadback) {
        // use OpenGL Path
        glGenBuffers(1, &pbo_buffer);
        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_buffer); 
        glBufferData(GL_PIXEL_UNPACK_BUFFER, 
                        g_Bpp * sizeof(Pixel) * imWidth * imHeight, 
                        pixels, GL_STREAM_DRAW);  

        glGetBufferParameteriv(GL_PIXEL_UNPACK_BUFFER, GL_BUFFER_SIZE, &bsize); 
        if ((GLuint)bsize != (g_Bpp * sizeof(Pixel) * imWidth * imHeight)) {
            printf("Buffer object (%d) has incorrect size (%d).\n", (unsigned)pbo_buffer, (unsigned)bsize);
            cudaThreadExit();
			cutilExit(argc, argv);
        }

        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);

		 // register this buffer object with CUDA
	    cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, pbo_buffer, cudaGraphicsMapFlagsWriteDiscard));	

        glGenTextures(1, &texid);
        glBindTexture(GL_TEXTURE_2D, texid);
        glTexImage2D(GL_TEXTURE_2D, 0, ((g_Bpp==1) ? GL_LUMINANCE : GL_BGRA), 
                    imWidth, imHeight,  0, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL);
        glBindTexture(GL_TEXTURE_2D, 0);

        glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
        glPixelStorei(GL_PACK_ALIGNMENT, 1);
    }
}
Exemplo n.º 3
0
// Function to perform final cleanup, which makes sure that all
// CUDA data is deallocated, and thread exist is called. Must be
// called before destructor is called!
void NLConsoleInterface::finalCleanUp()
{
    _synthesizer->finalCleanUp();
    cudaThreadExit();

    qDebug() << Stats::instance().allTimerStatistics();
}
Exemplo n.º 4
0
// Main program
int main(int argc, char** argv)
{
  // Create the CUTIL timer
  cutilCheckError( cutCreateTimer( &timer));
   
  if (CUTFalse == initGL(argc, argv)) {
    return CUTFalse;
  }
 
  initCuda(argc, argv);
  CUT_CHECK_ERROR_GL();
 
  // register callbacks
  glutDisplayFunc(fpsDisplay);
  glutKeyboardFunc(keyboard);
  glutMouseFunc(mouse);
  glutMotionFunc(motion);
   
  // start rendering mainloop
  glutMainLoop();
   
  // clean up
  cudaThreadExit();
  cutilExit(argc, argv);
}
Exemplo n.º 5
0
reg_f3d_gpu<T>::reg_f3d_gpu(int refTimePoint,int floTimePoint)
    :reg_f3d<T>::reg_f3d(refTimePoint,floTimePoint)
{
    this->currentReference_gpu=NULL;
    this->currentFloating_gpu=NULL;
    this->currentMask_gpu=NULL;
    this->warped_gpu=NULL;
    this->controlPointGrid_gpu=NULL;
    this->deformationFieldImage_gpu=NULL;
    this->warpedGradientImage_gpu=NULL;
    this->voxelBasedMeasureGradientImage_gpu=NULL;
    this->nodeBasedGradientImage_gpu=NULL;
    this->conjugateG_gpu=NULL;
    this->conjugateH_gpu=NULL;
    this->bestControlPointPosition_gpu=NULL;
    this->logJointHistogram_gpu=NULL;

    this->currentReference2_gpu=NULL;
    this->currentFloating2_gpu=NULL;
    this->warped2_gpu=NULL;
    this->warpedGradientImage2_gpu=NULL;

    NR_CUDA_SAFE_CALL(cudaThreadExit())
#ifndef NDEBUG
    printf("[NiftyReg DEBUG] reg_f3d_gpu constructor called\n");
#endif
}
Exemplo n.º 6
0
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char** argv)
{
    runTest(argc, argv);

    cudaThreadExit();

    cutilExit(argc, argv);
}
Exemplo n.º 7
0
SolverThread::~SolverThread() 
{
    std::cout<<" solver exit\n";
    cudaThreadExit();
    
    delete m_hostK;
    delete m_stiffnessMatrix;
}
Exemplo n.º 8
0
    //--------------------------------------------------------------------------
    // CUDA exit
    //--------------------------------------------------------------------------
    void CUDAContext::configExit()
    {
#ifdef EQUALIZER_USE_CUDA
        // Clean up all runtime-related resources associated with this thread.
        cudaThreadExit();
#else
        setError( ERROR_CUDACONTEXT_MISSING_SUPPORT );
#endif
    }
Exemplo n.º 9
0
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
CUTBoolean runTest(int argc, char** argv)
{
    if (!cutCheckCmdLineFlag(argc, (const char **)argv, "noqatest") ||
		cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) 
	{
        g_bQAReadback = true;
        fpsLimit = frameCheckNumber;
    }

    // First initialize OpenGL context, so we can properly set the GL for CUDA.
    // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
    if (CUTFalse == initGL(argc, argv)) {
        return CUTFalse;
    }

	// use command-line specified CUDA device, otherwise use device with highest Gflops/s
    if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") )
        cutilGLDeviceInit(argc, argv);
    else {
        cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
    }

    // Create the CUTIL timer
    cutilCheckError( cutCreateTimer( &timer));

    // register callbacks
    glutDisplayFunc(display);
    glutKeyboardFunc(keyboard);
    glutMouseFunc(mouse);
    glutMotionFunc(motion);

    if (g_bQAReadback) {
        g_CheckRender = new CheckBackBuffer(window_width, window_height, 4);
        g_CheckRender->setPixelFormat(GL_RGBA);
        g_CheckRender->setExecPath(argv[0]);
        g_CheckRender->EnableQAReadback(true);
    }

    // create VBO
    createVBO(&vbo);

    // run the cuda part
    runCuda(vbo);

    // check result of Cuda step
    checkResultCuda(argc, argv, vbo);

    atexit(cleanup);

    // start rendering mainloop
    glutMainLoop();

    cudaThreadExit();

	return CUTTrue;
}
Exemplo n.º 10
0
Arquivo: main.cpp Projeto: elf11/GPGPU
void cleanup()
{
	cutilCheckError(cutStopTimer(timer));
	cutilCheckError(cutDeleteTimer( timer));

	cudaFree(a_d);cudaFree(b_d);cudaFree(r_d);

	cudaThreadExit();
      
}
Exemplo n.º 11
0
void
op_cuda_exit ( )
{
  for ( int i = 0; i < OP_dat_index; i++ )
  {
    cutilSafeCall ( cudaFree ( OP_dat_list[i]->data_d ) );
  }

  cudaThreadExit ( );
}
Exemplo n.º 12
0
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char** argv) 
{
    numParticles = 1024;
    uint gridDim = 64;
    numIterations = 1;

    cutGetCmdLineArgumenti( argc, (const char**) argv, "n", (int *) &numParticles);
    cutGetCmdLineArgumenti( argc, (const char**) argv, "grid", (int *) &gridDim);
    gridSize.x = gridSize.y = gridSize.z = gridDim;
    printf("grid: %d x %d x %d = %d cells\n", gridSize.x, gridSize.y, gridSize.z, gridSize.x*gridSize.y*gridSize.z);

    bool benchmark = !cutCheckCmdLineFlag(argc, (const char**) argv, "noqatest") != 0;
    cutGetCmdLineArgumenti( argc, (const char**) argv, "i", &numIterations);
    
    cudaInit(argc, argv);

    glutInit(&argc, argv);
    glutInitDisplayMode(GLUT_RGB | GLUT_DEPTH | GLUT_DOUBLE);
    glutInitWindowSize(640, 480);
    glutCreateWindow("CUDA particles");

    initGL();
    init(numParticles, gridSize);
    initParams();
    initMenus();

    if (benchmark)
    {
        if (numIterations <= 0) 
            numIterations = 300;
        runBenchmark(numIterations);
    }
    else
    {
        glutDisplayFunc(display);
        glutReshapeFunc(reshape);
        glutMouseFunc(mouse);
        glutMotionFunc(motion);
        glutKeyboardFunc(key);
        glutSpecialFunc(special);
        glutIdleFunc(idle);

        glutMainLoop();
    }

    if (psystem)
        delete psystem;

    cudaThreadExit();

    return 0;
}
Exemplo n.º 13
0
void
runGraphicsTest(int argc, char** argv)
{
	printf("MarchingCubes ");
    if (g_bFBODisplay) printf("[w/ FBO] ");
    if (g_bOpenGLQA) printf("[Readback Comparisons] ");
    printf("\n");

    if ( cutCheckCmdLineFlag(argc, (const char **)argv, "device")) {
		printf("[%s]\n", argv[0]);
		printf("   Does not explicitly support -device=n in OpenGL mode\n");
		printf("   To use -device=n, the sample must be running w/o OpenGL\n\n");
		printf(" > %s -device=n -qatest\n", argv[0]);
		printf("exiting...\n");
        exit(0);
	}

    // First initialize OpenGL context, so we can properly set the GL for CUDA.
    // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
    if(CUTFalse == initGL(&argc, argv)) {
        return;
    }

    cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );

    // register callbacks
    glutDisplayFunc(display);
    glutKeyboardFunc(keyboard);
    glutMouseFunc(mouse);
    glutMotionFunc(motion);
    glutIdleFunc(idle);
    glutReshapeFunc(reshape);
    initMenus();

    // Initialize CUDA buffers for Marching Cubes 
    initMC(argc, argv);

    cutilCheckError( cutCreateTimer( &timer));

    if (g_bOpenGLQA) {
        g_CheckRender = new CheckBackBuffer(window_width, window_height, 4);
        g_CheckRender->setPixelFormat(GL_RGBA);
        g_CheckRender->setExecPath(argv[0]);
        g_CheckRender->EnableQAReadback(true);
    }

    // start rendering mainloop
    glutMainLoop();

    cudaThreadExit();
}
Exemplo n.º 14
0
////////////////////////////////////////////////////////////////////////////////
//! Keyboard events handler
////////////////////////////////////////////////////////////////////////////////
void
keyboard(unsigned char key, int /*x*/, int /*y*/)
{
    switch(key) {
    case(27) :
        cleanup();
        cudaThreadExit();
        exit(0);
    case '=':
        isoValue += 0.01;
        break;
    case '-':
        isoValue -= 0.01;
        break;
    case '+':
        isoValue += 0.1;
        break;
    case '_':
        isoValue -= 0.1;
        break;
    case 'w':
        wireframe = !wireframe;
        break;
    case ' ':
        animate = !animate;
        break;
    case 'l':
        lighting = !lighting;
        break;
    case 'r':
        render = !render;
        break;
    case 'c':
        compute = !compute;
        break;
    }

    printf("isoValue = %f\n", isoValue);
    printf("voxels = %d\n", activeVoxels);
    printf("verts = %d\n", totalVerts);
    printf("occupancy: %d / %d = %.2f%%\n", 
           activeVoxels, numVoxels, activeVoxels*100.0f / (float) numVoxels);

    if (!compute) {
        computeIsosurface();        
    }

    glutPostRedisplay();
}
Exemplo n.º 15
0
/**
 * \brief Frees the memory used in the plan
 * \param [in] plan The data and memory locations for the plan.
 * \sa parseCUDAMEMPlan
 * \sa makeCUDAMEMPlan
 * \sa initCUDAMEMPlan
 * \sa execCUDAMEMPlan
 * \sa perfCUDAMEMPlan
 */
void * killCUDAMEMPlan(void *plan) {
	Plan *p;
	CUDAMEMdata *d;
	p = (Plan *)plan;
	d = (CUDAMEMdata*)p->vptr;
	
	CUDA_CALL( cudaThreadSynchronize() );
	// if(d->DC) CUDA_CALL( cudaFree((void*)(d->DC)) );
	if(d->devicearray) CUDA_CALL( cudaFree((void*)(d->devicearray)) );
	if(d->hostarray) CUDA_CALL( cudaFreeHost((void*)(d->hostarray)) );
	CUDA_CALL( cudaThreadExit() );
	free((void*)(d));
	free((void*)(p));
	return (void*)NULL;
}
Exemplo n.º 16
0
static CUT_THREADPROC solverThread(TOptionPlan *plan) {
    //Init GPU
    cutilSafeCall( cudaSetDevice(plan->device) );

    cudaDeviceProp deviceProp;
    cutilSafeCall(cudaGetDeviceProperties(&deviceProp, plan->device));
    int version = deviceProp.major * 10 + deviceProp.minor;
    if(useDoublePrecision && version < 13) {
        printf("Double precision is not supported on device %i.\n", plan->device);
        exit(0);
    }

    //Allocate memory for normally distributed samples
    cutilSafeCall( cudaMalloc(
                       (void **)&plan->d_Samples,
                       plan->pathN * sizeof(float)
                   ) );

    //Generate normally distributed samples
    if(useDoublePrecision)
        inverseCND_SM13(plan->d_Samples, NULL, plan->pathN);
    else
        inverseCND_SM10(plan->d_Samples, NULL, plan->pathN);

    //Allocate intermediate memory for MC integrator
    if(useDoublePrecision)
        initMonteCarlo_SM13(plan);
    else
        initMonteCarlo_SM10(plan);

    //Main computations
    if(useDoublePrecision)
        MonteCarlo_SM13(plan);
    else
        MonteCarlo_SM10(plan);
    cutilSafeCall( cudaThreadSynchronize() );

    //Shut down this GPU
    if(useDoublePrecision)
        closeMonteCarlo_SM13(plan);
    else
        closeMonteCarlo_SM10(plan);
    cutilSafeCall( cudaFree(plan->d_Samples) );

    cudaThreadExit();

    CUT_THREADEND;
}
Exemplo n.º 17
0
int main(int argc, char **argv)
{
  cudaError_t err = cudaSuccess;
  int deviceCount = 0;
  size_t totalDevMem, freeDevMem;
  size_t lastLineLength = 0; // MUST be initialized to zero

  signal(SIGTERM, signalHandler);
  signal(SIGQUIT, signalHandler);
  signal(SIGINT, signalHandler);
  signal(SIGHUP, signalHandler);

  writeLine(lastLineLength, "Preparing...");

  err = cudaGetDeviceCount(&deviceCount);

  if (err != cudaSuccess) {
   std::cerr << "ERROR: " << cudaGetErrorString(err) << std::endl; 
  }

  while (err == cudaSuccess && gRun) {
    
    std::ostringstream stream;

    for (int i=0; i < deviceCount; ++i) {
      if (err == cudaSuccess) {
	err = cudaSetDevice(i);
	if (err == cudaSuccess) {
	  cudaMemGetInfo(&freeDevMem, &totalDevMem);
	  if (i != 0)
	    stream << " : ";
	  stream << "Dev " << i << " (" << (freeDevMem/1024) << " KB of " << (totalDevMem/1048576) << " MB free)";
	}
      }
    }
    if (err == cudaSuccess) {
      writeLine(lastLineLength, stream.str());
    }
    
    sleep(5); // TODO - make the cycle time an optional command line flag...
  }

  cudaThreadExit();

  std::cout << std::endl;

  return 0;
}
Exemplo n.º 18
0
Arquivo: main.cpp Projeto: elf11/GPGPU
void cleanup()
{
	free(a_h);free(b_h);free(r_h);
	free(control);

	cutilCheckError(cutStopTimer(timer));
	cutilCheckError(cutDeleteTimer( timer));

	cudaFree(a_d);cudaFree(b_d);cudaFree(r_d);

	
	cutilSafeCall(release());
	checkCUDAError("release");
	cudaThreadExit();
      
}
Exemplo n.º 19
0
////////////////////////////////////////////////////////////////////////////////
//! Run test
////////////////////////////////////////////////////////////////////////////////
void runAutoTest(int argc, char** argv)
{
    printf("[%s]\n", sSDKsample);

    // Cuda init
	int dev = cutilChooseCudaDevice(argc, argv);

    cudaDeviceProp deviceProp;
    cutilSafeCall(cudaGetDeviceProperties(&deviceProp, dev));
    printf("Compute capability %d.%d\n", deviceProp.major, deviceProp.minor);
    int version = deviceProp.major*10 + deviceProp.minor;
    g_hasDouble = (version >= 13);
    if (inEmulationMode()) {
        // workaround since SM13 kernel doesn't produce correct output in emulation mode
        g_hasDouble = false;
    }

    // create FFT plan
    CUFFT_SAFE_CALL(cufftPlan2d(&fftPlan, meshW, meshH, CUFFT_C2R) );

    // allocate memory
    fftInputW = (meshW / 2)+1;
    fftInputH = meshH;
    fftInputSize = (fftInputW*fftInputH)*sizeof(float2);

    cutilSafeCall(cudaMalloc((void **)&d_h0, fftInputSize) );
    cutilSafeCall(cudaMalloc((void **)&d_ht, fftInputSize) );
    h_h0 = (float2 *) malloc(fftInputSize);
    generate_h0();
    cutilSafeCall(cudaMemcpy(d_h0, h_h0, fftInputSize, cudaMemcpyHostToDevice) );

    cutilSafeCall(cudaMalloc((void **)&d_slope, meshW*meshH*sizeof(float2)) );

    cutCreateTimer(&timer);
    cutStartTimer(timer);
    prevTime = cutGetTimerValue(timer);

    // Creating the Auto-Validation Code
    g_CheckRender = new CheckBackBuffer(windowH, windowH, 4, false);
    g_CheckRender->setPixelFormat(GL_RGBA);
    g_CheckRender->setExecPath(argv[0]);
    g_CheckRender->EnableQAReadback(true);

    runCudaTest(g_hasDouble);
    cudaThreadExit();
}
Exemplo n.º 20
0
CMarchingCubes::~CMarchingCubes(void)
{
	if (m_pdVolume)
		cutilSafeCall(cudaFree(m_pdVolume));
	cutilSafeCall(cudaFree(m_pdEdgeTable));
	cutilSafeCall(cudaFree(m_pdTriTable));
	cutilSafeCall(cudaFree(m_pdNumVertsTable));

	cutilSafeCall(cudaFree(m_pdVoxelVerts));
	cutilSafeCall(cudaFree(m_pdVoxelVertsScan));
	cutilSafeCall(cudaFree(m_pdVoxelOccupied));
	cutilSafeCall(cudaFree(m_pdVoxelOccupiedScan));
	cutilSafeCall(cudaFree(m_pdCompactedVoxelArray));

	cudppDestroyPlan(m_Scanplan);
	cudaThreadExit();
}
Exemplo n.º 21
0
int main() {
  
  cudaDeviceReset();
  cudaDeviceSynchronize();
  
  // print device properties
  print_device();
    
  // create pointers to data
  const uint64_t size = N;
  DataArray* data_arr_ptr = (DataArray*) malloc((size_t) sizeof(DataArray)); // change to global variable <- easier to code
  
  // allocate memory for array of streams
  const uint8_t num_streams = 2; // rewrite on defines?
  streams_arr = (cudaStream_t*) malloc( (size_t) sizeof(cudaStream_t)*num_streams);
  
  // create threads
  const uint8_t num_threads = 2;
  printf("host thread id\t %u\ndevice thread id %u\n",KERNEL_THRD, MEMORY_THRD);
  
  pthread_t* thread_ptr_arr = (pthread_t*) malloc( (size_t) sizeof(pthread_t)*num_threads ); // alternatively pthread_t* thread_ptr_arr[num_threads];
  
  // init barier for threads
  pthread_barrier_init (&barrier, NULL, num_threads); // last number tells how many threads should be synchronized by this barier
  
  pthread_create(&thread_ptr_arr[KERNEL_THRD], NULL, host_thread, (void*) data_arr_ptr);
  pthread_create(&thread_ptr_arr[MEMORY_THRD], NULL, device_thread, (void*) data_arr_ptr);
  
  void* status;
  pthread_join(thread_ptr_arr[HOST_THRD], &status);
  pthread_join(thread_ptr_arr[DEVICE_THRD], &status);
  
  //printf("data visible in main thread:\n");  
  
  // Cleaning up
  free(thread_ptr_arr);
  free(streams_arr);
  free(data_arr_ptr);
  
  cudaThreadExit();
  cudaDeviceSynchronize();
  
  printf("Main: program completed. Exiting...\n");
  return EXIT_SUCCESS;
}
Exemplo n.º 22
0
////////////////////////////////////////////////////////////////////////////////
//! Initialize GL
////////////////////////////////////////////////////////////////////////////////
CUTBoolean initGL(int *argc, char **argv)
{
    // Create GL context
    glutInit(argc, argv);
    glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE | GLUT_DEPTH);
    glutInitWindowSize(windowW, windowH);
    glutCreateWindow("CUDA FFT Ocean Simulation");

    vertShaderPath = cutFindFilePath("ocean.vert", argv[0]);
    fragShaderPath = cutFindFilePath("ocean.frag", argv[0]);
    if (vertShaderPath == 0 || fragShaderPath == 0) {
        fprintf(stderr, "Error finding shader files!\n");
        cudaThreadExit();
        exit(EXIT_FAILURE);
    }

    // initialize necessary OpenGL extensions
    glewInit();
    if (! glewIsSupported("GL_VERSION_2_0 " 
        )) {
        fprintf(stderr, "ERROR: Support for necessary OpenGL extensions missing.");
        fflush(stderr);
        return CUTFalse;
    }

    if (!glewIsSupported( "GL_VERSION_1_5 GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object" )) {
	    fprintf(stderr, "Error: failed to get minimal extensions for demo\n");
	    fprintf(stderr, "This sample requires:\n");
	    fprintf(stderr, "  OpenGL version 1.5\n");
	    fprintf(stderr, "  GL_ARB_vertex_buffer_object\n");
	    fprintf(stderr, "  GL_ARB_pixel_buffer_object\n");
            cleanup();
	    exit(-1);
    }

    // default initialization
    glClearColor(0.0, 0.0, 0.0, 1.0);
    glEnable(GL_DEPTH_TEST);

    // load shader
    shaderProg = loadGLSLProgram(vertShaderPath, fragShaderPath);

    CUT_CHECK_ERROR_GL();
    return CUTTrue;
}
Exemplo n.º 23
0
int findCapableDevice(int argc, char **argv)
{
    int dev;
    int bestDev = -1;

    int deviceCount = 0;
    if (cudaGetDeviceCount(&deviceCount) != cudaSuccess) {
        fprintf(stderr, "cudaGetDeviceCount FAILED CUDA Driver and Runtime version may be mismatched.\n");
        fprintf(stderr, "\nFAILED\n");
        cudaThreadExit();
        cutilExit(argc, argv);
    }
    for (dev = 0; dev < deviceCount; ++dev) {
        cudaDeviceProp deviceProp;
        cudaGetDeviceProperties(&deviceProp, dev);

        if (dev == 0) {
            // This function call returns 9999 for both major & minor fields, if no CUDA capable devices are present
            if (deviceProp.major == 9999 && deviceProp.minor == 9999)
                fprintf(stderr,"There is no device supporting CUDA.\n");
            else if (deviceCount == 1)
                fprintf(stderr,"There is 1 device supporting CUDA\n");
            else
                fprintf(stderr,"There are %d devices supporting CUDA\n", deviceCount);
        }

        if( checkCUDAProfile( dev ) ) {
            fprintf(stderr,"\nFound capable device: %d\n", dev );
            if( bestDev == -1 ) { 
                bestDev = dev;
                fprintf(stderr, "Setting active device to %d\n", bestDev );
            }
        }
    }

    if( bestDev == -1 ) {
        fprintf(stderr, "\nNo configuration with available capabilities was found.  Test has been waived.\n");
        fprintf(stderr, "This sample requires:\n");
        fprintf(stderr, "\tGPU Device Compute   >= 2.0 is required\n");
        fprintf(stderr, "\tCUDA Runtime Version >= 3.1 is required\n");
        fprintf(stderr, "PASSED\n");
    }
    return bestDev;
}
Exemplo n.º 24
0
void gpuReset() {
  // releases previous contexts

  // opencl version
#ifdef USE_OPENCL
  if (run_opencl) clReleaseContext (mocl.context);
#endif

  // cuda version
#ifdef USE_CUDA
  if (run_cuda) {
#if CUDA_VERSION < 4000
    cudaThreadExit();
#else
    cudaDeviceReset();
#endif
  }
#endif
}
Exemplo n.º 25
0
reg_f3d_gpu<T>::~reg_f3d_gpu()
{
    if(this->currentReference_gpu!=NULL)
        cudaCommon_free(&this->currentReference_gpu);
    if(this->currentFloating_gpu!=NULL)
        cudaCommon_free(&this->currentFloating_gpu);
    if(this->currentMask_gpu!=NULL)
        cudaCommon_free<int>(&this->currentMask_gpu);
    if(this->warped_gpu!=NULL)
        cudaCommon_free<float>(&this->warped_gpu);
    if(this->controlPointGrid_gpu!=NULL)
        cudaCommon_free<float4>(&this->controlPointGrid_gpu);
    if(this->deformationFieldImage_gpu!=NULL)
        cudaCommon_free<float4>(&this->deformationFieldImage_gpu);
    if(this->warpedGradientImage_gpu!=NULL)
        cudaCommon_free<float4>(&this->warpedGradientImage_gpu);
    if(this->voxelBasedMeasureGradientImage_gpu!=NULL)
        cudaCommon_free<float4>(&this->voxelBasedMeasureGradientImage_gpu);
    if(this->nodeBasedGradientImage_gpu!=NULL)
        cudaCommon_free<float4>(&this->nodeBasedGradientImage_gpu);
    if(this->conjugateG_gpu!=NULL)
        cudaCommon_free<float4>(&this->conjugateG_gpu);
    if(this->conjugateH_gpu!=NULL)
        cudaCommon_free<float4>(&this->conjugateH_gpu);
    if(this->bestControlPointPosition_gpu!=NULL)
        cudaCommon_free<float4>(&this->bestControlPointPosition_gpu);
    if(this->logJointHistogram_gpu!=NULL)
        cudaCommon_free<float>(&this->logJointHistogram_gpu);

    if(this->currentReference2_gpu!=NULL)
        cudaCommon_free(&this->currentReference2_gpu);
    if(this->currentFloating2_gpu!=NULL)
        cudaCommon_free(&this->currentFloating2_gpu);
    if(this->warped2_gpu!=NULL)
        cudaCommon_free<float>(&this->warped2_gpu);
    if(this->warpedGradientImage2_gpu!=NULL)
        cudaCommon_free<float4>(&this->warpedGradientImage2_gpu);

    NR_CUDA_SAFE_CALL(cudaThreadExit())
#ifndef NDEBUG
    printf("[NiftyReg DEBUG] reg_f3d_gpu destructor called\n");
#endif
}
Exemplo n.º 26
0
void initGL()
{  
    glewInit();
    if (!glewIsSupported("GL_VERSION_2_0 "
                         "GL_VERSION_1_5 "
			             "GL_ARB_multitexture "
                         "GL_ARB_vertex_buffer_object")) 
    {
        fprintf(stderr, "Required OpenGL extensions missing.");
        cudaThreadExit();
        exit(-1);
    }

    glEnable(GL_DEPTH_TEST);
    glClearColor(0.0, 0.0, 0.0, 1.0);

    renderer = new ParticleRenderer;
    
    checkGLErrors("initGL");
}
void initGL(int argc, char **argv)
{
    glutInit(&argc, argv);
    glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE);
    glutInitWindowSize(width, height);
    glutCreateWindow(sSDKsample);
    glutDisplayFunc(display);
    glutKeyboardFunc(keyboard);
    glutReshapeFunc(reshape);
    glutIdleFunc(idle);

    printf("Press '+' and '-' to change filter width\n");
    printf("0, 1, 2 - change filter order\n");

    glewInit();
    if (!glewIsSupported("GL_VERSION_2_0 GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object")) {
        fprintf(stderr, "Required OpenGL extensions missing.");
        cudaThreadExit();
        exit(-1);
    }
}
Exemplo n.º 28
0
void runAutoTest(int argc, char **argv)
{
	int devID = cutilChooseCudaDevice(argc, argv);

    // Initialize CUDA buffers for Marching Cubes 
    initMC(argc, argv);

    g_CheckRender = new CheckBackBuffer(maxVerts*sizeof(float)*4, 1, 1, false);
    g_CheckRender->setPixelFormat(GL_RGBA);
    g_CheckRender->setExecPath(argv[0]);
    g_CheckRender->EnableQAReadback(true);

    computeIsosurface();

    if (g_bQAReadback)
    {
        dumpFile<float4>(d_pos,          maxVerts, sizeof(float4), "marchCube_posArray.bin");
        dumpFile<float4>(d_normal,       maxVerts, sizeof(float4), "marchCube_normalArray.bin");
        dumpFile<uint>(d_compVoxelArray, numVoxels, sizeof(uint),  "marchCube_compVoxelArray.bin");

        if (!g_CheckRender->compareBin2BinFloat("marchCube_posArray.bin",      "posArray.bin",       maxVerts*sizeof(float)*4, EPSILON, THRESHOLD))
           g_TotalErrors++;

        if (!g_CheckRender->compareBin2BinFloat("marchCube_normalArray.bin",   "normalArray.bin",    maxVerts*sizeof(float)*4, EPSILON, THRESHOLD))
           g_TotalErrors++;

	//printf("sizeof(uint) = %d\n", sizeof(uint));

        if (!g_CheckRender->compareBin2BinFloat("marchCube_compVoxelArray.bin", "compVoxelArray.bin", numVoxels*sizeof(uint), EPSILON, THRESHOLD))
           g_TotalErrors++;

        printf("%s\n", (g_TotalErrors > 0) ? "FAILED" : "PASSED");
    }

    cleanup();
    cudaThreadExit();
}
void WindowDrawTask::execute(HardwareContext *pContext, DrawEnv *pEnv)
{
    Window *pWindow = pEnv->getWindow();

    OSG_ASSERT(pWindow != NULL);

    switch(_uiTypeTask)
    {
        case Init:
        {
#ifdef OSG_DUMP_WINTASK
            fprintf(stderr, "Init\n");
            fflush(stderr);
#endif
            if(_bCreatePrivateContext == true)
                pWindow->init();

            pWindow->doActivate   ();
            pWindow->doFrameInit  (_bReinitExtFunctions);
            pWindow->setupGL      ();
            pWindow->setOpenGLInit();

            if(_oInitFunc)
            {
                _oInitFunc();
            }

            if(pWindow->getKeepContextActive() == false)
                pWindow->doDeactivate();
        }
        break;

        case Activate:
        {
#ifdef OSG_DUMP_WINTASK
            fprintf(stderr, "Activate\n");
            fflush(stderr);
#endif
            if(pWindow->getKeepContextActive() == false)
                pWindow->doActivate();
        }
        break;

        case FrameInit:
        {
#ifdef OSG_DUMP_WINTASK
            fprintf(stderr, "FrameInit\n");
            fflush(stderr);
#endif

            if(pWindow->getKeepContextActive() == false)
                pWindow->doActivate();

            pWindow->doFrameInit();
        }
        break;

        case FrameExit:
        {
#ifdef OSG_DUMP_WINTASK
            fprintf(stderr, "FrameExit\n");
            fflush(stderr);
#endif
            pWindow->doFrameExit();

            if(pWindow->getKeepContextActive() == false)
                pWindow->doDeactivate();

            commitChangesAndClear();
        }
        break;

        case WaitAtBarrier:
        {
#ifdef OSG_DUMP_WINTASK
            fprintf(stderr, "WaitAtBarrier\n");
            fflush(stderr);
#endif
            OSG_ASSERT(_pBarrier != NULL);
            _pBarrier->enter();
        }
        break;

        case Swap:
        {           
#ifdef OSG_DUMP_WINTASK
            fprintf(stderr, "Swap\n");
            fflush(stderr);
#endif
            pWindow->doSwap();

#ifdef OSG_SWAP_BARRIER
            OSG_ASSERT(_pBarrier != NULL);

            _pBarrier->enter();
#endif
        }
        break;

        case EndThread:
        {
            if(pWindow->getKeepContextActive() == false)
                pWindow->doActivate();

            pWindow->doFrameExit();
            
#ifdef OSG_WITH_CUDA
            if(0x0000 != (pWindow->getInitState() & 
                          HardwareContext::CudaInitialized))
            {
                cudaThreadExit();
            }
#endif
            pWindow->doDeactivate();
            pWindow->_pContextThread->endRunning();
        }
        break;

        default:
            break;
    }
}
Exemplo n.º 30
0
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv) 
{
    //start logs
    shrSetLogFileName ("volumeRender.txt");
    shrLog("%s Starting...\n\n", argv[0]); 

    if (cutCheckCmdLineFlag(argc, (const char **)argv, "qatest") ||
		cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) 
	{
        g_bQAReadback = true;
        fpsLimit = frameCheckNumber;
    }

    if (cutCheckCmdLineFlag(argc, (const char **)argv, "glverify")) 
	{
        g_bQAGLVerify = true;
        fpsLimit = frameCheckNumber;
    }

    if (g_bQAReadback) {
	    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
        if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) {
            cutilDeviceInit(argc, argv);
        } else {
            cudaSetDevice( cutGetMaxGflopsDeviceId() );
        }

    } else {
        // First initialize OpenGL context, so we can properly set the GL for CUDA.
        // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
        initGL( &argc, argv );

	    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
        if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) {
            cutilGLDeviceInit(argc, argv);
        } else {
            cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
        }
/*
        int device;
        struct cudaDeviceProp prop;
        cudaGetDevice( &device );
        cudaGetDeviceProperties( &prop, device );
        if( !strncmp( "Tesla", prop.name, 5 ) ) {
            shrLog("This sample needs a card capable of OpenGL and display.\n");
            shrLog("Please choose a different device with the -device=x argument.\n");
            cutilExit(argc, argv);
        }
*/
	}

    // parse arguments
    char *filename;
    if (cutGetCmdLineArgumentstr( argc, (const char**) argv, "file", &filename)) {
        volumeFilename = filename;
    }
    int n;
    if (cutGetCmdLineArgumenti( argc, (const char**) argv, "size", &n)) {
        volumeSize.width = volumeSize.height = volumeSize.depth = n;
    }
    if (cutGetCmdLineArgumenti( argc, (const char**) argv, "xsize", &n)) {
        volumeSize.width = n;
    }
    if (cutGetCmdLineArgumenti( argc, (const char**) argv, "ysize", &n)) {
        volumeSize.height = n;
    }
    if (cutGetCmdLineArgumenti( argc, (const char**) argv, "zsize", &n)) {
         volumeSize.depth = n;
    }

    // load volume data
    char* path = shrFindFilePath(volumeFilename, argv[0]);
    if (path == 0) {
        shrLog("Error finding file '%s'\n", volumeFilename);
        exit(EXIT_FAILURE);
    }

    size_t size = volumeSize.width*volumeSize.height*volumeSize.depth*sizeof(VolumeType);
    void *h_volume = loadRawFile(path, size);
    
    initCuda(h_volume, volumeSize);
    free(h_volume);

    cutilCheckError( cutCreateTimer( &timer));

    shrLog("Press '=' and '-' to change density\n"
           "      ']' and '[' to change brightness\n"
           "      ';' and ''' to modify transfer function offset\n"
           "      '.' and ',' to modify transfer function scale\n\n");

    // calculate new grid size
    gridSize = dim3(iDivUp(width, blockSize.x), iDivUp(height, blockSize.y));

    if (g_bQAReadback) {
        g_CheckRender = new CheckBackBuffer(width, height, 4, false);
        g_CheckRender->setPixelFormat(GL_RGBA);
        g_CheckRender->setExecPath(argv[0]);
        g_CheckRender->EnableQAReadback(true);

        uint *d_output;
        cutilSafeCall(cudaMalloc((void**)&d_output, width*height*sizeof(uint)));
        cutilSafeCall(cudaMemset(d_output, 0, width*height*sizeof(uint)));

        float modelView[16] = 
        {
            1.0f, 0.0f, 0.0f, 0.0f,
            0.0f, 1.0f, 0.0f, 0.0f,
            0.0f, 0.0f, 1.0f, 0.0f,
            0.0f, 0.0f, 4.0f, 1.0f
        };

        invViewMatrix[0] = modelView[0]; invViewMatrix[1] = modelView[4]; invViewMatrix[2] = modelView[8]; invViewMatrix[3] = modelView[12];
        invViewMatrix[4] = modelView[1]; invViewMatrix[5] = modelView[5]; invViewMatrix[6] = modelView[9]; invViewMatrix[7] = modelView[13];
        invViewMatrix[8] = modelView[2]; invViewMatrix[9] = modelView[6]; invViewMatrix[10] = modelView[10]; invViewMatrix[11] = modelView[14];

        // call CUDA kernel, writing results to PBO
	    copyInvViewMatrix(invViewMatrix, sizeof(float4)*3);
        
        // Start timer 0 and process n loops on the GPU 
        int nIter = 10;
        for (int i = -1; i < nIter; i++)
        {
            if( i == 0 ) {
                cudaThreadSynchronize();
                cutStartTimer(timer); 
            }
            
            render_kernel(gridSize, blockSize, d_output, width, height, density, brightness, transferOffset, transferScale);
        }
        cudaThreadSynchronize();
        cutStopTimer(timer);
        // Get elapsed time and throughput, then log to sample and master logs
        double dAvgTime = cutGetTimerValue(timer)/(nIter * 1000.0);
        shrLogEx(LOGBOTH | MASTER, 0, "volumeRender, Throughput = %.4f MTexels/s, Time = %.5f s, Size = %u Texels, NumDevsUsed = %u, Workgroup = %u\n", 
               (1.0e-6 * width * height)/dAvgTime, dAvgTime, (width * height), 1, blockSize.x * blockSize.y); 
        

        cutilCheckMsg("Error: render_kernel() execution FAILED");
        cutilSafeCall( cudaThreadSynchronize() );

        cutilSafeCall( cudaMemcpy(g_CheckRender->imageData(), d_output, width*height*4, cudaMemcpyDeviceToHost) );
        g_CheckRender->savePPM(sOriginal[g_Index], true, NULL);

        if (!g_CheckRender->PPMvsPPM(sOriginal[g_Index], sReference[g_Index], MAX_EPSILON_ERROR, THRESHOLD)) {
            shrLog("\nFAILED\n\n");
        } else {
            shrLog("\nPASSED\n\n");
        }

        cudaFree(d_output);
    	freeCudaBuffers();

        if (g_CheckRender) {
            delete g_CheckRender; g_CheckRender = NULL;
        }

    } else {
        // This is the normal rendering path for VolumeRender
        glutDisplayFunc(display);
        glutKeyboardFunc(keyboard);
        glutMouseFunc(mouse);
        glutMotionFunc(motion);
        glutReshapeFunc(reshape);
        glutIdleFunc(idle);

        initPixelBuffer();

        if (g_bQAGLVerify) {
            g_CheckRender = new CheckBackBuffer(width, height, 4);
            g_CheckRender->setPixelFormat(GL_RGBA);
            g_CheckRender->setExecPath(argv[0]);
            g_CheckRender->EnableQAReadback(true);
        }
        atexit(cleanup);

        glutMainLoop();
    }

    cudaThreadExit();
    shrEXIT(argc, (const char**)argv);
}