예제 #1
0
 // Initialization code to find the best CUDA Device
 int findCudaDevice(int argc, const char **argv)
 {
     cudaDeviceProp deviceProp;
     int devID = 0;
     // If the command-line has a device number specified, use it
     if (checkCmdLineFlag(argc, argv, "device")) {
         devID = getCmdLineArgumentInt(argc, argv, "device=");
         if (devID < 0) {
             printf("Invalid command line parameters\n");
             exit(-1);
         } else {
             devID = gpuDeviceInit(devID);
             if (devID < 0) {
                printf("exiting...\n");
                shrQAFinishExit(argc, (const char **)argv, QA_FAILED);
                exit(-1);
             }
         }
     } else {
         // Otherwise pick the device with highest Gflops/s
         devID = gpuGetMaxGflopsDeviceId();
         checkCudaErrors( cudaSetDevice( devID ) );
         checkCudaErrors( cudaGetDeviceProperties(&deviceProp, devID) );
         printf("> Using CUDA device [%d]: %s\n", devID, deviceProp.name);
     }
     return devID;
 }
예제 #2
0
파일: CUDA.cpp 프로젝트: baritone/openMVS
// initialize the given CUDA device and add it to the array of initialized devices;
// if the given device is -1, the best available device is selected
CUresult initDevice(int deviceID)
{
	Device device;

	if (deviceID >= 0) {
		device.ID = gpuCheckDeviceId(deviceID);
	} else {
		// Otherwise pick the device with the highest Gflops/s
		device.ID = gpuGetMaxGflopsDeviceId();
	}
	if (device.ID < 0) {
		VERBOSE("error: no CUDA capable devices found");
		return CUDA_ERROR_NO_DEVICE;
	}
	checkCudaErrors(cudaSetDevice(device.ID));

	checkCudaErrors(cudaGetDeviceProperties(&device.prop, device.ID));
	if (device.prop.major < 2) {
		VERBOSE("CUDA error: compute capability 2.0 or greater required (available %d.%d for device[%d])", device.ID, device.prop.major, device.prop.minor);
		return CUDA_ERROR_INVALID_DEVICE;
	}
	devices.Insert(device);
	DEBUG("CUDA device[%d] initialized: %s (Compute Capability %d.%d)", device.ID, device.prop.name, device.prop.major, device.prop.minor);

	#if 1
	// dummy memory allocation to work around a bug inside CUDA
	// (this seems to initialize some more things)
	void* cpDummy;
	cudaMalloc(&cpDummy, sizeof(int));
	cudaFree(cpDummy);
	#endif
	return CUDA_SUCCESS;
}
예제 #3
0
파일: main.cpp 프로젝트: pgurniak/Project-0
void initCuda(){
  // Use device with highest Gflops/s
  // Had to update this to remove cutil version
  cudaGLSetGLDevice( gpuGetMaxGflopsDeviceId() );

  initPBO(&pbo);

  // Clean up on program exit
  atexit(cleanupCuda);

  runCuda();
}
예제 #4
0
파일: main.cpp 프로젝트: vivreddy/Project-0
void initCuda(){
  // Use device with highest Gflops/s
#if CUDA_VERSION >= 5000
	cudaGLSetGLDevice( gpuGetMaxGflopsDeviceId() );
#else
	cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
#endif

  initPBO(&pbo);

  // Clean up on program exit
  atexit(cleanupCuda);

  runCuda();
}
예제 #5
0
// Initialization code to find the best CUDA Device
inline int findCudaDevice(int argc, const char **argv)
{
    cudaDeviceProp deviceProp;
    int devID = 0;

    // If the command-line has a device number specified, use it
    if (checkCmdLineFlag(argc, argv, "device"))
    {
        devID = getCmdLineArgumentInt(argc, argv, "device=");

        if (devID < 0)
        {
            printf("Invalid command line parameter\n ");
            exit(EXIT_FAILURE);
        }
        else
        {
            devID = gpuDeviceInit(devID);

            if (devID < 0)
            {
                printf("exiting...\n");
                exit(EXIT_FAILURE);
            }
        }
    }
    else
    {
        // Otherwise pick the device with highest Gflops/s
        devID = gpuGetMaxGflopsDeviceId();
        checkCudaErrors(cudaSetDevice(devID));
        checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
        printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor);
    }

    return devID;
}
예제 #6
0
void initialize(int argc, char **argv)
{
    printf("[%s] (OpenGL Mode)\n", sSDKsample);

    // First initialize OpenGL context, so we can properly set the GL for CUDA.
    // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
    initGL(&argc, argv);

    int devID;
    cudaDeviceProp deviceProps;

    if (checkCmdLineFlag(argc, (const char **)argv, "device"))
    {
        devID = gpuGLDeviceInit(argc, (const char **)argv);

        if (devID < 0)
        {
            printf("exiting...\n");
            exit(EXIT_SUCCESS);
        }
    }
    else
    {
        devID = gpuGetMaxGflopsDeviceId();
        cudaGLSetGLDevice(devID);
    }

    // get number of SMs on this GPU
    checkCudaErrors(cudaGetDeviceProperties(&deviceProps, devID));
    printf("CUDA device [%s] has %d Multi-Processors\n", deviceProps.name, deviceProps.multiProcessorCount);

    // Create the timer (for fps measurement)
    sdkCreateTimer(&timer);

    // load image from disk
    loadImageData(argc, argv);

    printf("\n"
           "\tControls\n"
           "\t=/- : Zoom in/out\n"
           "\tb   : Run Benchmark g_FilterMode\n"
           "\tc   : Draw Bicubic Spline Curve\n"
           "\t[esc] - Quit\n\n"

           "\tPress number keys to change filtering g_FilterMode:\n\n"
           "\t1 : nearest filtering\n"
           "\t2 : bilinear filtering\n"
           "\t3 : bicubic filtering\n"
           "\t4 : fast bicubic filtering\n"
           "\t5 : Catmull-Rom filtering\n\n"
          );

    initGLBuffers();

#if USE_BUFFER_TEX
    fprog = compileASMShader(GL_FRAGMENT_PROGRAM_ARB, shaderCode);

    if (!fprog)
    {
        exit(EXIT_SUCCESS);
    }

#endif
}
int main(int argc, char **argv)
{
    char *dump_file = NULL;

#if defined(__linux__)
    setenv ("DISPLAY", ":0", 0);
#endif

    pArgc = &argc;
    pArgv = argv;

    printf("%s Starting...\n\n", sSDKsample);

    if (checkCmdLineFlag(argc, (const char **)argv, "file"))
    {
        getCmdLineArgumentString(argc, (const char **)argv,
                                 "file", (char **) &dump_file);

        int kernel = 1;

        if (checkCmdLineFlag(argc, (const char **)argv, "kernel"))
        {
            kernel = getCmdLineArgumentInt(argc, (const char **)argv, "kernel");
        }

        runAutoTest(argc, argv, dump_file, kernel);
    }
    else
    {
        printf("[%s]\n", sSDKsample);

        // use command-line specified CUDA device, otherwise use device with highest Gflops/s
        if (checkCmdLineFlag(argc, (const char **)argv, "device"))
        {
            printf("[%s]\n", argv[0]);
            printf("   Does not explicitly support -device=n in OpenGL mode\n");
            printf("   To use -device=n, the sample must be running w/o OpenGL\n\n");
            printf(" > %s -device=n -qatest\n", argv[0]);
            printf("exiting...\n");
            exit(EXIT_SUCCESS);
        }

        // First load the image, so we know what the size of the image (imageW and imageH)
        printf("Allocating host and CUDA memory and loading image file...\n");
        const char *image_path = sdkFindFilePath("portrait_noise.bmp", argv[0]);

        if (image_path == NULL)
        {
            printf("imageDenoisingGL was unable to find and load image file <portrait_noise.bmp>.\nExiting...\n");
            exit(EXIT_FAILURE);
        }

        LoadBMPFile(&h_Src, &imageW, &imageH, image_path);
        printf("Data init done.\n");

        // First initialize OpenGL context, so we can properly set the GL for CUDA.
        // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
        initGL(&argc, argv);
        cudaGLSetGLDevice(gpuGetMaxGflopsDeviceId());

        checkCudaErrors(CUDA_MallocArray(&h_Src, imageW, imageH));

        initOpenGLBuffers();
    }

    printf("Starting GLUT main loop...\n");
    printf("Press [1] to view noisy image\n");
    printf("Press [2] to view image restored with knn filter\n");
    printf("Press [3] to view image restored with nlm filter\n");
    printf("Press [4] to view image restored with modified nlm filter\n");
    printf("Press [*] to view smooth/edgy areas [RED/BLUE] Ct's when a filter is active\n");
    printf("Press [f] to print frame rate\n");
    printf("Press [?] to print Noise and Lerp Ct's\n");
    printf("Press [q] to exit\n");

    sdkCreateTimer(&timer);
    sdkStartTimer(&timer);

    glutMainLoop();
}
예제 #8
0
int main(int argc, char **argv)
{
    int devID;
    cudaDeviceProp deviceProps;
    printf("%s Starting...\n\n", sSDKname);
    printf("[%s] - [OpenGL/CUDA simulation] starting...\n", sSDKname);

    // First initialize OpenGL context, so we can properly set the GL for CUDA.
    // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
    if (false == initGL(&argc, argv))
    {
        exit(EXIT_SUCCESS);
    }

    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
#ifndef OPTIMUS
    devID = findCudaGLDevice(argc, (const char **)argv);
#else
    devID = gpuGetMaxGflopsDeviceId();
#endif

    // get number of SMs on this GPU
    checkCudaErrors(cudaGetDeviceProperties(&deviceProps, devID));
    printf("CUDA device [%s] has %d Multi-Processors\n",
           deviceProps.name, deviceProps.multiProcessorCount);

    // automated build testing harness
    if (checkCmdLineFlag(argc, (const char **)argv, "file"))
    {
        getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
    }

    // Allocate and initialize host data
    GLint bsize;

    sdkCreateTimer(&timer);
    sdkResetTimer(&timer);

    hvfield = (cData *)malloc(sizeof(cData) * DS);
    memset(hvfield, 0, sizeof(cData) * DS);

    // Allocate and initialize device data
    cudaMallocPitch((void **)&dvfield, &tPitch, sizeof(cData)*DIM, DIM);

    cudaMemcpy(dvfield, hvfield, sizeof(cData) * DS,
               cudaMemcpyHostToDevice);
    // Temporary complex velocity field data
    cudaMalloc((void **)&vxfield, sizeof(cData) * PDS);
    cudaMalloc((void **)&vyfield, sizeof(cData) * PDS);

    setupTexture(DIM, DIM);
    bindTexture();

    // Create particle array in host memory
    particles = (cData *)malloc(sizeof(cData) * DS);
    memset(particles, 0, sizeof(cData) * DS);

#ifdef BROADCAST
	int step = 1;

	// Broadcasted visualization stepping.
	if (argc > 3)
		step = atoi(argv[3]);

	// Create additional space to store particle packets
	// for broadcasting.
	wstep = step; hstep = step;
	int npackets = sizeof(float) * (DIM / wstep) * (DIM / hstep) / UdpBroadcastServer::PacketSize;
	if (sizeof(float) * (DIM / wstep) * (DIM / hstep) % UdpBroadcastServer::PacketSize)
		npackets++;
	packets = (char*)malloc(npackets *
		(UdpBroadcastServer::PacketSize + sizeof(unsigned int)));
#endif

    initParticles(particles, DIM, DIM);

#if defined(OPTIMUS) || defined(BROADCAST)
    // Create particle array in device memory
    cudaMalloc((void **)&particles_gpu, sizeof(cData) * DS);
    cudaMemcpy(particles_gpu, particles, sizeof(cData) * DS, cudaMemcpyHostToDevice);
#endif

    // Create CUFFT transform plan configuration
    cufftPlan2d(&planr2c, DIM, DIM, CUFFT_R2C);
    cufftPlan2d(&planc2r, DIM, DIM, CUFFT_C2R);
    // TODO: update kernels to use the new unpadded memory layout for perf
    // rather than the old FFTW-compatible layout
    cufftSetCompatibilityMode(planr2c, CUFFT_COMPATIBILITY_FFTW_PADDING);
    cufftSetCompatibilityMode(planc2r, CUFFT_COMPATIBILITY_FFTW_PADDING);

    glGenBuffersARB(1, &vbo);
    glBindBufferARB(GL_ARRAY_BUFFER_ARB, vbo);
    glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(cData) * DS,
                    particles, GL_DYNAMIC_DRAW_ARB);

    glGetBufferParameterivARB(GL_ARRAY_BUFFER_ARB, GL_BUFFER_SIZE_ARB, &bsize);

    if (bsize != (sizeof(cData) * DS))
        goto EXTERR;

    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);

#ifndef OPTIMUS
    checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo, cudaGraphicsMapFlagsNone));
    getLastCudaError("cudaGraphicsGLRegisterBuffer failed");
#endif

    if (ref_file)
    {
        autoTest(argv);
        cleanup();

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();
        printf("[fluidsGL] - Test Results: %d Failures\n", g_TotalErrors);
        exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);

    }
    else
    {
#ifdef BROADCAST
		const char *sv_addr = "127.0.0:9097";
		const char *bc_addr = "127.255.255.2:9097";

		// Server address
		if (argc > 2)
			sv_addr = argv[2];

		// Broadcast address
		if (argc > 1)
			bc_addr = argv[1];

		server.reset(new UdpBroadcastServer(sv_addr, bc_addr));

		// Listen to clients' feedbacks in a separate thread.
		{
			pthread_t tid;
			pthread_create(&tid, NULL, &feedback_listener, &step);
		}

		// Broadcast the particles state in a separate thread.
		{
			pthread_t tid;
			pthread_create(&tid, NULL, &broadcaster, &step);
		}
#endif
#if defined (__APPLE__) || defined(MACOSX)
        atexit(cleanup);
#else
        glutCloseFunc(cleanup);
#endif
        glutMainLoop();
    }

    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
    cudaDeviceReset();

    if (!ref_file)
    {
        exit(EXIT_SUCCESS);
    }

    return 0;

EXTERR:
    printf("Failed to initialize GL extensions.\n");

    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
    cudaDeviceReset();
    exit(EXIT_FAILURE);
}
int main(int argc, char **argv)
{
    pArgc = &argc;
    pArgv = argv;

#if defined(__linux__)
    setenv ("DISPLAY", ":0", 0);
#endif

    printf("%s Starting...\n\n", sSDKsample);

    if (checkCmdLineFlag(argc, (const char **)argv, "help"))
    {
        printf("\nUsage: SobelFilter <options>\n");
        printf("\t\t-mode=n (0=original, 1=texture, 2=smem + texture)\n");
        printf("\t\t-file=ref_orig.pgm (ref_tex.pgm, ref_shared.pgm)\n\n");
        exit(EXIT_SUCCESS);
    }

    if (checkCmdLineFlag(argc, (const char **)argv, "file"))
    {
        g_bQAReadback = true;
        runAutoTest(argc, argv);
    }

    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
    if (checkCmdLineFlag(argc, (const char **)argv, "device"))
    {
        printf("   This SDK does not explicitly support -device=n when running with OpenGL.\n");
        printf("   When specifying -device=n (n=0,1,2,....) the sample must not use OpenGL.\n");
        printf("   See details below to run without OpenGL:\n\n");
        printf(" > %s -device=n\n\n", argv[0]);
        printf("exiting...\n");
        exit(EXIT_SUCCESS);
    }

    // First initialize OpenGL context, so we can properly set the GL for CUDA.
    // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
    initGL(&argc, argv);
    cudaGLSetGLDevice(gpuGetMaxGflopsDeviceId());

    sdkCreateTimer(&timer);
    sdkResetTimer(&timer);

    glutDisplayFunc(display);
    glutKeyboardFunc(keyboard);
    glutReshapeFunc(reshape);

    loadDefaultImage(argv[0]);

    // If code is not printing the USage, then we execute this path.
    printf("I: display Image (no filtering)\n");
    printf("T: display Sobel Edge Detection (Using Texture)\n");
    printf("S: display Sobel Edge Detection (Using SMEM+Texture)\n");
    printf("Use the '-' and '=' keys to change the brightness.\n");
    fflush(stdout);

#if defined (__APPLE__) || defined(MACOSX)
    atexit(cleanup);
#else
    glutCloseFunc(cleanup);
#endif

    glutTimerFunc(REFRESH_DELAY, timerEvent,0);
    glutMainLoop();
}
예제 #10
0
void initializeCuda()
{
	cudaError_t err = cudaGLSetGLDevice(gpuGetMaxGflopsDeviceId());
}