/////////////////////////////////////////////////////
// Main program
/////////////////////////////////////////////////////
int main(const int argc, const char **argv)
{
    unsigned long start = START_SIZE;
#ifdef NVS
    unsigned long end = END_NVS;
#else
    /* sizeof(unsigned long) = 8 bytes */
    unsigned long end = END_TITAN;
#endif

    int stateDim = 1;   /* default stateDim = 1 */
    if (checkCmdLineFlag(argc, argv, "dim"))
        stateDim = getCmdLineArgumentInt(argc, argv, "dim");

    char *typeInput = 0;
    getCmdLineArgumentString(argc, (const char**)argv, "type", &typeInput);
    if (0 != typeInput){
    if (!strcasecmp(typeInput, "float"))
        runTest<float>(start, end, stateDim);
    else if (!strcasecmp(typeInput, "int"))
        runTest<int>(start, end, stateDim);
    else if (!strcasecmp(typeInput, "double"))
        runTest<double>(start, end, stateDim);
    }
    else
        runTest<double>(start, end, stateDim);


    exit(EXIT_SUCCESS);
}
Beispiel #2
0
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
    pArgc = &argc;
    pArgv = argv;

    // parse arguments
    char *filename;

    printf("Starting bicubicTexture\n");

    if (checkCmdLineFlag(argc, (const char **) argv, "help"))
    {
        printHelp();
        exit(EXIT_SUCCESS);
    }

    if (checkCmdLineFlag(argc, (const char **) argv, "mode"))
    {
        g_FilterMode = (eFilterMode)getCmdLineArgumentInt(argc, (const char **) argv, "mode");

        if (g_FilterMode < MODE_NEAREST && g_FilterMode > MODE_CATMULL_ROM)
        {
            printf("Invalid Mode setting %d\n", g_FilterMode);
            exit(EXIT_FAILURE);
        }
    }

    if (getCmdLineArgumentString(argc, (const char **) argv, "file", &filename))
    {
        dumpFilename = filename;
        fpsLimit     = frameCheckNumber;

        // Running CUDA kernel (bicubicFiltering) without visualization (QA Testing/Verification)
        runAutoTest(argc, argv, (const char *)dumpFilename, g_FilterMode);
    }
    else
    {
        // This runs the CUDA kernel (bicubicFiltering) + OpenGL visualization
        initialize(argc, argv);
        glutMainLoop();
        sdkDeleteTimer(&timer);

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();
        exit(EXIT_SUCCESS);
    }

    exit(EXIT_SUCCESS);
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv)
{
    char device_name[256];
    char *ref_file = NULL;

    pArgc = &argc;
    pArgv = argv;

    printf("[%s] - Starting...\n", SDK_name);

    if (!findCUDADevice())               // Search for CUDA GPU
    {
        printf("> CUDA Device NOT found on \"%s\".. Exiting.\n", device_name);
        exit(EXIT_SUCCESS);
    }

    if (!dynlinkLoadD3D10API())              // Search for D3D API (locate drivers, does not mean device is found)
    {
        printf("> D3D10 API libraries NOT found.. Exiting.\n");
        dynlinkUnloadD3D10API();
        exit(EXIT_SUCCESS);
    }

    if (!findDXDevice(device_name))          // Search for D3D Hardware Device
    {
        printf("> D3D10 Graphics Device NOT found.. Exiting.\n");
        dynlinkUnloadD3D10API();
        exit(EXIT_SUCCESS);
    }

    // command line options
    if (argc > 1)
    {
        // automatied build testing harness
        if (checkCmdLineFlag(argc, (const char **)argv, "file"))
            getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
    }

    // run D3D10/CUDA test
    runTest(argc, argv, ref_file);

    //
    // and exit
    //
    printf("%s running on %s exiting...\n", SDK_name, device_name);

    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
    cudaDeviceReset();
    exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
}
Beispiel #4
0
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
    pArgc = &argc;
    pArgv = argv;

    char *ref_file = NULL;

    printf("%s Starting...\n\n", sSDKsample);

    if (checkCmdLineFlag(argc, (const char **)argv, "file"))
    {
        fpsLimit = frameCheckNumber;
        getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
    }

    if (ref_file)
    {
        chooseCudaDevice(argc, argv, false);

        loadVolumeData(argv[0]);

        runAutoTest(ref_file, argv[0]);
    }
    else
    {
        // First initialize OpenGL context, so we can properly set the GL for CUDA.
        // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
        initGL(&argc, argv);

        // use command-line specified CUDA device, otherwise use device with highest Gflops/s
        chooseCudaDevice(argc, argv, true);

        // OpenGL buffers
        initGLBuffers();

        loadVolumeData(argv[0]);
    }

    printf("Press space to toggle animation\n"
           "Press '+' and '-' to change displayed slice\n");

#if defined (__APPLE__) || defined(MACOSX)
    atexit(cleanup);
#else
    glutCloseFunc(cleanup);
#endif

    glutMainLoop();

    exit(EXIT_SUCCESS);
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
    pArgc = &argc;
    pArgv = argv;

    // parse arguments
    char *filename;

#if defined(__linux__)
    setenv ("DISPLAY", ":0", 0);
#endif

    printf("Starting bicubicTexture\n");

    if (checkCmdLineFlag(argc, (const char **) argv, "help"))
    {
        printHelp();
        exit(EXIT_SUCCESS);
    }

    if (checkCmdLineFlag(argc, (const char **) argv, "mode"))
    {
        g_FilterMode = (eFilterMode)getCmdLineArgumentInt(argc, (const char **) argv, "mode");

        if (g_FilterMode < 0 || g_FilterMode >= NUM_MODES)
        {
            printf("Invalid Mode setting %d\n", g_FilterMode);
            exit(EXIT_FAILURE);
        }
    }

    if (getCmdLineArgumentString(argc, (const char **) argv, "file", &filename))
    {
        dumpFilename = filename;
        fpsLimit     = frameCheckNumber;

        // Running CUDA kernel (bicubicFiltering) without visualization (QA Testing/Verification)
        runAutoTest(argc, argv, (const char *)dumpFilename, g_FilterMode);
    }
    else
    {
        // This runs the CUDA kernel (bicubicFiltering) + OpenGL visualization
        initialize(argc, argv);
        glutMainLoop();
    }

    exit(EXIT_SUCCESS);
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv)
{
    char device_name[256];
    char *ref_file = NULL;

    pArgc = &argc;
    pArgv = argv;

    printf("[%s] - Starting...\n", SDK_name);

    if (!findCUDADevice())               // Search for CUDA GPU
    {
        printf("> CUDA Device NOT found on \"%s\".. Exiting.\n", device_name);
        exit(EXIT_SUCCESS);
    }

    if (!dynlinkLoadD3D10API())              // Search for D3D API (locate drivers, does not mean device is found)
    {
        printf("> D3D10 API libraries NOT found.. Exiting.\n");
        dynlinkUnloadD3D10API();
        exit(EXIT_SUCCESS);
    }

    if (!findDXDevice(device_name))          // Search for D3D Hardware Device
    {
        printf("> D3D10 Graphics Device NOT found.. Exiting.\n");
        dynlinkUnloadD3D10API();
        exit(EXIT_SUCCESS);
    }

    // command line options
    if (argc > 1)
    {
        // automatied build testing harness
        if (checkCmdLineFlag(argc, (const char **)argv, "file"))
            getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
    }

    // run D3D10/CUDA test
    runTest(argc, argv, ref_file);

    //
    // and exit
    //
    printf("%s running on %s exiting...\n", SDK_name, device_name);

    cudaDeviceReset();
    exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv)
{
    char device_name[256];
    char *ref_file = NULL;

    pArgc = &argc;
    pArgv = argv;

    printf("> %s starting...\n", sSDKSample);

    if (!findCUDADevice())                   // Search for CUDA GPU
    {
        printf("> CUDA Device NOT found on \"%s\".. Exiting.\n", device_name);
        exit(EXIT_SUCCESS);
    }

    if (!dynlinkLoadD3D10API())                  // Search for D3D API (locate drivers, does not mean device is found)
    {
        printf("> D3D10 API libraries NOT found on.. Exiting.\n");
        dynlinkUnloadD3D10API();
        exit(EXIT_SUCCESS);
    }

    if (!findDXDevice(device_name))              // Search for D3D Hardware Device
    {
        printf("> D3D10 Graphics Device NOT found.. Exiting.\n");
        dynlinkUnloadD3D10API();
        exit(EXIT_SUCCESS);
    }

    if (argc > 1)
    {
        if (checkCmdLineFlag(argc, (const char **)argv, "file"))
        {
            getCmdLineArgumentString(argc, (const char **)argv, "file", (char **)&ref_file);
        }
    }

    runTest(argc, argv, ref_file);

    //
    // and exit
    //
    printf("%s running on %s exiting...\n", sSDKSample, device_name);
    printf("%s sample finished returned: %s\n", sSDKSample, (g_bPassed ? "OK" : "ERROR!"));
    exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
}
Beispiel #8
0
int main(int argc, char **argv)
{
    int devID;
    cudaDeviceProp deviceProps;
    printf("%s Starting...\n\n", sSDKname);
    printf("[%s] - [OpenGL/CUDA simulation] starting...\n", sSDKname);

    // First initialize OpenGL context, so we can properly set the GL for CUDA.
    // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
    if (false == initGL(&argc, argv))
    {
        exit(EXIT_SUCCESS);
    }

    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
#ifndef OPTIMUS
    devID = findCudaGLDevice(argc, (const char **)argv);
#else
    devID = gpuGetMaxGflopsDeviceId();
#endif

    // get number of SMs on this GPU
    checkCudaErrors(cudaGetDeviceProperties(&deviceProps, devID));
    printf("CUDA device [%s] has %d Multi-Processors\n",
           deviceProps.name, deviceProps.multiProcessorCount);

    // automated build testing harness
    if (checkCmdLineFlag(argc, (const char **)argv, "file"))
    {
        getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
    }

    // Allocate and initialize host data
    GLint bsize;

    sdkCreateTimer(&timer);
    sdkResetTimer(&timer);

    hvfield = (cData *)malloc(sizeof(cData) * DS);
    memset(hvfield, 0, sizeof(cData) * DS);

    // Allocate and initialize device data
    cudaMallocPitch((void **)&dvfield, &tPitch, sizeof(cData)*DIM, DIM);

    cudaMemcpy(dvfield, hvfield, sizeof(cData) * DS,
               cudaMemcpyHostToDevice);
    // Temporary complex velocity field data
    cudaMalloc((void **)&vxfield, sizeof(cData) * PDS);
    cudaMalloc((void **)&vyfield, sizeof(cData) * PDS);

    setupTexture(DIM, DIM);
    bindTexture();

    // Create particle array in host memory
    particles = (cData *)malloc(sizeof(cData) * DS);
    memset(particles, 0, sizeof(cData) * DS);

#ifdef BROADCAST
	int step = 1;

	// Broadcasted visualization stepping.
	if (argc > 3)
		step = atoi(argv[3]);

	// Create additional space to store particle packets
	// for broadcasting.
	wstep = step; hstep = step;
	int npackets = sizeof(float) * (DIM / wstep) * (DIM / hstep) / UdpBroadcastServer::PacketSize;
	if (sizeof(float) * (DIM / wstep) * (DIM / hstep) % UdpBroadcastServer::PacketSize)
		npackets++;
	packets = (char*)malloc(npackets *
		(UdpBroadcastServer::PacketSize + sizeof(unsigned int)));
#endif

    initParticles(particles, DIM, DIM);

#if defined(OPTIMUS) || defined(BROADCAST)
    // Create particle array in device memory
    cudaMalloc((void **)&particles_gpu, sizeof(cData) * DS);
    cudaMemcpy(particles_gpu, particles, sizeof(cData) * DS, cudaMemcpyHostToDevice);
#endif

    // Create CUFFT transform plan configuration
    cufftPlan2d(&planr2c, DIM, DIM, CUFFT_R2C);
    cufftPlan2d(&planc2r, DIM, DIM, CUFFT_C2R);
    // TODO: update kernels to use the new unpadded memory layout for perf
    // rather than the old FFTW-compatible layout
    cufftSetCompatibilityMode(planr2c, CUFFT_COMPATIBILITY_FFTW_PADDING);
    cufftSetCompatibilityMode(planc2r, CUFFT_COMPATIBILITY_FFTW_PADDING);

    glGenBuffersARB(1, &vbo);
    glBindBufferARB(GL_ARRAY_BUFFER_ARB, vbo);
    glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(cData) * DS,
                    particles, GL_DYNAMIC_DRAW_ARB);

    glGetBufferParameterivARB(GL_ARRAY_BUFFER_ARB, GL_BUFFER_SIZE_ARB, &bsize);

    if (bsize != (sizeof(cData) * DS))
        goto EXTERR;

    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);

#ifndef OPTIMUS
    checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo, cudaGraphicsMapFlagsNone));
    getLastCudaError("cudaGraphicsGLRegisterBuffer failed");
#endif

    if (ref_file)
    {
        autoTest(argv);
        cleanup();

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();
        printf("[fluidsGL] - Test Results: %d Failures\n", g_TotalErrors);
        exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);

    }
    else
    {
#ifdef BROADCAST
		const char *sv_addr = "127.0.0:9097";
		const char *bc_addr = "127.255.255.2:9097";

		// Server address
		if (argc > 2)
			sv_addr = argv[2];

		// Broadcast address
		if (argc > 1)
			bc_addr = argv[1];

		server.reset(new UdpBroadcastServer(sv_addr, bc_addr));

		// Listen to clients' feedbacks in a separate thread.
		{
			pthread_t tid;
			pthread_create(&tid, NULL, &feedback_listener, &step);
		}

		// Broadcast the particles state in a separate thread.
		{
			pthread_t tid;
			pthread_create(&tid, NULL, &broadcaster, &step);
		}
#endif
#if defined (__APPLE__) || defined(MACOSX)
        atexit(cleanup);
#else
        glutCloseFunc(cleanup);
#endif
        glutMainLoop();
    }

    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
    cudaDeviceReset();

    if (!ref_file)
    {
        exit(EXIT_SUCCESS);
    }

    return 0;

EXTERR:
    printf("Failed to initialize GL extensions.\n");

    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
    cudaDeviceReset();
    exit(EXIT_FAILURE);
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv)
{
    // start logs
    int devID;
    char *ref_file = NULL;
    printf("%s Starting...\n\n", argv[0]);

#if defined(__linux__)
    setenv ("DISPLAY", ":0", 0);
#endif

    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
    if (argc > 1)
    {
        if (checkCmdLineFlag(argc, (const char **)argv, "radius"))
        {
            filter_radius = getCmdLineArgumentInt(argc, (const char **) argv, "radius");
        }

        if (checkCmdLineFlag(argc, (const char **)argv, "passes"))
        {
            iterations = getCmdLineArgumentInt(argc, (const char **)argv, "passes");
        }

        if (checkCmdLineFlag(argc, (const char **)argv, "file"))
        {
            getCmdLineArgumentString(argc, (const char **)argv, "file", (char **)&ref_file);
        }
    }

    // load image to process
    loadImageData(argc, argv);

    if (checkCmdLineFlag(argc, (const char **)argv, "benchmark"))
    {
        // This is a separate mode of the sample, where we are benchmark the kernels for performance
        devID = findCudaDevice(argc, (const char **)argv);

        // Running CUDA kernels (bilateralfilter) in Benchmarking mode
        g_TotalErrors += runBenchmark(argc, argv);

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();
        exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
    }
    else if (checkCmdLineFlag(argc, (const char **)argv, "radius") ||
             checkCmdLineFlag(argc, (const char **)argv, "passes"))
    {
        // This overrides the default mode.  Users can specify the radius used by the filter kernel
        devID = findCudaDevice(argc, (const char **)argv);
        g_TotalErrors += runSingleTest(ref_file, argv[0]);

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();
        exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
    }
    else
    {
        // Default mode running with OpenGL visualization and in automatic mode
        // the output automatically changes animation
        printf("\n");

        // First initialize OpenGL context, so we can properly set the GL for CUDA.
        // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
        initGL(argc, (char **)argv);
        int dev = findCapableDevice(argc, argv);

        if (dev != -1)
        {
            dev = gpuGLDeviceInit(argc, (const char **)argv);

            if (dev == -1)
            {
                exit(EXIT_FAILURE);
            }
        }
        else
        {
            // cudaDeviceReset causes the driver to clean up all state. While
            // not mandatory in normal operation, it is good practice.  It is also
            // needed to ensure correct operation when the application is being
            // profiled. Calling cudaDeviceReset causes all profile data to be
            // flushed before the application exits
            cudaDeviceReset();
            exit(EXIT_SUCCESS);
        }

        // Now we can create a CUDA context and bind it to the OpenGL context
        initCuda();
        initGLResources();

        // sets the callback function so it will call cleanup upon exit
#if defined (__APPLE__) || defined(MACOSX)
        atexit(cleanup);
#else
        glutCloseFunc(cleanup);
#endif

        printf("Running Standard Demonstration with GLUT loop...\n\n");
        printf("Press '+' and '-' to change filter width\n"
               "Press ']' and '[' to change number of iterations\n"
               "Press 'e' and 'E' to change Euclidean delta\n"
               "Press 'g' and 'G' to changle Gaussian delta\n"
               "Press 'a' or  'A' to change Animation mode ON/OFF\n\n");

        // Main OpenGL loop that will run visualization for every vsync
        glutMainLoop();
    }
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
    printf("%s Starting...\n\n", argv[0]);

    char *typeInput = 0;
    getCmdLineArgumentString(argc, (const char **) argv, "type", &typeInput);

    ReduceType datatype = REDUCE_INT;

    if (0 != typeInput)
    {
        if (!strcasecmp(typeInput, "float"))
        {
            datatype = REDUCE_FLOAT;
        }
        else if (!strcasecmp(typeInput, "double"))
        {
            datatype = REDUCE_DOUBLE;
        }
        else if (strcasecmp(typeInput, "int"))
        {
            printf("Type %s is not recognized. Using default type int.\n\n", typeInput);
        }
    }

    cudaDeviceProp deviceProp;
    deviceProp.major = 1;
    deviceProp.minor = 0;
    int minimumComputeVersion = 10;

    if (datatype == REDUCE_DOUBLE)
    {
        deviceProp.minor = 3;
        minimumComputeVersion = 13;
    }

    int dev;

    dev = findCudaDevice(argc, (const char **)argv);

    checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));

    if ((deviceProp.major * 10 + deviceProp.minor) >= minimumComputeVersion)
    {
        printf("Using Device %d: %s\n\n", dev, deviceProp.name);
        checkCudaErrors(cudaSetDevice(dev));
    }
    else
    {
        printf("Error: the selected device does not support the minimum compute capability of %d.%d.\n\n",
               minimumComputeVersion / 10, minimumComputeVersion % 10);

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();
        exit(EXIT_FAILURE);
    }

    printf("Reducing array of type %s\n\n", getReduceTypeString(datatype));

    bool bResult = false;

    switch (datatype)
    {
        default:
        case REDUCE_INT:
            bResult = runTest<int>(argc, argv, datatype);
            break;

        case REDUCE_FLOAT:
            bResult = runTest<float>(argc, argv, datatype);
            break;

        case REDUCE_DOUBLE:
            bResult = runTest<double>(argc, argv, datatype);
            break;
    }

    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
    cudaDeviceReset();
    printf(bResult ? "Test passed\n" : "Test failed!\n");
}
int
main(int argc, char **argv)
{
    pArgc = &argc;
    pArgv = argv;

    char *ref_file = NULL;

    printf("%s Starting...\n\n", sSDKsample);

    //start logs

    if (checkCmdLineFlag(argc, (const char **)argv, "help"))
    {
        printHelp();
        exit(EXIT_SUCCESS);
    }

    if (checkCmdLineFlag(argc, (const char **)argv, "file"))
    {
        fpsLimit = frameCheckNumber;
        getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
    }

    if (ref_file)
    {
        if (checkCmdLineFlag(argc, (const char **)argv, "device"))
        {
            int device = findCudaDevice(argc, (const char **)argv);

            if (device < 0)
            {
                printf("No CUDA Capable devices found, exiting...\n");
                exit(EXIT_SUCCESS);
            }

            checkDeviceMeetComputeSpec(argc, argv);
        }
        else
        {
            int dev = findCapableDevice(argc, argv);

            if (dev != -1)
            {
                cudaSetDevice(dev);
            }
            else
            {
                cudaDeviceReset();
                exit(EXIT_SUCCESS);
            }
        }
    }
    else
    {
        if (checkCmdLineFlag(argc, (const char **)argv, "device"))
        {
            printf("   This SDK does not explicitly support -device=n when running with OpenGL.\n");
            printf("   When specifying -device=n (n=0,1,2,....) the sample must not use OpenGL.\n");
            printf("   See details below to run without OpenGL:\n\n");
            printf(" > %s -device=n -file=output.bin\n\n", argv[0]);
            printf("exiting...\n");
            exit(EXIT_SUCCESS);
        }

        // First initialize OpenGL context, so we can properly set the GL for CUDA.
        // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
        initGL(&argc, argv);

        int dev = findCapableDevice(argc, argv);

        if (dev != -1)
        {
            cudaGLSetGLDevice(dev);
        }
        else
        {
            exit(EXIT_SUCCESS);
        }
    }

    // load volume data
    initData(argc, argv);

    printf(
        "Press \n"
        "  'SPACE'     to toggle animation\n"
        "  'p'         to toggle pre-integrated transfer function\n"
        "  '+' and '-' to change density (0.01 increments)\n"
        "  ']' and '[' to change brightness\n"
        "  ';' and ''' to modify transfer function offset\n"
        "  '.' and ',' to modify transfer function scale\n\n");

    if (ref_file)
    {
        runSingleTest(ref_file, argv[0]);
    }
    else
    {
        // This is the normal rendering path for VolumeRender
        glutDisplayFunc(display);
        glutKeyboardFunc(keyboard);
        glutMouseFunc(mouse);
        glutMotionFunc(motion);
        glutReshapeFunc(reshape);
        glutIdleFunc(idle);

        initPixelBuffer();

        atexit(cleanup);

        glutMainLoop();
    }

    cudaDeviceReset();
}
int main(int argc, char **argv)
{
    // Start logs
    printf("%s Starting...\n\n", argv[0]);

    unsigned int useDoublePrecision;

    char *precisionChoice;
    getCmdLineArgumentString(argc, (const char **)argv, "type", &precisionChoice);

    if (precisionChoice == NULL)
    {
        useDoublePrecision = 0;
    }
    else
    {
        if (!STRCASECMP(precisionChoice, "double"))
        {
            useDoublePrecision = 1;
        }
        else
        {
            useDoublePrecision = 0;
        }
    }

    unsigned int tableCPU[QRNG_DIMENSIONS][QRNG_RESOLUTION];

    float *h_OutputGPU, *d_Output;

    int dim, pos;
    double delta, ref, sumDelta, sumRef, L1norm, gpuTime;

    StopWatchInterface *hTimer = NULL;

    if (sizeof(INT64) != 8)
    {
        printf("sizeof(INT64) != 8\n");
        return 0;
    }

    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
    int dev = findCudaDevice(argc, (const char **)argv);

    sdkCreateTimer(&hTimer);

    int deviceIndex;
    checkCudaErrors(cudaGetDevice(&deviceIndex));
    cudaDeviceProp deviceProp;
    checkCudaErrors(cudaGetDeviceProperties(&deviceProp, deviceIndex));
    int version = deviceProp.major * 10 + deviceProp.minor;

    if (useDoublePrecision && version < 13)
    {
        printf("Double precision not supported.\n");

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();
        return 0;
    }

    printf("Allocating GPU memory...\n");
    checkCudaErrors(cudaMalloc((void **)&d_Output, QRNG_DIMENSIONS * N * sizeof(float)));

    printf("Allocating CPU memory...\n");
    h_OutputGPU = (float *)malloc(QRNG_DIMENSIONS * N * sizeof(float));

    printf("Initializing QRNG tables...\n\n");
    initQuasirandomGenerator(tableCPU);

    if (useDoublePrecision)
    {
        initTable_SM13(tableCPU);
    }
    else
    {
        initTable_SM10(tableCPU);
    }

    printf("Testing QRNG...\n\n");
    checkCudaErrors(cudaMemset(d_Output, 0, QRNG_DIMENSIONS * N * sizeof(float)));
    int numIterations = 20;

    for (int i = -1; i < numIterations; i++)
    {
        if (i == 0)
        {
            checkCudaErrors(cudaDeviceSynchronize());
            sdkResetTimer(&hTimer);
            sdkStartTimer(&hTimer);
        }

        if (useDoublePrecision)
        {
            quasirandomGenerator_SM13(d_Output, 0, N);
        }
        else
        {
            quasirandomGenerator_SM10(d_Output, 0, N);
        }
    }

    checkCudaErrors(cudaDeviceSynchronize());
    sdkStopTimer(&hTimer);
    gpuTime = sdkGetTimerValue(&hTimer)/(double)numIterations*1e-3;
    printf("quasirandomGenerator, Throughput = %.4f GNumbers/s, Time = %.5f s, Size = %u Numbers, NumDevsUsed = %u, Workgroup = %u\n",
           (double)QRNG_DIMENSIONS * (double)N * 1.0E-9 / gpuTime, gpuTime, QRNG_DIMENSIONS*N, 1, 128*QRNG_DIMENSIONS);

    printf("\nReading GPU results...\n");
    checkCudaErrors(cudaMemcpy(h_OutputGPU, d_Output, QRNG_DIMENSIONS * N * sizeof(float), cudaMemcpyDeviceToHost));

    printf("Comparing to the CPU results...\n\n");
    sumDelta = 0;
    sumRef = 0;

    for (dim = 0; dim < QRNG_DIMENSIONS; dim++)
        for (pos = 0; pos < N; pos++)
        {
            ref       = getQuasirandomValue63(pos, dim);
            delta     = (double)h_OutputGPU[dim * N + pos] - ref;
            sumDelta += fabs(delta);
            sumRef   += fabs(ref);
        }

    printf("L1 norm: %E\n", sumDelta / sumRef);

    printf("\nTesting inverseCNDgpu()...\n\n");
    checkCudaErrors(cudaMemset(d_Output, 0, QRNG_DIMENSIONS * N * sizeof(float)));

    for (int i = -1; i < numIterations; i++)
    {
        if (i == 0)
        {
            checkCudaErrors(cudaDeviceSynchronize());
            sdkResetTimer(&hTimer);
            sdkStartTimer(&hTimer);
        }

        if (useDoublePrecision)
        {
            inverseCND_SM13(d_Output, NULL, QRNG_DIMENSIONS * N);
        }
        else
        {
            inverseCND_SM10(d_Output, NULL, QRNG_DIMENSIONS * N);
        }
    }

    checkCudaErrors(cudaDeviceSynchronize());
    sdkStopTimer(&hTimer);
    gpuTime = sdkGetTimerValue(&hTimer)/(double)numIterations*1e-3;
    printf("quasirandomGenerator-inverse, Throughput = %.4f GNumbers/s, Time = %.5f s, Size = %u Numbers, NumDevsUsed = %u, Workgroup = %u\n",
           (double)QRNG_DIMENSIONS * (double)N * 1E-9 / gpuTime, gpuTime, QRNG_DIMENSIONS*N, 1, 128);

    printf("Reading GPU results...\n");
    checkCudaErrors(cudaMemcpy(h_OutputGPU, d_Output, QRNG_DIMENSIONS * N * sizeof(float), cudaMemcpyDeviceToHost));

    printf("\nComparing to the CPU results...\n");
    sumDelta = 0;
    sumRef = 0;
    unsigned int distance = ((unsigned int)-1) / (QRNG_DIMENSIONS * N + 1);

    for (pos = 0; pos < QRNG_DIMENSIONS * N; pos++)
    {
        unsigned int d = (pos + 1) * distance;
        ref       = MoroInvCNDcpu(d);
        delta     = (double)h_OutputGPU[pos] - ref;
        sumDelta += fabs(delta);
        sumRef   += fabs(ref);
    }

    printf("L1 norm: %E\n\n", L1norm = sumDelta / sumRef);

    printf("Shutting down...\n");
    sdkDeleteTimer(&hTimer);
    free(h_OutputGPU);
    checkCudaErrors(cudaFree(d_Output));

    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
    cudaDeviceReset();
    exit(L1norm < 1e-6 ? EXIT_SUCCESS : EXIT_FAILURE);
}
Beispiel #13
0
int main(int argc, char **argv)
{
    char *multiMethodChoice = NULL;
    char *scalingChoice = NULL;
    bool use_threads = true;
    bool bqatest = false;
    bool strongScaling = false;

    pArgc = &argc;
    pArgv = argv;

    printf("%s Starting...\n\n", argv[0]);

    if (checkCmdLineFlag(argc, (const char **)argv, "qatest"))
    {
        bqatest = true;
    }

    getCmdLineArgumentString(argc, (const char **)argv, "method", &multiMethodChoice);
    getCmdLineArgumentString(argc, (const char **)argv, "scaling", &scalingChoice);

    if (checkCmdLineFlag(argc, (const char **)argv, "h") ||
        checkCmdLineFlag(argc, (const char **)argv, "help"))
    {
        usage();
        exit(EXIT_SUCCESS);
    }

    if (multiMethodChoice == NULL)
    {
        use_threads = true;
    }
    else
    {
        if (!strcasecmp(multiMethodChoice, "threaded"))
        {
            use_threads = true;
        }
        else
        {
            use_threads = false;
        }
    }

    if (use_threads == false)
    {
        printf("Using single CPU thread for multiple GPUs\n");
    }

    if (scalingChoice == NULL)
    {
        strongScaling = false;
    }
    else
    {
        if (!strcasecmp(scalingChoice, "strong"))
        {
            strongScaling = true;
        }
        else
        {
            strongScaling = false;
        }
    }


    //GPU number present in the system
    int GPU_N;
    checkCudaErrors(cudaGetDeviceCount(&GPU_N));
    int nOptions = 256;

    nOptions = adjustProblemSize(GPU_N, nOptions);

    // select problem size
    int scale = (strongScaling) ? 1 : GPU_N;
    int OPT_N = nOptions * scale;
    int PATH_N = 262144;
    const unsigned long long SEED = 777;

    // initialize the timers
    hTimer = new StopWatchInterface*[GPU_N];

    for (int i=0; i<GPU_N; i++)
    {
        sdkCreateTimer(&hTimer[i]);
        sdkResetTimer(&hTimer[i]);
    }

    //Input data array
    TOptionData  *optionData   = new TOptionData[OPT_N];
    //Final GPU MC results
    TOptionValue *callValueGPU = new TOptionValue[OPT_N];
    //"Theoretical" call values by Black-Scholes formula
    float *callValueBS = new float[OPT_N];
    //Solver config
    TOptionPlan *optionSolver = new TOptionPlan[GPU_N];
    //OS thread ID
    CUTThread *threadID = new CUTThread[GPU_N];

    int gpuBase, gpuIndex;
    int i;

    float time;

    double delta, ref, sumDelta, sumRef, sumReserve;

    printf("MonteCarloMultiGPU\n");
    printf("==================\n");
    printf("Parallelization method  = %s\n", use_threads ? "threaded" : "streamed");
    printf("Problem scaling         = %s\n", strongScaling? "strong" : "weak");
    printf("Number of GPUs          = %d\n", GPU_N);
    printf("Total number of options = %d\n", OPT_N);
    printf("Number of paths         = %d\n", PATH_N);


    printf("main(): generating input data...\n");
    srand(123);

    for (i=0; i < OPT_N; i++)
    {
        optionData[i].S = randFloat(5.0f, 50.0f);
        optionData[i].X = randFloat(10.0f, 25.0f);
        optionData[i].T = randFloat(1.0f, 5.0f);
        optionData[i].R = 0.06f;
        optionData[i].V = 0.10f;
        callValueGPU[i].Expected   = -1.0f;
        callValueGPU[i].Confidence = -1.0f;
    }

    printf("main(): starting %i host threads...\n", GPU_N);


    //Get option count for each GPU
    for (i = 0; i < GPU_N; i++)
    {
        optionSolver[i].optionCount = OPT_N / GPU_N;
    }

    //Take into account cases with "odd" option counts
    for (i = 0; i < (OPT_N % GPU_N); i++)
    {
        optionSolver[i].optionCount++;
    }

    //Assign GPU option ranges
    gpuBase = 0;

    for (i = 0; i < GPU_N; i++)
    {
        optionSolver[i].device     = i;
        optionSolver[i].optionData = optionData   + gpuBase;
        optionSolver[i].callValue  = callValueGPU + gpuBase;
        // all devices use the same global seed, but start
        // the sequence at a different offset
        optionSolver[i].seed       = SEED;
        optionSolver[i].pathN      = PATH_N;
        gpuBase += optionSolver[i].optionCount;
    }


    if (use_threads || bqatest)
    {
        //Start CPU thread for each GPU
        for (gpuIndex = 0; gpuIndex < GPU_N; gpuIndex++)
        {
            threadID[gpuIndex] = cutStartThread((CUT_THREADROUTINE)solverThread, &optionSolver[gpuIndex]);
        }

        printf("main(): waiting for GPU results...\n");
        cutWaitForThreads(threadID, GPU_N);

        printf("main(): GPU statistics, threaded\n");

        for (i = 0; i < GPU_N; i++)
        {
            cudaDeviceProp deviceProp;
            checkCudaErrors(cudaGetDeviceProperties(&deviceProp, optionSolver[i].device));
            printf("GPU Device #%i: %s\n", optionSolver[i].device, deviceProp.name);
            printf("Options         : %i\n", optionSolver[i].optionCount);
            printf("Simulation paths: %i\n", optionSolver[i].pathN);
            time = sdkGetTimerValue(&hTimer[i]);
            printf("Total time (ms.): %f\n", time);
            printf("Options per sec.: %f\n", OPT_N / (time * 0.001));
        }

        printf("main(): comparing Monte Carlo and Black-Scholes results...\n");
        sumDelta   = 0;
        sumRef     = 0;
        sumReserve = 0;

        for (i = 0; i < OPT_N; i++)
        {
            BlackScholesCall(callValueBS[i], optionData[i]);
            delta     = fabs(callValueBS[i] - callValueGPU[i].Expected);
            ref       = callValueBS[i];
            sumDelta += delta;
            sumRef   += fabs(ref);

            if (delta > 1e-6)
            {
                sumReserve += callValueGPU[i].Confidence / delta;
            }

#ifdef PRINT_RESULTS
            printf("BS: %f; delta: %E\n", callValueBS[i], delta);
#endif

        }

        sumReserve /= OPT_N;
    }

    if (!use_threads || bqatest)
    {
        multiSolver(optionSolver, GPU_N);

        printf("main(): GPU statistics, streamed\n");

        for (i = 0; i < GPU_N; i++)
        {
            cudaDeviceProp deviceProp;
            checkCudaErrors(cudaGetDeviceProperties(&deviceProp, optionSolver[i].device));
            printf("GPU Device #%i: %s\n", optionSolver[i].device, deviceProp.name);
            printf("Options         : %i\n", optionSolver[i].optionCount);
            printf("Simulation paths: %i\n", optionSolver[i].pathN);
        }

        time = sdkGetTimerValue(&hTimer[0]);
        printf("\nTotal time (ms.): %f\n", time);
        printf("\tNote: This is elapsed time for all to compute.\n");
        printf("Options per sec.: %f\n", OPT_N / (time * 0.001));

        printf("main(): comparing Monte Carlo and Black-Scholes results...\n");
        sumDelta   = 0;
        sumRef     = 0;
        sumReserve = 0;

        for (i = 0; i < OPT_N; i++)
        {
            BlackScholesCall(callValueBS[i], optionData[i]);
            delta     = fabs(callValueBS[i] - callValueGPU[i].Expected);
            ref       = callValueBS[i];
            sumDelta += delta;
            sumRef   += fabs(ref);

            if (delta > 1e-6)
            {
                sumReserve += callValueGPU[i].Confidence / delta;
            }

#ifdef PRINT_RESULTS
            printf("BS: %f; delta: %E\n", callValueBS[i], delta);
#endif
        }

        sumReserve /= OPT_N;
    }

#ifdef DO_CPU
    printf("main(): running CPU MonteCarlo...\n");
    TOptionValue callValueCPU;
    sumDelta = 0;
    sumRef   = 0;

    for (i = 0; i < OPT_N; i++)
    {
        MonteCarloCPU(
            callValueCPU,
            optionData[i],
            NULL,
            PATH_N
        );
        delta     = fabs(callValueCPU.Expected - callValueGPU[i].Expected);
        ref       = callValueCPU.Expected;
        sumDelta += delta;
        sumRef   += fabs(ref);
        printf("Exp : %f | %f\t", callValueCPU.Expected,   callValueGPU[i].Expected);
        printf("Conf: %f | %f\n", callValueCPU.Confidence, callValueGPU[i].Confidence);
    }

    printf("L1 norm: %E\n", sumDelta / sumRef);
#endif

    printf("Shutting down...\n");

    for (int i=0; i<GPU_N; i++)
    {
        sdkStartTimer(&hTimer[i]);
        checkCudaErrors(cudaSetDevice(i));

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();
    }

    delete[] optionSolver;
    delete[] callValueBS;
    delete[] callValueGPU;
    delete[] optionData;
    delete[] threadID;
    delete[] hTimer;

    printf("Test Summary...\n");
    printf("L1 norm        : %E\n", sumDelta / sumRef);
    printf("Average reserve: %f\n", sumReserve);
    printf(sumReserve > 1.0f ? "Test passed\n" : "Test failed!\n");
    exit(sumReserve > 1.0f ? EXIT_SUCCESS : EXIT_FAILURE);
}
Beispiel #14
0
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
    int devID = 0;
    char *ref_file = NULL;

    pArgc = &argc;
    pArgv = argv;

    // start logs
    printf("%s Starting...\n\n", argv[0]);

    if (checkCmdLineFlag(argc, (const char **)argv, "help"))
    {
        printHelp();
        exit(EXIT_SUCCESS);
    }

    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
    if (argc > 1)
    {
        if (checkCmdLineFlag(argc, (const char **)argv, "threads"))
        {
            nthreads      = getCmdLineArgumentInt(argc, (const char **) argv, "threads");
        }

        if (checkCmdLineFlag(argc, (const char **)argv, "radius"))
        {
            filter_radius = getCmdLineArgumentInt(argc, (const char **) argv, "radius");
        }

        if (checkCmdLineFlag(argc, (const char **)argv, "passes"))
        {
            iterations = getCmdLineArgumentInt(argc, (const char **) argv, "passes");
        }

        if (checkCmdLineFlag(argc, (const char **)argv, "file"))
        {
            getCmdLineArgumentString(argc, (const char **)argv, "file", (char **)&ref_file);
        }
    }

    // load image to process
    loadImageData(argc, argv);

    if (checkCmdLineFlag(argc, (const char **)argv, "benchmark"))
    {
        // This is a separate mode of the sample, where we are benchmark the kernels for performance
        devID = findCudaDevice(argc, (const char **)argv);

        // Running CUDA kernels (boxfilter) in Benchmarking mode
        g_TotalErrors += runBenchmark();
        exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
    }
    else if (checkCmdLineFlag(argc, (const char **)argv, "radius") ||
             checkCmdLineFlag(argc, (const char **)argv, "passes"))
    {
        // This overrides the default mode.  Users can specify the radius used by the filter kernel
        devID = findCudaDevice(argc, (const char **)argv);
        g_TotalErrors += runSingleTest(ref_file, argv[0]);
        exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
    }
    else
    {
        // Default mode running with OpenGL visualization and in automatic mode
        // the output automatically changes animation
        printf("\n");

        // First initialize OpenGL context, so we can properly set the GL for CUDA.
        // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
        initGL(&argc, argv);
        int dev = findCapableDevice(argc, argv);

        if (dev != -1)
        {
            cudaGLSetGLDevice(dev);
        }
        else
        {
            cudaDeviceReset();
            exit(EXIT_SUCCESS);
        }

        // Now we can create a CUDA context and bind it to the OpenGL context
        initCuda();
        initGLResources();

        // sets the callback function so it will call cleanup upon exit
        atexit(cleanup);

        printf("Running Standard Demonstration with GLUT loop...\n\n");
        printf("Press '+' and '-' to change filter width\n"
               "Press ']' and '[' to change number of iterations\n"
               "Press 'a' or  'A' to change animation ON/OFF\n\n");

        // Main OpenGL loop that will run visualization for every vsync
        glutMainLoop();

        cudaDeviceReset();
        exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
    }
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
#if defined(__linux__)
    setenv ("DISPLAY", ":0", 0);
#endif

    printf("%s Starting...\n\n", sSDKsample);

    printf("NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.\n\n");

    numParticles = NUM_PARTICLES;
    uint gridDim = GRID_SIZE;
    numIterations = 0;

    if (argc > 1)
    {
        if (checkCmdLineFlag(argc, (const char **) argv, "n"))
        {
            numParticles = getCmdLineArgumentInt(argc, (const char **)argv, "n");
        }

        if (checkCmdLineFlag(argc, (const char **) argv, "grid"))
        {
            gridDim = getCmdLineArgumentInt(argc, (const char **) argv, "grid");
        }

        if (checkCmdLineFlag(argc, (const char **)argv, "file"))
        {
            getCmdLineArgumentString(argc, (const char **)argv, "file", &g_refFile);
            fpsLimit = frameCheckNumber;
            numIterations = 1;
        }
    }

    gridSize.x = gridSize.y = gridSize.z = gridDim;
    printf("grid: %d x %d x %d = %d cells\n", gridSize.x, gridSize.y, gridSize.z, gridSize.x*gridSize.y*gridSize.z);
    printf("particles: %d\n", numParticles);

    bool benchmark = checkCmdLineFlag(argc, (const char **) argv, "benchmark") != 0;

    if (checkCmdLineFlag(argc, (const char **) argv, "i"))
    {
        numIterations = getCmdLineArgumentInt(argc, (const char **) argv, "i");
    }

    if (g_refFile)
    {
        cudaInit(argc, argv);
    }
    else
    {
        if (checkCmdLineFlag(argc, (const char **)argv, "device"))
        {
            printf("[%s]\n", argv[0]);
            printf("   Does not explicitly support -device=n in OpenGL mode\n");
            printf("   To use -device=n, the sample must be running w/o OpenGL\n\n");
            printf(" > %s -device=n -file=<*.bin>\n", argv[0]);
            printf("exiting...\n");
            exit(EXIT_SUCCESS);
        }

        initGL(&argc, argv);
        cudaGLInit(argc, argv);
    }

    initParticleSystem(numParticles, gridSize, g_refFile==NULL);
    initParams();

    if (!g_refFile)
    {
        initMenus();
    }

    if (benchmark || g_refFile)
    {
        if (numIterations <= 0)
        {
            numIterations = 300;
        }

        runBenchmark(numIterations, argv[0]);
    }
    else
    {
        glutDisplayFunc(display);
        glutReshapeFunc(reshape);
        glutMouseFunc(mouse);
        glutMotionFunc(motion);
        glutKeyboardFunc(key);
        glutSpecialFunc(special);
        glutIdleFunc(idle);

        glutCloseFunc(cleanup);

        glutMainLoop();
    }

    if (psystem)
    {
        delete psystem;
    }

    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
    cudaDeviceReset();
    exit(g_TotalErrors > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
}
void initData(int argc, char **argv)
{
    // parse arguments
    char *filename;

    if (getCmdLineArgumentString(argc, (const char **) argv, "file", &filename))
    {
        volumeFilename = filename;
    }

    int n;


    if (checkCmdLineFlag(argc, (const char **) argv, "size"))
    {
        n = getCmdLineArgumentInt(argc, (const char **) argv, "size");
        volumeSize.width = volumeSize.height = volumeSize.depth = n;
    }

    if (checkCmdLineFlag(argc, (const char **) argv, "xsize"))
    {
        n = getCmdLineArgumentInt(argc, (const char **) argv, "xsize");
        volumeSize.width = n;
    }

    if (checkCmdLineFlag(argc, (const char **) argv, "ysize"))
    {
        n = getCmdLineArgumentInt(argc, (const char **) argv, "ysize");
        volumeSize.height = n;
    }

    if (checkCmdLineFlag(argc, (const char **) argv, "zsize"))
    {
        n = getCmdLineArgumentInt(argc, (const char **) argv, "zsize");
        volumeSize.depth = n;
    }

    char *path = sdkFindFilePath(volumeFilename, argv[0]);

    if (path == 0)
    {
        printf("Error finding file '%s'\n", volumeFilename);
        exit(EXIT_FAILURE);
    }

    size_t size = volumeSize.width*volumeSize.height*volumeSize.depth*sizeof(VolumeType);
    void *h_volume = loadRawFile(path, size);

    FilterKernel_init();
    Volume_init(&volumeOriginal,volumeSize, h_volume, 0);
    free(h_volume);
    Volume_init(&volumeFilter0, volumeSize, NULL, 1);
    Volume_init(&volumeFilter1, volumeSize, NULL, 1);
    VolumeRender_init();
    VolumeRender_setPreIntegrated(preIntegrated);
    VolumeRender_setVolume(&volumeOriginal);

    sdkCreateTimer(&timer);
    sdkCreateTimer(&animationTimer);
    sdkStartTimer(&animationTimer);

    // calculate new grid size
    gridSize = dim3(iDivUp(width, blockSize.x), iDivUp(height, blockSize.y));
}
int
main(int argc, char *argv[])
{
    printf("%s Starting...\n\n", argv[0]);

    try
    {
        std::string sFilename;
        char *filePath;

        // set your own FreeImage error handler
        FreeImage_SetOutputMessage(FreeImageErrorHandler);

        cudaDeviceInit(argc, (const char **)argv);

        // Min spec is SM 1.0 devices
        if (printfNPPinfo(argc, argv, 1, 0) == false) 
        {
            cudaDeviceReset();
            exit(EXIT_SUCCESS);
        }

        if (checkCmdLineFlag(argc, (const char **)argv, "input"))
        {
            getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
        }
        else
        {
            filePath = sdkFindFilePath("Lena.pgm", argv[0]);
        }

        if (filePath)
        {
            sFilename = filePath;
        }
        else
        {
            sFilename = "Lena.pgm";
        }

        // if we specify the filename at the command line, then we only test sFilename
        // otherwise we will check both sFilename[0,1]
        int file_errors = 0;
        std::ifstream infile(sFilename.data(), std::ifstream::in);

        if (infile.good())
        {
            std::cout << "freeImageInteropNPP opened: <" << sFilename.data() << "> successfully!" << std::endl;
            file_errors = 0;
            infile.close();
        }
        else
        {
            std::cout << "freeImageInteropNPP unable to open: <" << sFilename.data() << ">" << std::endl;
            file_errors++;
            infile.close();
        }

        if (file_errors > 0)
        {
            exit(EXIT_FAILURE);
        }

        std::string sResultFilename = sFilename;

        std::string::size_type dot = sResultFilename.rfind('.');

        if (dot != std::string::npos)
        {
            sResultFilename = sResultFilename.substr(0, dot);
        }

        sResultFilename += "_boxFilterFII.pgm";

        if (checkCmdLineFlag(argc, (const char **)argv, "output"))
        {
            char *outputFilePath;
            getCmdLineArgumentString(argc, (const char **)argv, "output", &outputFilePath);
            sResultFilename = outputFilePath;
        }

        FREE_IMAGE_FORMAT eFormat = FreeImage_GetFileType(sFilename.c_str());

        // no signature? try to guess the file format from the file extension
        if (eFormat == FIF_UNKNOWN)
        {
            eFormat = FreeImage_GetFIFFromFilename(sFilename.c_str());
        }

        NPP_ASSERT(eFormat != FIF_UNKNOWN);
        // check that the plugin has reading capabilities ...
        FIBITMAP *pBitmap;

        if (FreeImage_FIFSupportsReading(eFormat))
        {
            pBitmap = FreeImage_Load(eFormat, sFilename.c_str());
        }

        NPP_ASSERT(pBitmap != 0);
        // Dump the bitmap information to the console
        std::cout << (*pBitmap) << std::endl;
        // make sure this is an 8-bit single channel image
        NPP_ASSERT(FreeImage_GetColorType(pBitmap) == FIC_MINISBLACK);
        NPP_ASSERT(FreeImage_GetBPP(pBitmap) == 8);

        unsigned int nImageWidth  = FreeImage_GetWidth(pBitmap);
        unsigned int nImageHeight = FreeImage_GetHeight(pBitmap);
        unsigned int nSrcPitch    = FreeImage_GetPitch(pBitmap);
        unsigned char *pSrcData  = FreeImage_GetBits(pBitmap);

        int nSrcPitchCUDA;
        Npp8u *pSrcImageCUDA = nppiMalloc_8u_C1(nImageWidth, nImageHeight, &nSrcPitchCUDA);
        NPP_ASSERT_NOT_NULL(pSrcImageCUDA);
        // copy image loaded via FreeImage to into CUDA device memory, i.e.
        // transfer the image-data up to the GPU's video-memory
        NPP_CHECK_CUDA(cudaMemcpy2D(pSrcImageCUDA, nSrcPitchCUDA, pSrcData, nSrcPitch,
                                    nImageWidth, nImageHeight, cudaMemcpyHostToDevice));

        // define size of the box filter
        const NppiSize  oMaskSize   = {7, 7};
        const NppiPoint oMaskAchnor = {0, 0};
        // compute maximal result image size
        const NppiSize  oSizeROI = {(int)nImageWidth  - (oMaskSize.width - 1),
                                    (int)nImageHeight - (oMaskSize.height - 1)
                                   };
        // allocate result image memory
        int nDstPitchCUDA;
        Npp8u *pDstImageCUDA = nppiMalloc_8u_C1(oSizeROI.width, oSizeROI.height, &nDstPitchCUDA);
        NPP_ASSERT_NOT_NULL(pDstImageCUDA);
        NPP_CHECK_NPP(nppiFilterBox_8u_C1R(pSrcImageCUDA, nSrcPitchCUDA, pDstImageCUDA, nDstPitchCUDA,
                                           oSizeROI, oMaskSize, oMaskAchnor));
        // create the result image storage using FreeImage so we can easily
        // save
        FIBITMAP *pResultBitmap = FreeImage_Allocate(oSizeROI.width, oSizeROI.height, 8 /* bits per pixel */);
        NPP_ASSERT_NOT_NULL(pResultBitmap);
        unsigned int nResultPitch   = FreeImage_GetPitch(pResultBitmap);
        unsigned char *pResultData = FreeImage_GetBits(pResultBitmap);

        NPP_CHECK_CUDA(cudaMemcpy2D(pResultData, nResultPitch, pDstImageCUDA, nDstPitchCUDA,
                                    oSizeROI.width, oSizeROI.height, cudaMemcpyDeviceToHost));
        // now save the result image
        bool bSuccess;
        bSuccess = FreeImage_Save(FIF_PGM, pResultBitmap, sResultFilename.c_str(), 0) == TRUE;
        NPP_ASSERT_MSG(bSuccess, "Failed to save result image.");

        //free nppiImage
        nppiFree(pSrcImageCUDA);
        nppiFree(pDstImageCUDA);

        cudaDeviceReset();
        exit(EXIT_SUCCESS);
    }
    catch (npp::Exception &rException)
    {
        std::cerr << "Program error! The following exception occurred: \n";
        std::cerr << rException << std::endl;
        std::cerr << "Aborting." << std::endl;
        exit(EXIT_FAILURE);
    }
    catch (...)
    {
        std::cerr << "Program error! An unknow type of exception occurred. \n";
        std::cerr << "Aborting." << std::endl;
        exit(EXIT_FAILURE);
    }

    exit(EXIT_SUCCESS);
}
int main(int argc, char **argv)
{
    char *dump_file = NULL;

#if defined(__linux__)
    setenv ("DISPLAY", ":0", 0);
#endif

    pArgc = &argc;
    pArgv = argv;

    printf("%s Starting...\n\n", sSDKsample);

    if (checkCmdLineFlag(argc, (const char **)argv, "file"))
    {
        getCmdLineArgumentString(argc, (const char **)argv,
                                 "file", (char **) &dump_file);

        int kernel = 1;

        if (checkCmdLineFlag(argc, (const char **)argv, "kernel"))
        {
            kernel = getCmdLineArgumentInt(argc, (const char **)argv, "kernel");
        }

        runAutoTest(argc, argv, dump_file, kernel);
    }
    else
    {
        printf("[%s]\n", sSDKsample);

        // use command-line specified CUDA device, otherwise use device with highest Gflops/s
        if (checkCmdLineFlag(argc, (const char **)argv, "device"))
        {
            printf("[%s]\n", argv[0]);
            printf("   Does not explicitly support -device=n in OpenGL mode\n");
            printf("   To use -device=n, the sample must be running w/o OpenGL\n\n");
            printf(" > %s -device=n -qatest\n", argv[0]);
            printf("exiting...\n");
            exit(EXIT_SUCCESS);
        }

        // First load the image, so we know what the size of the image (imageW and imageH)
        printf("Allocating host and CUDA memory and loading image file...\n");
        const char *image_path = sdkFindFilePath("portrait_noise.bmp", argv[0]);

        if (image_path == NULL)
        {
            printf("imageDenoisingGL was unable to find and load image file <portrait_noise.bmp>.\nExiting...\n");
            exit(EXIT_FAILURE);
        }

        LoadBMPFile(&h_Src, &imageW, &imageH, image_path);
        printf("Data init done.\n");

        // First initialize OpenGL context, so we can properly set the GL for CUDA.
        // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
        initGL(&argc, argv);
        cudaGLSetGLDevice(gpuGetMaxGflopsDeviceId());

        checkCudaErrors(CUDA_MallocArray(&h_Src, imageW, imageH));

        initOpenGLBuffers();
    }

    printf("Starting GLUT main loop...\n");
    printf("Press [1] to view noisy image\n");
    printf("Press [2] to view image restored with knn filter\n");
    printf("Press [3] to view image restored with nlm filter\n");
    printf("Press [4] to view image restored with modified nlm filter\n");
    printf("Press [*] to view smooth/edgy areas [RED/BLUE] Ct's when a filter is active\n");
    printf("Press [f] to print frame rate\n");
    printf("Press [?] to print Noise and Lerp Ct's\n");
    printf("Press [q] to exit\n");

    sdkCreateTimer(&timer);
    sdkStartTimer(&timer);

    glutMainLoop();
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
    pArgc = &argc;
    pArgv = argv;
    char *ref_file = NULL;

#if defined(__linux__)
    setenv ("DISPLAY", ":0", 0);
#endif

    printf("%s Starting...\n\n", sSDKsample);

    printf("NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.\n\n");

    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
    if (argc > 1)
    {
        if (checkCmdLineFlag(argc, (const char **)argv, "file"))
        {
            getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
            fpsLimit = frameCheckNumber;
        }
    }

    // Get the path of the filename
    char *filename;

    if (getCmdLineArgumentString(argc, (const char **) argv, "image", &filename))
    {
        image_filename = filename;
    }

    // load image
    char *image_path = sdkFindFilePath(image_filename, argv[0]);

    if (image_path == NULL)
    {
        fprintf(stderr, "Error unable to find and load image file: '%s'\n", image_filename);
        exit(EXIT_FAILURE);
    }

    sdkLoadPPM4ub(image_path, (unsigned char **)&h_img, &width, &height);

    if (!h_img)
    {
        printf("Error unable to load PPM file: '%s'\n", image_path);
        exit(EXIT_FAILURE);
    }

    printf("Loaded '%s', %d x %d pixels\n", image_path, width, height);

    if (checkCmdLineFlag(argc, (const char **)argv, "threads"))
    {
        nthreads = getCmdLineArgumentInt(argc, (const char **) argv, "threads");
    }

    if (checkCmdLineFlag(argc, (const char **)argv, "sigma"))
    {
        sigma = getCmdLineArgumentFloat(argc, (const char **) argv, "sigma");
    }

    runBenchmark = checkCmdLineFlag(argc, (const char **) argv, "benchmark");

    int device;
    struct cudaDeviceProp prop;
    cudaGetDevice(&device);
    cudaGetDeviceProperties(&prop, device);

    if (!strncmp("Tesla", prop.name, 5))
    {
        printf("Tesla card detected, running the test in benchmark mode (no OpenGL display)\n");
        //        runBenchmark = true;
        runBenchmark = true;
    }

    // Benchmark or AutoTest mode detected, no OpenGL
    if (runBenchmark == true || ref_file != NULL)
    {
        findCudaDevice(argc, (const char **)argv);
    }
    else
    {
        // First initialize OpenGL context, so we can properly set the GL for CUDA.
        // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
        initGL(&argc, argv);
        findCudaGLDevice(argc, (const char **)argv);
    }

    initCudaBuffers();

    if (ref_file)
    {
        printf("(Automated Testing)\n");
        bool testPassed = runSingleTest(ref_file, argv[0]);

        cleanup();

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();

        exit(testPassed ? EXIT_SUCCESS : EXIT_FAILURE);
    }

    if (runBenchmark)
    {
        printf("(Run Benchmark)\n");
        benchmark(100);

        cleanup();

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();

        exit(EXIT_SUCCESS);
    }

    initGLBuffers();
    glutMainLoop();

    exit(EXIT_SUCCESS);
}
Beispiel #20
0
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
    printf("%s Starting...\n\n", sSDKsample);

    numParticles = NUM_PARTICLES;
    maxNumParticles = MAX_NUM_PARTICLES;
    uint gridDim = GRID_SIZE;
    numIterations = 0;
    printf("Surely I can get this far\n");

    if (argc > 1)
    {
        if (checkCmdLineFlag(argc, (const char **) argv, "n"))
        {
            numParticles = getCmdLineArgumentInt(argc, (const char **)argv, "n");
        }

        if (checkCmdLineFlag(argc, (const char **) argv, "grid"))
        {
            gridDim = getCmdLineArgumentInt(argc, (const char **) argv, "grid");
        }

        if (checkCmdLineFlag(argc, (const char **)argv, "file"))
        {
            getCmdLineArgumentString(argc, (const char **)argv, "file", &g_refFile);
            fpsLimit = frameCheckNumber;
            numIterations = 1;
        }
    }

    //*******************************************************
    // RMK Hard code for cylindrical coords (y=theta=1)
    // DomainSize
	//char Zfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-nothot/ZVert.txt";
	//char Rfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-nothot/RVert.txt";
	//char Zfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-nothot-big/ZVert.txt";
	//char Rfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-nothot-big/RVert.txt";
	//char Zfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-big/ZVert.txt";
	//char Rfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-big/RVert.txt";
	//char Zfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-nothot-big-refine/ZVert.txt";
	//char Rfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-nothot-big-refine/RVert.txt";
	char Zfile[] = "/home/rkeedy/CFD/BuoyantStrumJet85-big-refine-lighter/ZVert.txt";
	char Rfile[] = "/home/rkeedy/CFD/BuoyantStrumJet85-big-refine-lighter/RVert.txt";
	//char Zfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet62-big-refine-lighter/ZVert.txt";
	//char Rfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet62-big-refine-lighter/RVert.txt";
	//char Zfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-big-refine/ZVert.txt";
	//char Rfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-big-refine/RVert.txt";
	//char Zfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet63-big-refine/ZVert.txt";
	//char Rfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet63-big-refine/RVert.txt";
	numVelNodes.x = filecount(Rfile); //-1;
	numVelNodes.z = filecount(Zfile); //-1;
	numVelNodes.y = 1;
	numCells.x = 80; //47; //24; //29;
	numCells.y = 1;
	numCells.z = 160; //188; //95; //88;
	numParticles = numCells.x*numCells.z*20; //avgnumparticles = 40
	srand( time( NULL ) );
	//numParticles = numCells.x*numCells.z*40;
	printf("vel grid: %d x %d x %d = %d cells\n", numVelNodes.x, numVelNodes.y, numVelNodes.z, numVelNodes.x*numVelNodes.y*numVelNodes.z);
	printf("    grid: %d x %d x %d = %d cells\n", numCells.x, numCells.y, numCells.z, numCells.x*numCells.y*numCells.z);
	//printf("vel grid: %d x %d x %d = %d cells\n", gridSize.x, gridSize.y, gridSize.z, gridSize.x*gridSize.y*gridSize.z);

    bool benchmark = checkCmdLineFlag(argc, (const char **) argv, "benchmark") != 0;

    if (checkCmdLineFlag(argc, (const char **) argv, "i"))
    {
        numIterations = getCmdLineArgumentInt(argc, (const char **) argv, "i");
    }
    if (g_refFile)
    {
        cudaInit(argc, argv);
    }
    else
    {
        if (checkCmdLineFlag(argc, (const char **)argv, "device"))
        {
            printf("[%s]\n", argv[0]);
            printf("   Does not explicitly support -device=n in OpenGL mode\n");
            printf("   To use -device=n, the sample must be running w/o OpenGL\n\n");
            printf(" > %s -device=n -file=<*.bin>\n", argv[0]);
            printf("exiting...\n");
            exit(EXIT_SUCCESS);
        }

        initGL(&argc, argv);
        cudaGLInit(argc, argv);
    }

    // Moved code snippet to CellSystem
    //initCellSystem(gridSize);
    // now moved to particlesystem

    printf("Begin initialization\n");

    //initParticleSystem(numParticles, gridSize, g_refFile==NULL);
    initParticleSystem(maxNumParticles, numParticles, numVelNodes, numCells, g_refFile==NULL);
    //printf("Finished with initParticleSystem, %d\n",g_refFile==NULL);
    //cin.ignore();
    initParams();

    printf("Finished with initialization\n");

    if (!g_refFile)
    {
        initMenus();
    }

    if (benchmark || g_refFile)
    {
        if (numIterations <= 0)
        {
            numIterations = 300;
        }

        runBenchmark(numIterations, argv[0]);
    }
    else
    {
        glutDisplayFunc(display);
        glutReshapeFunc(reshape);
        glutMouseFunc(mouse);
        glutMotionFunc(motion);
        glutKeyboardFunc(key);
        glutSpecialFunc(special);
        glutIdleFunc(idle);

        atexit(cleanup);

        glutMainLoop();
    }

    if (psystem)
    {
        delete psystem;
    }

    cudaDeviceReset();
    exit(g_TotalErrors > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
}
Beispiel #21
0
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char *argv[])
{
    char device_name[NAME_LEN];
    char *ref_file = NULL;

    pArgc = &argc;
    pArgv = argv;

    printf("[%s] - Starting...\n", SDK_name);

    if (!findGraphicsGPU(device_name))
    {
        printf("> %s not supported on \"%s\" exiting...\n", SDK_name, device_name);
        exit(EXIT_SUCCESS);
    }

    // command line options
    if (argc > 1)
    {
        // automatied build testing harness
        if (checkCmdLineFlag(argc, (const char **)argv, "file"))
            getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
    }


    //
    // create window
    //
    // Register the window class
#if 1
    WNDCLASSEX wc = { sizeof(WNDCLASSEX), CS_CLASSDC, MsgProc, 0L, 0L,
                      GetModuleHandle(NULL), NULL, NULL, NULL, NULL,
                      "CUDA/D3D9 Texture InterOP", NULL
                    };
    RegisterClassEx(&wc);

    int xBorder = ::GetSystemMetrics(SM_CXSIZEFRAME);
    int yMenu = ::GetSystemMetrics(SM_CYMENU);
    int yBorder = ::GetSystemMetrics(SM_CYSIZEFRAME);

    // Create the application's window (padding by window border for uniform BB sizes across OSs)
    HWND hWnd = CreateWindow(wc.lpszClassName, "CUDA/D3D9 Texture InterOP",
                             WS_OVERLAPPEDWINDOW, 0, 0, g_WindowWidth + 2*xBorder, g_WindowHeight+ 2*yBorder+yMenu,
                             NULL, NULL, wc.hInstance, NULL);


#else
    static WNDCLASSEX wc = { sizeof(WNDCLASSEX), CS_CLASSDC, MsgProc, 0L, 0L, GetModuleHandle(NULL), NULL, NULL, NULL, NULL, "CudaD3D9Tex", NULL };
    RegisterClassEx(&wc);
    HWND hWnd = CreateWindow(
                    "CudaD3D9Tex", "CUDA D3D9 Texture Interop",
                    WS_OVERLAPPEDWINDOW,
                    0, 0, 800, 320,
                    GetDesktopWindow(),
                    NULL,
                    wc.hInstance,
                    NULL);
#endif

    ShowWindow(hWnd, SW_SHOWDEFAULT);
    UpdateWindow(hWnd);

    // Initialize Direct3D
    if (SUCCEEDED(InitD3D9(hWnd)) &&
        SUCCEEDED(InitCUDA()) &&
        SUCCEEDED(InitTextures()))
    {
        if (!g_bDeviceLost)
        {
            RegisterD3D9ResourceWithCUDA();
        }
    }

    //
    // the main loop
    //
    while (false == g_bDone)
    {
        RunCUDA();
        DrawScene();

        //
        // handle I/O
        //
        MSG msg;
        ZeroMemory(&msg, sizeof(msg));

        while (msg.message!=WM_QUIT)
        {
            if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE))
            {
                TranslateMessage(&msg);
                DispatchMessage(&msg);
            }
            else
            {
                RunCUDA();
                DrawScene();

                if (ref_file)
                {
                    for (int count=0; count<g_iFrameToCompare; count++)
                    {
                        RunCUDA();
                        DrawScene();
                    }

                    const char *cur_image_path = "simpleD3D9Texture.ppm";

                    // Save a reference of our current test run image
                    CheckRenderD3D9::BackbufferToPPM(g_pD3DDevice, cur_image_path);

                    // compare to offical reference image, printing PASS or FAIL.
                    g_bPassed = CheckRenderD3D9::PPMvsPPM(cur_image_path, ref_file, argv[0], MAX_EPSILON, 0.15f);

                    g_bDone = true;

                    Cleanup();
                    PostQuitMessage(0);
                }
            }
        }
    };

    // Unregister windows class
    UnregisterClass(wc.lpszClassName, wc.hInstance);

    //
    // and exit
    //
    printf("> %s running on %s exiting...\n", SDK_name, device_name);

    exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
}
Beispiel #22
0
int main(int argc, char **argv)
{
    int devID;
    cudaDeviceProp deviceProps;
    printf("%s Starting...\n\n", sSDKname);
    printf("[%s] - [OpenGL/CUDA simulation] starting...\n", sSDKname);

    // First initialize OpenGL context, so we can properly set the GL for CUDA.
    // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
    if (false == initGL(&argc, argv))
    {
        exit(EXIT_SUCCESS);
    }

    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
    devID = findCudaGLDevice(argc, (const char **)argv);

    // get number of SMs on this GPU
    checkCudaErrors(cudaGetDeviceProperties(&deviceProps, devID));
    printf("CUDA device [%s] has %d Multi-Processors\n",
           deviceProps.name, deviceProps.multiProcessorCount);

    // automated build testing harness
    if (checkCmdLineFlag(argc, (const char **)argv, "file"))
    {
        getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
    }

    // Allocate and initialize host data
    GLint bsize;

    sdkCreateTimer(&timer);
    sdkResetTimer(&timer);

    hvfield = (cData *)malloc(sizeof(cData) * DS);
    memset(hvfield, 0, sizeof(cData) * DS);

    // Allocate and initialize device data
    cudaMallocPitch((void **)&dvfield, &tPitch, sizeof(cData)*DIM, DIM);

    cudaMemcpy(dvfield, hvfield, sizeof(cData) * DS,
               cudaMemcpyHostToDevice);
    // Temporary complex velocity field data
    cudaMalloc((void **)&vxfield, sizeof(cData) * PDS);
    cudaMalloc((void **)&vyfield, sizeof(cData) * PDS);

    setupTexture(DIM, DIM);
    bindTexture();

    // Create particle array
    particles = (cData *)malloc(sizeof(cData) * DS);
    memset(particles, 0, sizeof(cData) * DS);

    initParticles(particles, DIM, DIM);

    // Create CUFFT transform plan configuration
    cufftPlan2d(&planr2c, DIM, DIM, CUFFT_R2C);
    cufftPlan2d(&planc2r, DIM, DIM, CUFFT_C2R);
    // TODO: update kernels to use the new unpadded memory layout for perf
    // rather than the old FFTW-compatible layout
    cufftSetCompatibilityMode(planr2c, CUFFT_COMPATIBILITY_FFTW_PADDING);
    cufftSetCompatibilityMode(planc2r, CUFFT_COMPATIBILITY_FFTW_PADDING);

    glGenBuffersARB(1, &vbo);
    glBindBufferARB(GL_ARRAY_BUFFER_ARB, vbo);
    glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(cData) * DS,
                    particles, GL_DYNAMIC_DRAW_ARB);

    glGetBufferParameterivARB(GL_ARRAY_BUFFER_ARB, GL_BUFFER_SIZE_ARB, &bsize);

    if (bsize != (sizeof(cData) * DS))
        goto EXTERR;

    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);

    checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo, cudaGraphicsMapFlagsNone));
    getLastCudaError("cudaGraphicsGLRegisterBuffer failed");

    if (ref_file)
    {
        autoTest(argv);
        cleanup();
        cudaDeviceReset();
        printf("[fluidsGL] - Test Results: %d Failures\n", g_TotalErrors);
        exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);

    }
    else
    {
        atexit(cleanup);
        glutMainLoop();
    }

    cudaDeviceReset();

    if (!ref_file)
    {
        exit(EXIT_SUCCESS);
    }

    return 0;

EXTERR:
    printf("Failed to initialize GL extensions.\n");

    cudaDeviceReset();
    exit(EXIT_FAILURE);
}
void runAutoTest(int argc, char *argv[])
{
    printf("[%s] (automated testing w/ readback)\n", sSDKsample);
    int devID = findCudaDevice(argc, (const char **)argv);

    loadDefaultImage(argv[0]);

    Pixel *d_result;
    checkCudaErrors(cudaMalloc((void **)&d_result, imWidth*imHeight*sizeof(Pixel)));

    char *ref_file = NULL;
    char  dump_file[256];

    int mode = 0;
    mode = getCmdLineArgumentInt(argc, (const char **)argv, "mode");
    getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);

    switch (mode)
    {
        case 0:
            g_SobelDisplayMode = SOBELDISPLAY_IMAGE;
            sprintf(dump_file, "lena_orig.pgm");
            break;

        case 1:
            g_SobelDisplayMode = SOBELDISPLAY_SOBELTEX;
            sprintf(dump_file, "lena_tex.pgm");
            break;

        case 2:
            g_SobelDisplayMode = SOBELDISPLAY_SOBELSHARED;
            sprintf(dump_file, "lena_shared.pgm");
            break;

        default:
            printf("Invalid Filter Mode File\n");
            exit(EXIT_FAILURE);
            break;
    }

    printf("AutoTest: %s <%s>\n", sSDKsample, filterMode[g_SobelDisplayMode]);
    sobelFilter(d_result, imWidth, imHeight, g_SobelDisplayMode, imageScale);
    checkCudaErrors(cudaDeviceSynchronize());

    unsigned char *h_result = (unsigned char *)malloc(imWidth*imHeight*sizeof(Pixel));
    checkCudaErrors(cudaMemcpy(h_result, d_result, imWidth*imHeight*sizeof(Pixel), cudaMemcpyDeviceToHost));
    sdkSavePGM(dump_file, h_result, imWidth, imHeight);

    if (!sdkComparePGM(dump_file, sdkFindFilePath(ref_file, argv[0]), MAX_EPSILON_ERROR, 0.15f, false))
    {
        g_TotalErrors++;
    }

    checkCudaErrors(cudaFree(d_result));
    free(h_result);

    if (g_TotalErrors != 0)
    {
        printf("Test failed!\n");
        exit(EXIT_FAILURE);
    }

    printf("Test passed!\n");
    exit(EXIT_SUCCESS);
}