void initialize(int argc, char **argv)
    printf("[%s] (OpenGL Mode)\n", sSDKsample);

    // First initialize OpenGL context, so we can properly set the GL for CUDA.
    // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
    initGL(&argc, argv);

    int devID;
    cudaDeviceProp deviceProps;

    if (checkCmdLineFlag(argc, (const char **)argv, "device"))
        devID = gpuGLDeviceInit(argc, (const char **)argv);

        if (devID < 0)
        devID = gpuGetMaxGflopsDeviceId();

    // get number of SMs on this GPU
    checkCudaErrors(cudaGetDeviceProperties(&deviceProps, devID));
    printf("CUDA device [%s] has %d Multi-Processors\n", deviceProps.name, deviceProps.multiProcessorCount);

    // Create the timer (for fps measurement)

    // load image from disk
    loadImageData(argc, argv);

           "\t=/- : Zoom in/out\n"
           "\tb   : Run Benchmark g_FilterMode\n"
           "\tc   : Draw Bicubic Spline Curve\n"
           "\t[esc] - Quit\n\n"

           "\tPress number keys to change filtering g_FilterMode:\n\n"
           "\t1 : nearest filtering\n"
           "\t2 : bilinear filtering\n"
           "\t3 : bicubic filtering\n"
           "\t4 : fast bicubic filtering\n"
           "\t5 : Catmull-Rom filtering\n\n"


    fprog = compileASMShader(GL_FRAGMENT_PROGRAM_ARB, shaderCode);

    if (!fprog)

// Program main
int main(int argc, char **argv)
    // start logs
    int devID;
    char *ref_file = NULL;
    printf("%s Starting...\n\n", argv[0]);

#if defined(__linux__)
    setenv ("DISPLAY", ":0", 0);

    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
    if (argc > 1)
        if (checkCmdLineFlag(argc, (const char **)argv, "radius"))
            filter_radius = getCmdLineArgumentInt(argc, (const char **) argv, "radius");

        if (checkCmdLineFlag(argc, (const char **)argv, "passes"))
            iterations = getCmdLineArgumentInt(argc, (const char **)argv, "passes");

        if (checkCmdLineFlag(argc, (const char **)argv, "file"))
            getCmdLineArgumentString(argc, (const char **)argv, "file", (char **)&ref_file);

    // load image to process
    loadImageData(argc, argv);

    if (checkCmdLineFlag(argc, (const char **)argv, "benchmark"))
        // This is a separate mode of the sample, where we are benchmark the kernels for performance
        devID = findCudaDevice(argc, (const char **)argv);

        // Running CUDA kernels (bilateralfilter) in Benchmarking mode
        g_TotalErrors += runBenchmark(argc, argv);

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
    else if (checkCmdLineFlag(argc, (const char **)argv, "radius") ||
             checkCmdLineFlag(argc, (const char **)argv, "passes"))
        // This overrides the default mode.  Users can specify the radius used by the filter kernel
        devID = findCudaDevice(argc, (const char **)argv);
        g_TotalErrors += runSingleTest(ref_file, argv[0]);

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
        // Default mode running with OpenGL visualization and in automatic mode
        // the output automatically changes animation

        // First initialize OpenGL context, so we can properly set the GL for CUDA.
        // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
        initGL(argc, (char **)argv);
        int dev = findCapableDevice(argc, argv);

        if (dev != -1)
            dev = gpuGLDeviceInit(argc, (const char **)argv);

            if (dev == -1)
            // cudaDeviceReset causes the driver to clean up all state. While
            // not mandatory in normal operation, it is good practice.  It is also
            // needed to ensure correct operation when the application is being
            // profiled. Calling cudaDeviceReset causes all profile data to be
            // flushed before the application exits

        // Now we can create a CUDA context and bind it to the OpenGL context

        // sets the callback function so it will call cleanup upon exit
#if defined (__APPLE__) || defined(MACOSX)

        printf("Running Standard Demonstration with GLUT loop...\n\n");
        printf("Press '+' and '-' to change filter width\n"
               "Press ']' and '[' to change number of iterations\n"
               "Press 'e' and 'E' to change Euclidean delta\n"
               "Press 'g' and 'G' to changle Gaussian delta\n"
               "Press 'a' or  'A' to change Animation mode ON/OFF\n\n");

        // Main OpenGL loop that will run visualization for every vsync