コード例 #1
0
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
    pArgc = &argc;
    pArgv = argv;
    char *ref_file = NULL;

#if defined(__linux__)
    setenv ("DISPLAY", ":0", 0);
#endif

    printf("%s Starting...\n\n", sSDKsample);

    printf("NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.\n\n");

    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
    if (argc > 1)
    {
        if (checkCmdLineFlag(argc, (const char **)argv, "file"))
        {
            getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
            fpsLimit = frameCheckNumber;
        }
    }

    // Get the path of the filename
    char *filename;

    if (getCmdLineArgumentString(argc, (const char **) argv, "image", &filename))
    {
        image_filename = filename;
    }

    // load image
    char *image_path = sdkFindFilePath(image_filename, argv[0]);

    if (image_path == NULL)
    {
        fprintf(stderr, "Error unable to find and load image file: '%s'\n", image_filename);
        exit(EXIT_FAILURE);
    }

    sdkLoadPPM4ub(image_path, (unsigned char **)&h_img, &width, &height);

    if (!h_img)
    {
        printf("Error unable to load PPM file: '%s'\n", image_path);
        exit(EXIT_FAILURE);
    }

    printf("Loaded '%s', %d x %d pixels\n", image_path, width, height);

    if (checkCmdLineFlag(argc, (const char **)argv, "threads"))
    {
        nthreads = getCmdLineArgumentInt(argc, (const char **) argv, "threads");
    }

    if (checkCmdLineFlag(argc, (const char **)argv, "sigma"))
    {
        sigma = getCmdLineArgumentFloat(argc, (const char **) argv, "sigma");
    }

    runBenchmark = checkCmdLineFlag(argc, (const char **) argv, "benchmark");

    int device;
    struct cudaDeviceProp prop;
    cudaGetDevice(&device);
    cudaGetDeviceProperties(&prop, device);

    if (!strncmp("Tesla", prop.name, 5))
    {
        printf("Tesla card detected, running the test in benchmark mode (no OpenGL display)\n");
        //        runBenchmark = true;
        runBenchmark = true;
    }

    // Benchmark or AutoTest mode detected, no OpenGL
    if (runBenchmark == true || ref_file != NULL)
    {
        findCudaDevice(argc, (const char **)argv);
    }
    else
    {
        // First initialize OpenGL context, so we can properly set the GL for CUDA.
        // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
        initGL(&argc, argv);
        findCudaGLDevice(argc, (const char **)argv);
    }

    initCudaBuffers();

    if (ref_file)
    {
        printf("(Automated Testing)\n");
        bool testPassed = runSingleTest(ref_file, argv[0]);

        cleanup();

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();

        exit(testPassed ? EXIT_SUCCESS : EXIT_FAILURE);
    }

    if (runBenchmark)
    {
        printf("(Run Benchmark)\n");
        benchmark(100);

        cleanup();

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();

        exit(EXIT_SUCCESS);
    }

    initGLBuffers();
    glutMainLoop();

    exit(EXIT_SUCCESS);
}
コード例 #2
0
ファイル: main.cpp プロジェクト: kuke/CUDA_PSO
int main(int argc, char **argv)
{
    int numParticles = 1024;
    int maxIters = 50;
    int numThreads = 32;
    float eps = 10^-6;

    if (checkCmdLineFlag(argc,(const char **)argv,"help"))
    {
        helper();
        exit(0);
    }
    if (checkCmdLineFlag(argc, (const char **) argv, "n"))
    {
        numParticles = getCmdLineArgumentFloat(argc, (const char **)argv, "n");
    }
    if (checkCmdLineFlag(argc, (const char **) argv, "m"))
    {
        maxIters = getCmdLineArgumentFloat(argc, (const char **)argv, "m");
    }
    if (checkCmdLineFlag(argc, (const char **) argv, "threads"))
    {
        numThreads = getCmdLineArgumentFloat(argc, (const char **)argv, "threads");
    }

    std::cout<<"PSO Algorithm: "<<" n= "<<numParticles<<", m= "<<maxIters<<", threads= "<<numThreads<<std::endl;

    PSO pso(numParticles);
    float cpu_time = pso.Solve(maxIters, eps);
    std::cout<<"CPU result: "<<std::endl;
    std::cout<<"a: "<<pso.gBest.x<<" b: "<<pso.gBest.y<<" iters: "<<pso.iters<<" time: "<<cpu_time<<"ms"<<std::endl;

    CudaPSO cuda_pso(numParticles);
    float cuda_time = cuda_pso.Solve(maxIters, numThreads, eps);
    std::cout<<"GPU result: "<<std::endl;
    std::cout<<" a: "<<cuda_pso.gBest.x<<" b: "<<cuda_pso.gBest.y<<" iters: "<<cuda_pso.iters<<" time: "<<cuda_time<<"ms"<<std::endl;

    std::cout<<std::endl<<"GPU perf./CPU perf. = "<<cpu_time/cuda_time<<std::endl;

    time_t now = time(NULL);
    struct tm timeinfo = *localtime(&now);
    char name[50];
    strftime(name, sizeof(name),"%Y%m%d%H%M%S", &timeinfo);
    std::string filename = std::string(name)+std::string(".log");
    std::ofstream fout(filename.c_str(), std::ios::app);
    fout<<"CPU RME\t\t"<<"GPU RME"<<std::endl;
    for (int i=0; i<std::min(pso.iters, cuda_pso.iters); i++) {
        fout<<pso.RME[i]<<"\t\t"<<cuda_pso.RME[i]<<std::endl;
    }
    fout.close();
    std::cout<<std::endl<<"RMEs have been written into ./"<<filename<<std::endl;
    //uncomment if want to output RMEs to terminal
    /*
    std::cout<<std::endl;
    std::cout<<"CPU RME:"<<std::endl;
    for (int i=0; i<pso.iters; i++){
        std::cout<<pso.RME[i]<<"\t";
    }
    std::cout<<std::endl;
    std::cout<<"GPU RME:"<<std::endl;
    for (int i=0; i<cuda_pso.iters; i++){
        std::cout<<cuda_pso.RME[i]<<"\t";
    }
    */
    return 0;
}