//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { pArgc = &argc; pArgv = argv; char *ref_file = NULL; #if defined(__linux__) setenv ("DISPLAY", ":0", 0); #endif printf("%s Starting...\n\n", sSDKsample); printf("NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.\n\n"); // use command-line specified CUDA device, otherwise use device with highest Gflops/s if (argc > 1) { if (checkCmdLineFlag(argc, (const char **)argv, "file")) { getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file); fpsLimit = frameCheckNumber; } } // Get the path of the filename char *filename; if (getCmdLineArgumentString(argc, (const char **) argv, "image", &filename)) { image_filename = filename; } // load image char *image_path = sdkFindFilePath(image_filename, argv[0]); if (image_path == NULL) { fprintf(stderr, "Error unable to find and load image file: '%s'\n", image_filename); exit(EXIT_FAILURE); } sdkLoadPPM4ub(image_path, (unsigned char **)&h_img, &width, &height); if (!h_img) { printf("Error unable to load PPM file: '%s'\n", image_path); exit(EXIT_FAILURE); } printf("Loaded '%s', %d x %d pixels\n", image_path, width, height); if (checkCmdLineFlag(argc, (const char **)argv, "threads")) { nthreads = getCmdLineArgumentInt(argc, (const char **) argv, "threads"); } if (checkCmdLineFlag(argc, (const char **)argv, "sigma")) { sigma = getCmdLineArgumentFloat(argc, (const char **) argv, "sigma"); } runBenchmark = checkCmdLineFlag(argc, (const char **) argv, "benchmark"); int device; struct cudaDeviceProp prop; cudaGetDevice(&device); cudaGetDeviceProperties(&prop, device); if (!strncmp("Tesla", prop.name, 5)) { printf("Tesla card detected, running the test in benchmark mode (no OpenGL display)\n"); // runBenchmark = true; runBenchmark = true; } // Benchmark or AutoTest mode detected, no OpenGL if (runBenchmark == true || ref_file != NULL) { findCudaDevice(argc, (const char **)argv); } else { // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. initGL(&argc, argv); findCudaGLDevice(argc, (const char **)argv); } initCudaBuffers(); if (ref_file) { printf("(Automated Testing)\n"); bool testPassed = runSingleTest(ref_file, argv[0]); cleanup(); // cudaDeviceReset causes the driver to clean up all state. While // not mandatory in normal operation, it is good practice. It is also // needed to ensure correct operation when the application is being // profiled. Calling cudaDeviceReset causes all profile data to be // flushed before the application exits cudaDeviceReset(); exit(testPassed ? EXIT_SUCCESS : EXIT_FAILURE); } if (runBenchmark) { printf("(Run Benchmark)\n"); benchmark(100); cleanup(); // cudaDeviceReset causes the driver to clean up all state. While // not mandatory in normal operation, it is good practice. It is also // needed to ensure correct operation when the application is being // profiled. Calling cudaDeviceReset causes all profile data to be // flushed before the application exits cudaDeviceReset(); exit(EXIT_SUCCESS); } initGLBuffers(); glutMainLoop(); exit(EXIT_SUCCESS); }
int main(int argc, char **argv) { int numParticles = 1024; int maxIters = 50; int numThreads = 32; float eps = 10^-6; if (checkCmdLineFlag(argc,(const char **)argv,"help")) { helper(); exit(0); } if (checkCmdLineFlag(argc, (const char **) argv, "n")) { numParticles = getCmdLineArgumentFloat(argc, (const char **)argv, "n"); } if (checkCmdLineFlag(argc, (const char **) argv, "m")) { maxIters = getCmdLineArgumentFloat(argc, (const char **)argv, "m"); } if (checkCmdLineFlag(argc, (const char **) argv, "threads")) { numThreads = getCmdLineArgumentFloat(argc, (const char **)argv, "threads"); } std::cout<<"PSO Algorithm: "<<" n= "<<numParticles<<", m= "<<maxIters<<", threads= "<<numThreads<<std::endl; PSO pso(numParticles); float cpu_time = pso.Solve(maxIters, eps); std::cout<<"CPU result: "<<std::endl; std::cout<<"a: "<<pso.gBest.x<<" b: "<<pso.gBest.y<<" iters: "<<pso.iters<<" time: "<<cpu_time<<"ms"<<std::endl; CudaPSO cuda_pso(numParticles); float cuda_time = cuda_pso.Solve(maxIters, numThreads, eps); std::cout<<"GPU result: "<<std::endl; std::cout<<" a: "<<cuda_pso.gBest.x<<" b: "<<cuda_pso.gBest.y<<" iters: "<<cuda_pso.iters<<" time: "<<cuda_time<<"ms"<<std::endl; std::cout<<std::endl<<"GPU perf./CPU perf. = "<<cpu_time/cuda_time<<std::endl; time_t now = time(NULL); struct tm timeinfo = *localtime(&now); char name[50]; strftime(name, sizeof(name),"%Y%m%d%H%M%S", &timeinfo); std::string filename = std::string(name)+std::string(".log"); std::ofstream fout(filename.c_str(), std::ios::app); fout<<"CPU RME\t\t"<<"GPU RME"<<std::endl; for (int i=0; i<std::min(pso.iters, cuda_pso.iters); i++) { fout<<pso.RME[i]<<"\t\t"<<cuda_pso.RME[i]<<std::endl; } fout.close(); std::cout<<std::endl<<"RMEs have been written into ./"<<filename<<std::endl; //uncomment if want to output RMEs to terminal /* std::cout<<std::endl; std::cout<<"CPU RME:"<<std::endl; for (int i=0; i<pso.iters; i++){ std::cout<<pso.RME[i]<<"\t"; } std::cout<<std::endl; std::cout<<"GPU RME:"<<std::endl; for (int i=0; i<cuda_pso.iters; i++){ std::cout<<cuda_pso.RME[i]<<"\t"; } */ return 0; }