void initParticles(cData *p, int dx, int dy) { for (int i = 0; i < dy; i++) { for (int j = 0; j < dx; j++) { p[i*dx+j].x = (j+0.5f+(myrand() - 0.5f))/dx; p[i*dx+j].y = (i+0.5f+(myrand() - 0.5f))/dy; } } unsigned char* data; unsigned int width, height; bool loaded = sdkLoadPPM4ub("data/usi.ppm", &data, &width, &height); if (!loaded) return; const char* no_label = getenv("NO_LABEL"); if (!no_label) { const int nchannels = 4; for (int j = 0; j < height; j++) for (int i = 0; i < width; i++) { unsigned char* pixel = &data[nchannels * (width * j + i)]; if ((pixel[0] == 0) && (pixel[1] == 0) && (pixel[2] == 0)) { p[j*dx+i].x = 0; p[j*dx+i].y = 0; } } } #ifdef BROADCAST // Randomly reorder particles to eliminate initial jittering // in UDP client. for (int i = 0; i < dy; i++) { for (int j = 0; j < dx; j++) { int i_rand = myrand() * (dx - 1); int j_rand = myrand() * (dy - 1); cData* p0 = &p[j*dx+i]; cData* p1 = &p[j_rand*dx+i_rand]; cData p2 = *p0; *p0 = *p1; *p1 = p2; } } #endif }
/////////////////////////////////////////////////////////////////////////////// /// \brief /// load 4-channel unsigned byte image /// and convert it to single channel FP32 image /// \param[out] img_data pointer to raw image data /// \param[out] img_w image width /// \param[out] img_h image height /// \param[out] img_s image row stride /// \param[in] name image file name /// \param[in] exePath executable file path /// \return true if image is successfully loaded or false otherwise /////////////////////////////////////////////////////////////////////////////// bool LoadImageAsFP32(float *&img_data, int &img_w, int &img_h, int &img_s, const char *name, const char *exePath) { printf("Loading \"%s\" ...\n", name); char *name_ = sdkFindFilePath(name, exePath); if (!name_) { printf("File not found\n"); return false; } unsigned char *data = 0; unsigned int w = 0, h = 0; bool result = sdkLoadPPM4ub(name_, &data, &w, &h); if (result == false) { printf("Invalid file format\n"); return false; } img_w = w; img_h = h; img_s = iAlignUp(img_w); img_data = new float [img_s * h]; // source is 4 channel image const int widthStep = 4 * img_w; for (int i = 0; i < img_h; ++i) { for (int j = 0; j < img_w; ++j) { img_data[j + i * img_s] = ((float) data[j * 4 + i * widthStep]) / 255.0f; } } return true; }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { pArgc = &argc; pArgv = argv; char *ref_file = NULL; #if defined(__linux__) setenv ("DISPLAY", ":0", 0); #endif printf("%s Starting...\n\n", sSDKsample); printf("NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.\n\n"); // use command-line specified CUDA device, otherwise use device with highest Gflops/s if (argc > 1) { if (checkCmdLineFlag(argc, (const char **)argv, "file")) { getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file); fpsLimit = frameCheckNumber; } } // Get the path of the filename char *filename; if (getCmdLineArgumentString(argc, (const char **) argv, "image", &filename)) { image_filename = filename; } // load image char *image_path = sdkFindFilePath(image_filename, argv[0]); if (image_path == NULL) { fprintf(stderr, "Error unable to find and load image file: '%s'\n", image_filename); exit(EXIT_FAILURE); } sdkLoadPPM4ub(image_path, (unsigned char **)&h_img, &width, &height); if (!h_img) { printf("Error unable to load PPM file: '%s'\n", image_path); exit(EXIT_FAILURE); } printf("Loaded '%s', %d x %d pixels\n", image_path, width, height); if (checkCmdLineFlag(argc, (const char **)argv, "threads")) { nthreads = getCmdLineArgumentInt(argc, (const char **) argv, "threads"); } if (checkCmdLineFlag(argc, (const char **)argv, "sigma")) { sigma = getCmdLineArgumentFloat(argc, (const char **) argv, "sigma"); } runBenchmark = checkCmdLineFlag(argc, (const char **) argv, "benchmark"); int device; struct cudaDeviceProp prop; cudaGetDevice(&device); cudaGetDeviceProperties(&prop, device); if (!strncmp("Tesla", prop.name, 5)) { printf("Tesla card detected, running the test in benchmark mode (no OpenGL display)\n"); // runBenchmark = true; runBenchmark = true; } // Benchmark or AutoTest mode detected, no OpenGL if (runBenchmark == true || ref_file != NULL) { findCudaDevice(argc, (const char **)argv); } else { // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. initGL(&argc, argv); findCudaGLDevice(argc, (const char **)argv); } initCudaBuffers(); if (ref_file) { printf("(Automated Testing)\n"); bool testPassed = runSingleTest(ref_file, argv[0]); cleanup(); // cudaDeviceReset causes the driver to clean up all state. While // not mandatory in normal operation, it is good practice. It is also // needed to ensure correct operation when the application is being // profiled. Calling cudaDeviceReset causes all profile data to be // flushed before the application exits cudaDeviceReset(); exit(testPassed ? EXIT_SUCCESS : EXIT_FAILURE); } if (runBenchmark) { printf("(Run Benchmark)\n"); benchmark(100); cleanup(); // cudaDeviceReset causes the driver to clean up all state. While // not mandatory in normal operation, it is good practice. It is also // needed to ensure correct operation when the application is being // profiled. Calling cudaDeviceReset causes all profile data to be // flushed before the application exits cudaDeviceReset(); exit(EXIT_SUCCESS); } initGLBuffers(); glutMainLoop(); exit(EXIT_SUCCESS); }
inline bool sdkComparePPM(const char *src_file, const char *ref_file, const float epsilon, const float threshold, bool verboseErrors) { unsigned char *src_data, *ref_data; unsigned long error_count = 0; unsigned int ref_width, ref_height; unsigned int src_width, src_height; if (src_file == NULL || ref_file == NULL) { if (verboseErrors) { std::cerr << "PPMvsPPM: src_file or ref_file is NULL. Aborting comparison\n"; } return false; } if (verboseErrors) { std::cerr << "> Compare (a)rendered: <" << src_file << ">\n"; std::cerr << "> (b)reference: <" << ref_file << ">\n"; } if (sdkLoadPPM4ub(ref_file, &ref_data, &ref_width, &ref_height) != true) { if (verboseErrors) { std::cerr << "PPMvsPPM: unable to load ref image file: "<< ref_file << "\n"; } return false; } if (sdkLoadPPM4ub(src_file, &src_data, &src_width, &src_height) != true) { std::cerr << "PPMvsPPM: unable to load src image file: " << src_file << "\n"; return false; } if (src_height != ref_height || src_width != ref_width) { if (verboseErrors) std::cerr << "PPMvsPPM: source and ref size mismatch (" << src_width << "," << src_height << ")vs(" << ref_width << "," << ref_height << ")\n"; } if (verboseErrors) std::cerr << "PPMvsPPM: comparing images size (" << src_width << "," << src_height << ") epsilon(" << epsilon << "), threshold(" << threshold*100 << "%)\n"; if (compareData(ref_data, src_data, src_width*src_height*4, epsilon, threshold) == false) { error_count=1; } if (error_count == 0) { if (verboseErrors) { std::cerr << " OK\n\n"; } } else { if (verboseErrors) { std::cerr << " FAILURE! "<<error_count<<" errors...\n\n"; } } return (error_count == 0)? true : false; // returns true if all pixels pass }