int main(int argc, char** argv) { float area[2]; float aTime = 0.0; float bTime = 0.0; int reps = argc > 1 ? atoi(argv[1]) :100; printf("ProgramCount [%d], size-of-float[%lu], size-of-float[%lu]\n",ispc::get_programCount(),sizeof(float), sizeof(float)); for (int i =0; i<reps; i++) { reset_and_start_timer(); area[0]= ptarray_signed_area_aos(polygon_aos,npoints); aTime += get_elapsed_mcycles(); } for (int i =0; i<reps; i++) { reset_and_start_timer(); area[1]= ispc::ptarray_signed_area_aos(polygon_aos,npoints); bTime += get_elapsed_mcycles(); } printf("%-20s: [%.2f] M cycles %s, [%.2f] M cycles %s (%.2fx speedup).\n", "ST_AREA", aTime, "serial_aos", bTime, "ispc_aos", aTime/bTime); printf("%-20s: serial [%.2f], ispc [%.2f]\n", "results", area[0], area[1]); aTime = 0.0; bTime = 0.0; for (int i =0; i<reps; i++) { reset_and_start_timer(); area[0]= ptarray_signed_area_soa(polygon_soa,npoints); aTime += get_elapsed_mcycles(); } printf("sequential code is fine.\n"); for (int i =0; i<reps; i++) { reset_and_start_timer(); area[1]= ispc::ptarray_signed_area_soa(polygon_aos,npoints); bTime += get_elapsed_mcycles(); } printf("%-20s: [%.2f] M cycles %s, [%.2f] M cycles %s (%.2fx speedup).\n", "ST_AREA", aTime, "serial_soa", bTime, "ispc_soa", aTime/bTime); printf("%-20s: serial [%.2f], ispc [%.2f]\n", "results", area[0], area[1]); return 0; }
int main(int argc, char** argv) { if (argc < 2) { printf("usage: deferred_shading <input_file (e.g. data/pp1280x720.bin)> [tasks iterations] [serial iterations]\n"); return 1; } static unsigned int test_iterations[] = {5, 3, 500}; //last value is for nframes, it is scale. if (argc == 5) { for (int i = 0; i < 3; i++) { test_iterations[i] = atoi(argv[2 + i]); } } InputData *input = CreateInputDataFromFile(argv[1]); if (!input) { printf("Failed to load input file \"%s\"!\n", argv[1]); return 1; } Framebuffer framebuffer(input->header.framebufferWidth, input->header.framebufferHeight); int nframes = test_iterations[2]; double ispcCycles = 1e30; for (int i = 0; i < test_iterations[0]; ++i) { framebuffer.clear(); reset_and_start_timer(); for (int j = 0; j < nframes; ++j) ispc::RenderStatic(&input->header, &input->arrays, VISUALIZE_LIGHT_COUNT, framebuffer.r, framebuffer.g, framebuffer.b); double msec = get_elapsed_msec() / nframes; printf("@time of ISPC + TASKS run:\t\t\t[%.3f] msec [%.3f fps]\n", msec, 1.0e3/msec); ispcCycles = std::min(ispcCycles, msec); } printf("[ispc static + tasks]:\t\t[%.3f] msec to render " "%d x %d image\n", ispcCycles, input->header.framebufferWidth, input->header.framebufferHeight); WriteFrame("deferred-ispc-static.ppm", input, framebuffer); DeleteInputData(input); return 0; }
void mandelbrot_threads(float x0, float y0, float x1, float y1, int width,int width_start, int width_end, int height, int maxIterations, int output[]) { float dx = (x1 - x0) / width; float dy = (y1 - y0) / height; int flag=1; reset_and_start_timer(); for (int j = 0; j < height; j++) { for (int i = width_start; i < width_end; ++i) { float x = x0 + i * dx; float y = y0 + j * dy; int index = (j * width + i); output[index] = mandel(x, y, maxIterations); } } double dt = get_elapsed_mcycles(); printf("\n\t\t\t Thread took :\t[%.3f] millon cycles",dt); }
int main(int argc, char ** argv) { if (argc < 4) { printf("Please input M, N and K\n"); return 1; } int row = atoi(argv[1]); int col = atoi(argv[2]); int num_iterate = atoi(argv[3]); GameOfLife* game; if (row <= 0 || col <= 0) { game = new GameOfLife(6, 4); game->specificInit(); } else { game = new GameOfLife(row, col); game->randomInit(); } if (game->notTooLarge()) { game->print(); } // start to record time consumption reset_and_start_timer(); game->iterateAll(num_iterate); // stop timer and print out total cycles double one_round = get_elapsed_mcycles(); if (game->notTooLarge()) { game->print(); } printf("\n-------- Statistic Infomation --------\n\n"); printf("time consumption:\t\t\t[%.3f] million cycles\n", one_round); delete(game); return 0; }
int main(int argc, char** argv) { if (argc != 2) { printf("usage: deferred_shading <input_file (e.g. data/pp1280x720.bin)>\n"); return 1; } InputData *input = CreateInputDataFromFile(argv[1]); if (!input) { printf("Failed to load input file \"%s\"!\n", argv[1]); return 1; } Framebuffer framebuffer(input->header.framebufferWidth, input->header.framebufferHeight); InitDynamicC(input); #ifdef __cilk InitDynamicCilk(input); #endif // __cilk int nframes = 5; double ispcCycles = 1e30; for (int i = 0; i < 5; ++i) { framebuffer.clear(); reset_and_start_timer(); for (int j = 0; j < nframes; ++j) ispc::RenderStatic(input->header, input->arrays, VISUALIZE_LIGHT_COUNT, framebuffer.r, framebuffer.g, framebuffer.b); double mcycles = get_elapsed_mcycles() / nframes; ispcCycles = std::min(ispcCycles, mcycles); } printf("[ispc static + tasks]:\t\t[%.3f] million cycles to render " "%d x %d image\n", ispcCycles, input->header.framebufferWidth, input->header.framebufferHeight); WriteFrame("deferred-ispc-static.ppm", input, framebuffer); #ifdef __cilk double dynamicCilkCycles = 1e30; for (int i = 0; i < 5; ++i) { framebuffer.clear(); reset_and_start_timer(); for (int j = 0; j < nframes; ++j) DispatchDynamicCilk(input, &framebuffer); double mcycles = get_elapsed_mcycles() / nframes; dynamicCilkCycles = std::min(dynamicCilkCycles, mcycles); } printf("[ispc + Cilk dynamic]:\t\t[%.3f] million cycles to render image\n", dynamicCilkCycles); WriteFrame("deferred-ispc-dynamic.ppm", input, framebuffer); #endif // __cilk double serialCycles = 1e30; for (int i = 0; i < 5; ++i) { framebuffer.clear(); reset_and_start_timer(); for (int j = 0; j < nframes; ++j) DispatchDynamicC(input, &framebuffer); double mcycles = get_elapsed_mcycles() / nframes; serialCycles = std::min(serialCycles, mcycles); } printf("[C++ serial dynamic, 1 core]:\t[%.3f] million cycles to render image\n", serialCycles); WriteFrame("deferred-serial-dynamic.ppm", input, framebuffer); #ifdef __cilk printf("\t\t\t\t(%.2fx speedup from static ISPC, %.2fx from Cilk+ISPC)\n", serialCycles/ispcCycles, serialCycles/dynamicCilkCycles); #else printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", serialCycles/ispcCycles); #endif // __cilk DeleteInputData(input); return 0; }
int main(int argc, char* argv[]) { int i; unsigned int SIZEX = 0; unsigned int SIZEY = 0; unsigned int SIZEZ = 0; if (argc == 4) { sscanf(argv [1], "%d", &SIZEX); sscanf(argv [2], "%d", &SIZEY); sscanf(argv [3], "%d", &SIZEZ); }else{ printf("usage: %s xdim ydim zdim\n", argv [0]); return -1; } unsigned int SIZE = SIZEX * SIZEY * SIZEZ; assert(SIZE > 0); printf("allocation size> %d\n", SIZE); float *a = (float*)malloc(sizeof(float) * SIZE); float *b = (float*)malloc(sizeof(float) * SIZE); float *c = (float*)malloc(sizeof(float) * SIZE); #ifdef __NVCUDA__ acc_init(acc_device_nvcuda); #endif #ifdef __NVOPENCL__ #define DEVICE_TYPE acc_device_nvocl printf("compiled for ocl\n"); acc_init(DEVICE_TYPE); acc_list_devices_spec(DEVICE_TYPE); #endif for (i = 0; i < SIZE; ++i) { a [i] = (float)i; b [i] = (float)2 * i; c [i] = 0.0f; } int k; double revsum = 0; int iter = 30; ipmacc_prompt((char*)"IPMACC: memory allocation c\n"); // ISPC host and device are the same, skipping memory allocation ipmacc_prompt((char*)"IPMACC: memory allocation a\n"); // ISPC host and device are the same, skipping memory allocation ipmacc_prompt((char*)"IPMACC: memory allocation b\n"); // ISPC host and device are the same, skipping memory allocation ipmacc_prompt((char*)"IPMACC: memory copyin a\n"); // ISPC host and device are the same, skipping copyin ipmacc_prompt((char*)"IPMACC: memory copyin b\n"); // ISPC host and device are the same, skipping copyin { for(k = 0; k < iter; k++) { reset_and_start_timer(); /* kernel call statement*/ { unsigned int __ispc_n_threads = sysconf(_SC_NPROCESSORS_ONLN); // acc_get_n_cores(acc_device_intelispc); if(getenv("IPMACC_VERBOSE")) printf("IPMACC: Launching ISPC kernel> %d threads + SIMD \n", __ispc_n_threads); __generated_kernel_launch_0(a,c,b,SIZE); } /* kernel call statement*/ // ISPC target is synchronized with CPU // skipping synchronization double dt = get_elapsed_msec(); revsum += 1.0 / dt; printf("@time of openacc run:\t\t\t%.3f msec\n", dt); } } ipmacc_prompt((char*)"IPMACC: memory copyout c\n"); // ISPC host and device are the same, skipping copyout printf("harmonic mean openacc run> %.3f msec\n", iter / revsum); for (i = 0; i < SIZE; ++i) { if (c [i] != (a [i] + b [i])) { fprintf(stdout, "Error %d %16.10f!=%16.10f \n", i, c [i], a [i] + b [i]); return -1; } } fprintf(stdout, "OpenACC vectoradd test was successful!\n"); return 0; }
int main(int argc, char *argv[]) { static unsigned int test_iterations[] = {1, 1, 1};//the last two numbers must be equal here int Nx = 256, Ny = 256, Nz = 256; int width = 4; if (argc > 1) { if (strncmp(argv[1], "--scale=", 8) == 0) { RealType scale = atof(argv[1] + 8); Nx = Nx * scale; Ny = Ny * scale; Nz = Nz * scale; } } if ((argc == 4) || (argc == 5)) { for (int i = 0; i < 3; i++) { test_iterations[i] = atoi(argv[argc - 3 + i]); } } RealType *Aserial[2], *Aispc[2]; Aserial[0] = new RealType [Nx * Ny * Nz]; Aserial[1] = new RealType [Nx * Ny * Nz]; Aispc[0] = new RealType [Nx * Ny * Nz]; Aispc[1] = new RealType [Nx * Ny * Nz]; RealType *vsq = new RealType [Nx * Ny * Nz]; RealType coeff[4] = { 0.5, -.25, .125, -.0625 }; // InitData(Nx, Ny, Nz, Aispc, vsq); // // Compute the image using the ispc implementation on one core; report // the minimum time of three runs. // double minTimeISPC = 1e30; // for (unsigned int i = 0; i < test_iterations[0]; ++i) { // reset_and_start_timer(); // loop_stencil_ispc(0, 6, width, Nx - width, width, Ny - width, // width, Nz - width, Nx, Ny, Nz, coeff, vsq, // Aispc[0], Aispc[1]); // double dt = get_elapsed_mcycles(); // printf("@time of ISPC run:\t\t\t[%.3f] million cycles\n", dt); // minTimeISPC = std::min(minTimeISPC, dt); // } // printf("[stencil ispc 1 core]:\t\t[%.3f] million cycles\n", minTimeISPC); // InitData(Nx, Ny, Nz, Aispc, vsq); // // // // Compute the image using the ispc implementation with tasks; report // // the minimum time of three runs. // // double minTimeISPCTasks = 1e30; // for (unsigned int i = 0; i < test_iterations[1]; ++i) { // reset_and_start_timer(); // loop_stencil_ispc_tasks(0, 6, width, Nx - width, width, Ny - width, // width, Nz - width, Nx, Ny, Nz, coeff, vsq, // Aispc[0], Aispc[1]); // double dt = get_elapsed_mcycles(); // printf("@time of ISPC + TASKS run:\t\t\t[%.3f] million cycles\n", dt); // minTimeISPCTasks = std::min(minTimeISPCTasks, dt); // } // printf("[stencil ispc + tasks]:\t\t[%.3f] million cycles\n", minTimeISPCTasks); InitData(Nx, Ny, Nz, Aserial, vsq); // // And run the serial implementation 3 times, again reporting the // minimum time. // double minTimeSerial = 1e30; for (unsigned int i = 0; i < test_iterations[2]; ++i) { reset_and_start_timer(); loop_stencil_serial(0, 6, width, Nx-width, width, Ny - width, width, Nz - width, Nx, Ny, Nz, coeff, vsq, Aserial[0], Aserial[1]); double dt = get_elapsed_msec(); printf("@time of serial run:\t\t\t[%.3f] milli secondes\n", dt); // minTimeSerial = std::min(minTimeSerial, dt); } // printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n", // minTimeSerial / minTimeISPC, minTimeSerial / minTimeISPCTasks); // Check for agreement int offset = 0; RealType norm=0; for (int z = 0; z < Nz; ++z){ for (int y = 0; y < Ny; ++y){ for (int x = 0; x < Nx; ++x, ++offset) { RealType value= Aserial[1][offset]; norm += value*value; } } } std::cout << std::setprecision(16)<< "norm: " << sqrt(norm)<<std::endl; }