int main( int argc, char *argv []) { //======================================================================================================================================================150 // CPU/MCPU VARIABLES //======================================================================================================================================================150 // timer long long time0; time0 = get_time(); // timer long long time1; long long time2; long long time3; long long time4; long long time5; long long time6; long long time7; // counters int i, j, k, l, m, n; // system memory par_str par_cpu; dim_str dim_cpu; box_str* box_cpu; FOUR_VECTOR* rv_cpu; fp* qv_cpu; FOUR_VECTOR* fv_cpu; int nh; time1 = get_time(); //======================================================================================================================================================150 // CHECK INPUT ARGUMENTS //======================================================================================================================================================150 // assing default values dim_cpu.arch_arg = 0; dim_cpu.cores_arg = 1; dim_cpu.boxes1d_arg = 1; // go through arguments if(argc==3){ for(dim_cpu.cur_arg=1; dim_cpu.cur_arg<argc; dim_cpu.cur_arg++){ // check if -boxes1d if(strcmp(argv[dim_cpu.cur_arg], "-boxes1d")==0){ // check if value provided if(argc>=dim_cpu.cur_arg+1){ // check if value is a number if(isInteger(argv[dim_cpu.cur_arg+1])==1){ dim_cpu.boxes1d_arg = atoi(argv[dim_cpu.cur_arg+1]); if(dim_cpu.boxes1d_arg<0){ printf("ERROR: Wrong value to -boxes1d argument, cannot be <=0\n"); return 0; } dim_cpu.cur_arg = dim_cpu.cur_arg+1; } // value is not a number else{ printf("ERROR: Value to -boxes1d argument in not a number\n"); return 0; } } // value not provided else{ printf("ERROR: Missing value to -boxes1d argument\n"); return 0; } } // unknown else{ printf("ERROR: Unknown argument\n"); return 0; } } // Print configuration printf("Configuration used: arch = %d, cores = %d, boxes1d = %d\n", dim_cpu.arch_arg, dim_cpu.cores_arg, dim_cpu.boxes1d_arg); } else{ printf("Provide boxes1d argument, example: -boxes1d 16"); return 0; } time2 = get_time(); //======================================================================================================================================================150 // INPUTS //======================================================================================================================================================150 par_cpu.alpha = 0.5; time3 = get_time(); //======================================================================================================================================================150 // DIMENSIONS //======================================================================================================================================================150 // total number of boxes dim_cpu.number_boxes = dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg; // 8*8*8=512 // how many particles space has in each direction dim_cpu.space_elem = dim_cpu.number_boxes * NUMBER_PAR_PER_BOX; //512*100=51,200 dim_cpu.space_mem = dim_cpu.space_elem * sizeof(FOUR_VECTOR); dim_cpu.space_mem2 = dim_cpu.space_elem * sizeof(fp); // box array dim_cpu.box_mem = dim_cpu.number_boxes * sizeof(box_str); time4 = get_time(); //======================================================================================================================================================150 // SYSTEM MEMORY //======================================================================================================================================================150 //====================================================================================================100 // BOX //====================================================================================================100 // allocate boxes box_cpu = (box_str*)malloc(dim_cpu.box_mem); // initialize number of home boxes nh = 0; // home boxes in z direction for(i=0; i<dim_cpu.boxes1d_arg; i++){ // home boxes in y direction for(j=0; j<dim_cpu.boxes1d_arg; j++){ // home boxes in x direction for(k=0; k<dim_cpu.boxes1d_arg; k++){ // current home box box_cpu[nh].x = k; box_cpu[nh].y = j; box_cpu[nh].z = i; box_cpu[nh].number = nh; box_cpu[nh].offset = nh * NUMBER_PAR_PER_BOX; // initialize number of neighbor boxes box_cpu[nh].nn = 0; // neighbor boxes in z direction for(l=-1; l<2; l++){ // neighbor boxes in y direction for(m=-1; m<2; m++){ // neighbor boxes in x direction for(n=-1; n<2; n++){ // check if (this neighbor exists) and (it is not the same as home box) if( (((i+l)>=0 && (j+m)>=0 && (k+n)>=0)==true && ((i+l)<dim_cpu.boxes1d_arg && (j+m)<dim_cpu.boxes1d_arg && (k+n)<dim_cpu.boxes1d_arg)==true) && (l==0 && m==0 && n==0)==false ){ // current neighbor box box_cpu[nh].nei[box_cpu[nh].nn].x = (k+n); box_cpu[nh].nei[box_cpu[nh].nn].y = (j+m); box_cpu[nh].nei[box_cpu[nh].nn].z = (i+l); box_cpu[nh].nei[box_cpu[nh].nn].number = (box_cpu[nh].nei[box_cpu[nh].nn].z * dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg) + (box_cpu[nh].nei[box_cpu[nh].nn].y * dim_cpu.boxes1d_arg) + box_cpu[nh].nei[box_cpu[nh].nn].x; box_cpu[nh].nei[box_cpu[nh].nn].offset = box_cpu[nh].nei[box_cpu[nh].nn].number * NUMBER_PAR_PER_BOX; // increment neighbor box box_cpu[nh].nn = box_cpu[nh].nn + 1; } } // neighbor boxes in x direction } // neighbor boxes in y direction } // neighbor boxes in z direction // increment home box nh = nh + 1; } // home boxes in x direction } // home boxes in y direction } // home boxes in z direction //====================================================================================================100 // PARAMETERS, DISTANCE, CHARGE AND FORCE //====================================================================================================100 // random generator seed set to random value - time in this case srand(time(NULL)); // input (distances) rv_cpu = (FOUR_VECTOR*)malloc(dim_cpu.space_mem); for(i=0; i<dim_cpu.space_elem; i=i+1){ rv_cpu[i].v = (rand()%10 + 1) / 10.0; // get a number in the range 0.1 - 1.0 // rv_cpu[i].v = 0.1; // get a number in the range 0.1 - 1.0 rv_cpu[i].x = (rand()%10 + 1) / 10.0; // get a number in the range 0.1 - 1.0 // rv_cpu[i].x = 0.2; // get a number in the range 0.1 - 1.0 rv_cpu[i].y = (rand()%10 + 1) / 10.0; // get a number in the range 0.1 - 1.0 // rv_cpu[i].y = 0.3; // get a number in the range 0.1 - 1.0 rv_cpu[i].z = (rand()%10 + 1) / 10.0; // get a number in the range 0.1 - 1.0 // rv_cpu[i].z = 0.4; // get a number in the range 0.1 - 1.0 } // input (charge) qv_cpu = (fp*)malloc(dim_cpu.space_mem2); for(i=0; i<dim_cpu.space_elem; i=i+1){ qv_cpu[i] = (rand()%10 + 1) / 10.0; // get a number in the range 0.1 - 1.0 // qv_cpu[i] = 0.5; // get a number in the range 0.1 - 1.0 } // output (forces) fv_cpu = (FOUR_VECTOR*)malloc(dim_cpu.space_mem); for(i=0; i<dim_cpu.space_elem; i=i+1){ fv_cpu[i].v = 0; // set to 0, because kernels keeps adding to initial value fv_cpu[i].x = 0; // set to 0, because kernels keeps adding to initial value fv_cpu[i].y = 0; // set to 0, because kernels keeps adding to initial value fv_cpu[i].z = 0; // set to 0, because kernels keeps adding to initial value } time5 = get_time(); //======================================================================================================================================================150 // KERNEL //======================================================================================================================================================150 //====================================================================================================100 // GPU_OPENCL //====================================================================================================100 kernel_gpu_opencl_wrapper( par_cpu, dim_cpu, box_cpu, rv_cpu, qv_cpu, fv_cpu); time6 = get_time(); //======================================================================================================================================================150 // SYSTEM MEMORY DEALLOCATION //======================================================================================================================================================150 free(rv_cpu); free(qv_cpu); free(fv_cpu); free(box_cpu); time7 = get_time(); //======================================================================================================================================================150 // DISPLAY TIMING //======================================================================================================================================================150 // printf("Time spent in different stages of the application:\n"); // printf("%15.12f s, %15.12f % : VARIABLES\n", (float) (time1-time0) / 1000000, (float) (time1-time0) / (float) (time7-time0) * 100); // printf("%15.12f s, %15.12f % : INPUT ARGUMENTS\n", (float) (time2-time1) / 1000000, (float) (time2-time1) / (float) (time7-time0) * 100); // printf("%15.12f s, %15.12f % : INPUTS\n", (float) (time3-time2) / 1000000, (float) (time3-time2) / (float) (time7-time0) * 100); // printf("%15.12f s, %15.12f % : dim_cpu\n", (float) (time4-time3) / 1000000, (float) (time4-time3) / (float) (time7-time0) * 100); // printf("%15.12f s, %15.12f % : SYS MEM: ALO\n", (float) (time5-time4) / 1000000, (float) (time5-time4) / (float) (time7-time0) * 100); // printf("%15.12f s, %15.12f % : KERNEL: COMPUTE\n", (float) (time6-time5) / 1000000, (float) (time6-time5) / (float) (time7-time0) * 100); // printf("%15.12f s, %15.12f % : SYS MEM: FRE\n", (float) (time7-time6) / 1000000, (float) (time7-time6) / (float) (time7-time0) * 100); // printf("Total time:\n"); // printf("%.12f s\n", (float) (time7-time0) / 1000000); //======================================================================================================================================================150 // RETURN //======================================================================================================================================================150 return 0.0; // always returns 0.0 }
int main( int argc, char* argv []){ printf("WG size of kernel = %d \n", NUMBER_THREADS); //======================================================================================================================================================150 // VARIABLES //======================================================================================================================================================150 // time long long time0; long long time1; long long time2; long long time3; long long time4; long long time5; long long time6; // inputs image, input paramenters fp* image_ori; // originalinput image int image_ori_rows; int image_ori_cols; long image_ori_elem; // inputs image, input paramenters fp* image; // input image int Nr,Nc; // IMAGE nbr of rows/cols/elements long Ne; // algorithm parameters int niter; // nbr of iterations fp lambda; // update step size // size of IMAGE int r1,r2,c1,c2; // row/col coordinates of uniform ROI long NeROI; // ROI nbr of elements // surrounding pixel indicies int* iN; int* iS; int* jE; int* jW; // counters int iter; // primary loop long i; // image row long j; // image col // memory sizes int mem_size_i; int mem_size_j; time0 = get_time(); //======================================================================================================================================================150 // INPUT ARGUMENTS //======================================================================================================================================================150 if(argc != 5){ printf("ERROR: wrong number of arguments\n"); return 0; } else{ niter = atoi(argv[1]); lambda = atof(argv[2]); Nr = atoi(argv[3]); // it is 502 in the original image Nc = atoi(argv[4]); // it is 458 in the original image } time1 = get_time(); //======================================================================================================================================================150 // READ INPUT FROM FILE //======================================================================================================================================================150 //====================================================================================================100 // READ IMAGE (SIZE OF IMAGE HAS TO BE KNOWN) //====================================================================================================100 image_ori_rows = 502; image_ori_cols = 458; image_ori_elem = image_ori_rows * image_ori_cols; image_ori = (fp*)malloc(sizeof(fp) * image_ori_elem); read_graphics( "../../data/srad/image.pgm", image_ori, image_ori_rows, image_ori_cols, 1); //====================================================================================================100 // RESIZE IMAGE (ASSUMING COLUMN MAJOR STORAGE OF image_orig) //====================================================================================================100 Ne = Nr*Nc; image = (fp*)malloc(sizeof(fp) * Ne); resize( image_ori, image_ori_rows, image_ori_cols, image, Nr, Nc, 1); //====================================================================================================100 // End //====================================================================================================100 time2 = get_time(); //======================================================================================================================================================150 // SETUP //======================================================================================================================================================150 // variables r1 = 0; // top row index of ROI r2 = Nr - 1; // bottom row index of ROI c1 = 0; // left column index of ROI c2 = Nc - 1; // right column index of ROI // ROI image size NeROI = (r2-r1+1)*(c2-c1+1); // number of elements in ROI, ROI size // allocate variables for surrounding pixels mem_size_i = sizeof(int) * Nr; // iN = (int *)malloc(mem_size_i) ; // north surrounding element iS = (int *)malloc(mem_size_i) ; // south surrounding element mem_size_j = sizeof(int) * Nc; // jW = (int *)malloc(mem_size_j) ; // west surrounding element jE = (int *)malloc(mem_size_j) ; // east surrounding element // N/S/W/E indices of surrounding pixels (every element of IMAGE) for (i=0; i<Nr; i++) { iN[i] = i-1; // holds index of IMAGE row above iS[i] = i+1; // holds index of IMAGE row below } for (j=0; j<Nc; j++) { jW[j] = j-1; // holds index of IMAGE column on the left jE[j] = j+1; // holds index of IMAGE column on the right } // N/S/W/E boundary conditions, fix surrounding indices outside boundary of image iN[0] = 0; // changes IMAGE top row index from -1 to 0 iS[Nr-1] = Nr-1; // changes IMAGE bottom row index from Nr to Nr-1 jW[0] = 0; // changes IMAGE leftmost column index from -1 to 0 jE[Nc-1] = Nc-1; // changes IMAGE rightmost column index from Nc to Nc-1 time3= get_time(); //======================================================================================================================================================150 // KERNEL //======================================================================================================================================================150 kernel_gpu_opencl_wrapper( image, // input image Nr, // IMAGE nbr of rows Nc, // IMAGE nbr of cols Ne, // IMAGE nbr of elem niter, // nbr of iterations lambda, // update step size NeROI, // ROI nbr of elements iN, iS, jE, jW, iter, // primary loop mem_size_i, mem_size_j); time4 = get_time(); //======================================================================================================================================================150 // WRITE OUTPUT IMAGE TO FILE //======================================================================================================================================================150 write_graphics( "./output/image_out.pgm", image, Nr, Nc, 1, 255); time5 = get_time(); //======================================================================================================================================================150 // FREE MEMORY //======================================================================================================================================================150 free(image_ori); free(image); free(iN); free(iS); free(jW); free(jE); time6 = get_time(); //======================================================================================================================================================150 // DISPLAY TIMING //======================================================================================================================================================150 printf("Time spent in different stages of the application:\n"); printf("%.12f s, %.12f % : READ COMMAND LINE PARAMETERS\n", (fp) (time1-time0) / 1000000, (fp) (time1-time0) / (fp) (time5-time0) * 100); printf("%.12f s, %.12f % : READ AND RESIZE INPUT IMAGE FROM FILE\n", (fp) (time2-time1) / 1000000, (fp) (time2-time1) / (fp) (time5-time0) * 100); printf("%.12f s, %.12f % : SETUP\n", (fp) (time3-time2) / 1000000, (fp) (time3-time2) / (fp) (time5-time0) * 100); printf("%.12f s, %.12f % : KERNEL\n", (fp) (time4-time3) / 1000000, (fp) (time4-time3) / (fp) (time5-time0) * 100); printf("%.12f s, %.12f % : WRITE OUTPUT IMAGE TO FILE\n", (fp) (time5-time4) / 1000000, (fp) (time5-time4) / (fp) (time5-time0) * 100); printf("%.12f s, %.12f % : FREE MEMORY\n", (fp) (time6-time5) / 1000000, (fp) (time6-time5) / (fp) (time5-time0) * 100); printf("Total time:\n"); printf("%.12f s\n", (fp) (time5-time0) / 1000000); }