int main(void){ //func1(); //Comment top and uncomment this from altera alt_u32 time1; alt_u32 time2; alt_u32 time3; int tamfiltro=5,modo=0,w,h,j; unsigned char** matriz; unsigned char** newmatriz; int filtro[24]; int ow,oh; matriz=pgmread(filename,&h,&w); if (tamfiltro > 0){ rellenar_filtro(filtro,tamfiltro,modo); ow=w-(tamfiltro-1); oh=h-(tamfiltro-1); newmatriz = calloc(ow, sizeof(unsigned char*)); for (j=0;j<ow;j++){ if (( newmatriz[j] = calloc(oh, sizeof(unsigned char))) == NULL){ printf("Memory allocation error. Exit program\n"); exit(1); } } printf("I ended creating the row %d \n", j); } if (alt_timestamp_start() < 0) { printf ("No timestamp device available\n"); } else { time1 = alt_timestamp(); aplicarfiltro(matriz,filtro,newmatriz,ow,oh,tamfiltro); time2 = alt_timestamp(); //func2(); /* second function to monitor */ //time3 = alt_timestamp(); printf ("time in func1 = %u ticks\n", (unsigned int) (time2 - time1)); pgmwrite(outname,ow,oh,newmatriz,"",1); pritnf("Image created successfully\n"); /*printf ("time in func2 = %u ticks\n", (unsigned int) (time3 - time2)); printf ("Number of ticks per second = %u\n", (unsigned int)alt_timestamp_freq()); }*/ return 0; } }
int main() { MPI_Init(NULL, NULL); /* MPI variables (in some sense) */ MPI_Comm comm; MPI_Status status; MPI_Request request; int size, rank, tag; int comm2d, disp, left, right, up, down, reorder; int dims[NDIMS], period[NDIMS], direction[NDIMS]; /* variable for the program */ int nx, ny, nxp, nyp, nxpe, nype; int i, j, iter; int lastcheck, checkinc; double max, delta; double avg, mean; char picName[20] = "edgeCHANGETHIS.pgm"; /* * find the size of the image do the arrays can be defined */ pgmsize(picName, &nx, &ny); comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &size); tag = 1; /* Introduce Cartesian topology */ for(i=0; i<NDIMS; ++i) { dims[i] = 0; period[i] = FALSE; /* TRUE gives Cyclic */ direction[i] = i; /* shift along the same index as element of the array*/ } reorder = TRUE; /* allows the processes to become reordered to hopefully improve efficiency */ disp = 1; /* Shift by 1 */ MPI_Dims_create(size,NDIMS,dims); MPI_Cart_create(comm,NDIMS,dims,period,reorder,&comm2d); MPI_Comm_rank(comm2d,&rank); MPI_Cart_shift(comm2d,direction[1],disp,&left,&right); MPI_Cart_shift(comm2d,direction[0],disp,&up,&down); /* check the array is a reasonable size to be split up among the processors to be used and if not quit */ if(nx < dims[1] || ny < dims[0]) { if(ROOT == rank) { printf("too many processors running on job, %d in x direction but only %d elements, %d in y, %d elements\n", dims[1], nx, dims[0], ny); } return 1; } initialise_local_array_sizes(nx, ny, &nxp, &nyp, &nxpe, &nype, dims, rank, size); /* now declare the arrays necessary (note they can be different sizes on different processes*/ float localBuf[nxp][nyp]; float localEdge[nxp+2][nyp+2], localOld[nxp+2][nyp+2], localNew[nxp+2][nyp+2]; float globalImage[nx][ny]; /* * set the halos of all the appropriate arrays to 255 */ set_halos(localEdge,localOld, localNew, nxp, nyp); if(ROOT == rank) { printf("Reading in Picture\n"); pgmread(picName, globalImage, nx, ny); } /*set up all the datatypes that will need to be used*/ /*send contiguous halos*/ MPI_Datatype mcols; MPI_Type_contiguous(nyp, MPI_FLOAT, &mcols); MPI_Type_commit(&mcols); /*send non-conmtiguous halos*/ MPI_Datatype mrows; MPI_Type_vector(nxp, 1, nyp+2, MPI_FLOAT, &mrows); /*nyp+2 since will be used on nyp+2 size arrays*/ MPI_Type_commit(&mrows); /*scatter data to processes with same size arrays as ROOT*/ MPI_Datatype scatter[4]; MPI_Type_vector(nxp, nyp, ny, MPI_FLOAT, &scatter[3]); MPI_Type_commit(&scatter[3]); /*scatter data to processes with different size arrays than ROOT in dim[0]*/ MPI_Type_vector(nxp, nype, ny, MPI_FLOAT, &scatter[0]); MPI_Type_commit(&scatter[0]); /*scatter data to processes with different size arrays than ROOT in dim[1]*/ MPI_Type_vector(nxpe, nyp, ny, MPI_FLOAT, &scatter[1]); MPI_Type_commit(&scatter[1]); /*scatter data to processes with different size arrays than ROOT in dim[0] and dim[1]*/ MPI_Type_vector(nxpe, nype, ny, MPI_FLOAT, &scatter[4]); MPI_Type_commit(&scatter[4]); /* Scatter the data from processer 0 to the rest */ if(ROOT == rank) { printf("Scattering image\n"); scatter_data(globalImage, localBuf, ny, nxp, nyp, dims, rank, comm2d, scatter); } else { MPI_Recv(localBuf, nxp*nyp, MPI_FLOAT, 0, rank, comm2d, &status); } /* * set up the edge data to be used in computation */ for(i=0; i<nxp; ++i) { for(j=0; j<nyp; ++j) { localEdge[i+1][j+1] = localBuf[i][j]; localOld[i+1][j+1] = 255; } } /* * computation loop */ if(ROOT == rank) { printf("Performing update routine for %d iterations\n", ITERATIONS); } double t1, t2; t1 = MPI_Wtime(); tag = 2; lastcheck = checkinc = iter = 0; delta = 1; while(iter < ITERATIONS) { send_halos(localOld, left, right, up, down, comm2d, tag, nxp, nyp, mrows, mcols); avg = 0; for(i=1; i<nxp+1; ++i) { for(j=1; j<nyp+1; ++j) { localNew[i][j] = 0.25*(localOld[i-1][j] + localOld[i+1][j] + localOld[i][j-1] + localOld[i][j+1] - localEdge[i][j]); avg = avg + localNew[i][j]; } } max = 0; for(i=1; i<nxp+1; ++i) { for(j=1; j<nyp+1; ++j) { if(fabs(localNew[i][j] - localOld[i][j]) > max) { max = fabs(localNew[i][j] - localOld[i][j]); } localOld[i][j] = localNew[i][j]; } } /* * want to perform a calculation of the average pixel value and delta */ if(iter == lastcheck + checkinc) { lastcheck = iter; MPI_Reduce(&avg, &mean, 1, MPI_DOUBLE, MPI_SUM, ROOT, comm2d); MPI_Allreduce(&max, &delta, 1, MPI_DOUBLE, MPI_MAX, comm2d); if(ROOT == rank) { // printf("iteration %d, average pixel value is %f, current delta %f\n", iter, mean/(nx*ny), delta); } checkinc = (int)(delta*500); if(checkinc > 200) checkinc = 500; } ++iter; if(ITERATIONS == iter) { break; } } t2 = MPI_Wtime(); if(ROOT == rank) { printf("finished after %d iterations, delta was %f\n", iter-1, delta); printf("seconds per iteration: %f\n", (t2-t1)/(iter-1)); } for(i=0; i<nxp; ++i) { for(j=0; j<nyp; ++j) { localBuf[i][j] = localOld[i+1][j+1]; } } tag = 3; if(ROOT == rank) { printf("recieving back data\n"); receive_data(globalImage, localBuf, ny, nxp, nyp, dims, tag, rank, comm2d, scatter); } else { MPI_Issend(localBuf, nxp*nyp, MPI_FLOAT, ROOT, tag, comm2d, &request); MPI_Wait(&request, &status); } if(ROOT == rank) { pgmwrite("parpictureCHANGETHIS.pgm", globalImage, nx, ny); } MPI_Finalize(); return 0; }
int main() { int nx, ny, i, j, iter; int lastcheck, checkinc; double max; double avg; char picName[20] = "edge512x384.pgm"; /* * test that the image to be worked on is the right size for the nx and ny defined */ pgmsize(picName, &nx, &ny); float globalImage[nx][ny], localEdge[nx+2][ny+2], localOld[nx+2][ny+2], localNew[nx+2][ny+2]; /* * set the halos of all the appropriate arrays to 255 */ for(i=0; i<nx+2; ++i) { localEdge[i][0] = localEdge[i][ny+1] = 255; localOld[i][0] = localOld[i][ny+1] = 255; localNew[i][0] = localNew[i][ny+1] = 255; } for(j=0; j<ny+2; ++j) { localEdge[0][j] = localEdge[nx+1][j] = 255; localOld[0][j] = localOld[nx+1][j] = 255; localNew[0][j] = localNew[nx+1][j] = 255; } printf("Reading in Picture\n"); pgmread(picName, globalImage, nx, ny); /* * set up the edge data to be used in computation */ for(i=1; i<nx+1; ++i) { for(j=1; j<ny+1; ++j) { localEdge[i][j] = globalImage[i-1][j-1]; localOld[i][j] = 255; } } /* * computation loop */ printf("Performing update routine for %d iterations\n", ITERATIONS); lastcheck = checkinc = iter = 0; max = 1; while(max > 0.1) { avg = 0; for(i=1; i<nx+1; ++i) { for(j=1; j<ny+1; ++j) { localNew[i][j] = 0.25*(localOld[i-1][j] + localOld[i+1][j] + localOld[i][j-1] + localOld[i][j+1] - localEdge[i][j]); avg = avg + localNew[i][j]; } } if(iter == lastcheck + checkinc) max = 0; for(i=1; i<nx+1; ++i) { for(j=1; j<ny+1; ++j) { if(fabs(localNew[i][j] - localOld[i][j]) > max && iter == lastcheck + checkinc) { max = fabs(localNew[i][j] - localOld[i][j]); } localOld[i][j] = localNew[i][j]; } } /* * want to perform a calculation of the average pixel value and delta */ if(iter == lastcheck + checkinc) { lastcheck = iter; printf("iteration %d, average pixel value is %f, current delta: %f\n", iter, avg/(nx*ny), max); checkinc = (int)(max*500); if(checkinc > 200) checkinc = 500; } ++iter; if(ITERATIONS == iter) { break; } } printf("finished after %d iterations. Delta was %f\n", iter-1, max); /* * set the data back for printing */ for(i=0; i<nx; ++i) { for(j=0; j<ny; ++j) { globalImage[i][j] = localNew[i+1][j+1]; } } pgmwrite("picture.pgm", globalImage, nx, ny); return 0; }
int main(int argc, char *argv[]) { int x, y; int i; int errors; double start_time_inc_data, end_time_inc_data; double cpu_start_time, cpu_end_time; cl_mem d_input, d_output, d_edge, tmp; cl_int err; int width = WIDTH; int height = HEIGHT; cl_context ctxt; cl_command_queue queue; cl_kernel kernel; size_t memSize = (WIDTH+2) * (HEIGHT+2) * sizeof(float); printf("Image size: %dx%d\n", WIDTH, HEIGHT); printf("Local work size: %dx%d\n", LOCAL_W, LOCAL_H); /* initialise OpenCL */ err = initCLDevice(CL_DEVICE_TYPE_GPU, &ctxt, &queue); checkOpenCLError(err, "initCLDevice"); err = getCLKernel(ctxt, "reverse.cl", "reverse2d", &kernel); //err = getCLKernel(ctxt, "reverse.cl", "reverse1d_col", &kernel); //err = getCLKernel(ctxt, "reverse.cl", "reverse1d_row", &kernel); checkOpenCLError(err, "getCLKernel"); /* allocate memory on device */ d_input = clCreateBuffer(ctxt, CL_MEM_READ_WRITE, memSize, NULL, &err); checkOpenCLError(err, "buffer allocation"); d_output = clCreateBuffer(ctxt, CL_MEM_READ_WRITE, memSize, NULL, &err); checkOpenCLError(err, "buffer allocation"); d_edge = clCreateBuffer(ctxt, CL_MEM_READ_ONLY, memSize, NULL, &err); checkOpenCLError(err, "buffer allocation"); /* read in edge data */ datread("edge256x192.dat", (void *)edge, WIDTH, HEIGHT); /* zero buffer so that halo is zeroed */ for (y = 0; y < HEIGHT+2; y++) { for (x = 0; x < WIDTH+2; x++) { host_input[y][x] = 0.0; } } /* copy input to buffer with halo */ for (y = 0; y < HEIGHT; y++) { for (x = 0; x < WIDTH; x++) { host_input[y+1][x+1] = edge[y][x]; } } /* * copy to all the GPU arrays. d_output doesn't need to have this data but * this will zero its halo */ start_time_inc_data = get_current_time(); err = clEnqueueWriteBuffer(queue, d_input, CL_TRUE, 0, memSize, host_input, 0, NULL, NULL); checkOpenCLError(err, "buffer write"); err = clEnqueueWriteBuffer(queue, d_output, CL_TRUE, 0, memSize, host_input, 0, NULL, NULL); checkOpenCLError(err, "buffer write"); err = clEnqueueWriteBuffer(queue, d_edge, CL_TRUE, 0, memSize, host_input, 0, NULL, NULL); checkOpenCLError(err, "buffer write"); /* run on GPU */ for (i = 0; i < ITERATIONS; i++) { /* run the kernel */ /* * One of these kernel invocations should be uncommented at a time. Make sure it * matches the kernel actually loaded above. */ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_output); checkOpenCLError(err, "setting kernel arguments"); err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_input); checkOpenCLError(err, "setting kernel arguments"); err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_edge); checkOpenCLError(err, "setting kernel arguments"); err = clSetKernelArg(kernel, 3, sizeof(int), &width); checkOpenCLError(err, "setting kernel arguments"); size_t globalsize[2] = { WIDTH, HEIGHT }; size_t localsize[2] = { LOCAL_W, LOCAL_H }; err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, &globalsize[0], &localsize[0], 0, NULL, NULL); //size_t globalsize[1] = { HEIGHT }; //size_t localsize[1] = { LOCAL_H }; //err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalsize[0], &localsize[0], 0, NULL, NULL); //err = clSetKernelArg(kernel, 4, sizeof(int), &height); //checkOpenCLError(err, "setting kernel arguments"); //size_t globalsize[1] = { WIDTH }; //size_t localsize[1] = { LOCAL_W }; //err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalsize[0], &localsize[0], 0, NULL, NULL); checkOpenCLError(err, "running kernel"); /* wait for kernel to complete */ clFinish(queue); /* swap the buffer pointers ready for next time */ tmp = d_input; d_input = d_output; d_output = tmp; } err = clEnqueueReadBuffer(queue, d_input, CL_TRUE, 0, memSize, gpu_output, 0, NULL, NULL); checkOpenCLError(err, "buffer read"); end_time_inc_data = get_current_time(); /* * run on host for comparison */ cpu_start_time = get_current_time(); for (i = 0; i < ITERATIONS; i++) { /* perform stencil operation */ for (y = 0; y < HEIGHT; y++) { for (x = 0; x < WIDTH; x++) { host_output[y+1][x+1] = (host_input[y+1][x] + host_input[y+1][x+2] + host_input[y][x+1] + host_input[y+2][x+1] \ - edge[y][x]) * 0.25; } } /* copy output back to input buffer */ for (y = 0; y < HEIGHT; y++) { for (x = 0; x < WIDTH; x++) { host_input[y+1][x+1] = host_output[y+1][x+1]; } } } cpu_end_time = get_current_time(); /* check that GPU result matches host result */ errors = 0; for (y = 0; y < HEIGHT; y++) { for (x = 0; x < WIDTH; x++) { float diff = fabs(gpu_output[y+1][x+1] - host_output[y+1][x+1]); if (diff >= MAX_DIFF) { errors++; printf("Error at %d,%d (CPU=%f, GPU=%f)\n", x, y, \ host_output[y+1][x+1], \ gpu_output[y+1][x+1]); } } } if (errors == 0) printf("\n\n ***TEST PASSED SUCCESSFULLY*** \n\n\n"); /* copy result to output buffer */ for (y = 0; y < HEIGHT; y++) { for (x = 0; x < WIDTH; x++) { img[y][x] = gpu_output[y+1][x+1]; } } /* write PGM */ pgmwrite("output.pgm", (void *)img, WIDTH, HEIGHT); clReleaseMemObject(d_input); clReleaseMemObject(d_output); clReleaseMemObject(d_edge); printf("GPU Time (Including Data Transfer): %fs\n", \ end_time_inc_data - start_time_inc_data); printf("CPU Time : %fs\n", \ cpu_end_time - cpu_start_time); return 0; }
/** * @brief Write a PGM file from an array. Only rank 0 can write (Wrapper for pgmwrite) * @param rank the rank of the calling process * @param filename the file to write to * @param img_dim the dimensions of the image * @param data the array to write to disk */ void image_write (int rank, char * filename, image_dimensions img_dim, real ** data) { if (rank == 0) { pgmwrite(filename, &data[0][0], img_dim.m, img_dim.n); } }