int main(void){
//Comment top and uncomment this from altera
	alt_u32 time1;
	alt_u32 time2;
	alt_u32 time3;
	int tamfiltro=5,modo=0,w,h,j;
	unsigned char** matriz;
	unsigned char** newmatriz;
	int filtro[24];
	int ow,oh;
	if (tamfiltro > 0){
		newmatriz = calloc(ow, sizeof(unsigned char*));
		for (j=0;j<ow;j++){
			if (( newmatriz[j] = calloc(oh, sizeof(unsigned char))) == NULL){
			printf("Memory allocation error. Exit program\n");
		printf("I ended creating the row %d \n", j);
	if (alt_timestamp_start() < 0)
		printf ("No timestamp device available\n");
		time1 = alt_timestamp();
		time2 = alt_timestamp();
//func2(); /* second function to monitor */
//time3 = alt_timestamp();
		printf ("time in func1 = %u ticks\n", (unsigned int) (time2 - time1));
		pritnf("Image created  successfully\n");
/*printf ("time in func2 = %u ticks\n",
(unsigned int) (time3 - time2));
printf ("Number of ticks per second = %u\n",
(unsigned int)alt_timestamp_freq());
	return 0;
Ejemplo n.º 2
int main()

/* MPI variables (in some sense) */
  MPI_Comm comm;
  MPI_Status status;
  MPI_Request request;
  int size, rank, tag;
  int comm2d, disp, left, right, up, down, reorder;
  int dims[NDIMS], period[NDIMS], direction[NDIMS];

/* variable for the program */
  int nx, ny, nxp, nyp, nxpe, nype;
  int i, j, iter;
  int lastcheck, checkinc;
  double max, delta;
  double avg, mean;
  char picName[20] = "edgeCHANGETHIS.pgm";

 * find the size of the image do the arrays can be defined
  pgmsize(picName, &nx, &ny); 

  comm = MPI_COMM_WORLD;
  MPI_Comm_size(comm, &size);
  tag = 1;

/* Introduce Cartesian topology */
  for(i=0; i<NDIMS; ++i)
    dims[i] = 0;
    period[i] = FALSE;    /* TRUE gives Cyclic */
    direction[i] = i;       /* shift along the same index as element of the array*/
  reorder = TRUE;      /* allows the processes to become reordered to hopefully improve efficiency */
  disp = 1;            /* Shift by 1 */


/* check the array is a reasonable size to be split up among the processors to be used and if not quit */
  if(nx < dims[1] || ny < dims[0])
    if(ROOT == rank)
      printf("too many processors running on job, %d in x direction but only %d elements, %d in y, %d elements\n", dims[1], nx, dims[0], ny);
    return 1;

  initialise_local_array_sizes(nx, ny, &nxp, &nyp, &nxpe, &nype, dims, rank, size);

/* now declare the arrays necessary (note they can be different sizes on different processes*/
  float localBuf[nxp][nyp];
  float localEdge[nxp+2][nyp+2], localOld[nxp+2][nyp+2], localNew[nxp+2][nyp+2];
  float globalImage[nx][ny];

 * set the halos of all the appropriate arrays to 255
  set_halos(localEdge,localOld, localNew, nxp, nyp);

  if(ROOT == rank)
    printf("Reading in Picture\n");
    pgmread(picName, globalImage, nx, ny);

/*set up all the datatypes that will need to be used*/
/*send contiguous halos*/
  MPI_Datatype mcols;
  MPI_Type_contiguous(nyp, MPI_FLOAT, &mcols);

/*send non-conmtiguous halos*/
  MPI_Datatype mrows;
  MPI_Type_vector(nxp, 1, nyp+2, MPI_FLOAT, &mrows); /*nyp+2 since will be used on nyp+2 size arrays*/

/*scatter data to processes with same size arrays as ROOT*/
  MPI_Datatype scatter[4];
  MPI_Type_vector(nxp, nyp, ny,  MPI_FLOAT, &scatter[3]);

/*scatter data to processes with different size arrays than ROOT in dim[0]*/
  MPI_Type_vector(nxp, nype, ny,  MPI_FLOAT, &scatter[0]);

/*scatter data to processes with different size arrays than ROOT in dim[1]*/
  MPI_Type_vector(nxpe, nyp, ny,  MPI_FLOAT, &scatter[1]);

/*scatter data to processes with different size arrays than ROOT in dim[0] and dim[1]*/
  MPI_Type_vector(nxpe, nype, ny,  MPI_FLOAT, &scatter[4]);

  /* Scatter the data from processer 0 to the rest */
  if(ROOT == rank)
    printf("Scattering image\n");
    scatter_data(globalImage, localBuf, ny, nxp, nyp, dims, rank, comm2d, scatter);
    MPI_Recv(localBuf, nxp*nyp, MPI_FLOAT, 0, rank, comm2d, &status);

 * set up the edge data to be used in computation
  for(i=0; i<nxp; ++i)
    for(j=0; j<nyp; ++j)
      localEdge[i+1][j+1] = localBuf[i][j];
      localOld[i+1][j+1] = 255;

 * computation loop
  if(ROOT == rank)
    printf("Performing update routine for %d iterations\n", ITERATIONS);

  double t1, t2;
  t1 = MPI_Wtime();

  tag = 2;
  lastcheck = checkinc = iter = 0;
  delta = 1;

  while(iter < ITERATIONS)
    send_halos(localOld, left, right, up, down, comm2d, tag, nxp, nyp, mrows, mcols);

    avg = 0;
    for(i=1; i<nxp+1; ++i)
      for(j=1; j<nyp+1; ++j)
        localNew[i][j] = 0.25*(localOld[i-1][j] + localOld[i+1][j] + localOld[i][j-1] + localOld[i][j+1] - localEdge[i][j]);
        avg = avg + localNew[i][j];

    max = 0;

    for(i=1; i<nxp+1; ++i)
      for(j=1; j<nyp+1; ++j)
        if(fabs(localNew[i][j] - localOld[i][j]) > max)
          max = fabs(localNew[i][j] - localOld[i][j]);
        localOld[i][j] = localNew[i][j];

 * want to perform a calculation of the average pixel value and delta
    if(iter == lastcheck + checkinc)
      lastcheck = iter;
      MPI_Reduce(&avg, &mean, 1, MPI_DOUBLE, MPI_SUM, ROOT, comm2d);
      MPI_Allreduce(&max, &delta, 1, MPI_DOUBLE, MPI_MAX, comm2d);
      if(ROOT == rank)
     //   printf("iteration %d, average pixel value is %f, current delta %f\n", iter, mean/(nx*ny), delta);
      checkinc = (int)(delta*500);
      if(checkinc > 200)
      checkinc = 500;

    if(ITERATIONS == iter)

  t2 = MPI_Wtime();

  if(ROOT == rank)
    printf("finished after %d iterations, delta was %f\n", iter-1, delta);
    printf("seconds per iteration: %f\n", (t2-t1)/(iter-1));

  for(i=0; i<nxp; ++i)
    for(j=0; j<nyp; ++j)
      localBuf[i][j] = localOld[i+1][j+1];

  tag = 3;

  if(ROOT == rank)

    printf("recieving back data\n");
    receive_data(globalImage, localBuf, ny, nxp, nyp, dims, tag, rank, comm2d, scatter);
    MPI_Issend(localBuf, nxp*nyp, MPI_FLOAT, ROOT, tag, comm2d, &request);
    MPI_Wait(&request, &status);

  if(ROOT == rank)
    pgmwrite("parpictureCHANGETHIS.pgm", globalImage, nx, ny);


  return 0;

Ejemplo n.º 3
int main()

  int nx, ny, i, j, iter;
  int lastcheck, checkinc;
  double max;
  double avg;
  char picName[20] = "edge512x384.pgm";

 * test that the image to be worked on is the right size for the nx and ny defined
  pgmsize(picName, &nx, &ny); 

  float globalImage[nx][ny], localEdge[nx+2][ny+2], localOld[nx+2][ny+2], localNew[nx+2][ny+2];

 * set the halos of all the appropriate arrays to 255
  for(i=0; i<nx+2; ++i)
    localEdge[i][0] = localEdge[i][ny+1] = 255;
    localOld[i][0] = localOld[i][ny+1] = 255;
    localNew[i][0] = localNew[i][ny+1] = 255;

  for(j=0; j<ny+2; ++j)
    localEdge[0][j] = localEdge[nx+1][j] = 255;
    localOld[0][j] = localOld[nx+1][j] = 255;
    localNew[0][j] = localNew[nx+1][j] = 255;

  printf("Reading in Picture\n");
  pgmread(picName, globalImage, nx, ny);

 * set up the edge data to be used in computation
  for(i=1; i<nx+1; ++i)
    for(j=1; j<ny+1; ++j)
      localEdge[i][j] = globalImage[i-1][j-1];
      localOld[i][j] = 255;

 * computation loop
  printf("Performing update routine for %d iterations\n", ITERATIONS);

  lastcheck = checkinc = iter = 0;
  max = 1;

  while(max > 0.1)
    avg = 0;
    for(i=1; i<nx+1; ++i)
      for(j=1; j<ny+1; ++j)
        localNew[i][j] = 0.25*(localOld[i-1][j] + localOld[i+1][j] + localOld[i][j-1] + localOld[i][j+1] - localEdge[i][j]);
        avg = avg + localNew[i][j];
    if(iter == lastcheck + checkinc)
    max = 0;

    for(i=1; i<nx+1; ++i)
      for(j=1; j<ny+1; ++j)
        if(fabs(localNew[i][j] - localOld[i][j]) > max && iter == lastcheck + checkinc)
          max = fabs(localNew[i][j] - localOld[i][j]);
        localOld[i][j] = localNew[i][j];

 * want to perform a calculation of the average pixel value and delta
    if(iter == lastcheck + checkinc)
      lastcheck = iter;
      printf("iteration %d, average pixel value is %f, current delta: %f\n", iter, avg/(nx*ny), max);

      checkinc = (int)(max*500);
      if(checkinc > 200)
      checkinc = 500;

    if(ITERATIONS == iter)

  printf("finished after %d iterations. Delta was %f\n", iter-1, max);

 * set the data back for printing
  for(i=0; i<nx; ++i)
    for(j=0; j<ny; ++j)
      globalImage[i][j] = localNew[i+1][j+1];

  pgmwrite("picture.pgm", globalImage, nx, ny);

  return 0;

Ejemplo n.º 4
int main(int argc, char *argv[])
    int x, y;
    int i;
    int errors;

    double start_time_inc_data, end_time_inc_data;
    double cpu_start_time, cpu_end_time;

    cl_mem d_input, d_output, d_edge, tmp;
    cl_int err;

    int width = WIDTH;
    int height = HEIGHT;

    cl_context ctxt;
    cl_command_queue queue;
    cl_kernel kernel;

    size_t memSize = (WIDTH+2) * (HEIGHT+2) * sizeof(float);

    printf("Image size: %dx%d\n", WIDTH, HEIGHT);
    printf("Local work size: %dx%d\n", LOCAL_W, LOCAL_H);

    /* initialise OpenCL */
    err = initCLDevice(CL_DEVICE_TYPE_GPU, &ctxt, &queue);
    checkOpenCLError(err, "initCLDevice");
    err = getCLKernel(ctxt, "", "reverse2d", &kernel);
    //err = getCLKernel(ctxt, "", "reverse1d_col", &kernel);
    //err = getCLKernel(ctxt, "", "reverse1d_row", &kernel);
    checkOpenCLError(err, "getCLKernel");

    /* allocate memory on device */
    d_input = clCreateBuffer(ctxt, CL_MEM_READ_WRITE, memSize, NULL, &err);
    checkOpenCLError(err, "buffer allocation");
    d_output = clCreateBuffer(ctxt, CL_MEM_READ_WRITE, memSize, NULL, &err);
    checkOpenCLError(err, "buffer allocation");
    d_edge = clCreateBuffer(ctxt, CL_MEM_READ_ONLY, memSize, NULL, &err);
    checkOpenCLError(err, "buffer allocation");
    /* read in edge data */
    datread("edge256x192.dat", (void *)edge, WIDTH, HEIGHT);
    /* zero buffer so that halo is zeroed */
    for (y = 0; y < HEIGHT+2; y++) {
	for (x = 0; x < WIDTH+2; x++) {
	    host_input[y][x] = 0.0;
    /* copy input to buffer with halo */
    for (y = 0; y < HEIGHT; y++) {
	for (x = 0; x < WIDTH; x++) {
	    host_input[y+1][x+1] = edge[y][x];
     * copy to all the GPU arrays. d_output doesn't need to have this data but
     * this will zero its halo
    start_time_inc_data = get_current_time();

    err = clEnqueueWriteBuffer(queue, d_input, CL_TRUE, 0, memSize, host_input, 0, NULL, NULL);
    checkOpenCLError(err, "buffer write");
    err = clEnqueueWriteBuffer(queue, d_output, CL_TRUE, 0, memSize, host_input, 0, NULL, NULL);
    checkOpenCLError(err, "buffer write");
    err = clEnqueueWriteBuffer(queue, d_edge, CL_TRUE, 0, memSize, host_input, 0, NULL, NULL);
    checkOpenCLError(err, "buffer write");
    /* run on GPU */
    for (i = 0; i < ITERATIONS; i++) {

	/* run the kernel */
	 * One of these kernel invocations should be uncommented at a time. Make sure it
	 * matches the kernel actually loaded above.

	err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_output);
	checkOpenCLError(err, "setting kernel arguments");
	err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_input);
	checkOpenCLError(err, "setting kernel arguments");
	err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_edge);
	checkOpenCLError(err, "setting kernel arguments");
	err = clSetKernelArg(kernel, 3, sizeof(int), &width);
	checkOpenCLError(err, "setting kernel arguments");

	size_t globalsize[2] = { WIDTH, HEIGHT };
	size_t localsize[2] = { LOCAL_W, LOCAL_H };
	err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, &globalsize[0], &localsize[0], 0, NULL, NULL);

	//size_t globalsize[1] = { HEIGHT };
	//size_t localsize[1] = { LOCAL_H };
	//err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalsize[0], &localsize[0], 0, NULL, NULL);

	//err = clSetKernelArg(kernel, 4, sizeof(int), &height);
	//checkOpenCLError(err, "setting kernel arguments");
	//size_t globalsize[1] = { WIDTH };
	//size_t localsize[1] = { LOCAL_W };
	//err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalsize[0], &localsize[0], 0, NULL, NULL);

	checkOpenCLError(err, "running kernel");

	/* wait for kernel to complete */
	/* swap the buffer pointers ready for next time */
	tmp = d_input;
	d_input = d_output;
	d_output = tmp;
    err = clEnqueueReadBuffer(queue, d_input, CL_TRUE, 0, memSize, gpu_output, 0, NULL, NULL);
    checkOpenCLError(err, "buffer read");
    end_time_inc_data = get_current_time();
     * run on host for comparison
    cpu_start_time = get_current_time();
    for (i = 0; i < ITERATIONS; i++) {
	/* perform stencil operation */
	for (y = 0; y < HEIGHT; y++) {
	    for (x = 0; x < WIDTH; x++) {
		host_output[y+1][x+1] = (host_input[y+1][x] + host_input[y+1][x+2] +
					 host_input[y][x+1] + host_input[y+2][x+1] \
					 - edge[y][x]) * 0.25;
	/* copy output back to input buffer */
	for (y = 0; y < HEIGHT; y++) {
	    for (x = 0; x < WIDTH; x++) {
		host_input[y+1][x+1] = host_output[y+1][x+1];
    cpu_end_time = get_current_time();
    /* check that GPU result matches host result */
    errors = 0;
    for (y = 0; y < HEIGHT; y++) {
	for (x = 0; x < WIDTH; x++) {
	    float diff = fabs(gpu_output[y+1][x+1] - host_output[y+1][x+1]);
	    if (diff >= MAX_DIFF) {
		printf("Error at %d,%d (CPU=%f, GPU=%f)\n", x, y,	\
		       host_output[y+1][x+1],				\
    if (errors == 0) printf("\n\n ***TEST PASSED SUCCESSFULLY*** \n\n\n");
    /* copy result to output buffer */
    for (y = 0; y < HEIGHT; y++) {
	for (x = 0; x < WIDTH; x++) {
	    img[y][x] = gpu_output[y+1][x+1];
    /* write PGM */
    pgmwrite("output.pgm", (void *)img, WIDTH, HEIGHT);
    printf("GPU Time (Including Data Transfer): %fs\n", \
	   end_time_inc_data - start_time_inc_data);
    printf("CPU Time                          : %fs\n", \
	   cpu_end_time - cpu_start_time);
    return 0;
Ejemplo n.º 5
 * @brief Write a PGM file from an array. Only rank 0 can write (Wrapper for pgmwrite)
 * @param rank the rank of the calling process
 * @param filename the file to write to
 * @param img_dim the dimensions of the image
 * @param data the array to write to disk
 void image_write (int rank, char * filename, image_dimensions img_dim, real ** data) {
    if (rank == 0) {
        pgmwrite(filename, &data[0][0], img_dim.m, img_dim.n);
