Ejemplo n.º 1
0
VImage 
VAniso3d(VImage src,VImage dest,VShort numiter,
	 VShort type,VFloat kappa,VFloat alpha)
{
  VImage tmp1=NULL,tmp2=NULL;
  int nbands,nrows,ncols;
  int b,r,c,iter;
  float delta;
  float dx,dy,dz,d,u,v;
  float ux1,ux2,uy1,uy2,uz1,uz2;
  float b1,b2,r1,r2,c1,c2;
  VDouble xmax,xmin;
  VBoolean ignore = TRUE;


  nbands = VImageNBands(src);
  nrows  = VImageNRows(src);
  ncols  = VImageNColumns(src);

  if (nbands < 3) VError(" min number of slices is 3");

  tmp1 = VConvertImageCopy(src,NULL,VAllBands,VFloatRepn);
  tmp2 = VCreateImage(nbands,nrows,ncols,VFloatRepn);
  VFillImage(tmp2,VAllBands,0);

  xmax = VPixelMaxValue (tmp1);
  xmin = VPixelMinValue (tmp1);

  delta = 1.0 / 7.0;

  dx = dy = dz = 0;

  for (iter=0; iter < numiter; iter++) {

    for (b=1; b<nbands-1; b++) {
      for (r=1; r<nrows-1; r++) {
	for (c=1; c<ncols-1; c++) {

	  u  = VPixel(tmp1,b,r,c,VFloat);
	  if (ignore && ABS(u) < 1.0e-10) continue;

	  c1 = VPixel(tmp1,b,r,c+1,VFloat);
	  c2 = VPixel(tmp1,b,r,c-1,VFloat);

	  r1 = VPixel(tmp1,b,r+1,c,VFloat);
	  r2 = VPixel(tmp1,b,r-1,c,VFloat);

	  b1 = VPixel(tmp1,b+1,r,c,VFloat);
	  b2 = VPixel(tmp1,b-1,r,c,VFloat);

	  /* col-dir */
	  dx = c1-u;
	  dy = r1-r2;
	  dz = b1-b2;
	  d  = diffusion3d(dx,dy,dz,type,kappa,alpha);
	  ux1 = d*(c1 - u);

	  dx = u-c2;
	  d  = diffusion3d(dx,dy,dz,type,kappa,alpha);
	  ux2 = d*(u - c2);


	  /* row-dir */
	  dx = c1-c2;
	  dy = r1-u;
	  dz = b1-b2;
	  d  = diffusion3d(dx,dy,dz,type,kappa,alpha);
	  uy1 = d*(r1 - u);

	  dy = u-r2;
	  d  = diffusion3d(dx,dy,dz,type,kappa,alpha);
	  uy2 = d*(u - r2);


	  /* slice-dir */
	  dx = c1-c2;
	  dy = r1-r2;
	  dz = b1-u;
	  d  = diffusion3d(dx,dy,dz,type,kappa,alpha);
	  uz1 = d*(b1 - u);

	  dz = u-b2;
	  d  = diffusion3d(dx,dy,dz,type,kappa,alpha);
	  uz2 = d*(u - b2);

	  /* sum */
	  v = u + delta*(ux1 - ux2 + uy1 - uy2 + uz1 - uz2);

	  if (v > xmax) v = xmax;
	  if (v < xmin) v = xmin;
	  VPixel(tmp2,b,r,c,VFloat) = v;
	}
      }
    }
    tmp1 = VCopyImagePixels(tmp2,tmp1,VAllBands);
  }


  /*
  ** output
  */
  dest = VCopyImage(src,dest,VAllBands);

  xmax = VPixelMaxValue (dest);
  xmin = VPixelMinValue (dest);

  for (b=1; b<nbands-1; b++) {
    for (r=1; r<nrows-1; r++) {
      for (c=1; c<ncols-1; c++) {
	v = VPixel(tmp2,b,r,c,VFloat);
	if (v > xmax) v = xmax;
	if (v < xmin) v = xmin;
	VSetPixel(dest,b,r,c,(VDouble) v);
      }
    }
  }

  VDestroyImage(tmp1);
  VDestroyImage(tmp2);

  return dest;
}
Ejemplo n.º 2
0
void  mainloop
// ====================================================================
//
// purpos     :  2-dimensional diffusion equation solved by FDM
//
// date       :  2012-5-10
// programmer :  Michel Müller
// place      :  Tokyo Institute of Technology
//
(
   FLOAT    *f,         /* dependent variable                        */
   FLOAT    *fn,        /* updated dependent variable                */
   FLOAT    kappa,      /* diffusion coefficient                     */
   FLOAT    *time,       /* time                                      */
   FLOAT    dt,         /* time step interval                        */
   FLOAT    dx,         /* grid spacing in the x-direction           */
   FLOAT    dy,         /* grid spacing in the y-direction           */
   FLOAT    dz          /* grid spacing in the z-direction           */
)
// --------------------------------------------------------------------
{
	int icnt = 1;

	double start_time, elapsed_time;
	double start_time_total, start_computation_time, elapsed_time_total, elapsed_computation_time;
	clock_t ctime_start_computation_time, ctime_start_total_time;
	double ctime_elapsed_computation_time, ctime_elapsed_total_time;

	long long int numOfStencilsComputed = 0;
	long long int idealCacheModelBytesTransferred = 0;
	long long int noCacheModelBytesTransferred = 0;
	start_time = omp_get_wtime();
	ctime_start_total_time = clock() / CLOCKS_PER_SEC;

	printf("Starting Reference C Version of 3D Diffusion\n");
	printf("kappa: %e, dt: %e, dx: %e\n", kappa, dt, dx);
	#pragma omp parallel
	#pragma omp master
	{
		printf("num threads: %d\n", omp_get_num_threads( ));
	}

	#pragma acc data copy(f[0:XYZ_SIZE]), create(fn[0:XYZ_SIZE])
	{
		#pragma omp master
		{
			start_computation_time = omp_get_wtime();
			ctime_start_computation_time = clock() / CLOCKS_PER_SEC;
		}

		do {  if(icnt % 100 == 0) fprintf(stderr,"time after iteration %4d:%7.5f\n",icnt+1,*time + dt);

			diffusion3d(f,fn,kappa,dt,dx,dy,dz);

			numOfStencilsComputed += DIM_X_INNER * DIM_Y_INNER * DIM_Z_INNER;
			idealCacheModelBytesTransferred += DIM_X_INNER * DIM_Y_INNER * DIM_Z_INNER * FLOAT_BYTE_LENGTH * 2;
			noCacheModelBytesTransferred += DIM_X_INNER * DIM_Y_INNER * DIM_Z_INNER * FLOAT_BYTE_LENGTH * 8;
			swap(&f,&fn);
			*time = *time + dt;

		} while(icnt++ < 90000 && *time + 0.5*dt < 0.1);
		#pragma acc wait

		#pragma omp master
		{
			elapsed_computation_time = omp_get_wtime() - start_computation_time;
			ctime_elapsed_computation_time = (clock() - ctime_start_computation_time) / (double) CLOCKS_PER_SEC;
		}
	}

	elapsed_time_total = omp_get_wtime() - start_time;
	ctime_elapsed_total_time = (clock() - ctime_start_total_time) / (double) CLOCKS_PER_SEC;
	double elapsed_computation_time_combined = elapsed_computation_time;
	if (elapsed_computation_time_combined <= 0.0) {
		elapsed_computation_time_combined = ctime_elapsed_computation_time;
	}

	aprint("Calculated Time= %9.3e [sec]\n",*time);
	aprint("Elapsed Total Time (OMP timer)= %9.3e [sec]\n",elapsed_time_total);
	aprint("Elapsed Total Time (CTime)= %9.3e [sec]\n",ctime_elapsed_total_time);
	aprint("Elapsed Computation Time (OMP timer)= %9.3e [sec]\n",elapsed_computation_time);
	aprint("Elapsed Computation Time (CTime)= %9.3e [sec]\n",ctime_elapsed_computation_time);
	aprint("Performance= %7.2f [million stencils/sec]\n",((double)numOfStencilsComputed)/elapsed_computation_time_combined*1.0e-06);
	aprint("Bandwidth Ideal Cache Model= %7.2f [GB/s]\n",((double)idealCacheModelBytesTransferred)/elapsed_computation_time_combined*1.0e-09);
	aprint("Bandwidth No Cache Model= %7.2f [GB/s]\n",((double)noCacheModelBytesTransferred)/elapsed_computation_time_combined*1.0e-09);
}