예제 #1
0
void smoke3D::init() {
    // Allocate Memory
    u[0] = alloc3D(N+1,N,N);
    u[1] = alloc3D(N,N+1,N);
    u[2] = alloc3D(N,N,N+1);
    c = alloc3D(N,N,N);
    b = alloc3D(N,N,N);

    FOR_EVERY_X_FLOW {
        u[0][i][j][k] = 0.0;
    } END_FOR

    FOR_EVERY_Y_FLOW {
        u[1][i][j][k] = 0.0;
    } END_FOR

    FOR_EVERY_Z_FLOW {
        u[2][i][j][k] = 0.0;
    } END_FOR

    // Mark Wall Inside A Sphere
    int w = SPHERE_R*N;
    for( int i=-w; i<=w; i++ ) {
        for( int j=-w; j<=w; j++ ) {
            for( int k=-w; k<=w; k++ ) {
                if( hypot(hypot((float)i,(float)j),(float)k) < w ) {
                    b[i+N/2][j+N/2][k+N/2] = 1.0;
                }
            }
        }
    }

#if _OPENMP
    printf( "OpenMP Detected.\n" );
#endif
}
예제 #2
0
파일: poisson19.c 프로젝트: 8l/rose
int main (int nargs, char** args)
{
  int n;      /* number of points in each direction */
  double h;   /* grid spacing, same in all the directions */
  double ***u_old, ***u_new, ***rhs;
  double factor, factor2, l2_norm;
  int i,j,k;
  int max_iters=100;

  if (nargs>1)
    n = atoi(args[1]);
  else
    n = 256;

  h = 1.0/(n-1);
  
  u_old = alloc3D(n+2, n+2, n+2);
  u_new = alloc3D(n+2, n+2, n+2);
  rhs = alloc3D(n+2, n+2, n+2);

  /* fill the right-hand side vector */
  factor = (1.0-h*h*M_PI*M_PI/4)*3.0*M_PI*M_PI;  /* use deferred correction */
  for (k=0; k<= n+1; k++)
    for (j=0; j<= n+1; j++)
      for (i=0; i<= n+1; i++)
	rhs[k][j][i] = 6.*h*h*factor*sin(M_PI*i*h)*sin(M_PI*j*h)*sin(M_PI*k*h);

  /* use initial zero guess */
  for (k=0; k<= n+1; k++)
    for (j=0; j<= n+1; j++)
      for (i=0; i<= n+1; i++)
	u_old[k][j][i] = u_new[k][j][i] = 0.;

  /* Jacobi iterations */

  l2_norm = 1e+12;
  factor = 1.0/24; factor2 = 6.0*h*h;


  printf("\n=====Timings (sec) for 19-Point Jacobi, Solving Poisson Eqn ");
  if(sizeof(REAL) == 4)
    printf(" (Single Precision) =====\n");
  if(sizeof(REAL) == 8)
    printf(" (Double Precision) =====\n");

  printf("Kernel\t Time(sec)\tGflops  \tBW-ideal(GB/s)\tBW-algorithm (N=(%d) iters=%d)\n",n, max_iters);
  printf("------\t----------\t--------\t--------------\t------------\n");

  int nIters=0; 


  double time_elapsed= getTime();
  double Gflops =0.0; 

#pragma mint copy ( u_old,  toDevice, ( n+2 ), n+2, ( n+2 ) )
#pragma mint copy ( u_new,  toDevice, ( n+2 ), n+2, ( n+2 ) )
#pragma mint copy ( rhs, toDevice, ( n+2 ), n+2, ( n+2 ) )

#pragma mint parallel 
  {
    int iters = 0 ; 

  while (iters < max_iters && l2_norm > 1e-9) {
    ++iters;

    /* update each interior point */
#pragma mint for  nest(all) tile(16,16,1)
    for (k=1; k<= n; k++){
      for (j=1; j<= n; j++){
	for (i=1; i<= n; i++)
	  u_new[k][j][i] = factor*(rhs[k][j][i]
                                  +factor2*(u_old[k][j][i-1]+u_old[k][j][i+1]
					    +u_old[k][j-1][i]+u_old[k][j+1][i]
					    +u_old[k+1][j][i]+u_old[k-1][j][i])
				  +u_old[k-1][j-1][i]+u_old[k-1][j+1][i]
				  +u_old[k-1][j][i-1]+u_old[k-1][j][i+1]
				  +u_old[k][j-1][i-1]+u_old[k][j+1][i-1]
				  +u_old[k][j-1][i+1]+u_old[k][j+1][i+1]
				  +u_old[k+1][j-1][i]+u_old[k+1][j+1][i]
				  +u_old[k+1][j][i-1]+u_old[k+1][j][i+1]);

	}}
    /* pointer swap */
  #pragma mint single
    {
      REAL*** tmp;
      tmp = u_old; u_old= u_new; u_new = tmp;

      nIters = iters; 
    }

  }
  }
#pragma mint copy ( u_old,  fromDevice, ( n+2 ), ( n+2 ), ( n+2 ) )
  time_elapsed = getTime() - time_elapsed;

  Gflops = (double)(nIters * (n) * (n) * (n) * 1.0e-9 * FLOPS) / time_elapsed ;

  l2_norm = 0;
  for (k=0; k<= n+1; k++)
    for (j=0; j<= n+1; j++)
      for (i=0; i<= n+1; i++) {
	factor = sin(M_PI*i*h)*sin(M_PI*j*h)*sin(M_PI*k*h);
	l2_norm += (factor-u_old[k][j][i])*(factor-u_old[k][j][i]);
      }

  printf("%s%3.3f \t%5.3f\n", "Poisson19   ", time_elapsed, Gflops);
  printf(":N %d M %d K %d , iteration %d\n", n, n, n , nIters);
  printf(":max: %20.12e, l2norm: %20.12e\n",factor,sqrt(l2_norm*h*h*h));   
           
  //printf("Total iterations used: %d, l2-norm of error=%e\n",
  //	 nIters,sqrt(l2_norm*h*h*h));
 
  free3D(u_new);
  free3D(u_old);
  free3D(rhs);

  return 0;
}