void smoke3D::init() { // Allocate Memory u[0] = alloc3D(N+1,N,N); u[1] = alloc3D(N,N+1,N); u[2] = alloc3D(N,N,N+1); c = alloc3D(N,N,N); b = alloc3D(N,N,N); FOR_EVERY_X_FLOW { u[0][i][j][k] = 0.0; } END_FOR FOR_EVERY_Y_FLOW { u[1][i][j][k] = 0.0; } END_FOR FOR_EVERY_Z_FLOW { u[2][i][j][k] = 0.0; } END_FOR // Mark Wall Inside A Sphere int w = SPHERE_R*N; for( int i=-w; i<=w; i++ ) { for( int j=-w; j<=w; j++ ) { for( int k=-w; k<=w; k++ ) { if( hypot(hypot((float)i,(float)j),(float)k) < w ) { b[i+N/2][j+N/2][k+N/2] = 1.0; } } } } #if _OPENMP printf( "OpenMP Detected.\n" ); #endif }
int main (int nargs, char** args) { int n; /* number of points in each direction */ double h; /* grid spacing, same in all the directions */ double ***u_old, ***u_new, ***rhs; double factor, factor2, l2_norm; int i,j,k; int max_iters=100; if (nargs>1) n = atoi(args[1]); else n = 256; h = 1.0/(n-1); u_old = alloc3D(n+2, n+2, n+2); u_new = alloc3D(n+2, n+2, n+2); rhs = alloc3D(n+2, n+2, n+2); /* fill the right-hand side vector */ factor = (1.0-h*h*M_PI*M_PI/4)*3.0*M_PI*M_PI; /* use deferred correction */ for (k=0; k<= n+1; k++) for (j=0; j<= n+1; j++) for (i=0; i<= n+1; i++) rhs[k][j][i] = 6.*h*h*factor*sin(M_PI*i*h)*sin(M_PI*j*h)*sin(M_PI*k*h); /* use initial zero guess */ for (k=0; k<= n+1; k++) for (j=0; j<= n+1; j++) for (i=0; i<= n+1; i++) u_old[k][j][i] = u_new[k][j][i] = 0.; /* Jacobi iterations */ l2_norm = 1e+12; factor = 1.0/24; factor2 = 6.0*h*h; printf("\n=====Timings (sec) for 19-Point Jacobi, Solving Poisson Eqn "); if(sizeof(REAL) == 4) printf(" (Single Precision) =====\n"); if(sizeof(REAL) == 8) printf(" (Double Precision) =====\n"); printf("Kernel\t Time(sec)\tGflops \tBW-ideal(GB/s)\tBW-algorithm (N=(%d) iters=%d)\n",n, max_iters); printf("------\t----------\t--------\t--------------\t------------\n"); int nIters=0; double time_elapsed= getTime(); double Gflops =0.0; #pragma mint copy ( u_old, toDevice, ( n+2 ), n+2, ( n+2 ) ) #pragma mint copy ( u_new, toDevice, ( n+2 ), n+2, ( n+2 ) ) #pragma mint copy ( rhs, toDevice, ( n+2 ), n+2, ( n+2 ) ) #pragma mint parallel { int iters = 0 ; while (iters < max_iters && l2_norm > 1e-9) { ++iters; /* update each interior point */ #pragma mint for nest(all) tile(16,16,1) for (k=1; k<= n; k++){ for (j=1; j<= n; j++){ for (i=1; i<= n; i++) u_new[k][j][i] = factor*(rhs[k][j][i] +factor2*(u_old[k][j][i-1]+u_old[k][j][i+1] +u_old[k][j-1][i]+u_old[k][j+1][i] +u_old[k+1][j][i]+u_old[k-1][j][i]) +u_old[k-1][j-1][i]+u_old[k-1][j+1][i] +u_old[k-1][j][i-1]+u_old[k-1][j][i+1] +u_old[k][j-1][i-1]+u_old[k][j+1][i-1] +u_old[k][j-1][i+1]+u_old[k][j+1][i+1] +u_old[k+1][j-1][i]+u_old[k+1][j+1][i] +u_old[k+1][j][i-1]+u_old[k+1][j][i+1]); }} /* pointer swap */ #pragma mint single { REAL*** tmp; tmp = u_old; u_old= u_new; u_new = tmp; nIters = iters; } } } #pragma mint copy ( u_old, fromDevice, ( n+2 ), ( n+2 ), ( n+2 ) ) time_elapsed = getTime() - time_elapsed; Gflops = (double)(nIters * (n) * (n) * (n) * 1.0e-9 * FLOPS) / time_elapsed ; l2_norm = 0; for (k=0; k<= n+1; k++) for (j=0; j<= n+1; j++) for (i=0; i<= n+1; i++) { factor = sin(M_PI*i*h)*sin(M_PI*j*h)*sin(M_PI*k*h); l2_norm += (factor-u_old[k][j][i])*(factor-u_old[k][j][i]); } printf("%s%3.3f \t%5.3f\n", "Poisson19 ", time_elapsed, Gflops); printf(":N %d M %d K %d , iteration %d\n", n, n, n , nIters); printf(":max: %20.12e, l2norm: %20.12e\n",factor,sqrt(l2_norm*h*h*h)); //printf("Total iterations used: %d, l2-norm of error=%e\n", // nIters,sqrt(l2_norm*h*h*h)); free3D(u_new); free3D(u_old); free3D(rhs); return 0; }