double run(struct user_parameters* params) { int matrix_size = params->matrix_size; if (matrix_size <= 0) { matrix_size = 512; params->matrix_size = matrix_size; } int block_size = params->blocksize; if (block_size <= 0) { block_size = 128; params->blocksize = block_size; } if ( (matrix_size % block_size) || (matrix_size % block_size) ) { params->succeed = 0; params->string2display = "*****ERROR: blocsize must divide NX and NY"; return 0; } int niter = params->titer; if (niter <= 0) { niter = 4; params->titer = niter; } int ii,i,jj,j; double *f_ = (double*)malloc(matrix_size * matrix_size * sizeof(double)); double (*f)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])f_; double *u_ = (double*)malloc(matrix_size * matrix_size * sizeof(double)); double *unew_ = (double*)malloc(matrix_size * matrix_size * sizeof(double)); double (*unew)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])unew_; double (*u)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])u_; double dx = 1.0 / (double) (matrix_size - 1); double dy = 1.0 / (double) (matrix_size - 1); rhs(matrix_size, matrix_size, f_, block_size); //Set the initial solution estimate UNEW. //We are "allowed" to pick up the boundary conditions exactly. #pragma omp parallel #pragma omp master for (j = 0; j < matrix_size; j+= block_size) for (i = 0; i < matrix_size; i+= block_size) #pragma omp task firstprivate(i,j) private(ii,jj) for (jj=j; jj<j+block_size; ++jj) for (ii=i; ii<i+block_size; ++ii) { if (ii == 0 || ii == matrix_size - 1 || jj == 0 || jj == matrix_size - 1) { (*unew)[ii][jj] = (*f)[ii][jj]; (*u)[ii][jj] = (*f)[ii][jj]; } else { (*unew)[ii][jj] = 0.0; (*u)[ii][jj] = 0.0; } } /// KERNEL INTENSIVE COMPUTATION START_TIMER; #ifndef _OPENMP sweep_seq(matrix_size, matrix_size, dx, dy, f_, 0, niter, u_, unew_); #else sweep(matrix_size, matrix_size, dx, dy, f_, 0, niter, u_, unew_, block_size); #endif END_TIMER; #ifdef _OPENMP if(params->check) { check_params(params, matrix_size, block_size, dx, dy, f_, niter, u_, unew_) ; } #else params->succeed = 1; #endif free(f_); free(u_); free(unew_); return TIMER; }
void check_params( struct user_parameters* params, int matrix_size, int block_size, double dx, double dy, double *f_, int niter, double *u_, double *unew_) { double x, y; int i, j; double *udiff_ =(double*)malloc(matrix_size * matrix_size * sizeof(double)); double (*udiff)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])udiff_; double (*unew)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])unew_; double (*u)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])u_; double (*f)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])f_; // Check for convergence. for (j = 0; j < matrix_size; j++) { y = (double) (j) / (double) (matrix_size - 1); for (i = 0; i < matrix_size; i++) { x = (double) (i) / (double) (matrix_size - 1); (*udiff)[i][j] = (*unew)[i][j] - u_exact(x, y); if( (*udiff)[i][j] > 1.0E-6 ) { printf("error: %d, %d: %f\n", i, j, (*udiff)[i][j]); } } } double error = r8mat_rms(matrix_size, matrix_size, udiff_); double error1; // Set the right hand side array F. rhs(matrix_size, matrix_size, f_, block_size); for (j = 0; j < matrix_size; j++) { for (i = 0; i < matrix_size; i++) { if (i == 0 || i == matrix_size - 1 || j == 0 || j == matrix_size - 1) { (*unew)[i][j] = (*f)[i][j]; (*u)[i][j] = (*f)[i][j]; } else { (*unew)[i][j] = 0.0; (*u)[i][j] = 0.0; } } } sweep_seq(matrix_size, matrix_size, dx, dy, f_, 0, niter, u_, unew_); // Check for convergence. for (j = 0; j < matrix_size; j++) { y = (double) (j) / (double) (matrix_size - 1); for (i = 0; i < matrix_size; i++) { x = (double) (i) / (double) (matrix_size - 1); (*udiff)[i][j] = (*unew)[i][j] - u_exact(x, y); if( (*udiff)[i][j] > 1.0E-6 ) { printf("error: %d, %d: %f\n", i, j, (*udiff)[i][j]); } } } error1 = r8mat_rms(matrix_size, matrix_size, udiff_); params->succeed = fabs(error - error1) < 1.0E-6; if(!params->succeed) { printf("error = %f, error1 = %f\n", error, error1); } free(udiff_); }
double run(struct user_parameters* params) { int matrix_size = params->matrix_size; if (matrix_size <= 0) { matrix_size = 512; params->matrix_size = matrix_size; } int block_size = params->blocksize; if (block_size <= 0) { block_size = 128; params->blocksize = block_size; } int niter = params->titer; if (niter <= 0) { niter = 4; params->titer = niter; } double dx; double dy; double error; int ii,i; int jj,j; int nx = matrix_size; int ny = matrix_size; double *f = (double *)malloc(nx * nx * sizeof(double)); double *u = (double *)malloc(nx * nx * sizeof(double)); double *unew = (double *)malloc(nx * ny * sizeof(double)); /* test if valid */ if ( (nx % block_size) || (ny % block_size) ) { params->succeed = 0; params->string2display = "*****ERROR: blocsize must divide NX and NY"; return 0; } /// INITIALISATION dx = 1.0 / (double) (nx - 1); dy = 1.0 / (double) (ny - 1); // Set the right hand side array F. rhs(nx, ny, f, block_size); /* Set the initial solution estimate UNEW. We are "allowed" to pick up the boundary conditions exactly. */ #pragma omp parallel { #pragma omp single for (j = 0; j < ny; j+= block_size) { for (i = 0; i < nx; i+= block_size) { #pragma omp task firstprivate(i,j) private(ii,jj) for (jj=j; jj<j+block_size; ++jj) { for (ii=i; ii<i+block_size; ++ii) { if (ii == 0 || ii == nx - 1 || jj == 0 || jj == ny - 1) { (unew)[ii * ny + jj] = (f)[ii * ny + jj]; } else { (unew)[ii * ny + jj] = 0.0; } } } } } } /// KERNEL INTENSIVE COMPUTATION START_TIMER; sweep(nx, ny, dx, dy, f, 0, niter, u, unew, block_size); END_TIMER; if(params->check) { double x; double y; double *udiff = (double *)malloc(nx * ny * sizeof(double)); /// CHECK OUTPUT // Check for convergence. for (j = 0; j < ny; j++) { y = (double) (j) / (double) (ny - 1); for (i = 0; i < nx; i++) { x = (double) (i) / (double) (nx - 1); (udiff)[i * ny + j] = (unew)[i * ny + j] - u_exact(x, y); } } error = r8mat_rms(nx, ny, udiff); double error1; // Set the right hand side array F. rhs(nx, ny, f, block_size); /* Set the initial solution estimate UNEW. We are "allowed" to pick up the boundary conditions exactly. */ for (j = 0; j < ny; j++) { for (i = 0; i < nx; i++) { if (i == 0 || i == nx - 1 || j == 0 || j == ny - 1) { (unew)[i * ny + j] = (f)[i * ny + j]; } else { (unew)[i * ny + j] = 0.0; } } } sweep_seq(nx, ny, dx, dy, f, 0, niter, u, unew); // Check for convergence. for (j = 0; j < ny; j++) { y = (double) (j) / (double) (ny - 1); for (i = 0; i < nx; i++) { x = (double) (i) / (double) (nx - 1); (udiff)[i * ny + j] = (unew)[i * ny + j] - u_exact(x, y); } } error1 = r8mat_rms(nx, ny, udiff); params->succeed = fabs(error - error1) < 1.0E-6; free(udiff); } free(f); free(u); free(unew); return TIMER; }