void initialize(field * temperature1, field * temperature2, parallel_data * parallel) { int i, j; // Allocate also ghost layers temperature1->data = malloc_2d(temperature1->nx + 2, temperature1->ny + 2); temperature2->data = malloc_2d(temperature2->nx + 2, temperature2->ny + 2); // Initialize to zero memset(temperature1->data[0], 0.0, (temperature1->nx + 2) * (temperature1->ny + 2) * sizeof(double)); for (i = 0; i < temperature1->nx + 2; i++) { temperature1->data[i][0] = 85.0; temperature1->data[i][temperature1->ny + 1] = 45.0; } if (parallel->rank == 0) { for (j = 0; j < temperature1->ny + 2; j++) temperature1->data[0][j] = 5.0; } else if (parallel->rank == parallel->size - 1) { for (j = 0; j < temperature1->ny + 2; j++) temperature1->data[temperature1->nx + 1][j] = 20.0; } copy_field(temperature1, temperature2); }
void output(field * temperature, int iter, parallel_data * parallel) { char filename[64]; // The actual write routine takes only the actual data // (without ghost layers) so we need array for that int height, width; double **full_data; double **tmp_data; // array for MPI sends and receives int i, p; height = temperature->nx * parallel->size; width = temperature->ny; tmp_data = malloc_2d(temperature->nx, temperature->ny); if (parallel->rank == 0) { // Copy the inner data full_data = malloc_2d(height, width); for (i = 0; i < temperature->nx; i++) memcpy(full_data[i], &temperature->data[i + 1][1], temperature->ny * sizeof(double)); // Receive data for (p = 1; p < parallel->size; p++) { MPI_Recv(&tmp_data[0][0], temperature->nx * temperature->ny, MPI_DOUBLE, p, 22, parallel->comm, MPI_STATUS_IGNORE); // Copy data to full array memcpy(&full_data[p * temperature->nx][0], tmp_data[0], temperature->nx * temperature->ny * sizeof(double)); } } else { // Send data for (i = 0; i < temperature->nx; i++) memcpy(tmp_data[i], &temperature->data[i + 1][1], temperature->ny * sizeof(double)); MPI_Send(&tmp_data[0][0], temperature->nx * temperature->ny, MPI_DOUBLE, 0, 22, parallel->comm); } if (parallel->rank == 0) { sprintf(filename, "%s_%04d.png", "heat", iter); save_png(full_data[0], height, width, filename, 'c'); free_2d(full_data); } free_2d(tmp_data); }
void output(field * temperature, int iter) { char filename[64]; // The actual write routine takes only the actual data // (without ghost layers) so we need array for that int height, width; double **full_data; int i; height = temperature->nx; width = temperature->ny; // Copy the inner data full_data = malloc_2d(height, width); for (i = 0; i < temperature->nx; i++) memcpy(full_data[i], &temperature->data[i + 1][1], temperature->ny * sizeof(double)); sprintf(filename, "%s_%04d.png", "heat", iter); save_png(full_data[0], height, width, filename, 'c'); free(full_data); }
int main(void) { double **test_array; int count = 0; test_array = malloc_2d(10, 5, sizeof(double)); if (test_array == NULL) { exit(EXIT_FAILURE); } for (int rows = 0; rows < 10; rows++) { for (int columns = 0; columns < 5; columns++) { test_array[rows][columns] = count++; } } printf("\n"); for (int rows = 0; rows < 10; rows++) { for (int columns = 0; columns < 5; columns++) { printf("%2f ", test_array[rows][columns]); } printf("\n"); } printf("Test completed succesfully.\n"); exit(EXIT_SUCCESS); }
void initialize(field * temperature1, field * temperature2, parallel_data * parallel) { int i, j; // Allocate also ghost layers temperature1->data = malloc_2d(temperature1->nx + 2, temperature1->ny + 2); temperature2->data = malloc_2d(temperature2->nx + 2, temperature2->ny + 2); // Create RMA window. In principle, only borders would be needed // but it is simpler to expose the whole array MPI_Win_create(&temperature1->data[0][0], (temperature1->nx + 2) * (temperature1->ny + 2) * sizeof(double), sizeof(double), MPI_INFO_NULL, parallel->comm, &temperature1->rma_window); MPI_Win_create(&temperature2->data[0][0], (temperature2->nx + 2) * (temperature2->ny + 2) * sizeof(double), sizeof(double), MPI_INFO_NULL, parallel->comm, &temperature2->rma_window); // Initialize to zero memset(temperature1->data[0], 0.0, (temperature1->nx + 2) * (temperature1->ny + 2) * sizeof(double)); for (i = 0; i < temperature1->nx + 2; i++) { temperature1->data[i][0] = 30.0; temperature1->data[i][temperature1->ny + 1] = -10.0; } if (parallel->rank == 0) { for (j = 0; j < temperature1->ny + 2; j++) temperature1->data[0][j] = 15.0; } else if (parallel->rank == parallel->size - 1) { for (j = 0; j < temperature1->ny + 2; j++) temperature1->data[temperature1->nx + 1][j] = -25.0; } copy_field(temperature1, temperature2); }
// Allocate memory for a temperature field and initialise it to zero void allocate_field(field *temperature) { // Allocate also ghost layers temperature->data = malloc_2d(temperature->nx + 2, temperature->ny + 2); // Initialize to zero memset(temperature->data[0], 0.0, (temperature->nx + 2) * (temperature->ny + 2) * sizeof(double)); }
void initialize(field * temperature1, field * temperature2, parallel_data * parallel) { int i, j; int dims[2], coords[2], periods[2]; // Allocate also ghost layers temperature1->data = malloc_2d(temperature1->nx + 2, temperature1->ny + 2); temperature2->data = malloc_2d(temperature2->nx + 2, temperature2->ny + 2); // Initialize to zero memset(temperature1->data[0], 0.0, (temperature1->nx + 2) * (temperature1->ny + 2) * sizeof(double)); MPI_Cart_get(parallel->comm, 2, dims, periods, coords); // Left boundary if (coords[1] == 0) for (i = 0; i < temperature1->nx + 2; i++) temperature1->data[i][0] = 30.0; // Upper boundary if (coords[0] == 0) for (j = 0; j < temperature1->ny + 2; j++) temperature1->data[0][j] = 15.0; // Right boundary if (coords[1] == dims[1] - 1) for (i = 0; i < temperature1->nx + 2; i++) temperature1->data[i][temperature1->ny + 1] = -10.0; // Lower boundary if (coords[0] == dims[0] - 1) for (j = 0; j < temperature1->ny + 2; j++) temperature1->data[temperature1->nx + 1][j] = -25.0; copy_field(temperature1, temperature2); }
void initialize(field * temperature1, field * temperature2) { int i, j; // Allocate also ghost layers temperature1->data = malloc_2d(temperature1->nx + 2, temperature1->ny + 2); temperature2->data = malloc_2d(temperature2->nx + 2, temperature2->ny + 2); for (i = 0; i < temperature1->nx + 2; i++) { temperature1->data[i][0] = 30.0; temperature1->data[i][temperature1->ny + 1] = -10.0; } for (j = 0; j < temperature1->ny + 2; j++) { temperature1->data[0][j] = 15.0; temperature1->data[temperature1->nx + 1][j] = -25.0; } copy_field(temperature1, temperature2); }
void read_input(field * temperature1, field * temperature2, char *filename) { FILE *fp; int nx, ny, i, j; fp = fopen(filename, "r"); // Read the header fscanf(fp, "# %d %d \n", &nx, &ny); initialize_field_metadata(temperature1, nx, ny); initialize_field_metadata(temperature2, nx, ny); // Allocate arrays (including ghost layers temperature1->data = malloc_2d(nx + 2, ny + 2); temperature2->data = malloc_2d(nx + 2, ny + 2); // Read the actual data for (i = 1; i < nx + 1; i++) { for (j = 1; j < ny + 1; j++) { fscanf(fp, "%lf", &temperature1->data[i][j]); } } // Set the boundary values for (i = 1; i < nx + 1; i++) { temperature1->data[i][0] = temperature1->data[i][1]; temperature1->data[i][ny + 1] = temperature1->data[i][ny]; } for (j = 0; j < ny + 2; j++) { temperature1->data[0][j] = temperature1->data[1][j]; temperature1->data[nx + 1][j] = temperature1->data[nx][j]; } copy_field(temperature1, temperature2); fclose(fp); }
void output(field * temperature, int iter, parallel_data * parallel) { char filename[64]; // The actual write routine takes only the actual data // (without ghost layers) so we need array for that int height, width; double **full_data; int coords[2]; int ix, jy; int i, p; height = temperature->nx_full; width = temperature->ny_full; if (parallel->rank == 0) { // Copy the inner data full_data = malloc_2d(height, width); for (i = 0; i < temperature->nx; i++) memcpy(full_data[i], &temperature->data[i + 1][1], temperature->ny * sizeof(double)); // Receive data for (p = 1; p < parallel->size; p++) { MPI_Cart_coords(parallel->comm, p, 2, coords); ix = coords[0] * temperature->nx; jy = coords[1] * temperature->ny; MPI_Recv(&full_data[ix][jy], 1, parallel->subarraytype, p, 22, parallel->comm, MPI_STATUS_IGNORE); } } else { // Send data MPI_Ssend(&temperature->data[1][1], 1, parallel->subarraytype, 0, 22, parallel->comm); } if (parallel->rank == 0) { sprintf(filename, "%s_%04d.png", "heat", iter); save_png(full_data[0], height, width, filename, 'c'); free_2d(full_data); } }
void knapsack_solve_dynamic_up(knapsack_t *k) { int i; int w; // int dyntbl[MAX_ITEMS + 1][MAX_CAPACITY + 1]; int **dyntbl = NULL; int *dyntbl_data = NULL; malloc_2d(&dyntbl, &dyntbl_data, k->n + 1, k->cap + 1); for (i = 0; i <= k->n; i++) { for (w = 0; w <= k->cap; w++) { dyntbl[i][w] = 0; } } for (i = 1; i <= k->n; i++) { for (w = 0; w <= k->cap; w++) { if (k->items[i - 1].weight <= w) { dyntbl[i][w] = max( k->items[i - 1].cost + dyntbl[i - 1][w - k->items[i - 1].weight], dyntbl[i - 1][w]); } else { /* over limit, just copy the previous line */ dyntbl[i][w] = dyntbl[i - 1][w]; } } } k->solution.cost = dyntbl[k->n][k->cap]; /*for (w = 0; w <= k->cap; w++) { for (i = 0; i <= k->n; i++) { printf("[%d][%2d]%d\t", i, w, dyntbl[i][w]); } printf("\n"); }*/ free(dyntbl); free(dyntbl_data); }
int main() { int i,j,k,l; memcount = 0; readgrid(); printf("Grid reading finished, memcount = %ld\n",memcount); double tol = 1e-6; //0.1; double kappa = 0.0, beta = 0.5, omega = 1 - kappa/beta, alpha0 = 1-omega; double tin = 1000.0, tw = 0.0; double dh1 = 0.0, dh2 = 100.0; double sigma = 5.67e-8; buildcoeffs(alpha0,beta); printf("Built co-efficients, memcount = %ld\n",memcount); tb = (double*) malloc(sizeof(double)*nbcfaces); t = (double*) malloc(sizeof(double)*ncells); memcount += sizeof(double)*nbcfaces + sizeof(double)*ncells; //printf("Memcount = %ld\n",memcount); for( i = 0 ; i < ncells ; i++ ) t[i] = 0.0; for( i = 0 ; i < nbcfaces ; i++){ if( bcname[i] == BOTTOM ){ int face_num = bf_to_f[i]; if( xf[face_num] >= dh1 && xf[face_num] <= dh2 ) tb[i] = tin; else tb[i] = tw; } else tb[i] = tw; } /* for( i = 0 ; i < nbcfaces ; i++ ) */ /* printf("%lf\n",tb[i]); */ phic_old = malloc_2d(ncells,4); phic_new = malloc_2d(ncells,4); sc = malloc_2d(ncells,4); memcount += sizeof(double)*4*ncells*3 + sizeof(double*)*3*ncells; //printf("Memcount = %ld\n",memcount); for( i = 0 ; i < ncells ; i++ ){ for( j = 0 ; j < 4 ;j++){ phic_old[i][j] = 0.0; phic_new[i][j] = 0.0; } } sb = malloc_2d(nbcfaces,4); phib = malloc_2d(nbcfaces,4); memcount += sizeof(double)*4*nbcfaces*2 + sizeof(double*)*2*nbcfaces; //printf("Memcount = %ld\n",memcount); for( i = 0 ; i < nbcfaces ; i++ ){ for( j = 0 ; j < 4 ; j++ ) phib[i][j] = 0.0; } phiv = malloc_2d(nnodes,4); memcount += sizeof(double)*4*nnodes + sizeof(double*)*nnodes; //printf("Memcount = %ld\n",memcount); for( i = 0 ; i < nnodes ; i++ ) for( j = 0 ; j < 4 ; j++ ) phiv[i][j] = 0.0; dphi = malloc_2d(nfaces,4); memcount += sizeof(double)*4*nfaces + sizeof(double*)*nfaces; //printf("Memcount = %ld, dphi = %p\n",memcount,dphi); for( i = 0 ; i < nfaces ; i++ ) for( j = 0 ; j < 4 ; j++ ) dphi[i][j] = 0.0; double resid0,resid1,resid2,resid3; resid0 = tol * 2; resid1 = tol * 2; resid2 = tol * 2; resid3 = tol * 2; for( i = 0 ; i < nbcfaces ; i++ ){ sb[i][0] = 4.0 * sigma * tb[i] * tb[i] * tb[i] * tb[i]; sb[i][1] = 0.0; sb[i][2] = 4.0 * sigma * tb[i] * tb[i] * tb[i] * tb[i]; sb[i][3] = 0.0; } for( i = 0 ; i < ncells ; i++ ){ sc[i][0] = 0.0; sc[i][1] = 0.0; sc[i][2] = 0.0; sc[i][3] = -20.0 * alpha0 * sigma * t[i]*t[i]*t[i]*t[i] * beta * volcell[i]; } int count = 0; double start_t = rtclock(); while ( resid0 > tol || resid1 > tol || resid2 > tol || resid3 > tol ){ /* while(count < 1 ){ */ for(i = 0 ; i < nbcfaces ; i++ ){ double new_phib[4]; double lhs_matrix[4][4]; int currf = bf_to_f[i]; assert( currf >=0 && currf < nfaces); assert( bf_to_c[i] >=0 && bf_to_c[i] < nfaces); for( j = 0 ; j < 4 ; j++ ) new_phib[j] = sb[i][j]; for( k = 0 ; k < 4 ; k++ ) for( l = 0 ; l < 4 ; l++ ){ new_phib[k] += bc[i][4*k+l] * dphi[currf][l] + bb[i][4*k+l]*phic_new[bf_to_c[i]][l]; lhs_matrix[k][l] = ba[i][4*k+l]; } inverse_multiply(new_phib,lhs_matrix); for( j = 0 ; j < 4 ; j++ ) phib[i][j] = new_phib[j]; } for( i = 0 ; i < nnodes ; i++ ) for( j = 0 ; j < 4 ; j++ ) phiv[i][j] = 0.0; for( i = 0 ; i < nbcfaces ; i++ ){ int currf = bf_to_f[i]; int n1 = lfv0[currf]; int n2 = lfv1[currf]; assert(currf >=0 && currf < nfaces ); assert(n1 >=0 && n1 < nnodes ); assert(n2 >=0 && n2 < nnodes ); for( j = 0 ; j < 4 ; j++ ){ phiv[n1][j] += phib[i][j] * wbfv[i][0]; phiv[n2][j] += phib[i][j] * wbfv[i][1]; } } for( i= 0 ; i < ncells ; i++ ){ for( j = ia_cv[i] ; j < ia_cv[i+1] ; j++ ){ int currv = lcv[j]; assert(currv >=0 && currv < nnodes); if( bnode[currv] == 0 ) for( k = 0 ; k < 4 ; k++ ) phiv[currv][k] += phic_new[i][k] * wcv[j]; } for( j = 0 ; j < 4 ; j++ ) phic_old[i][j] = phic_new[i][j]; } for( i = 0 ; i < nfaces ; i++ ){ int n1 = lfv0[i]; int n2 = lfv1[i]; assert(n1 >= 0 && n1 < nnodes ); assert(n2 >= 0 && n2 < nnodes ); double xv1 = xv[n1]; double xv2 = xv[n2]; double yv1 = yv[n1]; double yv2 = yv[n2]; /* if( i == 0 ) { */ /* printf("i= %d, phiv[%d]=[%lf,%lf,%lf,%lf], phiv[%d]=[%lf,%lf,%lf,%lf]\n",i,n1,phiv[n1][0],phiv[n1][1],phiv[n1][2],phiv[n1][3],n2,phiv[n2][0],phiv[n2][1],phiv[n2][2],phiv[n2][3]); */ /* } */ for( j = 0 ; j < 4 ; j++ ) dphi[i][j] = ( phiv[n2][j] - phiv[n1][j] ) / sqrt( ( xv2 - xv1 ) * ( xv2 - xv1 ) + ( yv2 - yv1 ) * ( yv2 - yv1 ) ); /* if( i == 0 ) */ /* printf("i=%d,dphi[%d]=[%lf,%lf,%lf,%lf]\n",i,i,dphi[i][0],dphi[i][1],dphi[i][2],dphi[i][3]); */ } for( i = 0 ; i < ncells ; i++ ){ double diag_matrix[4][4]; double new_phi[4]; for( j = 0 ; j < 4 ; j++ ){ for( k = 0 ; k < 4 ; k++ ) diag_matrix[j][k] = 0.0; new_phi[j] = sc[i][j]; diag_matrix[j][j] = vol_vec[i][j]; } for( j = ia_cf[i] ; j < ia_cf[i+1] ; j++ ){ int currf = lcf[j]; if( bface[currf] == 0 ){ int currcell; double alpha; if( lfc0[currf] == i ){ currcell = lfc1[currf]; alpha = 1; } else{ alpha = -1; currcell = lfc0[currf]; } for( k = 0 ; k < 4 ; k++ ) for( l = 0 ; l < 4 ; l++ ){ new_phi[k] -= (fclink[currf][k*4+l] * phic_old[currcell][l] + alpha * ftlink[currf][k*4+l] * dphi[currf][l]) ; diag_matrix[k][l] -= fclink[currf][k*4+l]; } } else{ int currbf = f_to_bf[currf]; assert(currbf >= 0 ); for( k = 0 ; k < 4 ; k++ ) for( l = 0 ; l < 4 ; l++ ){ new_phi[k] -= (fclink[currf][k*4+l] * phib[currbf][l] + ftlink[currf][k*4+l] * dphi[currf][l]) ; diag_matrix[k][l] -= fclink[currf][k*4+l]; } } } inverse_multiply(new_phi,diag_matrix); for( j = 0 ; j < 4 ; j++ ) phic_new[i][j] = new_phi[j]; } resid0 = 0.0; resid1 = 0.0; resid2 = 0.0; resid3 = 0.0; for( i = 0 ; i < ncells ; i++ ){ double temp = phic_new[i][0] - phic_old[i][0]; resid0 += temp * temp; temp = phic_new[i][1] - phic_old[i][1]; resid1 += temp * temp; temp = phic_new[i][2] - phic_old[i][2]; resid2 += temp * temp; temp = phic_new[i][3] - phic_old[i][3]; resid3 += temp * temp; } resid0 = sqrt(resid0) / ncells; resid1 = sqrt(resid1) / ncells; resid2 = sqrt(resid2) / ncells; resid3 = sqrt(resid3) / ncells; count++; #ifndef NDEBUG printf("%d %lf %lf %lf %lf\n",count,resid0,resid1,resid2,resid3); #endif } double stop_t = rtclock(); printf("[IEC]:OriginalTime:%lf\n",stop_t - start_t); //printf("%d %lf %lf %lf %lf\n",count,resid0,resid1,resid2,resid3); /* print_output(); */ free(xc); free(yc); free(xf); free(yf); free(volcell); free(areaf); free(vecfx); free(vecfy); free(xv); free(yv); free(ia_cf); free(ia_cv); free(lcf); free(lcv); /* free_2d_int(lfc); */ /* free_2d_int(lfv); */ free(lfc0); free(lfc1); free(lfv0); free(lfv1); free(bface); free(f_to_bf); free(bf_to_f); free(bf_to_c); free(bnode); free(bctype); free(bcname); free(wcv); free_2d(wbfv); free(t); free(tb); free_2d(sc); free_2d(sb); free_2d(phic_new); free_2d(phic_old); free_2d(phib); free_2d(phiv); free_2d(dphi); free_2d(fclink); free_2d(ftlink); free_2d(vol_vec); free_2d(ba); free_2d(bb); free_2d(bc); return 0; }
void readgrid() { FILE* one_file = fopen("gridfolder/one.out","r"); int i,j,k; fscanf(one_file,"%d %d %d %d",&ncells,&nfaces,&nnodes,&nbcfaces); //printf("one.out : %d %d %d %d\n",ncells,nfaces,nnodes,nbcfaces); fclose(one_file); xc = (double*) malloc(ncells*sizeof(double)); yc = (double*) malloc(ncells*sizeof(double)); volcell = (double*) malloc(ncells*sizeof(double)); memcount += sizeof(double)*ncells*3; FILE* celldata_file = fopen("gridfolder/celldata.out","r"); for( i = 0 ; i < ncells ; i++ ){ int temp; fscanf(celldata_file,"%d %lf %lf %lf",&temp,xc+i,yc+i,volcell+i); } //printf("celldata.out : %lf %lf %lf\n",xc[ncells-1],yc[ncells-1],volcell[ncells-1]); fclose(celldata_file); xf = (double*) malloc(nfaces*sizeof(double)); yf = (double*) malloc(nfaces*sizeof(double)); areaf = (double*) malloc(nfaces*sizeof(double)); vecfx = (double*) malloc(nfaces*sizeof(double)); vecfy = (double*) malloc(nfaces*sizeof(double)); memcount += sizeof(double)*nfaces*5; FILE* facedata_file = fopen("gridfolder/facedata.out","r"); for( i = 0 ; i < nfaces ; i++ ){ int temp; fscanf(facedata_file,"%d %lf %lf %lf %lf %lf",&temp,xf+i,yf+i,areaf+i,vecfx+i,vecfy+i); } //printf("facedata.out : %lf %lf %lf %lf %lf\n",xf[nfaces-1],yf[nfaces-1],areaf[nfaces-1],vecfx[nfaces-1],vecfy[nfaces-1]); fclose(facedata_file); xv = (double*) malloc(nnodes*sizeof(double)); yv = (double*) malloc(nnodes*sizeof(double)); memcount += sizeof(double)*nnodes*2; FILE* nodes_file = fopen("gridfolder/nodesdata.out","r"); for( i = 0 ; i < nnodes ; i++ ){ int temp; fscanf(nodes_file,"%d %lf %lf",&temp,xv+i,yv+i); } //printf("nodesdata.out : %lf %lf\n",xv[nnodes-1],yv[nnodes-1]); fclose(nodes_file); ia_cf = (int*)malloc(sizeof(int)*(ncells+1)); ia_cv = (int*)malloc(sizeof(int)*(ncells+1)); memcount += sizeof(int)*(ncells+1)*3; ia_cf[0] = 0; ia_cv[0] = 0; FILE* nfcells_file = fopen("gridfolder/nfcells.out","r"); for( i = 0 ; i < ncells ; i++ ){ int temp; fscanf(nfcells_file,"%d",&temp); ia_cv[i+1] = ia_cv[i] + temp; ia_cf[i+1] = ia_cv[i+1]; } //printf("nfcells.out : %d %d\n",ia_cv[ncells],ia_cf[ncells]); fclose(nfcells_file); lcv = (int*) malloc(ia_cv[ncells]*sizeof(int)); lcf= (int*) malloc(ia_cf[ncells]*sizeof(int)); memcount += sizeof(int)*(ia_cv[ncells]+ia_cf[ncells]); FILE* celldata2_file = fopen("gridfolder/celldata2.out","r"); for( i = 0 ; i < ncells ; i++ ){ int temp; fscanf(celldata2_file,"%d",&temp); assert(temp == i+1); for( j = ia_cf[i] ; j < ia_cf[i+1] ; j++ ){ int temp2; fscanf(celldata2_file,"%d",&temp2); lcf[j] = temp2 -1 ; } fscanf(celldata2_file,"%d",&temp); assert(temp == i+1); for( j = ia_cv[i] ; j < ia_cv[i+1] ; j++ ){ int temp2; fscanf(celldata2_file,"%d",&temp2); lcv[j] = temp2 -1; } } fclose(celldata2_file); //printf("celldata2.out: %d %d\n",lcv[ia_cv[ncells]-1],lcf[ia_cf[ncells]-1]); /* lfc = malloc_2d_int(nfaces,2); */ lfc0 = (int*)malloc(nfaces*sizeof(int)); lfc1 = (int*)malloc(nfaces*sizeof(int)); memcount += sizeof(int)*2*nfaces + sizeof(int*)*nfaces; /* lfv = malloc_2d_int(nfaces,2); */ lfv0 = (int*)malloc(nfaces*sizeof(int)); lfv1 = (int*)malloc(nfaces*sizeof(int)); memcount += sizeof(int)*2*nfaces + sizeof(int*)*nfaces; FILE* facedata2_file = fopen("gridfolder/facedata2.out","r"); for( i = 0 ; i < nfaces ; i++ ){ int temp,temp1,temp2; fscanf(facedata2_file,"%d %d %d",&temp,&temp1,&temp2); lfc0[i] = temp1 - 1; lfc1[i] = temp2 - 1; fscanf(facedata2_file,"%d %d %d",&temp,&temp1,&temp2); lfv0[i] = temp1 - 1; lfv1[i] = temp2 - 1; } fclose(facedata2_file); //printf("facedat2.out: %d %d\n",lfc[nfaces-1][0],lfv[nfaces-1][1]); bface = (int*) malloc(sizeof(int)*nfaces); f_to_bf = (int*) malloc(sizeof(int)*nfaces); memcount += sizeof(int)*2*nfaces; FILE* facedata3_file = fopen("gridfolder/facedata3.out","r"); for( i = 0 ; i < nfaces ; i++ ){ int temp; fscanf(facedata3_file,"%d %d %d",&temp,bface+i,f_to_bf+i); f_to_bf[i]--; } fclose(facedata3_file); //printf("facedata3.out: %d %d\n",bface[nfaces-1],f_to_bf[nfaces-1]); bcname = (enum bdy_type*) malloc(sizeof(enum bdy_type)*nbcfaces); bctype = (int*) malloc(sizeof(int)*nbcfaces); bf_to_f = (int*) malloc(sizeof(int)*nbcfaces); memcount += sizeof(int)*2*nbcfaces + sizeof(enum bdy_type)*nbcfaces; FILE* boundarydata_file = fopen("gridfolder/boundarydata.out","r"); char btype_names[][7] = {"bottom","left","top","right"}; for( i = 0 ; i < nbcfaces ; i++ ){ int temp; char name[10]; fscanf(boundarydata_file,"%d %d %d %s",&temp,bf_to_f+i,bctype+i,name); /* printf("Name : %s\n",name); */ if( strcmp(name,btype_names[0]) == 0 ) bcname[i] = BOTTOM; else if( strcmp(name,btype_names[1]) == 0 ) bcname[i] = LEFT; else if( strcmp(name,btype_names[2]) == 0 ) bcname[i] = TOP; else if( strcmp(name,btype_names[3]) == 0 ) bcname[i] = RIGHT; else assert(0); bf_to_f[i]--; } fclose(boundarydata_file); //printf("boundarydata.out: %d %d\n",bctype[nbcfaces-1],bf_to_f[nbcfaces-1]); bf_to_c = (int*) malloc(sizeof(int)*nbcfaces); memcount += sizeof(int)*nbcfaces ; for( i = 0 ; i < nbcfaces ; i++ ) bf_to_c[i] = lfc0[bf_to_f[i]]; bnode = (int*) malloc(sizeof(int)*nnodes); memcount += sizeof(int)*nnodes ; for( i = 0 ; i < nnodes ; i++ ) bnode[i] = 0; for( i = 0 ; i < nbcfaces ; i++){ bnode[ lfv0[bf_to_f[i]] ] = 1; bnode[ lfv1[bf_to_f[i]] ] = 1; } double *wv = (double*) malloc(sizeof(double)*nnodes); wcv = (double*) malloc(sizeof(double)*ia_cv[ncells]); memcount += sizeof(double)*nnodes + sizeof(double)*lcv[ncells]; for( i = 0 ; i < nnodes ; i++ ) wv[i] = 0.0; for( i = 0 ; i < ncells ; i++){ double xcell = xc[i]; double ycell = yc[i]; for( j = ia_cv[i] ; j < ia_cv[i+1] ; j++ ) if( bnode[lcv[j]] == 0 ){ double xnode = xv[lcv[j]]; double ynode = yv[lcv[j]]; wcv[j] = 1.0 / sqrt( (xcell - xnode)*(xcell - xnode) + (ycell - ynode)*(ycell-ynode)); wv[lcv[j]] += wcv[j]; } } for( i = 0 ; i < ncells ; i++ ) for( j = ia_cv[i] ; j < ia_cv[i+1] ; j++ ) if( bnode[lcv[j]] == 0 ) wcv[j] /= wv[lcv[j]]; wbfv = malloc_2d(nbcfaces,2); memcount += sizeof(double)*2*nbcfaces + sizeof(double*)*nbcfaces ; for( i = 0 ; i < nbcfaces ; i++ ){ int gf = bf_to_f[i]; double xbdy = xf[gf]; double ybdy = yf[gf]; int n1 = lfv0[gf]; assert(bnode[n1] == 1 ); double xn1 = xv[n1]; double yn1 = yv[n1]; double inv_dist = sqrt( (xbdy - xn1)*(xbdy-xn1) + (ybdy-yn1)*(ybdy-yn1) ); wbfv[i][0] = 1.0 / inv_dist; int n2 = lfv1[gf]; assert(bnode[n2] == 1 ); double xn2 = xv[n2]; double yn2 = yv[n2]; wbfv[i][1] = 1.0 / sqrt((xbdy - xn2)*(xbdy-xn2) + (ybdy-yn2)*(ybdy-yn2) ); wv[n1] += wbfv[i][0]; wv[n2] += wbfv[i][1]; } for( i = 0 ; i < nbcfaces ; i++ ){ wbfv[i][0] /= wv[ lfv0[bf_to_f[i]] ]; wbfv[i][1] /= wv[ lfv1[bf_to_f[i]] ]; } free(wv); memcount -= sizeof(double)*nnodes; }
void buildcoeffs(double alpha0, double beta) { double c83,c11,c7_3,c61,c1,c2,c3,c4,c5,c6,sinx,cosx; const double alpha1 = 3.0, alpha2 = 5.0, alpha3 = 7.0; int i; c83 = 8.00 / alpha3 + 3.00 / alpha1; c11 = 1.00 / alpha3 + 1.00 / alpha1; c7_3 = 7.00 / alpha3 - 3.00 / alpha1; c61 = 6.00 / alpha3 + 1.00 / alpha1; double gamma[4][5]; gamma[0][0] = 2.00 * c83 / beta; gamma[0][1] = - c11 / beta; gamma[0][2] = 2.00 * c7_3 / beta; gamma[0][3] = 5.00 / alpha1 / beta; gamma[0][4] = -2.00 * alpha2 * beta; gamma[1][0] = -6.00 * c11 / beta; gamma[1][1] = c61 / beta; gamma[1][2] = -6.00 * c11 / beta; gamma[1][3] = -5.00 / alpha1 / beta; gamma[1][4] = - alpha2 * beta; gamma[2][0] = -2.00 * c7_3 / beta; gamma[2][1] = - c11 / beta; gamma[2][2] = 2.00 * c83 / beta; gamma[2][3] = 5.00 / alpha1 / beta; gamma[2][4] = -2.00 * alpha2 * beta; gamma[3][0] = 6.00 / alpha1 / beta; gamma[3][1] = - 1.00 / alpha1 / beta; gamma[3][2] = 6.00 / alpha1 / beta; gamma[3][3] = 5.00 / alpha1 / beta; gamma[3][4] = -5.00 * alpha0 * beta; vol_vec = malloc_2d(ncells,4); memcount += sizeof(double)*4*ncells + sizeof(double*)*ncells ; for( i = 0 ; i < ncells ; i++ ){ vol_vec[i][0] = volcell[i] * gamma[0][4]; vol_vec[i][1] = volcell[i] * gamma[1][4]; vol_vec[i][2] = volcell[i] * gamma[2][4]; vol_vec[i][3] = volcell[i] * gamma[3][4]; } ftlink = malloc_2d(nfaces,16); fclink = malloc_2d(nfaces,16); memcount += sizeof(double)*16*nfaces*2 + sizeof(double*)*nfaces*2 ; ba = malloc_2d(nbcfaces,16); bb = malloc_2d(nbcfaces,16); bc = malloc_2d(nbcfaces,16); memcount += sizeof(double)*16*nbcfaces*3 + sizeof(double*)*nbcfaces*3 ; for( i = 0 ; i < nfaces ; i++ ){ double x1,y1,x2,y2,xv1,xv2,yv1,yv2; if( bface[i] == 0 ){ x1 = xc[lfc0[i]]; y1 = yc[lfc0[i]]; x2 = xc[lfc1[i]]; y2 = yc[lfc1[i]]; } else{ x1 = xc[lfc0[i]]; y1 = yc[lfc0[i]]; x2 = xf[i]; y2 = yf[i]; } xv1 = xv[lfv0[i]]; yv1 = yv[lfv0[i]]; xv2 = xv[lfv1[i]]; yv2 = yv[lfv1[i]]; double delta = vecfx[i] * ( x2 - x1 ) + vecfy[i] * ( y2 - y1 ); assert(delta >= 0.0 ); double tgtx = ( xv2 - xv1 ) / sqrt( ( xv2 - xv1 ) * ( xv2 - xv1 ) + ( yv2 - yv1 ) * ( yv2 - yv1 ) ); double tgty = ( yv2 - yv1 ) / sqrt( ( xv2 - xv1 ) * ( xv2 - xv1 ) + ( yv2 - yv1 ) * ( yv2 - yv1 ) ); double tdotl = ( x2 - x1 ) * tgtx + ( y2 - y1 ) * tgty; c1 = areaf[i] / delta; c2 = 2.0 * vecfx[i] * vecfy[i] * c1; c3 = ( vecfx[i] * tgty + vecfy[i] * tgtx ) * areaf[i]; c4 = ( vecfx[i] * tgty - vecfy[i] * tgtx ) * areaf[i]; c5 = ( vecfx[i] * vecfx[i] - vecfy[i] * vecfy[i] ) * c1; c6 = ( vecfx[i] * tgtx - vecfy[i] * tgty ) * areaf[i]; fclink[i][0] = gamma[0][0]*c1; ftlink[i][0] = - gamma[0][0] * tdotl * c1; fclink[i][1] = gamma[0][1]*c2; ftlink[i][1] = gamma[0][1]*(c3 - tdotl * c2); fclink[i][2] = 0.0; ftlink[i][2] = gamma[0][2]*c4; fclink[i][3] = gamma[0][3]*c2; ftlink[i][3] = gamma[0][3]*(c3 - tdotl * c2); fclink[i][4] = gamma[1][0] * c2 ; fclink[i][5] = gamma[1][1] * c1; fclink[i][6] = gamma[1][2] * c5 ; fclink[i][7] = gamma[1][3] * c1; ftlink[i][4] = gamma[1][0] * (c3 - tdotl * c2 ); ftlink[i][5] = - gamma[1][1] * tdotl * c1; ftlink[i][6] = gamma[1][2] * (c6 - tdotl * c5 ); ftlink[i][7] = - gamma[1][3] * tdotl * c1; fclink[i][8] = 0.0; fclink[i][9] = gamma[2][1] * c5; fclink[i][10] = gamma[2][2] * c1; fclink[i][11] = gamma[2][3] * c5 ; ftlink[i][8] = gamma[2][0] * c4 ; ftlink[i][9] = gamma[2][1] * (c6 - tdotl * c5); ftlink[i][10] = - gamma[2][1] * tdotl * c1; ftlink[i][11] = gamma[2][3] * (c6 - tdotl * c5); fclink[i][12] = gamma[3][0] * c2; fclink[i][13] = gamma[3][1] * c1; fclink[i][14] = gamma[3][2] * c5 ; fclink[i][15] = gamma[3][3] * c1; ftlink[i][12] = gamma[3][0] * (c3 - tdotl * c2 ); ftlink[i][13] = - gamma[3][1] * tdotl * c1; ftlink[i][14] = gamma[3][2] * (c6 - tdotl * c5); ftlink[i][15] = - gamma[3][3] * tdotl * c1; if( bface[i] != 0 ){ double cosz,sinx; int currbf = f_to_bf[i]; switch (bcname[currbf]){ case BOTTOM: cosx = 1.0; sinx = 0.0; break; case RIGHT: cosx = -1.0; sinx = 0.0; break; case TOP: cosx = 1.0; sinx = 0.0; break; case LEFT: cosx = -1.0; sinx = 0.0; break; default: assert(0); } ba[currbf][0] = -3.0 * sinx / 4.0 - 12.0 * sinx / ( 5.0 * alpha1 * beta * delta ); ba[currbf][1] = -1.0 / 8.0 - 2.0 / ( 5.0 * alpha1 * beta * delta ); ba[currbf][2] = -3.0 * cosx / 4.0 - 12.0 * cosx / (5.0 * alpha1 * beta * delta ); ba[currbf][3] = 1.0 + 2.0 / ( alpha1 * beta * delta ); bb[currbf][0] = -12.0 * sinx / ( 5.0 * alpha1 * beta * delta ); bb[currbf][1] = -2.0 / ( 5.0 * alpha1 * beta * delta ); bb[currbf][2] = -12.0 * cosx / (5.0 * alpha1 * beta * delta ); bb[currbf][3] = + 2.0 / ( alpha1 * beta * delta ); bc[currbf][0] = 12.0 * cosx / ( 5.0 * alpha1 * beta ) - 12.0 * sinx * tdotl / ( 5.0 * alpha1 * beta * delta ); bc[currbf][1] = -2.0 * tdotl / ( 5.0 * alpha1 * beta * delta ); bc[currbf][2] = -12.0 * sinx / ( 5.0 * alpha1 * beta ) - 12.0 * cosx * tdotl / (5.0 * alpha1 * beta * delta ); bc[currbf][3] = 2.0 * tdotl / ( alpha1 * delta * beta ); ba[currbf][4] = -3.0 * cosx / 2.0 - 12.0 * cosx / ( 5.0 * alpha1 * delta * beta ); ba[currbf][5] = 0.0; ba[currbf][6] = 3.0 * sinx / 2.0 + 12.0 * sinx / (5.0 * alpha1 * delta * beta); ba[currbf][7] = 0.0; bb[currbf][4] = -12.0 * cosx / ( 5.0 * alpha1 * delta * beta ); bb[currbf][5] = 0.0; bb[currbf][6] = +12.0 * sinx / (5.0 * alpha1 * delta * beta); bb[currbf][7] = 0.0; bc[currbf][4] = -12.0 * sinx / ( 5.0 * alpha1 * beta ) - 12.0 * cosx * tdotl / ( 5.0 * alpha1 * delta * beta ); bc[currbf][5] = 2.0 / ( 5.0 * alpha1 * beta ); bc[currbf][6] = -12.0 * cosx / ( 5.0 * alpha1 * beta ) + 12.0 * sinx * tdotl / (5.0 * alpha1 * delta * beta ); bc[currbf][7] = -2.0 / ( alpha1 * beta ); ba[currbf][8] = 3.0 * sinx + 72.0 * sinx / ( 5.0 * alpha3 * beta * delta ); ba[currbf][9] = 1.0 / 2.0 + 12.0 / ( 5.0 * alpha3 * beta * delta ); ba[currbf][10] = 3.0 * cosx + 72.0 * cosx / (5.0 * alpha3 * beta * delta ); ba[currbf][11] = 1.0 ; bb[currbf][8] = +72.0 * sinx / ( 5.0 * alpha3 * delta * beta ); bb[currbf][9] = +12.0 / ( 5.0 * alpha3 * delta * beta ); bb[currbf][10] = +72.0 * cosx / (5.0 * alpha3 * delta * beta ); bb[currbf][11] = 0.0; bc[currbf][8] = 48.0 * cosx / ( 5.0 * alpha3 * beta ) + 72.0 * sinx * tdotl / ( 5.0 * alpha3 * beta * delta ); bc[currbf][9] = 12.0 * tdotl / ( 5.0 * alpha3 * delta * beta ); bc[currbf][10] = -48.0 * sinx / ( 5.0 * alpha3 * beta ) + 72.0 * cosx * tdotl / (5.0 * alpha3 * delta * beta ); bc[currbf][11] = 0.0; ba[currbf][12] = sinx / 2.0 + 8.0 * sinx / ( 5.0 * alpha3 * delta * beta ); ba[currbf][13] = -1.0 / 4.0 - 4.0 / ( 5.0 * alpha3 * beta * delta ); ba[currbf][14] = cosx / 2.0 + 8.0 * cosx / (5.0 * alpha3 * delta * beta ); ba[currbf][15] = 0.0 ; bb[currbf][12] = 8.0 * sinx / ( 5.0 * alpha3 * delta * beta ); bb[currbf][13] = -4.0 / ( 5.0 * alpha3 * delta * beta ); bb[currbf][14] = 8.0 * cosx / (5.0 * alpha3 * delta * beta ); bb[currbf][15] = 0.0; bc[currbf][12] = 16.0 * cosx / ( 5.0 * alpha3 * beta ) + 8.0 * sinx * tdotl / ( 5.0 * alpha3 * beta * delta ); bc[currbf][13] = -4.0 * tdotl / ( 5.0 * alpha3 * delta * beta ); bc[currbf][14] = -16.0 * sinx / ( 5.0 * alpha3 * beta ) + 8.0 * cosx * tdotl / (5.0 * alpha3 * delta * beta ); bc[currbf][15] = 0.0; } } }
void read_input(field * temperature1, field * temperature2, char *filename, parallel_data * parallel) { FILE *fp; int nx, ny, i, j; double **full_data; double **inner_data; int nx_local; fp = fopen(filename, "r"); // Read the header fscanf(fp, "# %d %d \n", &nx, &ny); parallel_initialize(parallel, nx, ny); initialize_field_metadata(temperature1, nx, ny, parallel); initialize_field_metadata(temperature2, nx, ny, parallel); // Allocate arrays (including ghost layers) temperature1->data = malloc_2d(temperature1->nx + 2, temperature1->ny + 2); temperature2->data = malloc_2d(temperature2->nx + 2, temperature2->ny + 2); inner_data = malloc_2d(temperature1->nx, temperature1->ny); if (parallel->rank == 0) { // Full array full_data = malloc_2d(nx, ny); // Read the actual data for (i = 0; i < nx; i++) { for (j = 0; j < ny; j++) { fscanf(fp, "%lf", &full_data[i][j]); } } } else // dummy array for full data full_data = malloc_2d(1, 1); nx_local = temperature1->nx; MPI_Scatter(full_data[0], nx_local * ny, MPI_DOUBLE, inner_data[0], nx_local * ny, MPI_DOUBLE, 0, parallel->comm); // Copy to the array containing also boundaries for (i = 0; i < nx_local; i++) memcpy(&temperature1->data[i + 1][1], &inner_data[i][0], ny * sizeof(double)); // Set the boundary values for (i = 0; i < nx_local + 1; i++) { temperature1->data[i][0] = temperature1->data[i][1]; temperature1->data[i][ny + 1] = temperature1->data[i][ny]; } for (j = 0; j < ny + 2; j++) { temperature1->data[0][j] = temperature1->data[1][j]; temperature1->data[nx_local + 1][j] = temperature1->data[nx_local][j]; } copy_field(temperature1, temperature2); free_2d(full_data); free_2d(inner_data); fclose(fp); }
void read_input(field * temperature1, field * temperature2, char *filename, parallel_data * parallel) { FILE *fp; int nx, ny, i, j; double **full_data; int coords[2]; int ix, jy, p; fp = fopen(filename, "r"); // Read the header fscanf(fp, "# %d %d \n", &nx, &ny); parallel_initialize(parallel, nx, ny); initialize_field_metadata(temperature1, nx, ny, parallel); initialize_field_metadata(temperature2, nx, ny, parallel); // Allocate arrays (including ghost layers) temperature1->data = malloc_2d(temperature1->nx + 2, temperature1->ny + 2); temperature2->data = malloc_2d(temperature2->nx + 2, temperature2->ny + 2); if (parallel->rank == 0) { // Full array full_data = malloc_2d(nx, ny); // Read the actual data for (i = 0; i < nx; i++) { for (j = 0; j < ny; j++) { fscanf(fp, "%lf", &full_data[i][j]); } } // Copy to own local array for (i = 0; i < temperature1->nx; i++) memcpy(&temperature1->data[i + 1][1], full_data[i], temperature1->ny * sizeof(double)); // Send to other processes for (p = 1; p < parallel->size; p++) { MPI_Cart_coords(parallel->comm, p, 2, coords); ix = coords[0] * temperature1->nx; jy = coords[1] * temperature1->ny; MPI_Send(&full_data[ix][jy], 1, parallel->subarraytype, p, 44, parallel->comm); } } else // Receive data MPI_Recv(&temperature1->data[1][1], 1, parallel->subarraytype, 0, 44, parallel->comm, MPI_STATUS_IGNORE); // Set the boundary values for (i = 0; i < temperature1->nx + 1; i++) { temperature1->data[i][0] = temperature1->data[i][1]; temperature1->data[i][temperature1->ny + 1] = temperature1->data[i][temperature1->ny]; } for (j = 0; j < temperature1->ny + 2; j++) { temperature1->data[0][j] = temperature1->data[1][j]; temperature1->data[temperature1->nx + 1][j] = temperature1->data[temperature1->nx][j]; } copy_field(temperature1, temperature2); if (parallel->rank == 0) free_2d(full_data); fclose(fp); }
int main(int argc, char **argv) { float eps; float ** u, ** unew; float norm = 0e0, mlups = 0e0; int maxiter, nx, ny, iter, ndef = 2400; clock_t t_start, t_end; float dt; eps = 0.5e-3; maxiter = (int)(1e0/eps); switch (argc) { case 1: nx = ndef; ny = nx; break; case 2: nx = atoi(argv[1]); ny = nx; break; case 3: nx = atoi(argv[1]); ny = atoi(argv[2]); break; default: usage(argv[0]); return -1; } printf("Stencil: nx,ny,maxiter,eps=%d %d %d %18.16f\n",nx, ny, maxiter, eps); u = malloc_2d(nx+2, ny+2); unew = malloc_2d(nx+2, ny+2); #pragma acc data create(u[0:nx+2][0:nx+2],unew[0:nx+2][0:nx+2]) { init(u, nx, ny); init(unew, nx, ny); t_start = clock(); norm = eps + 1; iter = 0; while (iter <= maxiter && norm >= eps) { update(unew, u, NULL, nx, ny); update(u, unew, &norm, nx, ny); iter = iter + 2; if (iter % 100 == 0 || norm < eps) { printf(": norm, eps= %18.16f %18.16f\n", norm, eps); } } } free_2d(u); free_2d(unew); mlups = iter*nx*ny*1.0e-6; t_end = clock(); dt = ((float)(t_end-t_start)) / CLOCKS_PER_SEC; printf("'Stencil: norm =%18.16f with iter = %d\n",norm,iter); printf("'Stencil: Time =%18.16f sec, MLups/s=%18.16f\n",dt, (float) mlups/dt); return 0; }