/** Init cell interactions for cell system domain decomposition. * initializes the interacting neighbor cell list of a cell The * created list of interacting neighbor cells is used by the verlet * algorithm (see verlet.c) to build the verlet lists. */ void dd_init_cell_interactions() { int m,n,o,p,q,r,ind1,ind2,c_cnt=0,n_cnt; /* initialize cell neighbor structures */ dd.cell_inter = (IA_Neighbor_List *) realloc(dd.cell_inter,local_cells.n*sizeof(IA_Neighbor_List)); for(m=0; m<local_cells.n; m++) { dd.cell_inter[m].nList = NULL; dd.cell_inter[m].n_neighbors=0; } /* loop all local cells */ DD_LOCAL_CELLS_LOOP(m,n,o) { dd.cell_inter[c_cnt].nList = (IA_Neighbor *) realloc(dd.cell_inter[c_cnt].nList, CELLS_MAX_NEIGHBORS*sizeof(IA_Neighbor)); dd.cell_inter[c_cnt].n_neighbors = CELLS_MAX_NEIGHBORS; n_cnt=0; ind1 = get_linear_index(m,n,o,dd.ghost_cell_grid); /* loop all neighbor cells */ for(p=o-1; p<=o+1; p++) for(q=n-1; q<=n+1; q++) for(r=m-1; r<=m+1; r++) { ind2 = get_linear_index(r,q,p,dd.ghost_cell_grid); if(ind2 >= ind1) { dd.cell_inter[c_cnt].nList[n_cnt].cell_ind = ind2; dd.cell_inter[c_cnt].nList[n_cnt].pList = &cells[ind2]; init_pairList(&dd.cell_inter[c_cnt].nList[n_cnt].vList); n_cnt++; } } c_cnt++; }
void Lattice::get_data_for_local_index(index_t* ind, void** data) { index_t index_in_halogrid[3]; index_in_halogrid[0] = ind[0]+this->halo_size; index_in_halogrid[1] = ind[1]+this->halo_size; index_in_halogrid[2] = ind[2]+this->halo_size; (*data) = ((char*)this->_data) + get_linear_index(index_in_halogrid[0], index_in_halogrid[1], index_in_halogrid[2], this->halo_grid)*this->element_size; }
/** Calculate temperature of the LB fluid. * \param result Fluid temperature */ void lb_calc_fluid_temp(double *result) { int x, y, z, index; double local_rho, local_j2; double temp = 0.0; for (x=1; x<=lblattice.grid[0]; x++) { for (y=1; y<=lblattice.grid[1]; y++) { for (z=1; z<=lblattice.grid[2]; z++) { index = get_linear_index(x,y,z,lblattice.halo_grid); lb_calc_local_j(&lbfluid[index]); lb_calc_local_rho(&lbfluid[index]); local_rho = *lbfluid[index].rho; local_j2 = scalar(lbfluid[index].j,lbfluid[index].j); temp += local_j2; } } } temp *= 1./(lbpar.rho*lblattice.grid_volume*lbpar.tau*lbpar.tau*pow(lblattice.agrid,4)); MPI_Reduce(&temp, result, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); }
/** Calculate momentum of the LB fluid. * \param result Fluid momentum */ void lb_calc_fluid_momentum(double *result) { int x, y, z, index; double j[3], momentum[3] = { 0.0, 0.0, 0.0 }; for (x=1; x<=lblattice.grid[0]; x++) { for (y=1; y<=lblattice.grid[1]; y++) { for (z=1; z<=lblattice.grid[2]; z++) { index = get_linear_index(x,y,z,lblattice.halo_grid); lb_calc_local_j(index,j); momentum[0] += j[0] + lbfields[index].force[0]; momentum[1] += j[1] + lbfields[index].force[1]; momentum[2] += j[2] + lbfields[index].force[2]; } } } momentum[0] *= lblattice.agrid/lbpar.tau; momentum[1] *= lblattice.agrid/lbpar.tau; momentum[2] *= lblattice.agrid/lbpar.tau; MPI_Reduce(momentum, result, 3, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); }
/** Initialize a planar boundary specified by a wall constraint. * @param plane The \ref Constraint_wall struct describing the boundary. */ static void lb_init_constraint_wall(Constraint_wall* plane) { int x, y, z; double pos[3], dist; for (x=0;x<lblattice.halo_grid[0];x++) { for (y=0;y<lblattice.halo_grid[1];y++) { for (z=0;z<lblattice.halo_grid[2];z++) { pos[0] = my_left[0] + (x-1)*lblattice.agrid; pos[1] = my_left[1] + (y-1)*lblattice.agrid; pos[2] = my_left[2] + (z-1)*lblattice.agrid; dist = scalar(pos,plane->n) - plane->d; if (fabs(dist) < lblattice.agrid) { //printf("%d %d %d\n",x,y,z); lbfluid[get_linear_index(x,y,z,lblattice.halo_grid)].boundary = 1; } } } } }
int Lattice::global_pos_to_lattice_index_checked(double pos[3], int* index) { int i; for (i=0; i<3; i++) if (fabs(fmod(pos[i]-this->offset[i],this->agrid[i])) > ROUND_ERROR_PREC) return ES_ERROR; int ind[3]; for (i=0; i<3; i++) ind[i] = (int) round((pos[i]-this->offset[i])/this->agrid[i]); *index = get_linear_index(this->halo_size + ind[0], this->halo_size + ind[1], this->halo_size + ind[2], this->halo_grid); return ES_OK; }
//int Lattice::init(double *agrid, double* offset, int halo_size, size_t dim) { int Lattice::init(double *agrid, double* offset, int halo_size, size_t dim) { this->dim=dim; /* determine the number of local lattice nodes */ for (int d=0; d<3; d++) { this->agrid[d] = agrid[d]; this->global_grid[d] = (int)dround(box_l[d]/agrid[d]); this->offset[d]=offset[d]; this->local_index_offset[d]=(int) ceil((my_left[d]-this->offset[d])/this->agrid[d]); this->local_offset[d] = this->offset[d] + this->local_index_offset[d]*this->agrid[d]; this->grid[d] = (int) ceil ( ( my_right[d] - this->local_offset[d]-ROUND_ERROR_PREC ) / this->agrid[d]); } // sanity checks for (int dir=0;dir<3;dir++) { // check if local_box_l is compatible with lattice spacing if (fabs(local_box_l[dir]-this->grid[dir]*agrid[dir]) > ROUND_ERROR_PREC*box_l[dir]) { char *errtxt = runtime_error(256); ERROR_SPRINTF(errtxt, \ "{097 Lattice spacing agrid[%d]=%f " \ "is incompatible with local_box_l[%d]=%f " \ "(box_l[%d]=%f node_grid[%d]=%d)} ", \ dir, agrid[dir], \ dir, local_box_l[dir], \ dir, box_l[dir], \ dir, node_grid[dir]); } } this->element_size = this->dim*sizeof(double); LATTICE_TRACE(fprintf(stderr,"%d: box_l (%.3f,%.3f,%.3f) grid (%d,%d,%d) node_neighbors (%d,%d,%d,%d,%d,%d)\n",this_node,local_box_l[0],local_box_l[1],local_box_l[2],this->grid[0],this->grid[1],this->grid[2],node_neighbors[0],node_neighbors[1],node_neighbors[2],node_neighbors[3],node_neighbors[4],node_neighbors[5])); this->halo_size = halo_size; /* determine the number of total nodes including halo */ this->halo_grid[0] = this->grid[0] + 2*halo_size ; this->halo_grid[1] = this->grid[1] + 2*halo_size ; this->halo_grid[2] = this->grid[2] + 2*halo_size ; this->grid_volume = this->grid[0]*this->grid[1]*this->grid[2] ; this->halo_grid_volume = this->halo_grid[0]*this->halo_grid[1]*this->halo_grid[2] ; this->halo_grid_surface = this->halo_grid_volume - this->grid_volume ; this->halo_offset = get_linear_index(halo_size,halo_size,halo_size,this->halo_grid) ; this->interpolation_type = INTERPOLATION_LINEAR; allocate_memory(); return ES_OK; }
void Lattice::interpolate_linear(double* pos, double* value) { int left_halo_index[3]; double d[3]; if (this->halo_size <= 0) { char* c = runtime_error(128); ERROR_SPRINTF(c, "Error in interpolate_linear: halo size is 0"); return; } for (int dim = 0; dim<3; dim++) { left_halo_index[dim]=(int) floor((pos[dim]-this->local_offset[dim])/this->agrid[dim]) + this->halo_size; d[dim]=((pos[dim]-this->local_offset[dim])/this->agrid[dim] - floor((pos[dim]-this->local_offset[dim])/this->agrid[dim])); if (left_halo_index[dim] < 0 || left_halo_index[dim] >= this->halo_grid[dim]) { char* c = runtime_error(128); ERROR_SPRINTF(c, "Error in interpolate_linear: Particle out of range"); return; } } double w[8]; index_t index[8]; w[0] = (1-d[0])*(1-d[1])*(1-d[2]); index[0]=get_linear_index( left_halo_index[0], left_halo_index[1], left_halo_index[2], this->halo_grid); w[1] = ( +d[0])*(1-d[1])*(1-d[2]); index[1]=get_linear_index( left_halo_index[0]+1, left_halo_index[1], left_halo_index[2], this->halo_grid); w[2] = (1-d[0])*( +d[1])*(1-d[2]); index[2]=get_linear_index( left_halo_index[0], left_halo_index[1]+1, left_halo_index[2], this->halo_grid); w[3] = ( +d[0])*( +d[1])*(1-d[2]); index[3]=get_linear_index( left_halo_index[0]+1, left_halo_index[1]+1, left_halo_index[2], this->halo_grid); w[4] = (1-d[0])*(1-d[1])*( +d[2]); index[4]=get_linear_index( left_halo_index[0], left_halo_index[1], left_halo_index[2]+1, this->halo_grid); w[5] = ( +d[0])*(1-d[1])*( +d[2]); index[5]=get_linear_index( left_halo_index[0]+1, left_halo_index[1], left_halo_index[2]+1, this->halo_grid); w[6] = (1-d[0])*( +d[1])*( +d[2]); index[6]=get_linear_index( left_halo_index[0], left_halo_index[1]+1, left_halo_index[2]+1, this->halo_grid); w[7] = ( +d[0])*( +d[1])*( +d[2]); index[7]=get_linear_index( left_halo_index[0]+1, left_halo_index[1]+1, left_halo_index[2]+1, this->halo_grid); for (unsigned int i = 0; i<this->dim; i++) { value[i] = 0; } double* local_value; for (unsigned int i=0; i<8; i++) { get_data_for_linear_index(index[i], (void**) &local_value); for (unsigned int j = 0; j<this->dim; j++) { value[j]+=w[i]*local_value[j]; } } }
/** Caclulate mass of the LB fluid. * \param result Fluid mass */ void lb_calc_fluid_mass(double *result) { int x, y, z, index; double mass = 0.0; for (x=1; x<=lblattice.grid[0]; x++) { for (y=1; y<=lblattice.grid[1]; y++) { for (z=1; z<=lblattice.grid[2]; z++) { index = get_linear_index(x,y,z,lblattice.halo_grid); lb_calc_local_rho(&lbfluid[index]); mass += *lbfluid[index].rho; } } } MPI_Reduce(&mass, result, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); }
/** Caclulate mass of the LB fluid. * \param result Fluid mass */ void lb_calc_fluid_mass(double *result) { int x, y, z, index; double sum_rho=0.0, rho=0.0; for (x=1; x<=lblattice.grid[0]; x++) { for (y=1; y<=lblattice.grid[1]; y++) { for (z=1; z<=lblattice.grid[2]; z++) { index = get_linear_index(x,y,z,lblattice.halo_grid); lb_calc_local_rho(index,&rho); //fprintf(stderr,"(%d,%d,%d) %e\n",x,y,z,rho); sum_rho += rho; } } } MPI_Reduce(&sum_rho, result, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); }
/** Fill a communication cell pointer list. Fill the cell pointers of all cells which are inside a rectangular subgrid of the 3D cell grid (\ref DomainDecomposition::ghost_cell_grid) starting from the lower left corner lc up to the high top corner hc. The cell pointer list part_lists must already be large enough. \param part_lists List of cell pointers to store the result. \param lc lower left corner of the subgrid. \param hc high up corner of the subgrid. */ int dd_fill_comm_cell_lists(Cell **part_lists, int lc[3], int hc[3]) { int i,m,n,o,c=0; /* sanity check */ for(i=0; i<3; i++) { if(lc[i]<0 || lc[i] >= dd.ghost_cell_grid[i]) return 0; if(hc[i]<0 || hc[i] >= dd.ghost_cell_grid[i]) return 0; if(lc[i] > hc[i]) return 0; } for(o=lc[0]; o<=hc[0]; o++) for(n=lc[1]; n<=hc[1]; n++) for(m=lc[2]; m<=hc[2]; m++) { i = get_linear_index(o,n,m,dd.ghost_cell_grid); CELL_TRACE(fprintf(stderr,"%d: dd_fill_comm_cell_list: add cell %d\n",this_node,i)); part_lists[c] = &cells[i]; c++; } return c; }
void Lattice::map_position_to_lattice(const double pos[3], index_t node_index[8], double delta[6]) { int dir,ind[3] ; double lpos, rel; /* determine the elementary lattice cell containing the particle and the relative position of the particle in this cell */ for (dir=0;dir<3;dir++) { lpos = pos[dir] - my_left[dir]; rel = lpos/this->agrid[dir] + 0.5; // +1 for halo offset ind[dir] = (int)floor(rel); /* surrounding elementary cell is not completely inside this box, adjust if this is due to round off errors */ if (ind[dir] < 0) { if (fabs(rel) < ROUND_ERROR_PREC) { ind[dir] = 0; // TODO } else { fprintf(stderr,"%d: map_position_to_lattice: position (%f,%f,%f) not inside a local plaquette in dir %d ind[dir]=%d rel=%f lpos=%f.\n",this_node,pos[0],pos[1],pos[2],dir,ind[dir],rel,lpos); } } else if (ind[dir] > this->grid[dir]) { if (lpos - local_box_l[dir] < ROUND_ERROR_PREC*local_box_l[dir]) ind[dir] = this->grid[dir]; else fprintf(stderr,"%d: map_position_to_lattice: position (%f,%f,%f) not inside a local plaquette in dir %d ind[dir]=%d rel=%f lpos=%f.\n",this_node,pos[0],pos[1],pos[2],dir,ind[dir],rel,lpos); } delta[3+dir] = rel - ind[dir]; // delta_x/a delta[dir] = 1.0 - delta[3+dir]; } node_index[0] = get_linear_index(ind[0],ind[1],ind[2],this->halo_grid); node_index[1] = node_index[0] + 1; node_index[2] = node_index[0] + this->halo_grid[0]; node_index[3] = node_index[0] + this->halo_grid[0] + 1; node_index[4] = node_index[0] + this->halo_grid[0]*this->halo_grid[1]; node_index[5] = node_index[4] + 1; node_index[6] = node_index[4] + this->halo_grid[0]; node_index[7] = node_index[4] + this->halo_grid[0] + 1; }
/** Calculate temperature of the LB fluid. * \param result Fluid temperature */ void lb_calc_fluid_temp(double *result) { int x, y, z, index; double rho, j[3]; double temp = 0.0; for (x=1; x<=lblattice.grid[0]; x++) { for (y=1; y<=lblattice.grid[1]; y++) { for (z=1; z<=lblattice.grid[2]; z++) { index = get_linear_index(x,y,z,lblattice.halo_grid); lb_calc_local_fields(index, &rho, j, NULL); temp += scalar(j,j); } } } temp *= 1./(3.*lbpar.rho*lblattice.grid_volume*lbpar.tau*lbpar.tau*lblattice.agrid); MPI_Reduce(&temp, result, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); }
/** Initialize lattice. * * This function initializes the variables describing the lattice * layout. Important: The lattice data is <em>not</em> allocated here! * * \param lattice pointer to the lattice * \param agrid lattice spacing * \param tau time step for lattice dynamics */ void init_lattice(Lattice *lattice, double agrid, double tau) { int dir; /* determine the number of local lattice nodes */ lattice->grid[0] = local_box_l[0]/agrid; lattice->grid[1] = local_box_l[1]/agrid; lattice->grid[2] = local_box_l[2]/agrid; /* sanity checks */ for (dir=0;dir<3;dir++) { /* check if local_box_l is compatible with lattice spacing */ if (fabs(local_box_l[dir]-lattice->grid[dir]*agrid) > ROUND_ERROR_PREC*box_l[dir]) { char *errtxt = runtime_error(128); ERROR_SPRINTF(errtxt, "{097 Lattice spacing agrid=%f is incompatible with local_box_l[%d]=%f (box_l[%d]=%f node_grid[%d]=%d) %f} ",agrid,dir,local_box_l[dir],dir,box_l[dir],dir,node_grid[dir],local_box_l[dir]-lattice->grid[dir]*agrid); return; } } /* set the lattice spacing */ lattice->agrid = agrid ; lattice->tau = tau ; LATTICE_TRACE(fprintf(stderr,"%d: box_l (%.3f,%.3f,%.3f) grid (%d,%d,%d) node_neighbors (%d,%d,%d,%d,%d,%d)\n",this_node,local_box_l[0],local_box_l[1],local_box_l[2],lattice->grid[0],lattice->grid[1],lattice->grid[2],node_neighbors[0],node_neighbors[1],node_neighbors[2],node_neighbors[3],node_neighbors[4],node_neighbors[5])); /* determine the number of total nodes including halo */ lattice->halo_grid[0] = lattice->grid[0] + 2 ; lattice->halo_grid[1] = lattice->grid[1] + 2 ; lattice->halo_grid[2] = lattice->grid[2] + 2 ; lattice->grid_volume = lattice->grid[0]*lattice->grid[1]*lattice->grid[2] ; lattice->halo_grid_volume = lattice->halo_grid[0]*lattice->halo_grid[1]*lattice->halo_grid[2] ; lattice->halo_grid_surface = lattice->halo_grid_volume - lattice->grid_volume ; lattice->halo_offset = get_linear_index(1,1,1,lattice->halo_grid) ; }
/** Calculate a velocity profile for the LB fluid. */ void lb_calc_velocity_profile(double *velprof, int vcomp, int pdir, int x1, int x2) { int index, dir[3]; double local_rho, local_j; /* \todo generalize and parallelize */ dir[(pdir+1)%3] = x1; dir[(pdir+2)%3] = x2; for (dir[pdir]=1;dir[pdir]<=lblattice.grid[pdir];dir[pdir]++) { index = get_linear_index(dir[0],dir[1],dir[2],lblattice.halo_grid); lb_calc_local_j(&lbfluid[index]); lb_calc_local_rho(&lbfluid[index]); local_rho = *lbfluid[index].rho; local_j = lbfluid[index].j[vcomp]; if (local_j == 0) { velprof[dir[pdir]-1] = 0.0; } else { velprof[dir[pdir]-1] = local_j/local_rho * lblattice.agrid/lbpar.tau; } } }
void lb_bounce_back() { #ifdef D3Q19 #ifndef PULL int k,i,l; int yperiod = lblattice.halo_grid[0]; int zperiod = lblattice.halo_grid[0]*lblattice.halo_grid[1]; int next[19]; int x,y,z; double population_shift; double modes[19]; next[0] = 0; // ( 0, 0, 0) = next[1] = 1; // ( 1, 0, 0) + next[2] = - 1; // (-1, 0, 0) next[3] = yperiod; // ( 0, 1, 0) + next[4] = - yperiod; // ( 0,-1, 0) next[5] = zperiod; // ( 0, 0, 1) + next[6] = - zperiod; // ( 0, 0,-1) next[7] = (1+yperiod); // ( 1, 1, 0) + next[8] = - (1+yperiod); // (-1,-1, 0) next[9] = (1-yperiod); // ( 1,-1, 0) next[10] = - (1-yperiod); // (-1, 1, 0) + next[11] = (1+zperiod); // ( 1, 0, 1) + next[12] = - (1+zperiod); // (-1, 0,-1) next[13] = (1-zperiod); // ( 1, 0,-1) next[14] = - (1-zperiod); // (-1, 0, 1) + next[15] = (yperiod+zperiod); // ( 0, 1, 1) + next[16] = - (yperiod+zperiod); // ( 0,-1,-1) next[17] = (yperiod-zperiod); // ( 0, 1,-1) next[18] = - (yperiod-zperiod); // ( 0,-1, 1) + int reverse[] = { 0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17 }; /* bottom-up sweep */ // for (k=lblattice.halo_offset;k<lblattice.halo_grid_volume;k++) { for (z=0; z<lblattice.grid[2]+2; z++) { for (y=0; y<lblattice.grid[1]+2; y++) { for (x=0; x<lblattice.grid[0]+2; x++) { k= get_linear_index(x,y,z,lblattice.halo_grid); if (lbfields[k].boundary) { lb_calc_modes(k, modes); for (i=0; i<19; i++) { population_shift=0; for (l=0; l<3; l++) { population_shift-=lbpar.agrid*lbpar.agrid*lbpar.agrid*lbpar.agrid*lbpar.agrid*lbpar.rho[0]*2*lbmodel.c[i][l]*lbmodel.w[i]*lb_boundaries[lbfields[k].boundary-1].velocity[l]/lbmodel.c_sound_sq; } if ( x-lbmodel.c[i][0] > 0 && x -lbmodel.c[i][0] < lblattice.grid[0]+1 && y-lbmodel.c[i][1] > 0 && y -lbmodel.c[i][1] < lblattice.grid[1]+1 && z-lbmodel.c[i][2] > 0 && z -lbmodel.c[i][2] < lblattice.grid[2]+1) { if ( !lbfields[k-next[i]].boundary ) { for (l=0; l<3; l++) { lb_boundaries[lbfields[k].boundary-1].force[l]+=(2*lbfluid[1][i][k]+population_shift)*lbmodel.c[i][l]; } lbfluid[1][reverse[i]][k-next[i]] = lbfluid[1][i][k]+ population_shift; } else { lbfluid[1][reverse[i]][k-next[i]] = lbfluid[1][i][k] = 0.0; } } } } } } } #else #error Bounce back boundary conditions are only implemented for PUSH scheme! #endif #else #error Bounce back boundary conditions are only implemented for D3Q19! #endif }
void Lattice::interpolate_linear_gradient(double* pos, double* value) { int left_halo_index[3]; double d[3]; if (this->halo_size <= 0) { runtimeErrorMsg() << "Error in interpolate_linear: halo size is 0"; return; } for (int dim = 0; dim<3; dim++) { left_halo_index[dim]=(int) floor((pos[dim]-this->local_offset[dim])/this->agrid[dim]) + this->halo_size; d[dim]=((pos[dim]-this->local_offset[dim])/this->agrid[dim] - floor((pos[dim]-this->local_offset[dim])/this->agrid[dim])); if (left_halo_index[dim] < 0 || left_halo_index[dim] >= this->halo_grid[dim]) { runtimeErrorMsg() <<"Error in interpolate_linear: Particle out of range"; return; } } index_t index; double* local_value; for (unsigned int i = 0; i<3*this->dim; i++) { value[i] = 0; } index=get_linear_index( left_halo_index[0], left_halo_index[1], left_halo_index[2], this->halo_grid); for (unsigned int i = 0; i<this->dim; i++) { get_data_for_linear_index(index, (void**) &local_value); value[3*i ]+= ( -1 )*(1-d[1])*(1-d[2]) * local_value[i] / this->agrid[0]; value[3*i+1]+= (1-d[0])*( -1 )*(1-d[2]) * local_value[i] / this->agrid[1]; value[3*i+2]+= (1-d[0])*(1-d[1])*( -1 ) * local_value[i] / this->agrid[2]; } index=get_linear_index( left_halo_index[0]+1, left_halo_index[1], left_halo_index[2], this->halo_grid); for (unsigned int i = 0; i<this->dim; i++) { get_data_for_linear_index(index, (void**) &local_value); value[3*i ]+= ( +1 )*(1-d[1])*(1-d[2]) * local_value[i] / this->agrid[0]; value[3*i+1]+= ( +d[0])*( -1 )*(1-d[2]) * local_value[i] / this->agrid[1]; value[3*i+2]+= ( +d[0])*(1-d[1])*( -1 ) * local_value[i] / this->agrid[2]; } index=get_linear_index( left_halo_index[0], left_halo_index[1]+1, left_halo_index[2], this->halo_grid); for (unsigned int i = 0; i<this->dim; i++) { get_data_for_linear_index(index, (void**) &local_value); value[3*i ]+= ( -1 )*( +d[1])*(1-d[2]) * local_value[i] / this->agrid[0]; value[3*i+1]+= (1-d[0])*( +1 )*(1-d[2]) * local_value[i] / this->agrid[1]; value[3*i+2]+= (1-d[0])*( +d[1])*( -1 ) * local_value[i] / this->agrid[2]; } index=get_linear_index( left_halo_index[0]+1, left_halo_index[1]+1, left_halo_index[2], this->halo_grid); for (unsigned int i = 0; i<this->dim; i++) { get_data_for_linear_index(index, (void**) &local_value); value[3*i ]+= ( +1 )*( +d[1])*(1-d[2]) * local_value[i] / this->agrid[0]; value[3*i+1]+= ( +d[0])*( +1 )*(1-d[2]) * local_value[i] / this->agrid[1]; value[3*i+2]+= ( +d[0])*( +d[1])*( -1 ) * local_value[i] / this->agrid[2]; } index=get_linear_index( left_halo_index[0] , left_halo_index[1] , left_halo_index[2] + 1, this->halo_grid); for (unsigned int i = 0; i<this->dim; i++) { get_data_for_linear_index(index, (void**) &local_value); value[3*i ]+= ( -1 )*(1-d[1])*( +d[2]) * local_value[i] / this->agrid[0]; value[3*i+1]+= (1-d[0])*( -1 )*( +d[2]) * local_value[i] / this->agrid[1]; value[3*i+2]+= (1-d[0])*(1-d[1])*( +1 ) * local_value[i] / this->agrid[2]; } index=get_linear_index( left_halo_index[0]+1, left_halo_index[1], left_halo_index[2]+1, this->halo_grid); for (unsigned int i = 0; i<this->dim; i++) { get_data_for_linear_index(index, (void**) &local_value); value[3*i ]+= ( +1 )*(1-d[1])*( +d[2]) * local_value[i] / this->agrid[0]; value[3*i+1]+= ( +d[0])*( -1 )*( +d[2]) * local_value[i] / this->agrid[1]; value[3*i+2]+= ( +d[0])*(1-d[1])*( +1 ) * local_value[i] / this->agrid[2]; } index=get_linear_index( left_halo_index[0], left_halo_index[1]+1, left_halo_index[2]+1, this->halo_grid); for (unsigned int i = 0; i<this->dim; i++) { get_data_for_linear_index(index, (void**) &local_value); value[3*i ]+= ( -1 )*( +d[1])*( +d[2]) * local_value[i] / this->agrid[0]; value[3*i+1]+= (1-d[0])*( +1 )*( +d[2]) * local_value[i] / this->agrid[1]; value[3*i+2]+= (1-d[0])*( +d[1])*( +1 ) * local_value[i] / this->agrid[2]; } index=get_linear_index( left_halo_index[0]+1, left_halo_index[1]+1, left_halo_index[2]+1, this->halo_grid); for (unsigned int i = 0; i<this->dim; i++) { get_data_for_linear_index(index, (void**) &local_value); value[3*i ]+= ( +1 )*( +d[1])*( +d[2]) * local_value[i] / this->agrid[0]; value[3*i+1]+= ( +d[0])*( +1 )*( +d[2]) * local_value[i] / this->agrid[1]; value[3*i+2]+= ( +d[0])*( +d[1])*( +1 ) * local_value[i] / this->agrid[2]; } }
/** Calculate a velocity profile for the LB fluid. */ void lb_calc_velprof(double *result, int *params) { int index, dir[3], grid[3]; int newroot=0, subrank, involved=0; double rho, j[3], *velprof; MPI_Comm slice_comm = NULL; MPI_Status status; if (this_node != 0) params = malloc(4*sizeof(int)); MPI_Bcast(params, 4, MPI_INT, 0, MPI_COMM_WORLD); int vcomp = params[0]; int pdir = params[1]; int x1 = params[2]; int x2 = params[3]; dir[pdir] = 0; dir[(pdir+1)%3] = x1; dir[(pdir+2)%3] = x2; //fprintf(stderr,"%d: (%d,%d,%d)\n",this_node,dir[0],dir[1],dir[2]); newroot = map_lattice_to_node(&lblattice, dir, grid); map_node_array(this_node, node_pos); //fprintf(stderr,"%d: newroot=%d (%d,%d,%d)\n",this_node,newroot,grid[0],grid[1],grid[2]); if ( (grid[(pdir+1)%3] == node_pos[(pdir+1)%3]) && (grid[(pdir+2)%3] == node_pos[(pdir+2)%3]) ) { involved = 1; } MPI_Comm_split(MPI_COMM_WORLD, involved, this_node, &slice_comm); MPI_Comm_rank(slice_comm, &subrank); if (this_node == newroot) result = realloc(result,box_l[pdir]/lblattice.agrid*sizeof(double)); //fprintf(stderr,"%d (%d,%d): result=%p vcomp=%d pdir=%d x1=%d x2=%d involved=%d\n",this_node,subrank,newroot,result,vcomp,pdir,x1,x2,involved); if (involved) { velprof = malloc(lblattice.grid[pdir]*sizeof(double)); //dir[(pdir+1)%3] += 1; //dir[(pdir+2)%3] += 1; for (dir[pdir]=1;dir[pdir]<=lblattice.grid[pdir];dir[pdir]++) { index = get_linear_index(dir[0],dir[1],dir[2],lblattice.halo_grid); lb_calc_local_fields(index, &rho, j, NULL); //fprintf(stderr,"%p %d %.12e %.12e %d\n",lbfluid[0],index,rho,j[0],vcomp); if (rho < ROUND_ERROR_PREC) { velprof[dir[pdir]-1] = 0.0; } else { //velprof[dir[pdir]-1] = local_j / (SQR(lbpar.agrid)*lbpar.tau); velprof[dir[pdir]-1] = j[vcomp]/rho * lblattice.agrid/lbpar.tau; //fprintf(stderr,"%f %f %f\n",velprof[dir[pdir]-1],local_j,local_rho); } //if (dir[pdir]==lblattice.grid[pdir]) { // int i; // fprintf(stderr,"(%d,%d,%d)\n",dir[0],dir[1],dir[2]); // fprintf(stderr,"%d\n",lbfluid[index].boundary); // for (i=0;i<lbmodel.n_veloc;i++) fprintf(stderr,"local_n[%p][%d]=%.12e\n",lbfluid[index].n,i,lbfluid[index].n[i]+lbmodel.coeff[i][0]*lbpar.rho); // fprintf(stderr,"local_rho=%e\n",local_rho); // fprintf(stderr,"local_j=%e\n",local_j); //} } MPI_Gather(velprof, lblattice.grid[pdir], MPI_DOUBLE, result, lblattice.grid[pdir], MPI_DOUBLE, newroot, slice_comm); free(velprof); } MPI_Comm_free(&slice_comm); if (newroot != 0) { if (this_node == newroot) { MPI_Send(result, lblattice.grid[pdir]*node_grid[pdir], MPI_DOUBLE, 0, REQ_VELPROF, MPI_COMM_WORLD); free(result); } if (this_node == 0) { //fprintf(stderr,"%d: I'm just here!\n",this_node); MPI_Recv(result, lblattice.grid[pdir]*node_grid[pdir], MPI_DOUBLE, newroot, REQ_VELPROF, MPI_COMM_WORLD, &status); //fprintf(stderr,"%d: And now I'm here!\n",this_node); } } if (this_node !=0) free(params); }
/** Calculate a density profile of the fluid. */ void lb_calc_densprof(double *result, int *params) { int index, dir[3], grid[3]; int newroot=0, subrank, involved=0; double *profile; MPI_Comm slice_comm = NULL; MPI_Status status; if (this_node !=0) params = malloc(3*sizeof(int)); MPI_Bcast(params, 3, MPI_INT, 0, MPI_COMM_WORLD); int pdir = params[0]; int x1 = params[1]; int x2 = params[2]; dir[pdir] = 0; dir[(pdir+1)%3] = x1; dir[(pdir+2)%3] = x2; newroot = map_lattice_to_node(&lblattice, dir, grid); map_node_array(this_node, node_pos); if ( (grid[(pdir+1)%3] == node_pos[(pdir+1)%3]) && (grid[(pdir+2)%3] == node_pos[(pdir+2)%3]) ) { involved = 1; } MPI_Comm_split(MPI_COMM_WORLD, involved, this_node, &slice_comm); MPI_Comm_rank(slice_comm, &subrank); if (this_node == newroot) result = realloc(result,box_l[pdir]/lblattice.agrid*sizeof(double)); if (involved) { profile = malloc(lblattice.grid[pdir]*sizeof(double)); //dir[(pdir+1)%3] += 1; //dir[(pdir+2)%3] += 1; for (dir[pdir]=1;dir[pdir]<=lblattice.grid[pdir];dir[pdir]++) { index = get_linear_index(dir[0],dir[1],dir[2],lblattice.halo_grid); lb_calc_local_rho(index,&profile[dir[pdir]-1]); //profile[dir[pdir]-1] = *lbfluid[index].rho; //if (dir[pdir]==lblattice.grid[pdir]) { // int i; // fprintf(stderr,"(%d,%d,%d)\n",dir[0],dir[1],dir[2]); // fprintf(stderr,"%d\n",lbfluid[index].boundary); // for (i=0;i<lbmodel.n_veloc;i++) fprintf(stderr,"local_n[%p][%d]=%.12e\n",lbfluid[index].n,i,lbfluid[index].n[i]+lbmodel.coeff[i][0]*lbpar.rho); // fprintf(stderr,"local_rho=%e\n",*lbfluid[index].rho); //} } MPI_Gather(profile, lblattice.grid[pdir], MPI_DOUBLE, result, lblattice.grid[pdir], MPI_DOUBLE, 0, slice_comm); free(profile); } MPI_Comm_free(&slice_comm); if (newroot != 0) { if (this_node == newroot) { MPI_Send(result, lblattice.grid[pdir]*node_grid[pdir], MPI_DOUBLE, 0, REQ_DENSPROF, MPI_COMM_WORLD); free(result); } if (this_node == 0) { MPI_Recv(result, lblattice.grid[pdir]*node_grid[pdir], MPI_DOUBLE, newroot, REQ_DENSPROF, MPI_COMM_WORLD, &status); } } if (this_node != 0) free(params); }
void Lattice::set_data_for_local_grid_index(index_t* ind, void* data) { memmove(((char*)this->_data) + get_linear_index(ind[0]+this->halo_size, ind[1]+this->halo_size, ind[2]+this->halo_size, this->halo_grid)*this->element_size, data, this->element_size); }
/** Initialize boundary conditions for all constraints in the system. */ void lb_init_boundaries() { int n, x, y, z, node_domain_position[3], offset[3]; char *errtxt; double pos[3], dist, dist_tmp=0.0, dist_vec[3]; int the_boundary=-1; map_node_array(this_node, node_domain_position); offset[0] = node_domain_position[0]*lblattice.grid[0]; offset[1] = node_domain_position[1]*lblattice.grid[1]; offset[2] = node_domain_position[2]*lblattice.grid[2]; for (n=0;n<lblattice.halo_grid_volume;n++) { lbfields[n].boundary = 0; } if (lblattice.halo_grid_volume==0) return; for (z=0; z<lblattice.grid[2]+2; z++) { for (y=0; y<lblattice.grid[1]+2; y++) { for (x=0; x<lblattice.grid[0]+2; x++) { pos[0] = (offset[0]+(x-1))*lblattice.agrid; pos[1] = (offset[1]+(y-1))*lblattice.agrid; pos[2] = (offset[2]+(z-1))*lblattice.agrid; dist = 1e99; for (n=0;n<n_lb_boundaries;n++) { switch (lb_boundaries[n].type) { case LB_BOUNDARY_WAL: calculate_wall_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.wal, &dist_tmp, dist_vec); break; case LB_BOUNDARY_SPH: calculate_sphere_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.sph, &dist_tmp, dist_vec); break; case LB_BOUNDARY_CYL: calculate_cylinder_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.cyl, &dist_tmp, dist_vec); break; case LB_BOUNDARY_POR: calculate_pore_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.pore, &dist_tmp, dist_vec); break; default: errtxt = runtime_error(128); ERROR_SPRINTF(errtxt, "{109 lbboundary type %d not implemented in lb_init_boundaries()\n", lb_boundaries[n].type); } // if (abs(dist) > abs(dist_tmp) || n == 0) { if (dist_tmp<dist) { //If you try to create a wall of finite thickness ...|xxx|..., it makes every node a wall node! We still leave it like that, since it allows for corners without problems. We will add a box type to allow for walls of finite thickness. (Georg Rempfer, Stefan Kesselheim, 05.10.2011) dist = dist_tmp; the_boundary = n; } } if (dist <= 0 && n_lb_boundaries > 0) { lbfields[get_linear_index(x,y,z,lblattice.halo_grid)].boundary = the_boundary+1; } else { lbfields[get_linear_index(x,y,z,lblattice.halo_grid)].boundary=0; } } } } }
void Lattice::get_data_for_halo_index(index_t* ind, void** data) { (*data) = ((char*)this->_data) + get_linear_index(ind[0], ind[1], ind[2], this->halo_grid)*this->element_size; }
int fft_find_comm_groups(int grid1[3], int grid2[3], int *node_list1, int *node_list2, int *group, int *pos, int *my_pos) { int i; /* communication group cell size on grid1 and grid2 */ int s1[3], s2[3]; /* The communication group cells build the same super grid on grid1 and grid2 */ int ds[3]; /* communication group size */ int g_size=1; /* comm. group cell index */ int gi[3]; /* position of a node in a grid */ int p1[3], p2[3]; /* node identity */ int n; /* this_node position in the communication group. */ int c_pos=-1; /* flag for group identification */ int my_group=0; FFT_TRACE(fprintf(stderr,"%d: fft_find_comm_groups:\n",this_node)); FFT_TRACE(fprintf(stderr,"%d: for grid1=(%d,%d,%d) and grids=(%d,%d,%d)\n", this_node,grid1[0],grid1[1],grid1[2],grid2[0],grid2[1],grid2[2])); /* calculate dimension of comm. group cells for both grids */ if( (grid1[0]*grid1[1]*grid1[2]) != (grid2[0]*grid2[1]*grid2[2]) ) return -1; /* unlike number of nodes */ for(i=0;i<3;i++) { s1[i] = grid1[i] / grid2[i]; if(s1[i] == 0) s1[i] = 1; else if(grid1[i] != grid2[i]*s1[i]) return -1; /* grids do not match!!! */ s2[i] = grid2[i] / grid1[i]; if(s2[i] == 0) s2[i] = 1; else if(grid2[i] != grid1[i]*s2[i]) return -1; /* grids do not match!!! */ ds[i] = grid2[i] / s2[i]; g_size *= s2[i]; } /* calc node_list2 */ /* loop through all comm. group cells */ for(gi[2] = 0; gi[2] < ds[2]; gi[2]++) for(gi[1] = 0; gi[1] < ds[1]; gi[1]++) for(gi[0] = 0; gi[0] < ds[0]; gi[0]++) { /* loop through all nodes in that comm. group cell */ for(i=0;i<g_size;i++) { p1[0] = (gi[0]*s1[0]) + (i%s1[0]); p1[1] = (gi[1]*s1[1]) + ((i/s1[0])%s1[1]); p1[2] = (gi[2]*s1[2]) + (i/(s1[0]*s1[1])); p2[0] = (gi[0]*s2[0]) + (i%s2[0]); p2[1] = (gi[1]*s2[1]) + ((i/s2[0])%s2[1]); p2[2] = (gi[2]*s2[2]) + (i/(s2[0]*s2[1])); n = node_list1[ get_linear_index(p1[0],p1[1],p1[2],grid1) ]; node_list2[ get_linear_index(p2[0],p2[1],p2[2],grid2) ] = n ; pos[3*n+0] = p2[0]; pos[3*n+1] = p2[1]; pos[3*n+2] = p2[2]; if(my_group==1) group[i] = n; if(n==this_node && my_group==0) { my_group = 1; c_pos = i; my_pos[0] = p2[0]; my_pos[1] = p2[1]; my_pos[2] = p2[2]; i=-1; /* restart the loop */ } } my_group=0; } /* permute comm. group according to the nodes position in the group */ /* This is necessary to have matching node pairs during communication! */ while( c_pos>0 ) { n=group[g_size-1]; for(i=g_size-1; i>0; i--) group[i] = group[i-1]; group[0] = n; c_pos--; } return g_size; }
/** Initialize boundary conditions for all constraints in the system. */ void lb_init_boundaries() { int n, x, y, z; //char *errtxt; double pos[3], dist, dist_tmp=0.0, dist_vec[3]; if (lattice_switch & LATTICE_LB_GPU) { #if defined (LB_GPU) && defined (LB_BOUNDARIES_GPU) int number_of_boundnodes = 0; int *host_boundary_node_list= (int*)Utils::malloc(sizeof(int)); int *host_boundary_index_list= (int*)Utils::malloc(sizeof(int)); size_t size_of_index; int boundary_number = -1; // the number the boundary will actually belong to. #ifdef EK_BOUNDARIES ekfloat *host_wallcharge_species_density = NULL; float node_wallcharge = 0.0f; int wallcharge_species = -1, charged_boundaries = 0; int node_charged = 0; for(n = 0; n < int(n_lb_boundaries); n++) lb_boundaries[n].net_charge = 0.0; if (ek_initialized) { host_wallcharge_species_density = (ekfloat*) Utils::malloc(ek_parameters.number_of_nodes * sizeof(ekfloat)); for(n = 0; n < int(n_lb_boundaries); n++) { if(lb_boundaries[n].charge_density != 0.0) { charged_boundaries = 1; break; } } if (pdb_charge_lattice) { charged_boundaries = 1; } for(n = 0; n < int(ek_parameters.number_of_species); n++) if(ek_parameters.valency[n] != 0.0) { wallcharge_species = n; break; } if(wallcharge_species == -1 && charged_boundaries) { runtimeErrorMsg() <<"no charged species available to create wall charge\n"; } } #endif for(z=0; z<int(lbpar_gpu.dim_z); z++) { for(y=0; y<int(lbpar_gpu.dim_y); y++) { for (x=0; x<int(lbpar_gpu.dim_x); x++) { pos[0] = (x+0.5)*lbpar_gpu.agrid; pos[1] = (y+0.5)*lbpar_gpu.agrid; pos[2] = (z+0.5)*lbpar_gpu.agrid; dist = 1e99; #ifdef EK_BOUNDARIES if (ek_initialized) { host_wallcharge_species_density[ek_parameters.dim_y*ek_parameters.dim_x*z + ek_parameters.dim_x*y + x] = 0.0f; node_charged = 0; node_wallcharge = 0.0f; } #endif for (n=0; n < n_lb_boundaries; n++) { switch (lb_boundaries[n].type) { case LB_BOUNDARY_WAL: calculate_wall_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.wal, &dist_tmp, dist_vec); break; case LB_BOUNDARY_SPH: calculate_sphere_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.sph, &dist_tmp, dist_vec); break; case LB_BOUNDARY_CYL: calculate_cylinder_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.cyl, &dist_tmp, dist_vec); break; case LB_BOUNDARY_RHOMBOID: calculate_rhomboid_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.rhomboid, &dist_tmp, dist_vec); break; case LB_BOUNDARY_POR: calculate_pore_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.pore, &dist_tmp, dist_vec); break; case LB_BOUNDARY_STOMATOCYTE: calculate_stomatocyte_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.stomatocyte, &dist_tmp, dist_vec); break; case LB_BOUNDARY_HOLLOW_CONE: calculate_hollow_cone_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.hollow_cone, &dist_tmp, dist_vec); break; case LB_BOUNDARY_SPHEROCYLINDER: calculate_spherocylinder_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.spherocyl, &dist_tmp, dist_vec); break; case LB_BOUNDARY_VOXEL: // voxel data do not need dist //calculate_voxel_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.voxel, &dist_tmp, dist_vec); dist_tmp=1e99; break; default: runtimeErrorMsg() <<"lbboundary type "<< lb_boundaries[n].type << " not implemented in lb_init_boundaries()\n"; } if (dist > dist_tmp || n == 0) { dist = dist_tmp; boundary_number = n; } #ifdef EK_BOUNDARIES if (ek_initialized) { if(dist_tmp <= 0 && lb_boundaries[n].charge_density != 0.0f) { node_charged = 1; node_wallcharge += lb_boundaries[n].charge_density * ek_parameters.agrid*ek_parameters.agrid*ek_parameters.agrid; lb_boundaries[n].net_charge += lb_boundaries[n].charge_density * ek_parameters.agrid*ek_parameters.agrid*ek_parameters.agrid; } } #endif } #ifdef EK_BOUNDARIES if(pdb_boundary_lattice && pdb_boundary_lattice[ek_parameters.dim_y*ek_parameters.dim_x*z + ek_parameters.dim_x*y + x]) { dist = -1; boundary_number = n_lb_boundaries; // Makes sure that boundary_number is not used by a constraint } #endif if (dist <= 0 && boundary_number >= 0 && (n_lb_boundaries > 0 || pdb_boundary_lattice)) { size_of_index = (number_of_boundnodes+1)*sizeof(int); host_boundary_node_list = (int *) Utils::realloc(host_boundary_node_list, size_of_index); host_boundary_index_list = (int *) Utils::realloc(host_boundary_index_list, size_of_index); host_boundary_node_list[number_of_boundnodes] = x + lbpar_gpu.dim_x*y + lbpar_gpu.dim_x*lbpar_gpu.dim_y*z; host_boundary_index_list[number_of_boundnodes] = boundary_number + 1; number_of_boundnodes++; //printf("boundindex %i: \n", number_of_boundnodes); } #ifdef EK_BOUNDARIES if (ek_initialized) { ek_parameters.number_of_boundary_nodes = number_of_boundnodes; if(wallcharge_species != -1) { if(pdb_charge_lattice && pdb_charge_lattice[ek_parameters.dim_y*ek_parameters.dim_x*z + ek_parameters.dim_x*y + x] != 0.0f) { node_charged = 1; node_wallcharge += pdb_charge_lattice[ek_parameters.dim_y*ek_parameters.dim_x*z + ek_parameters.dim_x*y + x]; } if(node_charged) host_wallcharge_species_density[ek_parameters.dim_y*ek_parameters.dim_x*z + ek_parameters.dim_x*y + x] = node_wallcharge / ek_parameters.valency[wallcharge_species]; else if(dist <= 0) host_wallcharge_species_density[ek_parameters.dim_y*ek_parameters.dim_x*z + ek_parameters.dim_x*y + x] = 0.0f; else host_wallcharge_species_density[ek_parameters.dim_y*ek_parameters.dim_x*z + ek_parameters.dim_x*y + x] = ek_parameters.density[wallcharge_species] * ek_parameters.agrid*ek_parameters.agrid*ek_parameters.agrid; } } #endif } } } /**call of cuda fkt*/ float* boundary_velocity = (float *) Utils::malloc(3*(n_lb_boundaries+1)*sizeof(float)); for (n=0; n<n_lb_boundaries; n++) { boundary_velocity[3*n+0]=lb_boundaries[n].velocity[0]; boundary_velocity[3*n+1]=lb_boundaries[n].velocity[1]; boundary_velocity[3*n+2]=lb_boundaries[n].velocity[2]; } boundary_velocity[3*n_lb_boundaries+0] = 0.0f; boundary_velocity[3*n_lb_boundaries+1] = 0.0f; boundary_velocity[3*n_lb_boundaries+2] = 0.0f; if (n_lb_boundaries || pdb_boundary_lattice) lb_init_boundaries_GPU(n_lb_boundaries, number_of_boundnodes, host_boundary_node_list, host_boundary_index_list, boundary_velocity); free(boundary_velocity); free(host_boundary_node_list); free(host_boundary_index_list); #ifdef EK_BOUNDARIES if (ek_initialized) { ek_init_species_density_wallcharge(host_wallcharge_species_density, wallcharge_species); free(host_wallcharge_species_density); } #endif #endif /* defined (LB_GPU) && defined (LB_BOUNDARIES_GPU) */ } else { #if defined (LB) && defined (LB_BOUNDARIES) int node_domain_position[3], offset[3]; int the_boundary=-1; map_node_array(this_node, node_domain_position); offset[0] = node_domain_position[0]*lblattice.grid[0]; offset[1] = node_domain_position[1]*lblattice.grid[1]; offset[2] = node_domain_position[2]*lblattice.grid[2]; for (n=0;n<lblattice.halo_grid_volume;n++) { lbfields[n].boundary = 0; } if (lblattice.halo_grid_volume==0) return; for (z=0; z<lblattice.grid[2]+2; z++) { for (y=0; y<lblattice.grid[1]+2; y++) { for (x=0; x<lblattice.grid[0]+2; x++) { pos[0] = (offset[0]+(x-0.5))*lblattice.agrid[0]; pos[1] = (offset[1]+(y-0.5))*lblattice.agrid[1]; pos[2] = (offset[2]+(z-0.5))*lblattice.agrid[2]; dist = 1e99; for (n=0;n<n_lb_boundaries;n++) { switch (lb_boundaries[n].type) { case LB_BOUNDARY_WAL: calculate_wall_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.wal, &dist_tmp, dist_vec); break; case LB_BOUNDARY_SPH: calculate_sphere_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.sph, &dist_tmp, dist_vec); break; case LB_BOUNDARY_CYL: calculate_cylinder_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.cyl, &dist_tmp, dist_vec); break; case LB_BOUNDARY_RHOMBOID: calculate_rhomboid_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.rhomboid, &dist_tmp, dist_vec); break; case LB_BOUNDARY_POR: calculate_pore_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.pore, &dist_tmp, dist_vec); break; case LB_BOUNDARY_STOMATOCYTE: calculate_stomatocyte_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.stomatocyte, &dist_tmp, dist_vec); break; case LB_BOUNDARY_HOLLOW_CONE: calculate_hollow_cone_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.hollow_cone, &dist_tmp, dist_vec); break; case LB_BOUNDARY_VOXEL: // voxel data do not need dist dist_tmp=1e99; //calculate_voxel_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.voxel, &dist_tmp, dist_vec); break; default: runtimeErrorMsg() <<"lbboundary type " << lb_boundaries[n].type << " not implemented in lb_init_boundaries()\n"; } if (dist_tmp<dist || n == 0) { dist = dist_tmp; the_boundary = n; } } if (dist <= 0 && the_boundary >= 0 && n_lb_boundaries > 0) { lbfields[get_linear_index(x,y,z,lblattice.halo_grid)].boundary = the_boundary+1; //printf("boundindex %i: \n", get_linear_index(x,y,z,lblattice.halo_grid)); } else { lbfields[get_linear_index(x,y,z,lblattice.halo_grid)].boundary = 0; } } } } //printf("init voxels\n\n"); // SET VOXEL BOUNDARIES DIRECTLY int xxx,yyy,zzz=0; char line[80]; for (n=0;n<n_lb_boundaries;n++) { switch (lb_boundaries[n].type) { case LB_BOUNDARY_VOXEL: //lbfields[get_linear_index(lb_boundaries[n].c.voxel.pos[0],lb_boundaries[n].c.voxel.pos[1],lb_boundaries[n].c.voxel.pos[2],lblattice.halo_grid)].boundary = n+1; FILE *fp; //fp=fopen("/home/mgusenbauer/Daten/Copy/DUK/GentlePump/Optimierer/NSvsLBM/geometry_files/bottleneck_fine_voxel_data_d20_converted_noMirror.csv", "r"); //fp=fopen("/home/mgusenbauer/Daten/Copy/DUK/GentlePump/Optimierer/NSvsLBM/geometry_files/bottleneck_fine_voxel_data_d80_converted_noMirror.csv", "r"); //fp=fopen("/home/mgusenbauer/Daten/Copy/DUK/GentlePump/Optimierer/NSvsLBM/geometry_files/bottleneck_fine_voxel_data_d80_converted.csv", "r"); fp=fopen(lb_boundaries[n].c.voxel.filename, "r"); while(fgets(line, 80, fp) != NULL) { /* get a line, up to 80 chars from fp, done if NULL */ sscanf (line, "%d %d %d", &xxx,&yyy,&zzz); //printf("%d %d %d\n", xxx,yyy,zzz); //lbfields[get_linear_index(xxx,yyy+30,zzz,lblattice.halo_grid)].boundary = n+1; lbfields[get_linear_index(xxx,yyy,zzz,lblattice.halo_grid)].boundary = n+1; } fclose(fp); break; default: break; } } // CHECK FOR BOUNDARY NEIGHBOURS AND SET FLUID NORMAL VECTOR //int neighbours = {0,0,0,0,0,0}; //int x=0,y=0,z=0; //double nn[]={0.0,0.0,0.0,0.0,0.0,0.0}; //for (n=0;n<n_lb_boundaries;n++) { //switch (lb_boundaries[n].type) { //case LB_BOUNDARY_VOXEL: //x=lb_boundaries[n].c.voxel.pos[0]; //y=lb_boundaries[n].c.voxel.pos[1]; //z=lb_boundaries[n].c.voxel.pos[2]; //if(((x-1) >= 0) && (lbfields[get_linear_index(x-1,y,z,lblattice.halo_grid)].boundary == 0)) nn[0] = -1.0;//neighbours[0] = -1; //if(((x+1) <= lblattice.grid[0]) && (lbfields[get_linear_index(x+1,y,z,lblattice.halo_grid)].boundary == 0)) nn[1] = 1.0;//neighbours[1] = 1; ////printf("%.0lf %.0lf ",nn[0],nn[1]); //lb_boundaries[n].c.voxel.n[0] = nn[0]+nn[1]; ////nn=0.0; //if(((y-1) >= 0) && (lbfields[get_linear_index(x,y-1,z,lblattice.halo_grid)].boundary == 0)) nn[2] = -1.0;//neighbours[2] = -1; //if(((y+1) <= lblattice.grid[1]) && (lbfields[get_linear_index(x,y+1,z,lblattice.halo_grid)].boundary == 0)) nn[3] = 1.0;//neighbours[3] = 1; ////printf("%.0lf %.0lf ",nn[2],nn[3]); //lb_boundaries[n].c.voxel.n[1] = nn[2]+nn[3]; ////nn=0.0; //if(((z-1) >= 0) && (lbfields[get_linear_index(x,y,z-1,lblattice.halo_grid)].boundary == 0)) nn[4] = -1.0;//neighbours[4] = -1; //if(((z+1) <= lblattice.grid[2]) && (lbfields[get_linear_index(x,y,z+1,lblattice.halo_grid)].boundary == 0)) nn[5] = 1.0;//neighbours[5]= 1; ////printf("%.0lf %.0lf ",nn[4],nn[5]); //lb_boundaries[n].c.voxel.n[2] = nn[4]+nn[5]; //nn[0]=0.0,nn[1]=0.0,nn[2]=0.0,nn[3]=0.0,nn[4]=0.0,nn[5]=0.0; ////printf("t %d pos: %.0lf %.0lf %.0lf, fluid normal %.0lf %.0lf %.0lf\n",n, x,y,z,lb_boundaries[n].c.voxel.normal[0],lb_boundaries[n].c.voxel.normal[1],lb_boundaries[n].c.voxel.normal[2]); ////printf("boundaries: %d %d %d %d %d %d\n",lbfields[get_linear_index(x-1,y,z,lblattice.halo_grid)].boundary,lbfields[get_linear_index(x+1,y,z,lblattice.halo_grid)].boundary,lbfields[get_linear_index(x,y-1,z,lblattice.halo_grid)].boundary,lbfields[get_linear_index(x,y+1,z,lblattice.halo_grid)].boundary,lbfields[get_linear_index(x,y,z-1,lblattice.halo_grid)].boundary,lbfields[get_linear_index(x,y,z+1,lblattice.halo_grid)].boundary); //break; //default: //break; //} //} //// DO THE SAME FOR THE CONSTRAINTS: CONSTRAINTS MUST BE SET AND THE SAME AS LB_BOUNDARY !!! //for(n=0;n<n_constraints;n++) { //switch(constraints[n].type) { //case CONSTRAINT_VOXEL: //x=constraints[n].c.voxel.pos[0]; //y=constraints[n].c.voxel.pos[1]; //z=constraints[n].c.voxel.pos[2]; //if(((x-1) >= 0) && (lbfields[get_linear_index(x-1,y,z,lblattice.halo_grid)].boundary == 0)) nn[0] = -1.0;//neighbours[0] = -1; //if(((x+1) <= lblattice.grid[0]) && (lbfields[get_linear_index(x+1,y,z,lblattice.halo_grid)].boundary == 0)) nn[1] = 1.0;//neighbours[1] = 1; ////printf("%.0lf %.0lf ",nn[0],nn[1]); //constraints[n].c.voxel.n[0] = nn[0]+nn[1]; ////nn=0.0; //if(((y-1) >= 0) && (lbfields[get_linear_index(x,y-1,z,lblattice.halo_grid)].boundary == 0)) nn[2] = -1.0;//neighbours[2] = -1; //if(((y+1) <= lblattice.grid[1]) && (lbfields[get_linear_index(x,y+1,z,lblattice.halo_grid)].boundary == 0)) nn[3] = 1.0;//neighbours[3] = 1; ////printf("%.0lf %.0lf ",nn[2],nn[3]); //constraints[n].c.voxel.n[1] = nn[2]+nn[3]; ////nn=0.0; //if(((z-1) >= 0) && (lbfields[get_linear_index(x,y,z-1,lblattice.halo_grid)].boundary == 0)) nn[4] = -1.0;//neighbours[4] = -1; //if(((z+1) <= lblattice.grid[2]) && (lbfields[get_linear_index(x,y,z+1,lblattice.halo_grid)].boundary == 0)) nn[5] = 1.0;//neighbours[5]= 1; ////printf("%.0lf %.0lf ",nn[4],nn[5]); //constraints[n].c.voxel.n[2] = nn[4]+nn[5]; //nn[0]=0.0,nn[1]=0.0,nn[2]=0.0,nn[3]=0.0,nn[4]=0.0,nn[5]=0.0; //break; //default: //break; //} //} //#ifdef VOXEL_BOUNDARIES /* for (z=0; z<lblattice.grid[2]+2; z++) { for (y=0; y<lblattice.grid[1]+2; y++) { for (x=0; x<lblattice.grid[0]+2; x++) { lbfields[get_linear_index(x,y,z,lblattice.halo_grid)].boundary = 1; } } } static const char filename[] = "/home/mgusenbauer/Daten/Copy/DUK/GentlePump/Optimierer/voxels/stl/data_final.csv"; FILE *file = fopen ( filename, "r" ); int coords[3]; printf("start new\n"); if ( file != NULL ){ char line [ 128 ]; // or other suitable maximum line size while ( fgets ( line, sizeof line, file ) != NULL ) {// read a line //fputs ( line, stdout ); // write the line //coords = line.Split(' ').Select(n => Convert.ToInt32(n)).ToArray(); //printf("readline: %s\n",line); int i; sscanf(line, "%d %d %d", &coords[0],&coords[1],&coords[2]); //printf("%d %d %d\n", coords[0],coords[1],coords[2]); lbfields[get_linear_index(coords[0]+5,coords[1]+5,coords[2]+5,lblattice.halo_grid)].boundary = 0; } fclose ( file ); } printf("end new\n"); */ #endif } }
/** Init cell interactions for the Lees-Edwards cell system. * initializes the interacting neighbor cell list of a cell. The * created list of interacting neighbor cells is used by the verlet * algorithm (see verlet.cpp) to build the verlet lists. */ void le_dd_init_cell_interactions() { int m,n,o,p,q,r,ind1,ind2,c_cnt=0,n_cnt=0; int extra_cells = 0; /* initialize cell neighbor structures */ dd.cell_inter = (IA_Neighbor_List *) realloc(dd.cell_inter,local_cells.n*sizeof(IA_Neighbor_List)); for(m=0; m<local_cells.n; m++) { dd.cell_inter[m].nList = NULL; dd.cell_inter[m].n_neighbors=0; } /* loop over non-ghost cells */ for(o=1; o<=dd.cell_grid[2]; o++) { for(n=1; n<=dd.cell_grid[1]; n++) { for(m=1; m<=dd.cell_grid[0]; m++) { /* plenty for most cases */ dd.cell_inter[c_cnt].nList = (IA_Neighbor *) realloc(dd.cell_inter[c_cnt].nList, 14*sizeof(IA_Neighbor)); n_cnt=0; ind1 = get_linear_index(m,n,o,dd.ghost_cell_grid); /* loop all 'conventional' neighbor cells */ for(p=o-1; p<=o+1; p++) { /*z-loop*/ for(q=n-1; q<=n+1; q++) { /*y-loop*/ for(r=m-1; r<=m+2; r++) { /*x-loop*/ /* Extra neighbours in x only for some cases */ if( (q == 0 && node_pos[1] == 0) || (q == dd.cell_grid[1]+1 && node_pos[1] == node_grid[1]-1) ){ extra_cells++; dd.cell_inter[c_cnt].nList = (IA_Neighbor *) realloc(dd.cell_inter[c_cnt].nList, (extra_cells+14)*sizeof(IA_Neighbor)); }else{ if( r == m + 2 ) continue; } ind2 = get_linear_index(r,q,p,dd.ghost_cell_grid); if(ind2 >= ind1) { dd.cell_inter[c_cnt].nList[n_cnt].cell_ind = ind2; dd.cell_inter[c_cnt].nList[n_cnt].pList = &cells[ind2]; init_pairList(&dd.cell_inter[c_cnt].nList[n_cnt].vList); #ifdef LE_DEBUG dd.cell_inter[c_cnt].nList[n_cnt].my_pos[0] = my_left[0] + r * dd.cell_size[0]; dd.cell_inter[c_cnt].nList[n_cnt].my_pos[1] = my_left[1] + q * dd.cell_size[1]; dd.cell_inter[c_cnt].nList[n_cnt].my_pos[2] = my_left[2] + p * dd.cell_size[2]; #endif n_cnt++; } } } } dd.cell_inter[c_cnt].n_neighbors = n_cnt; c_cnt++; } } } #ifdef LE_DEBUG FILE *cells_fp; char cLogName[64]; int c,nn,this_n; double myPos[3]; sprintf(cLogName, "cells_map%i.dat", this_node); cells_fp = fopen(cLogName,"w"); /* print out line segments showing the vector from each cell to each neighbour cell*/ for(c=0;c<c_cnt;c++){ myPos[0] = my_left[0] + dd.cell_size[0] * ( 1 + c % dd.cell_grid[0] ); myPos[1] = my_left[1] + dd.cell_size[1] * ( 1 + (c / dd.cell_grid[0]) % dd.cell_grid[1]); myPos[2] = my_left[2] + dd.cell_size[2] * ( 1 + (c / (dd.cell_grid[0] * dd.cell_grid[1]))); for(nn=0;nn<dd.cell_inter[c].n_neighbors;nn++){ this_n = dd.cell_inter[c].nList[nn].cell_ind; fprintf(cells_fp,"%i %i %i %f %f %f %f %f %f\n",c,nn,this_n, myPos[0], myPos[1], myPos[2], dd.cell_inter[c].nList[nn].my_pos[0], dd.cell_inter[c].nList[nn].my_pos[1], dd.cell_inter[c].nList[nn].my_pos[2]); } } fclose(cells_fp); #endif }
/** Initialize boundary conditions for all constraints in the system. */ void lb_init_boundaries() { int n, x, y, z; char *errtxt; double pos[3], dist, dist_tmp=0.0, dist_vec[3]; if (lattice_switch & LATTICE_LB_GPU) { #if defined (LB_GPU) && defined (LB_BOUNDARIES_GPU) int number_of_boundnodes = 0; int *host_boundary_node_list= (int*)malloc(sizeof(int)); int *host_boundary_index_list= (int*)malloc(sizeof(int)); size_t size_of_index; int boundary_number = -1; // the number the boundary will actually belong to. for(z=0; z<lbpar_gpu.dim_z; z++) { for(y=0; y<lbpar_gpu.dim_y; y++) { for (x=0; x<lbpar_gpu.dim_x; x++) { pos[0] = (x+0.5)*lbpar_gpu.agrid; pos[1] = (y+0.5)*lbpar_gpu.agrid; pos[2] = (z+0.5)*lbpar_gpu.agrid; dist = 1e99; for (n=0;n<n_lb_boundaries;n++) { switch (lb_boundaries[n].type) { case LB_BOUNDARY_WAL: calculate_wall_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.wal, &dist_tmp, dist_vec); break; case LB_BOUNDARY_SPH: calculate_sphere_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.sph, &dist_tmp, dist_vec); break; case LB_BOUNDARY_CYL: calculate_cylinder_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.cyl, &dist_tmp, dist_vec); break; case LB_BOUNDARY_RHOMBOID: calculate_rhomboid_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.rhomboid, &dist_tmp, dist_vec); break; case LB_BOUNDARY_POR: calculate_pore_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.pore, &dist_tmp, dist_vec); break; default: errtxt = runtime_error(128); ERROR_SPRINTF(errtxt, "{109 lbboundary type %d not implemented in lb_init_boundaries()\n", lb_boundaries[n].type); } if (dist > dist_tmp || n == 0) { dist = dist_tmp; boundary_number = n; } } if (dist <= 0 && boundary_number >= 0 && n_lb_boundaries > 0) { size_of_index = (number_of_boundnodes+1)*sizeof(int); host_boundary_node_list = realloc(host_boundary_node_list, size_of_index); host_boundary_index_list = realloc(host_boundary_index_list, size_of_index); host_boundary_node_list[number_of_boundnodes] = x + lbpar_gpu.dim_x*y + lbpar_gpu.dim_x*lbpar_gpu.dim_y*z; host_boundary_index_list[number_of_boundnodes] = boundary_number + 1; number_of_boundnodes++; // printf("boundindex %i: \n", number_of_boundnodes); } } } } /**call of cuda fkt*/ float* boundary_velocity = malloc(3*n_lb_boundaries*sizeof(float)); for (n=0; n<n_lb_boundaries; n++) { boundary_velocity[3*n+0]=lb_boundaries[n].velocity[0]; boundary_velocity[3*n+1]=lb_boundaries[n].velocity[1]; boundary_velocity[3*n+2]=lb_boundaries[n].velocity[2]; } if (n_lb_boundaries) lb_init_boundaries_GPU(n_lb_boundaries, number_of_boundnodes, host_boundary_node_list, host_boundary_index_list, boundary_velocity); free(boundary_velocity); free(host_boundary_node_list); free(host_boundary_index_list); #endif } else { #if defined (LB) && defined (LB_BOUNDARIES) int node_domain_position[3], offset[3]; int the_boundary=-1; map_node_array(this_node, node_domain_position); offset[0] = node_domain_position[0]*lblattice.grid[0]; offset[1] = node_domain_position[1]*lblattice.grid[1]; offset[2] = node_domain_position[2]*lblattice.grid[2]; for (n=0;n<lblattice.halo_grid_volume;n++) { lbfields[n].boundary = 0; } if (lblattice.halo_grid_volume==0) return; for (z=0; z<lblattice.grid[2]+2; z++) { for (y=0; y<lblattice.grid[1]+2; y++) { for (x=0; x<lblattice.grid[0]+2; x++) { pos[0] = (offset[0]+(x-0.5))*lblattice.agrid; pos[1] = (offset[1]+(y-0.5))*lblattice.agrid; pos[2] = (offset[2]+(z-0.5))*lblattice.agrid; dist = 1e99; for (n=0;n<n_lb_boundaries;n++) { switch (lb_boundaries[n].type) { case LB_BOUNDARY_WAL: calculate_wall_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.wal, &dist_tmp, dist_vec); break; case LB_BOUNDARY_SPH: calculate_sphere_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.sph, &dist_tmp, dist_vec); break; case LB_BOUNDARY_CYL: calculate_cylinder_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.cyl, &dist_tmp, dist_vec); break; case LB_BOUNDARY_RHOMBOID: calculate_rhomboid_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.rhomboid, &dist_tmp, dist_vec); break; case LB_BOUNDARY_POR: calculate_pore_dist((Particle*) NULL, pos, (Particle*) NULL, &lb_boundaries[n].c.pore, &dist_tmp, dist_vec); break; default: errtxt = runtime_error(128); ERROR_SPRINTF(errtxt, "{109 lbboundary type %d not implemented in lb_init_boundaries()\n", lb_boundaries[n].type); } if (dist_tmp<dist || n == 0) { dist = dist_tmp; the_boundary = n; } } if (dist <= 0 && the_boundary >= 0 && n_lb_boundaries > 0) { lbfields[get_linear_index(x,y,z,lblattice.halo_grid)].boundary = the_boundary+1; //printf("boundindex %i: \n", get_linear_index(x,y,z,lblattice.halo_grid)); } else { lbfields[get_linear_index(x,y,z,lblattice.halo_grid)].boundary = 0; } } } } #endif } }
int dfft_init(double **data, int *local_mesh_dim, int *local_mesh_margin, int* global_mesh_dim, double *global_mesh_off, int *ks_pnum) { int i,j; /* helpers */ int mult[3]; int n_grid[4][3]; /* The four node grids. */ int my_pos[4][3]; /* The position of this_node in the node grids. */ int *n_id[4]; /* linear node identity lists for the node grids. */ int *n_pos[4]; /* positions of nodes in the node grids. */ /* FFTW WISDOM stuff. */ char wisdom_file_name[255]; FILE *wisdom_file; int wisdom_status; FFT_TRACE(fprintf(stderr,"%d: dipolar dfft_init():\n",this_node)); dfft.max_comm_size=0; dfft.max_mesh_size=0; for(i=0;i<4;i++) { n_id[i] = (int *) malloc(1*n_nodes*sizeof(int)); n_pos[i] = (int *) malloc(3*n_nodes*sizeof(int)); } /* === node grids === */ /* real space node grid (n_grid[0]) */ for(i=0;i<3;i++) { n_grid[0][i] = node_grid[i]; my_pos[0][i] = node_pos[i]; } for(i=0;i<n_nodes;i++) { map_node_array(i,&(n_pos[0][3*i+0])); n_id[0][get_linear_index( n_pos[0][3*i+0],n_pos[0][3*i+1],n_pos[0][3*i+2], n_grid[0])] = i; } /* FFT node grids (n_grid[1 - 3]) */ calc_2d_grid(n_nodes,n_grid[1]); /* resort n_grid[1] dimensions if necessary */ dfft.plan[1].row_dir = map_3don2d_grid(n_grid[0], n_grid[1], mult); dfft.plan[0].n_permute = 0; for(i=1;i<4;i++) dfft.plan[i].n_permute = (dfft.plan[1].row_dir+i)%3; for(i=0;i<3;i++) { n_grid[2][i] = n_grid[1][(i+1)%3]; n_grid[3][i] = n_grid[1][(i+2)%3]; } dfft.plan[2].row_dir = (dfft.plan[1].row_dir-1)%3; dfft.plan[3].row_dir = (dfft.plan[1].row_dir-2)%3; /* === communication groups === */ /* copy local mesh off real space charge assignment grid */ for(i=0;i<3;i++) dfft.plan[0].new_mesh[i] = local_mesh_dim[i]; for(i=1; i<4;i++) { dfft.plan[i].g_size=fft_find_comm_groups(n_grid[i-1], n_grid[i], n_id[i-1], n_id[i], dfft.plan[i].group, n_pos[i], my_pos[i]); if(dfft.plan[i].g_size==-1) { /* try permutation */ j = n_grid[i][(dfft.plan[i].row_dir+1)%3]; n_grid[i][(dfft.plan[i].row_dir+1)%3] = n_grid[i][(dfft.plan[i].row_dir+2)%3]; n_grid[i][(dfft.plan[i].row_dir+2)%3] = j; dfft.plan[i].g_size=fft_find_comm_groups(n_grid[i-1], n_grid[i], n_id[i-1], n_id[i], dfft.plan[i].group, n_pos[i], my_pos[i]); if(dfft.plan[i].g_size==-1) { fprintf(stderr,"%d: dipolar INTERNAL ERROR: fft_find_comm_groups error\n", this_node); errexit(); } } dfft.plan[i].send_block = (int *)realloc(dfft.plan[i].send_block, 6*dfft.plan[i].g_size*sizeof(int)); dfft.plan[i].send_size = (int *)realloc(dfft.plan[i].send_size, 1*dfft.plan[i].g_size*sizeof(int)); dfft.plan[i].recv_block = (int *)realloc(dfft.plan[i].recv_block, 6*dfft.plan[i].g_size*sizeof(int)); dfft.plan[i].recv_size = (int *)realloc(dfft.plan[i].recv_size, 1*dfft.plan[i].g_size*sizeof(int)); dfft.plan[i].new_size = fft_calc_local_mesh(my_pos[i], n_grid[i], global_mesh_dim, global_mesh_off, dfft.plan[i].new_mesh, dfft.plan[i].start); permute_ifield(dfft.plan[i].new_mesh,3,-(dfft.plan[i].n_permute)); permute_ifield(dfft.plan[i].start,3,-(dfft.plan[i].n_permute)); dfft.plan[i].n_ffts = dfft.plan[i].new_mesh[0]*dfft.plan[i].new_mesh[1]; /* === send/recv block specifications === */ for(j=0; j<dfft.plan[i].g_size; j++) { int k, node; /* send block: this_node to comm-group-node i (identity: node) */ node = dfft.plan[i].group[j]; dfft.plan[i].send_size[j] = fft_calc_send_block(my_pos[i-1], n_grid[i-1], &(n_pos[i][3*node]), n_grid[i], global_mesh_dim, global_mesh_off, &(dfft.plan[i].send_block[6*j])); permute_ifield(&(dfft.plan[i].send_block[6*j]),3,-(dfft.plan[i-1].n_permute)); permute_ifield(&(dfft.plan[i].send_block[6*j+3]),3,-(dfft.plan[i-1].n_permute)); if(dfft.plan[i].send_size[j] > dfft.max_comm_size) dfft.max_comm_size = dfft.plan[i].send_size[j]; /* First plan send blocks have to be adjusted, since the CA grid may have an additional margin outside the actual domain of the node */ if(i==1) { for(k=0;k<3;k++) dfft.plan[1].send_block[6*j+k ] += local_mesh_margin[2*k]; } /* recv block: this_node from comm-group-node i (identity: node) */ dfft.plan[i].recv_size[j] = fft_calc_send_block(my_pos[i], n_grid[i], &(n_pos[i-1][3*node]), n_grid[i-1], global_mesh_dim, global_mesh_off,&(dfft.plan[i].recv_block[6*j])); permute_ifield(&(dfft.plan[i].recv_block[6*j]),3,-(dfft.plan[i].n_permute)); permute_ifield(&(dfft.plan[i].recv_block[6*j+3]),3,-(dfft.plan[i].n_permute)); if(dfft.plan[i].recv_size[j] > dfft.max_comm_size) dfft.max_comm_size = dfft.plan[i].recv_size[j]; } for(j=0;j<3;j++) dfft.plan[i].old_mesh[j] = dfft.plan[i-1].new_mesh[j]; if(i==1) dfft.plan[i].element = 1; else { dfft.plan[i].element = 2; for(j=0; j<dfft.plan[i].g_size; j++) { dfft.plan[i].send_size[j] *= 2; dfft.plan[i].recv_size[j] *= 2; } } /* DEBUG */ for(j=0;j<n_nodes;j++) { /* MPI_Barrier(comm_cart); */ if(j==this_node) FFT_TRACE(fft_print_fft_plan(dfft.plan[i])); } } /* Factor 2 for complex fields */ dfft.max_comm_size *= 2; dfft.max_mesh_size = (local_mesh_dim[0]*local_mesh_dim[1]*local_mesh_dim[2]); for(i=1;i<4;i++) if(2*dfft.plan[i].new_size > dfft.max_mesh_size) dfft.max_mesh_size = 2*dfft.plan[i].new_size; FFT_TRACE(fprintf(stderr,"%d: dfft.max_comm_size = %d, dfft.max_mesh_size = %d\n", this_node,dfft.max_comm_size,dfft.max_mesh_size)); /* === pack function === */ for(i=1;i<4;i++) { dfft.plan[i].pack_function = fft_pack_block_permute2; FFT_TRACE(fprintf(stderr,"%d: forw plan[%d] permute 2 \n",this_node,i)); } (*ks_pnum)=6; if(dfft.plan[1].row_dir==2) { dfft.plan[1].pack_function = fft_pack_block; FFT_TRACE(fprintf(stderr,"%d: forw plan[%d] permute 0 \n",this_node,1)); (*ks_pnum)=4; } else if(dfft.plan[1].row_dir==1) { dfft.plan[1].pack_function = fft_pack_block_permute1; FFT_TRACE(fprintf(stderr,"%d: forw plan[%d] permute 1 \n",this_node,1)); (*ks_pnum)=5; } /* Factor 2 for complex numbers */ dfft.send_buf = (double *)realloc(dfft.send_buf, dfft.max_comm_size*sizeof(double)); dfft.recv_buf = (double *)realloc(dfft.recv_buf, dfft.max_comm_size*sizeof(double)); (*data) = (double *)realloc((*data), dfft.max_mesh_size*sizeof(double)); dfft.data_buf = (double *)realloc(dfft.data_buf, dfft.max_mesh_size*sizeof(double)); if(!(*data) || !dfft.data_buf || !dfft.recv_buf || !dfft.send_buf) { fprintf(stderr,"%d: Could not allocate FFT data arays\n",this_node); errexit(); } fftw_complex *c_data = (fftw_complex *) (*data); /* === FFT Routines (Using FFTW / RFFTW package)=== */ for(i=1;i<4;i++) { dfft.plan[i].dir = FFTW_FORWARD; /* FFT plan creation. Attention: destroys contents of c_data/data and c_data_buf/data_buf. */ wisdom_status = FFTW_FAILURE; sprintf(wisdom_file_name,"dfftw3_1d_wisdom_forw_n%d.file", dfft.plan[i].new_mesh[2]); if( (wisdom_file=fopen(wisdom_file_name,"r"))!=NULL ) { wisdom_status = fftw_import_wisdom_from_file(wisdom_file); fclose(wisdom_file); } if(dfft.init_tag==1) fftw_destroy_plan(dfft.plan[i].our_fftw_plan); //printf("dfft.plan[%d].n_ffts=%d\n",i,dfft.plan[i].n_ffts); dfft.plan[i].our_fftw_plan = fftw_plan_many_dft(1,&dfft.plan[i].new_mesh[2],dfft.plan[i].n_ffts, c_data,NULL,1,dfft.plan[i].new_mesh[2], c_data,NULL,1,dfft.plan[i].new_mesh[2], dfft.plan[i].dir,FFTW_PATIENT); if( wisdom_status == FFTW_FAILURE && (wisdom_file=fopen(wisdom_file_name,"w"))!=NULL ) { fftw_export_wisdom_to_file(wisdom_file); fclose(wisdom_file); } dfft.plan[i].fft_function = fftw_execute; } /* === The BACK Direction === */ /* this is needed because slightly different functions are used */ for(i=1;i<4;i++) { dfft.back[i].dir = FFTW_BACKWARD; wisdom_status = FFTW_FAILURE; sprintf(wisdom_file_name,"dfftw3_1d_wisdom_back_n%d.file", dfft.plan[i].new_mesh[2]); if( (wisdom_file=fopen(wisdom_file_name,"r"))!=NULL ) { wisdom_status = fftw_import_wisdom_from_file(wisdom_file); fclose(wisdom_file); } if(dfft.init_tag==1) fftw_destroy_plan(dfft.back[i].our_fftw_plan); dfft.back[i].our_fftw_plan = fftw_plan_many_dft(1,&dfft.plan[i].new_mesh[2],dfft.plan[i].n_ffts, c_data,NULL,1,dfft.plan[i].new_mesh[2], c_data,NULL,1,dfft.plan[i].new_mesh[2], dfft.back[i].dir,FFTW_PATIENT); if( wisdom_status == FFTW_FAILURE && (wisdom_file=fopen(wisdom_file_name,"w"))!=NULL ) { fftw_export_wisdom_to_file(wisdom_file); fclose(wisdom_file); } dfft.back[i].fft_function = fftw_execute; dfft.back[i].pack_function = fft_pack_block_permute1; FFT_TRACE(fprintf(stderr,"%d: back plan[%d] permute 1 \n",this_node,i)); } if(dfft.plan[1].row_dir==2) { dfft.back[1].pack_function = fft_pack_block; FFT_TRACE(fprintf(stderr,"%d: back plan[%d] permute 0 \n",this_node,1)); } else if(dfft.plan[1].row_dir==1) { dfft.back[1].pack_function = fft_pack_block_permute2; FFT_TRACE(fprintf(stderr,"%d: back plan[%d] permute 2 \n",this_node,1)); } dfft.init_tag=1; /* free(data); */ for(i=0;i<4;i++) { free(n_id[i]); free(n_pos[i]); } return dfft.max_mesh_size; }