void cells_re_init(int new_cs) { CellPList tmp_local; Cell *tmp_cells; int tmp_n_cells,i; CELL_TRACE(fprintf(stderr, "%d: cells_re_init: convert type (%d->%d)\n", this_node, cell_structure.type, new_cs)); invalidate_ghosts(); /* CELL_TRACE({ int p; for (p = 0; p < n_total_particles; p++) if (local_particles[p]) fprintf(stderr, "%d: cells_re_init: got particle %d\n", this_node, p); } ); */ topology_release(cell_structure.type); /* MOVE old local_cell list to temporary buffer */ memcpy(&tmp_local,&local_cells,sizeof(CellPList)); init_cellplist(&local_cells); /* MOVE old cells to temporary buffer */ tmp_cells = cells; tmp_n_cells = n_cells; cells = NULL; n_cells = 0; topology_init(new_cs, &tmp_local); particle_invalidate_part_node(); /* finally deallocate the old cells */ realloc_cellplist(&tmp_local,0); for(i=0;i<tmp_n_cells;i++) realloc_particlelist(&tmp_cells[i],0); free(tmp_cells); CELL_TRACE(fprintf(stderr, "%d: old cells deallocated\n",this_node)); /* CELL_TRACE({ int p; for (p = 0; p < n_total_particles; p++) if (local_particles[p]) fprintf(stderr, "%d: cells_re_init: now got particle %d\n", this_node, p); } ); */ /* to enforce initialization of the ghost cells */ resort_particles = 1; #ifdef ADDITIONAL_CHECKS check_cells_consistency(); #endif }
void local_sort_particles() { CELL_TRACE(fprintf(stderr, "%d: entering local_sort_particles\n", this_node)); /* first distribute strictly on nodes */ cells_resort_particles(CELL_GLOBAL_EXCHANGE); CELL_TRACE(fprintf(stderr, "%d: sorting local cells\n", this_node)); /* now sort the local cells */ for (int c = 0; c < local_cells.n; c++) { Cell *cell = local_cells.cell[c]; Particle *p = cell->part; int np = cell->n; #ifdef CELL_DEBUG for (int id = 0; id < np; ++id) { Cell *tgt_cell = cell_structure.position_to_cell(p[id].r.p); if (tgt_cell != cell) { fprintf(stderr, "%d: particle %d at position %lf %lf %lf is not in its expected cell. Have %ld, expected %ld\n", this_node, p[id].p.identity, p[id].r.p[0], p[id].r.p[1], p[id].r.p[2], (cell - cells)/sizeof(Cell*), (tgt_cell - cells) /sizeof(Cell*)); } } #endif qsort(p, np, sizeof(Particle), compare_particles); update_local_particles(cell); } CELL_TRACE(dump_particle_ordering()); CELL_TRACE(fprintf(stderr, "%d: leaving local_sort_particles\n", this_node)); }
void cells_on_geometry_change(int flags) { if (max_cut > 0.0) { max_range = max_cut + skin; } else /* if no interactions yet, we also don't need a skin */ max_range = 0.0; CELL_TRACE(fprintf(stderr,"%d: on_geometry_change with max range %f\n", this_node, max_range)); switch (cell_structure.type) { case CELL_STRUCTURE_DOMDEC: dd_on_geometry_change(flags); break; case CELL_STRUCTURE_LAYERED: /* there is no fast version, always redo everything. */ cells_re_init(CELL_STRUCTURE_LAYERED); break; case CELL_STRUCTURE_NSQUARE: /* this cell system doesn't need to react, just tell the others */ on_boxl_change(); break; } }
/** Revert the order of a communicator: After calling this the communicator is working in reverted order with exchanged communication types GHOST_SEND <-> GHOST_RECV. */ void dd_revert_comm_order(GhostCommunicator *comm) { int i,j,nlist2; GhostCommunication tmp; ParticleList *tmplist; CELL_TRACE(fprintf(stderr,"%d: dd_revert_comm_order: anz comm: %d\n",this_node,comm->num)); /* revert order */ for(i=0; i<(comm->num/2); i++) { tmp = comm->comm[i]; comm->comm[i] = comm->comm[comm->num-i-1]; comm->comm[comm->num-i-1] = tmp; } /* exchange SEND/RECV */ for(i=0; i<comm->num; i++) { if(comm->comm[i].type == GHOST_SEND) comm->comm[i].type = GHOST_RECV; else if(comm->comm[i].type == GHOST_RECV) comm->comm[i].type = GHOST_SEND; else if(comm->comm[i].type == GHOST_LOCL) { nlist2=comm->comm[i].n_part_lists/2; for(j=0;j<nlist2;j++) { tmplist = comm->comm[i].part_lists[j]; comm->comm[i].part_lists[j] = comm->comm[i].part_lists[j+nlist2]; comm->comm[i].part_lists[j+nlist2] = tmplist; } } } }
void nsq_topology_release() { CELL_TRACE(fprintf(stderr,"%d: nsq_topology_release:\n",this_node)); /* free ghost cell pointer list */ realloc_cellplist(&me_do_ghosts, 0); free_comm(&cell_structure.ghost_cells_comm); free_comm(&cell_structure.exchange_ghosts_comm); free_comm(&cell_structure.update_ghost_pos_comm); free_comm(&cell_structure.collect_ghost_force_comm); }
void cells_pre_init() { CellPList tmp_local; CELL_TRACE(fprintf(stderr, "%d: cells_pre_init\n",this_node)); /* her local_cells has to be a NULL pointer */ if(local_cells.cell != NULL) { fprintf(stderr,"INTERNAL ERROR: wrong usage of cells_pre_init!\n"); errexit(); } memcpy(&tmp_local,&local_cells,sizeof(CellPList)); dd_topology_init(&tmp_local); }
void cells_resort_particles(int global_flag) { CELL_TRACE(fprintf(stderr, "%d: entering cells_resort_particles %d\n", this_node, global_flag)); invalidate_ghosts(); particle_invalidate_part_node(); n_verlet_updates++; switch (cell_structure.type) { case CELL_STRUCTURE_LAYERED: layered_exchange_and_sort_particles(global_flag); break; case CELL_STRUCTURE_NSQUARE: nsq_balance_particles(global_flag); break; case CELL_STRUCTURE_DOMDEC: dd_exchange_and_sort_particles(global_flag); break; } #ifdef ADDITIONAL_CHECKS /* at the end of the day, everything should be consistent again */ check_particle_consistency(); #endif ghost_communicator(&cell_structure.ghost_cells_comm); ghost_communicator(&cell_structure.exchange_ghosts_comm); resort_particles = 0; rebuild_verletlist = 1; on_resort_particles(); CELL_TRACE(dump_particle_ordering()); CELL_TRACE(fprintf(stderr, "%d: leaving cells_resort_particles\n", this_node)); }
/** Create communicators for cell structure domain decomposition. (see \ref GhostCommunicator) */ void le_dd_prepare_comm(le_dd_comms_manager *mgr, GhostCommunicator *comm, int data_parts) { static int le_cells_state_physical = 1; int dir,lr,i,cnt, num, n_comm_cells[3], send_rec, thisCommCount; int lc[3],hc[3], neighbor_index; #ifdef LE_DEBUG if( comms_log != NULL ){ fclose(comms_log);comms_log=NULL;} char vLogName[64]; sprintf(vLogName, "%i_comms_%i.dat", comms_count++,this_node); comms_log = fopen(vLogName, "w"); #endif CELL_TRACE(fprintf(stderr,"%d: neighbours:", this_node)); CELL_TRACE(for(i = 0; i < my_neighbor_count; i++)fprintf(stderr," %d",node_neighbors[i]);)
void realloc_cells(int size) { int i; CELL_TRACE(fprintf(stderr, "%d: realloc_cells %d\n", this_node, size)); /* free all memory associated with cells to be deleted. */ for(i=size; i<n_cells; i++) { realloc_particlelist(&cells[i],0); } /* resize the cell list */ if(size != n_cells) { cells = (Cell *) realloc(cells, sizeof(Cell)*size); } /* initialize new cells */ for(i=n_cells; i<size; i++) { init_particlelist(&cells[i]); } n_cells = size; }
/** Fill a communication cell pointer list. Fill the cell pointers of all cells which are inside a rectangular subgrid of the 3D cell grid (\ref DomainDecomposition::ghost_cell_grid) starting from the lower left corner lc up to the high top corner hc. The cell pointer list part_lists must already be large enough. \param part_lists List of cell pointers to store the result. \param lc lower left corner of the subgrid. \param hc high up corner of the subgrid. */ int dd_fill_comm_cell_lists(Cell **part_lists, int lc[3], int hc[3]) { int i,m,n,o,c=0; /* sanity check */ for(i=0; i<3; i++) { if(lc[i]<0 || lc[i] >= dd.ghost_cell_grid[i]) return 0; if(hc[i]<0 || hc[i] >= dd.ghost_cell_grid[i]) return 0; if(lc[i] > hc[i]) return 0; } for(o=lc[0]; o<=hc[0]; o++) for(n=lc[1]; n<=hc[1]; n++) for(m=lc[2]; m<=hc[2]; m++) { i = get_linear_index(o,n,m,dd.ghost_cell_grid); CELL_TRACE(fprintf(stderr,"%d: dd_fill_comm_cell_list: add cell %d\n",this_node,i)); part_lists[c] = &cells[i]; c++; } return c; }
/** Calculate cell grid dimensions, cell sizes and number of cells. * Calculates the cell grid, based on \ref local_box_l and \ref * max_range. If the number of cells is larger than \ref * max_num_cells, it increases max_range until the number of cells is * smaller or equal \ref max_num_cells. It sets: \ref * DomainDecomposition::cell_grid, \ref * DomainDecomposition::ghost_cell_grid, \ref * DomainDecomposition::cell_size, \ref * DomainDecomposition::inv_cell_size, and \ref n_cells. */ void dd_create_cell_grid() { int i,n_local_cells,new_cells,min_ind; double cell_range[3], min_size, scale, volume; CELL_TRACE(fprintf(stderr, "%d: dd_create_cell_grid: max_range %f\n",this_node,max_range)); CELL_TRACE(fprintf(stderr, "%d: dd_create_cell_grid: local_box %f-%f, %f-%f, %f-%f,\n",this_node,my_left[0],my_right[0],my_left[1],my_right[1],my_left[2],my_right[2])); /* initialize */ cell_range[0]=cell_range[1]=cell_range[2] = max_range; if (max_range < ROUND_ERROR_PREC*box_l[0]) { /* this is the initialization case */ #ifdef LEES_EDWARDS dd.cell_grid[0] = 2; dd.cell_grid[1] = 1; dd.cell_grid[2] = 1; n_local_cells = 2; #else n_local_cells = dd.cell_grid[0] = dd.cell_grid[1] = dd.cell_grid[2]=1; #endif } else { /* Calculate initial cell grid */ volume = local_box_l[0]; for(i=1;i<3;i++) volume *= local_box_l[i]; scale = pow(max_num_cells/volume, 1./3.); for(i=0;i<3;i++) { /* this is at least 1 */ dd.cell_grid[i] = (int)ceil(local_box_l[i]*scale); cell_range[i] = local_box_l[i]/dd.cell_grid[i]; if ( cell_range[i] < max_range ) { /* ok, too many cells for this direction, set to minimum */ dd.cell_grid[i] = (int)floor(local_box_l[i]/max_range); if ( dd.cell_grid[i] < 1 ) { ostringstream msg; msg << "interaction range " << max_range << " in direction " << i << " is larger than the local box size " << local_box_l[i]; runtimeError(msg); dd.cell_grid[i] = 1; } #ifdef LEES_EDWARDS if ( (i == 0) && (dd.cell_grid[0] < 2) ) { ostringstream msg; msg << "interaction range " << max_range << " in direction " << i << " is larger than half the local box size " << local_box_l[i] << "/2"; runtimeError(msg); dd.cell_grid[0] = 2; } #endif cell_range[i] = local_box_l[i]/dd.cell_grid[i]; } } /* It may be necessary to asymmetrically assign the scaling to the coordinates, which the above approach will not do. For a symmetric box, it gives a symmetric result. Here we correct that. */ for (;;) { n_local_cells = dd.cell_grid[0] * dd.cell_grid[1] * dd.cell_grid[2]; /* done */ if (n_local_cells <= max_num_cells) break; /* find coordinate with the smallest cell range */ min_ind = 0; min_size = cell_range[0]; #ifdef LEES_EDWARDS for (i = 2; i >= 1; i--) {/*preferably have thin slices in z or y... this is more efficient for Lees Edwards*/ #else for (i = 1; i < 3; i++) { #endif if (dd.cell_grid[i] > 1 && cell_range[i] < min_size) { min_ind = i; min_size = cell_range[i]; } } CELL_TRACE(fprintf(stderr, "%d: minimal coordinate %d, size %f, grid %d\n", this_node,min_ind, min_size, dd.cell_grid[min_ind])); dd.cell_grid[min_ind]--; cell_range[min_ind] = local_box_l[min_ind]/dd.cell_grid[min_ind]; } CELL_TRACE(fprintf(stderr, "%d: final %d %d %d\n", this_node, dd.cell_grid[0], dd.cell_grid[1], dd.cell_grid[2])); /* sanity check */ if (n_local_cells < min_num_cells) { ostringstream msg; msg << "number of cells "<< n_local_cells << " is smaller than minimum " << min_num_cells << " (interaction range too large or min_num_cells too large)"; runtimeError(msg); } } /* quit program if unsuccesful */ if(n_local_cells > max_num_cells) { ostringstream msg; msg << "no suitable cell grid found "; runtimeError(msg); } /* now set all dependent variables */ new_cells=1; for(i=0;i<3;i++) { dd.ghost_cell_grid[i] = dd.cell_grid[i]+2; #ifdef LEES_EDWARDS //Hack alert: only the boundary y-layers actually need the extra-thick ghost cell grid, //so some memory (and copies) are wasted in the name of simpler code. if( i == 0 ){dd.ghost_cell_grid[i]++;} #endif new_cells *= dd.ghost_cell_grid[i]; dd.cell_size[i] = local_box_l[i]/(double)dd.cell_grid[i]; dd.inv_cell_size[i] = 1.0 / dd.cell_size[i]; } max_skin = dmin(dmin(dd.cell_size[0],dd.cell_size[1]),dd.cell_size[2]) - max_cut; /* allocate cell array and cell pointer arrays */ realloc_cells(new_cells); realloc_cellplist(&local_cells, local_cells.n = n_local_cells); realloc_cellplist(&ghost_cells, ghost_cells.n = new_cells-n_local_cells); CELL_TRACE(fprintf(stderr, "%d: dd_create_cell_grid, n_cells=%d, local_cells.n=%d, ghost_cells.n=%d, dd.ghost_cell_grid=(%d,%d,%d)\n", this_node, n_cells,local_cells.n,ghost_cells.n,dd.ghost_cell_grid[0],dd.ghost_cell_grid[1],dd.ghost_cell_grid[2])); } /** Fill local_cells list and ghost_cells list for use with domain decomposition. \ref cells::cells is assumed to be a 3d grid with size \ref DomainDecomposition::ghost_cell_grid . */ void dd_mark_cells() { int m,n,o,cnt_c=0,cnt_l=0,cnt_g=0; DD_CELLS_LOOP(m,n,o) { #ifdef LEES_EDWARDS /* convenient for LE if a cell knows where it is*/ cells[cnt_c].myIndex[0] = m; cells[cnt_c].myIndex[1] = n; cells[cnt_c].myIndex[2] = o; #endif if(DD_IS_LOCAL_CELL(m,n,o)) local_cells.cell[cnt_l++] = &cells[cnt_c++]; else ghost_cells.cell[cnt_g++] = &cells[cnt_c++]; }
void check_particle_consistency() { Particle *part; Cell *cell; int n, np, dir, c, p; int cell_part_cnt=0, ghost_part_cnt=0, local_part_cnt=0; int cell_err_cnt=0; /* checks: part_id, part_pos, local_particles id */ for (c = 0; c < local_cells.n; c++) { cell = local_cells.cell[c]; cell_part_cnt += cell->n; part = cell->part; np = cell->n; for(n=0; n<cell->n ; n++) { if(part[n].p.identity < 0 || part[n].p.identity > max_seen_particle) { fprintf(stderr,"%d: check_particle_consistency: ERROR: Cell %d Part %d has corrupted id=%d\n", this_node,c,n,cell->part[n].p.identity); errexit(); } for(dir=0;dir<3;dir++) { if(PERIODIC(dir) && (part[n].r.p[dir] < -ROUND_ERROR_PREC || part[n].r.p[dir] - box_l[dir] > ROUND_ERROR_PREC)) { fprintf(stderr,"%d: check_particle_consistency: ERROR: illegal pos[%d]=%f of part %d id=%d in cell %d\n", this_node,dir,part[n].r.p[dir],n,part[n].p.identity,c); errexit(); } } if(local_particles[part[n].p.identity] != &part[n]) { fprintf(stderr,"%d: check_particle_consistency: ERROR: address mismatch for part id %d: local: %p cell: %p in cell %d\n", this_node,part[n].p.identity,local_particles[part[n].p.identity], &part[n],c); errexit(); } } } for (c = 0; c < ghost_cells.n; c++) { cell = ghost_cells.cell[c]; if(cell->n>0) { ghost_part_cnt += cell->n; fprintf(stderr,"%d: check_particle_consistency: WARNING: ghost_cell %d contains %d particles!\n", this_node,c,cell->n); } } CELL_TRACE(fprintf(stderr,"%d: check_particle_consistency: %d particles in cells, %d particles in ghost_cells.\n", this_node,cell_part_cnt, ghost_part_cnt)); /* checks: local particle id */ for(n=0; n< max_seen_particle+1; n++) { if(local_particles[n] != NULL) { local_part_cnt ++; if(local_particles[n]->p.identity != n) { fprintf(stderr,"%d: check_particle_consistency: ERROR: local_particles part %d has corrupted id %d\n", this_node,n,local_particles[n]->p.identity); errexit(); } } } CELL_TRACE(fprintf(stderr,"%d: check_particle_consistency: %d particles in local_particles.\n", this_node,local_part_cnt)); /* EXIT on severe errors */ if(cell_err_cnt>0) { fprintf(stderr,"%d: check_particle_consistency: %d ERRORS detected in cell structure!\n",this_node,cell_err_cnt); errexit(); } if(local_part_cnt != cell_part_cnt) { fprintf(stderr,"%d: check_particle_consistency: ERROR: %d parts in cells but %d parts in local_particles\n", this_node,cell_part_cnt,local_part_cnt); for (c = 0; c < local_cells.n; c++) { for(p = 0; p < local_cells.cell[c]->n; p++) fprintf(stderr, "%d: got particle %d in cell %d\n", this_node, local_cells.cell[c]->part[p].p.identity, c); } for(p = 0; p < n_total_particles; p++) if (local_particles[p]) fprintf(stderr, "%d: got particle %d in local_particles\n", this_node, p); if(ghost_part_cnt==0) errexit(); } if(ghost_part_cnt>0) { fprintf(stderr,"%d: check_particle_consistency: ERROR: Found %d illegal ghost particles!\n", this_node,ghost_part_cnt); errexit(); } }
void nsq_balance_particles() { int i, n, surplus, s_node, tmp, lack, l_node, transfer; int pp = cells_get_n_particles(); int *ppnode = malloc(n_nodes*sizeof(int)); /* minimal difference between node shares */ int minshare = n_total_particles/n_nodes; int maxshare = minshare + 1; CELL_TRACE(fprintf(stderr, "%d: nsq_balance_particles: load %d-%d\n", this_node, minshare, maxshare)); MPI_Allgather(&pp, 1, MPI_INT, ppnode, 1, MPI_INT, MPI_COMM_WORLD); for (;;) { /* find node with most excessive particles */ surplus = -1; s_node = -1; for (n = 0; n < n_nodes; n++) { tmp = ppnode[n] - minshare; CELL_TRACE(fprintf(stderr, "%d: nsq_balance_particles: node %d has %d\n", this_node, n, ppnode[n])); if (tmp > surplus) { surplus = tmp; s_node = n; } } CELL_TRACE(fprintf(stderr, "%d: nsq_balance_particles: excess %d on node %d\n", this_node, surplus, s_node)); /* find node with most lacking particles */ lack = -1; l_node = -1; for (n = 0; n < n_nodes; n++) { tmp = maxshare - ppnode[n]; if (tmp > lack) { lack = tmp; l_node = n; } } CELL_TRACE(fprintf(stderr, "%d: nsq_balance_particles: lack %d on node %d\n", this_node, lack, l_node)); /* should not happen: minshare or maxshare wrong or more likely, the algorithm */ if (s_node == -1 || l_node == -1) { fprintf(stderr, "%d: Particle load balancing failed\n", this_node); break; } /* exit if all nodes load is withing min and max share */ if (lack <= 1 && surplus <= 1) break; transfer = lack < surplus ? lack : surplus; if (s_node == this_node) { ParticleList send_buf; init_particlelist(&send_buf); realloc_particlelist(&send_buf, send_buf.n = transfer); for (i = 0; i < transfer; i++) { memcpy(&send_buf.part[i], &local->part[--local->n], sizeof(Particle)); } realloc_particlelist(local, local->n); update_local_particles(local); send_particles(&send_buf, l_node); #ifdef ADDITIONAL_CHECKS check_particle_consistency(); #endif } else if (l_node == this_node) { recv_particles(local, s_node); #ifdef ADDITIONAL_CHECKS check_particle_consistency(); #endif } ppnode[s_node] -= transfer; ppnode[l_node] += transfer; } CELL_TRACE(fprintf(stderr, "%d: nsq_balance_particles: done\n", this_node)); free(ppnode); }
void nsq_topology_init(CellPList *old) { Particle *part; int n, c, p, np, ntodo, diff; CELL_TRACE(fprintf(stderr, "%d: nsq_topology_init, %d\n", this_node, old->n)); cell_structure.type = CELL_STRUCTURE_NSQUARE; cell_structure.position_to_node = map_position_node_array; cell_structure.position_to_cell = nsq_position_to_cell; realloc_cells(n_nodes); /* mark cells */ local = &cells[this_node]; realloc_cellplist(&local_cells, local_cells.n = 1); local_cells.cell[0] = local; realloc_cellplist(&ghost_cells, ghost_cells.n = n_nodes - 1); c = 0; for (n = 0; n < n_nodes; n++) if (n != this_node) ghost_cells.cell[c++] = &cells[n]; /* distribute force calculation work */ ntodo = (n_nodes + 3)/2; init_cellplist(&me_do_ghosts); realloc_cellplist(&me_do_ghosts, ntodo); for (n = 0; n < n_nodes; n++) { diff = n - this_node; /* simple load balancing formula. Basically diff % n, where n >= n_nodes, n odd. The node itself is also left out, as it is treated differently */ if (((diff > 0 && (diff % 2) == 0) || (diff < 0 && ((-diff) % 2) == 1))) { CELL_TRACE(fprintf(stderr, "%d: doing interactions with %d\n", this_node, n)); me_do_ghosts.cell[me_do_ghosts.n++] = &cells[n]; } } /* create communicators */ nsq_prepare_comm(&cell_structure.ghost_cells_comm, GHOSTTRANS_PARTNUM); nsq_prepare_comm(&cell_structure.exchange_ghosts_comm, GHOSTTRANS_PROPRTS | GHOSTTRANS_POSITION); nsq_prepare_comm(&cell_structure.update_ghost_pos_comm, GHOSTTRANS_POSITION); nsq_prepare_comm(&cell_structure.collect_ghost_force_comm, GHOSTTRANS_FORCE); /* here we just decide what to transfer where */ if (n_nodes > 1) { for (n = 0; n < n_nodes; n++) { /* use the prefetched send buffers. Node 0 transmits first and never prefetches. */ if (this_node == 0 || this_node != n) { cell_structure.ghost_cells_comm.comm[n].type = GHOST_BCST; cell_structure.exchange_ghosts_comm.comm[n].type = GHOST_BCST; cell_structure.update_ghost_pos_comm.comm[n].type = GHOST_BCST; } else { cell_structure.ghost_cells_comm.comm[n].type = GHOST_BCST | GHOST_PREFETCH; cell_structure.exchange_ghosts_comm.comm[n].type = GHOST_BCST | GHOST_PREFETCH; cell_structure.update_ghost_pos_comm.comm[n].type = GHOST_BCST | GHOST_PREFETCH; } cell_structure.collect_ghost_force_comm.comm[n].type = GHOST_RDCE; } /* first round: all nodes except the first one prefetch their send data */ if (this_node != 0) { cell_structure.ghost_cells_comm.comm[0].type |= GHOST_PREFETCH; cell_structure.exchange_ghosts_comm.comm[0].type |= GHOST_PREFETCH; cell_structure.update_ghost_pos_comm.comm[0].type |= GHOST_PREFETCH; } } /* copy particles */ for (c = 0; c < old->n; c++) { part = old->cell[c]->part; np = old->cell[c]->n; for (p = 0; p < np; p++) append_unindexed_particle(local, &part[p]); } update_local_particles(local); }
void check_particles() { Particle *part; int *is_here; Cell *cell; int n, np, dir, c, p; int cell_part_cnt=0, local_part_cnt=0; int cell_err_cnt=0; double skin2 = (skin != -1) ? skin/2 : 0; CELL_TRACE(fprintf(stderr, "%d: entering check_particles\n", this_node)); /* check the consistency of particle_nodes */ /* to this aim the array is broadcasted temporarily */ if (this_node != 0) particle_node = malloc((max_seen_particle + 1)*sizeof(int)); is_here = malloc((max_seen_particle + 1)*sizeof(int)); memset(is_here, 0, (max_seen_particle + 1)*sizeof(int)); MPI_Bcast(particle_node, max_seen_particle + 1, MPI_INT, 0, MPI_COMM_WORLD); /* checks: part_id, part_pos, local_particles id */ for (c = 0; c < local_cells.n; c++) { cell = local_cells.cell[c]; cell_part_cnt += cell->n; part = cell->part; np = cell->n; for(n=0; n<cell->n ; n++) { if(part[n].p.identity < 0 || part[n].p.identity > max_seen_particle) { fprintf(stderr,"%d: check_particles: ERROR: Cell %d Part %d has corrupted id=%d\n", this_node,c,n,cell->part[n].p.identity); errexit(); } is_here[part[n].p.identity] = 1; for(dir=0;dir<3;dir++) { if(PERIODIC(dir) && (part[n].r.p[dir] < -skin2 || part[n].r.p[dir] > box_l[dir] + skin2)) { fprintf(stderr,"%d: check_particles: ERROR: illegal pos[%d]=%f of part %d id=%d in cell %d\n", this_node,dir,part[n].r.p[dir],n,part[n].p.identity,c); errexit(); } } if(local_particles[part[n].p.identity] != &part[n]) { fprintf(stderr,"%d: check_particles: ERROR: address mismatch for part id %d: local: %p cell: %p in cell %d\n", this_node,part[n].p.identity,local_particles[part[n].p.identity], &part[n],c); errexit(); } if (particle_node[part[n].p.identity] != this_node) { fprintf(stderr,"%d: check_particles: ERROR: node for particle %d wrong\n", this_node,part[n].p.identity); errexit(); } } } CELL_TRACE(fprintf(stderr,"%d: check_particles: %d particles in local cells.\n", this_node,cell_part_cnt)); /* checks: local particle id */ for(n = 0; n <= max_seen_particle; n++) { if(local_particles[n] != NULL) { local_part_cnt ++; if(local_particles[n]->p.identity != n) { fprintf(stderr,"%d: check_particles: ERROR: local_particles part %d has corrupted id %d\n", this_node,n,local_particles[n]->p.identity); errexit(); } } } CELL_TRACE(fprintf(stderr,"%d: check_particles: %d particles in local_particles.\n", this_node,local_part_cnt)); /* EXIT on severe errors */ if(cell_err_cnt>0) { fprintf(stderr,"%d: check_particles: %d ERRORS detected in cell structure!\n",this_node,cell_err_cnt); errexit(); } /* check whether the particles on my node are actually here */ for (p = 0; p <= max_seen_particle; p++) { if (particle_node[p] == this_node) { if (!is_here[p]) { fprintf(stderr,"%d: check_particles: ERROR: particle %d on this node, but not in local cell\n", this_node, p); } } } free(is_here); if (this_node != 0) { free(particle_node); particle_node = NULL; } else { /* check whether the total count of particles is ok */ c = 0; for (p = 0; p <= max_seen_particle; p++) if (particle_node[p] != -1) c++; if (c != n_total_particles) { fprintf(stderr,"%d: check_particles: #particles in particle_node inconsistent\n", this_node); errexit(); } CELL_TRACE(fprintf(stderr,"%d: check_particles: %d particles in particle_node.\n", this_node,c)); } CELL_TRACE(fprintf(stderr, "%d: leaving check_particles\n", this_node)); }
/** Calculate cell grid dimensions, cell sizes and number of cells. * Calculates the cell grid, based on \ref local_box_l and \ref * max_range. If the number of cells is larger than \ref * max_num_cells, it increases max_range until the number of cells is * smaller or equal \ref max_num_cells. It sets: \ref * DomainDecomposition::cell_grid, \ref * DomainDecomposition::ghost_cell_grid, \ref * DomainDecomposition::cell_size, \ref * DomainDecomposition::inv_cell_size, and \ref n_cells. */ void dd_create_cell_grid() { int i,n_local_cells,new_cells,min_ind; double cell_range[3], min_size, scale, volume; CELL_TRACE(fprintf(stderr, "%d: dd_create_cell_grid: max_range %f\n",this_node,max_range)); CELL_TRACE(fprintf(stderr, "%d: dd_create_cell_grid: local_box %f-%f, %f-%f, %f-%f,\n",this_node,my_left[0],my_right[0],my_left[1],my_right[1],my_left[2],my_right[2])); /* initialize */ cell_range[0]=cell_range[1]=cell_range[2] = max_range; if (max_range < ROUND_ERROR_PREC*box_l[0]) { /* this is the initialization case */ n_local_cells = dd.cell_grid[0] = dd.cell_grid[1] = dd.cell_grid[2]=1; } else { /* Calculate initial cell grid */ volume = local_box_l[0]; for(i=1;i<3;i++) volume *= local_box_l[i]; scale = pow(max_num_cells/volume, 1./3.); for(i=0;i<3;i++) { /* this is at least 1 */ dd.cell_grid[i] = (int)ceil(local_box_l[i]*scale); cell_range[i] = local_box_l[i]/dd.cell_grid[i]; if ( cell_range[i] < max_range ) { /* ok, too many cells for this direction, set to minimum */ dd.cell_grid[i] = (int)floor(local_box_l[i]/max_range); if ( dd.cell_grid[i] < 1 ) { char *error_msg = runtime_error(ES_INTEGER_SPACE + 2*ES_DOUBLE_SPACE + 128); ERROR_SPRINTF(error_msg, "{002 interaction range %g in direction %d is larger than the local box size %g} ", max_range, i, local_box_l[i]); dd.cell_grid[i] = 1; } cell_range[i] = local_box_l[i]/dd.cell_grid[i]; } } /* It may be necessary to asymmetrically assign the scaling to the coordinates, which the above approach will not do. For a symmetric box, it gives a symmetric result. Here we correct that. */ for (;;) { n_local_cells = dd.cell_grid[0]; for (i = 1; i < 3; i++) n_local_cells *= dd.cell_grid[i]; /* done */ if (n_local_cells <= max_num_cells) break; /* find coordinate with the smallest cell range */ min_ind = 0; min_size = cell_range[0]; for (i = 1; i < 3; i++) if (dd.cell_grid[i] > 1 && cell_range[i] < min_size) { min_ind = i; min_size = cell_range[i]; } CELL_TRACE(fprintf(stderr, "%d: minimal coordinate %d, size %f, grid %d\n", this_node,min_ind, min_size, dd.cell_grid[min_ind])); dd.cell_grid[min_ind]--; cell_range[min_ind] = local_box_l[min_ind]/dd.cell_grid[min_ind]; } CELL_TRACE(fprintf(stderr, "%d: final %d %d %d\n", this_node, dd.cell_grid[0], dd.cell_grid[1], dd.cell_grid[2])); /* sanity check */ if (n_local_cells < min_num_cells) { char *error_msg = runtime_error(ES_INTEGER_SPACE + 2*ES_DOUBLE_SPACE + 128); ERROR_SPRINTF(error_msg, "{001 number of cells %d is smaller than minimum %d (interaction range too large or min_num_cells too large)} ", n_local_cells, min_num_cells); } } /* quit program if unsuccesful */ if(n_local_cells > max_num_cells) { char *error_msg = runtime_error(128); ERROR_SPRINTF(error_msg, "{003 no suitable cell grid found} "); } /* now set all dependent variables */ new_cells=1; for(i=0;i<3;i++) { dd.ghost_cell_grid[i] = dd.cell_grid[i]+2; new_cells *= dd.ghost_cell_grid[i]; dd.cell_size[i] = local_box_l[i]/(double)dd.cell_grid[i]; dd.inv_cell_size[i] = 1.0 / dd.cell_size[i]; } max_skin = dmin(dmin(dd.cell_size[0],dd.cell_size[1]),dd.cell_size[2]) - max_cut; /* allocate cell array and cell pointer arrays */ realloc_cells(new_cells); realloc_cellplist(&local_cells, local_cells.n = n_local_cells); realloc_cellplist(&ghost_cells, ghost_cells.n = new_cells-n_local_cells); CELL_TRACE(fprintf(stderr, "%d: dd_create_cell_grid, n_cells=%d, local_cells.n=%d, ghost_cells.n=%d, dd.ghost_cell_grid=(%d,%d,%d)\n", this_node, n_cells,local_cells.n,ghost_cells.n,dd.ghost_cell_grid[0],dd.ghost_cell_grid[1],dd.ghost_cell_grid[2])); }
static void recalc_maximal_cutoff_nonbonded() { int i, j; CELL_TRACE(fprintf(stderr, "%d: recalc_maximal_cutoff_nonbonded\n", this_node)); recalc_global_maximal_nonbonded_cutoff(); CELL_TRACE(fprintf(stderr, "%d: recalc_maximal_cutoff_nonbonded: max_cut_global = %f\n", this_node, max_cut_global)); max_cut_nonbonded = max_cut_global; for (i = 0; i < n_particle_types; i++) for (j = i; j < n_particle_types; j++) { double max_cut_current = 0; IA_parameters *data = get_ia_param(i, j); #ifdef LENNARD_JONES if(max_cut_current < (data->LJ_cut+data->LJ_offset)) max_cut_current = (data->LJ_cut+data->LJ_offset); #endif #ifdef INTER_DPD { double max_cut_tmp = (data->dpd_r_cut > data->dpd_tr_cut) ? data->dpd_r_cut : data->dpd_tr_cut; if (max_cut_current < max_cut_tmp) max_cut_current = max_cut_tmp; } #endif #ifdef LENNARD_JONES_GENERIC if (max_cut_current < (data->LJGEN_cut+data->LJGEN_offset)) max_cut_current = (data->LJGEN_cut+data->LJGEN_offset); #endif #ifdef LJ_ANGLE if (max_cut_current < (data->LJANGLE_cut)) max_cut_current = (data->LJANGLE_cut); #endif #ifdef SMOOTH_STEP if (max_cut_current < data->SmSt_cut) max_cut_current = data->SmSt_cut; #endif #ifdef HERTZIAN if (max_cut_current < data->Hertzian_sig) max_cut_current = data->Hertzian_sig; #endif #ifdef GAUSSIAN if (max_cut_current < data->Gaussian_cut) max_cut_current = data->Gaussian_cut; #endif #ifdef BMHTF_NACL if (max_cut_current < data->BMHTF_cut) max_cut_current = data->BMHTF_cut; #endif #ifdef MORSE if (max_cut_current < data->MORSE_cut) max_cut_current = data->MORSE_cut; #endif #ifdef BUCKINGHAM if (max_cut_current < data->BUCK_cut) max_cut_current = data->BUCK_cut; #endif #ifdef SOFT_SPHERE if (max_cut_current < data->soft_cut) max_cut_current = data->soft_cut; #endif #ifdef HAT if (max_cut_current < data->HAT_r) max_cut_current = data->HAT_r; #endif #ifdef LJCOS { double max_cut_tmp = data->LJCOS_cut + data->LJCOS_offset; if (max_cut_current < max_cut_tmp) max_cut_current = max_cut_tmp; } #endif #ifdef LJCOS2 { double max_cut_tmp = data->LJCOS2_cut + data->LJCOS2_offset; if (max_cut_current < max_cut_tmp) max_cut_current = max_cut_tmp; } #endif #ifdef GAY_BERNE if (max_cut_current < data->GB_cut) max_cut_current = data->GB_cut; #endif #ifdef TABULATED if (max_cut_current < data->TAB_maxval) max_cut_current = data->TAB_maxval; #endif #ifdef TUNABLE_SLIP if (max_cut_current < data->TUNABLE_SLIP_r_cut) max_cut_current = data->TUNABLE_SLIP_r_cut; #endif #ifdef CATALYTIC_REACTIONS if (max_cut_current < data->REACTION_range) max_cut_current = data->REACTION_range; #endif #ifdef MOL_CUT if (data->mol_cut_type != 0) { if (max_cut_current < data->mol_cut_cutoff) max_cut_current = data->mol_cut_cutoff; max_cut_current += 2.0* max_cut_bonded; } #endif IA_parameters *data_sym = get_ia_param(j, i); /* no interaction ever touched it, at least no real short-ranged one (that writes to the nonbonded energy) */ data_sym->particlesInteract = data->particlesInteract = (max_cut_current > 0.0); /* take into account any electrostatics */ if (max_cut_global > max_cut_current) max_cut_current = max_cut_global; data_sym->max_cut = data->max_cut = max_cut_current; if (max_cut_current > max_cut_nonbonded) max_cut_nonbonded = max_cut_current; CELL_TRACE(fprintf(stderr, "%d: pair %d,%d max_cut total %f\n", this_node, i, j, data->max_cut)); } }
static void recalc_maximal_cutoff_nonbonded() { int i, j; CELL_TRACE(fprintf(stderr, "%d: recalc_maximal_cutoff_nonbonded\n", this_node)); recalc_global_maximal_nonbonded_and_long_range_cutoff(); CELL_TRACE(fprintf(stderr, "%d: recalc_maximal_cutoff_nonbonded: max_cut_global = %f\n", this_node, max_cut_global)); max_cut_nonbonded = max_cut_global; for (i = 0; i < n_particle_types; i++) for (j = i; j < n_particle_types; j++) { double max_cut_current = 0; IA_parameters *data = get_ia_param(i, j); #ifdef LENNARD_JONES if(max_cut_current < (data->LJ_cut+data->LJ_offset)) max_cut_current = (data->LJ_cut+data->LJ_offset); #endif #ifdef INTER_DPD { double max_cut_tmp = (data->dpd_r_cut > data->dpd_tr_cut) ? data->dpd_r_cut : data->dpd_tr_cut; if (max_cut_current < max_cut_tmp) max_cut_current = max_cut_tmp; } #endif #ifdef LENNARD_JONES_GENERIC if (max_cut_current < (data->LJGEN_cut+data->LJGEN_offset)) max_cut_current = (data->LJGEN_cut+data->LJGEN_offset); #endif #ifdef LJ_ANGLE if (max_cut_current < (data->LJANGLE_cut)) max_cut_current = (data->LJANGLE_cut); #endif #ifdef SMOOTH_STEP if (max_cut_current < data->SmSt_cut) max_cut_current = data->SmSt_cut; #endif #ifdef HERTZIAN if (max_cut_current < data->Hertzian_sig) max_cut_current = data->Hertzian_sig; #endif #ifdef GAUSSIAN if (max_cut_current < data->Gaussian_cut) max_cut_current = data->Gaussian_cut; #endif #ifdef BMHTF_NACL if (max_cut_current < data->BMHTF_cut) max_cut_current = data->BMHTF_cut; #endif #ifdef MORSE if (max_cut_current < data->MORSE_cut) max_cut_current = data->MORSE_cut; #endif #ifdef BUCKINGHAM if (max_cut_current < data->BUCK_cut) max_cut_current = data->BUCK_cut; #endif #ifdef SOFT_SPHERE if (max_cut_current < data->soft_cut) max_cut_current = data->soft_cut; #endif #ifdef AFFINITY if (max_cut_current < data->affinity_cut) max_cut_current = data->affinity_cut; #endif #ifdef MEMBRANE_COLLISION if (max_cut_current < data->membrane_cut) max_cut_current = data->membrane_cut; #endif #ifdef HAT if (max_cut_current < data->HAT_r) max_cut_current = data->HAT_r; #endif #ifdef LJCOS { double max_cut_tmp = data->LJCOS_cut + data->LJCOS_offset; if (max_cut_current < max_cut_tmp) max_cut_current = max_cut_tmp; } #endif #ifdef LJCOS2 { double max_cut_tmp = data->LJCOS2_cut + data->LJCOS2_offset; if (max_cut_current < max_cut_tmp) max_cut_current = max_cut_tmp; } #endif #ifdef COS2 { double max_cut_tmp = data->COS2_cut + data->COS2_offset; if (max_cut_current < max_cut_tmp) max_cut_current = max_cut_tmp; } #endif #ifdef GAY_BERNE if (max_cut_current < data->GB_cut) max_cut_current = data->GB_cut; #endif #ifdef TABULATED if (max_cut_current < data->TAB_maxval) max_cut_current = data->TAB_maxval; #endif #ifdef TUNABLE_SLIP if (max_cut_current < data->TUNABLE_SLIP_r_cut) max_cut_current = data->TUNABLE_SLIP_r_cut; #endif #ifdef CATALYTIC_REACTIONS if (max_cut_current < data->REACTION_range) max_cut_current = data->REACTION_range; #endif #ifdef MOL_CUT if (data->mol_cut_type != 0) { if (max_cut_current < data->mol_cut_cutoff) max_cut_current = data->mol_cut_cutoff; max_cut_current += 2.0* max_cut_bonded; } #endif IA_parameters *data_sym = get_ia_param(j, i); /* no interaction ever touched it, at least no real short-ranged one (that writes to the nonbonded energy) */ data_sym->particlesInteract = data->particlesInteract = (max_cut_current > 0.0); /* Bigger cutoffs are chosen due to dpd and the like. Coulomb and dipolar interactions are handled in the Verlet lists separately. */ max_cut_current =std::max(max_cut_current,max_cut_global_without_coulomb_and_dipolar); data_sym->max_cut = data->max_cut = max_cut_current; if (max_cut_current > max_cut_nonbonded) max_cut_nonbonded = max_cut_current; CELL_TRACE(fprintf(stderr, "%d: pair %d,%d max_cut total %f\n", this_node, i, j, data->max_cut)); } }
/** Create communicators for cell structure domain decomposition. (see \ref GhostCommunicator) */ void dd_prepare_comm(GhostCommunicator *comm, int data_parts) { int dir,lr,i,cnt, num, n_comm_cells[3]; int lc[3],hc[3],done[3]={0,0,0}; /* calculate number of communications */ num = 0; for(dir=0; dir<3; dir++) { for(lr=0; lr<2; lr++) { #ifdef PARTIAL_PERIODIC /* No communication for border of non periodic direction */ if( PERIODIC(dir) || (boundary[2*dir+lr] == 0) ) #endif { if(node_grid[dir] == 1 ) num++; else num += 2; } } } /* prepare communicator */ CELL_TRACE(fprintf(stderr,"%d Create Communicator: prep_comm data_parts %d num %d\n",this_node,data_parts,num)); prepare_comm(comm, data_parts, num); /* number of cells to communicate in a direction */ n_comm_cells[0] = dd.cell_grid[1] * dd.cell_grid[2]; n_comm_cells[1] = dd.cell_grid[2] * dd.ghost_cell_grid[0]; n_comm_cells[2] = dd.ghost_cell_grid[0] * dd.ghost_cell_grid[1]; cnt=0; /* direction loop: x, y, z */ for(dir=0; dir<3; dir++) { lc[(dir+1)%3] = 1-done[(dir+1)%3]; lc[(dir+2)%3] = 1-done[(dir+2)%3]; hc[(dir+1)%3] = dd.cell_grid[(dir+1)%3]+done[(dir+1)%3]; hc[(dir+2)%3] = dd.cell_grid[(dir+2)%3]+done[(dir+2)%3]; /* lr loop: left right */ /* here we could in principle build in a one sided ghost communication, simply by taking the lr loop only over one value */ for(lr=0; lr<2; lr++) { if(node_grid[dir] == 1) { /* just copy cells on a single node */ #ifdef PARTIAL_PERIODIC if( PERIODIC(dir ) || (boundary[2*dir+lr] == 0) ) #endif { comm->comm[cnt].type = GHOST_LOCL; comm->comm[cnt].node = this_node; /* Buffer has to contain Send and Recv cells -> factor 2 */ comm->comm[cnt].part_lists = malloc(2*n_comm_cells[dir]*sizeof(ParticleList *)); comm->comm[cnt].n_part_lists = 2*n_comm_cells[dir]; /* prepare folding of ghost positions */ if((data_parts & GHOSTTRANS_POSSHFTD) && boundary[2*dir+lr] != 0) comm->comm[cnt].shift[dir] = boundary[2*dir+lr]*box_l[dir]; /* fill send comm cells */ lc[(dir+0)%3] = hc[(dir+0)%3] = 1+lr*(dd.cell_grid[(dir+0)%3]-1); dd_fill_comm_cell_lists(comm->comm[cnt].part_lists,lc,hc); CELL_TRACE(fprintf(stderr,"%d: prep_comm %d copy to grid (%d,%d,%d)-(%d,%d,%d)\n",this_node,cnt, lc[0],lc[1],lc[2],hc[0],hc[1],hc[2])); /* fill recv comm cells */ lc[(dir+0)%3] = hc[(dir+0)%3] = 0+(1-lr)*(dd.cell_grid[(dir+0)%3]+1); /* place recieve cells after send cells */ dd_fill_comm_cell_lists(&comm->comm[cnt].part_lists[n_comm_cells[dir]],lc,hc); CELL_TRACE(fprintf(stderr,"%d: prep_comm %d copy from grid (%d,%d,%d)-(%d,%d,%d)\n",this_node,cnt,lc[0],lc[1],lc[2],hc[0],hc[1],hc[2])); cnt++; } } else { /* i: send/recv loop */ for(i=0; i<2; i++) { #ifdef PARTIAL_PERIODIC if( PERIODIC(dir) || (boundary[2*dir+lr] == 0) ) #endif if((node_pos[dir]+i)%2==0) { comm->comm[cnt].type = GHOST_SEND; comm->comm[cnt].node = node_neighbors[2*dir+lr]; comm->comm[cnt].part_lists = malloc(n_comm_cells[dir]*sizeof(ParticleList *)); comm->comm[cnt].n_part_lists = n_comm_cells[dir]; /* prepare folding of ghost positions */ if((data_parts & GHOSTTRANS_POSSHFTD) && boundary[2*dir+lr] != 0) comm->comm[cnt].shift[dir] = boundary[2*dir+lr]*box_l[dir]; lc[(dir+0)%3] = hc[(dir+0)%3] = 1+lr*(dd.cell_grid[(dir+0)%3]-1); dd_fill_comm_cell_lists(comm->comm[cnt].part_lists,lc,hc); CELL_TRACE(fprintf(stderr,"%d: prep_comm %d send to node %d grid (%d,%d,%d)-(%d,%d,%d)\n",this_node,cnt, comm->comm[cnt].node,lc[0],lc[1],lc[2],hc[0],hc[1],hc[2])); cnt++; } #ifdef PARTIAL_PERIODIC if( PERIODIC(dir) || (boundary[2*dir+(1-lr)] == 0) ) #endif if((node_pos[dir]+(1-i))%2==0) { comm->comm[cnt].type = GHOST_RECV; comm->comm[cnt].node = node_neighbors[2*dir+(1-lr)]; comm->comm[cnt].part_lists = malloc(n_comm_cells[dir]*sizeof(ParticleList *)); comm->comm[cnt].n_part_lists = n_comm_cells[dir]; lc[(dir+0)%3] = hc[(dir+0)%3] = 0+(1-lr)*(dd.cell_grid[(dir+0)%3]+1); dd_fill_comm_cell_lists(comm->comm[cnt].part_lists,lc,hc); CELL_TRACE(fprintf(stderr,"%d: prep_comm %d recv from node %d grid (%d,%d,%d)-(%d,%d,%d)\n",this_node,cnt, comm->comm[cnt].node,lc[0],lc[1],lc[2],hc[0],hc[1],hc[2])); cnt++; } } } done[dir]=1; } } }