void unpack_forces( msgbuf *b, int j, int k ) { int i; cell *to; to = PTR_2D_V(cell_array, j, k, cell_dim); for (i=0; i<to->n; ++i) { KRAFT (to,i,X) += b->data[ b->n++ ]; KRAFT (to,i,Y) += b->data[ b->n++ ]; POTENG(to,i) += b->data[ b->n++ ]; #ifdef STRESS_TENS PRESSTENS(to,i,xx) += b->data[ b->n++ ]; PRESSTENS(to,i,yy) += b->data[ b->n++ ]; PRESSTENS(to,i,xy) += b->data[ b->n++ ]; #endif #ifdef NNBR NBANZ(to,i) += (shortint) b->data[ b->n++ ]; #endif } if (b->n_max < b->n) error("Buffer overflow in unpack_forces."); }
void pack_forces( msgbuf *b, int j, int k ) { int i; cell *from; from = PTR_2D_V(cell_array, j, k, cell_dim); for (i=0; i<from->n; ++i) { b->data[ b->n++ ] = KRAFT(from,i,X); b->data[ b->n++ ] = KRAFT(from,i,Y); b->data[ b->n++ ] = POTENG(from,i); #ifdef STRESS_TENS b->data[ b->n++ ] = PRESSTENS(from,i,xx); b->data[ b->n++ ] = PRESSTENS(from,i,yy); b->data[ b->n++ ] = PRESSTENS(from,i,xy); #endif #ifdef NNBR b->data[ b->n++ ] = (real) NBANZ(from,i); #endif } if (b->n_max < b->n) error("Buffer overflow in pack_forces."); }
void add_presstensors(void) { int k; for (k=0; k<NCELLS; k++) { int i; cell* p; p = CELLPTR(k); for (i=0; i<p->n; i++) { AVPRESSTENS(p,i,xx) += PRESSTENS(p,i,xx); AVPRESSTENS(p,i,yy) += PRESSTENS(p,i,yy); AVPRESSTENS(p,i,xy) += PRESSTENS(p,i,xy); #ifndef TWOD AVPRESSTENS(p,i,zz) += PRESSTENS(p,i,zz); AVPRESSTENS(p,i,zx) += PRESSTENS(p,i,zx); AVPRESSTENS(p,i,yz) += PRESSTENS(p,i,yz); #endif } } }
void clear_forces(void) { int k,i; tot_pot_energy = 0.0; virial = vir_xx = vir_yy = vir_xy = vir_zz = vir_yz = vir_zx = 0.0; for (k=0; k<nallcells; k++) { cell *p = cell_array + k; for (i=0; i<p->n; i++) { KRAFT(p,i,X) = 0.0; KRAFT(p,i,Y) = 0.0; KRAFT(p,i,Z) = 0.0; #if defined (STRESS_TENS) PRESSTENS(p,i,xx) = 0.0; PRESSTENS(p,i,yy) = 0.0; PRESSTENS(p,i,xy) = 0.0; PRESSTENS(p,i,zz) = 0.0; PRESSTENS(p,i,yz) = 0.0; PRESSTENS(p,i,zx) = 0.0; #endif POTENG(p,i) = 0.0; } } }
void add_forces( int j, int k, int l, int m ) { int i; cell *from, *to; from = PTR_2D_V(cell_array, j, k, cell_dim); to = PTR_2D_V(cell_array, l, m, cell_dim); for (i=0; i<to->n; ++i) { KRAFT (to,i,X) += KRAFT (from,i,X); KRAFT (to,i,Y) += KRAFT (from,i,Y); POTENG(to,i) += POTENG(from,i); #ifdef STRESS_TENS PRESSTENS(to,i,xx) += PRESSTENS(from,i,xx); PRESSTENS(to,i,yy) += PRESSTENS(from,i,yy); PRESSTENS(to,i,xy) += PRESSTENS(from,i,xy); #endif #ifdef NNBR NBANZ(to,i) += NBANZ(from,i); #endif } }
void calc_extpot(void) { int k, i, n; int isinx,isiny,isinz; real tmpvec1[4], tmpvec2[4]; vektor d,addforce,totaddforce; real dd,cc; real dn,ddn,ee; vektor force; real tmp_virial; #ifdef P_AXIAL vektor tmp_vir_vect; #endif real pot_zwi, pot_grad; int col, is_short=0; for (k=0; k<ep_n; k++) { ep_fext[k] = 0.0; ep_xmax[k] = 0.0; ep_ymax[k] = 0.0; ep_atomsincontact[k]=0; ep_xmin[k] = 1.e8; ep_ymin[k] = 1.e8; } if(ep_key == 0) { /* default: original harmonic potential */ for (k=0; k<NCELLS; ++k) { cell *p = CELLPTR(k); for (i=0; i<p->n; ++i) { for (n=0; n<ep_n; ++n) { isinx= ep_dir[n].x; isiny= ep_dir[n].y; isinz= ep_dir[n].z; d.x = ep_pos[n].x - ORT(p,i,X); d.y = ep_pos[n].y - ORT(p,i,Y); d.z = ep_pos[n].z - ORT(p,i,Z); dn = SPROD(d,ep_dir[n]); /* spherical indentor*/ if (n<ep_nind) { if (dn > -ep_rcut) { real d2 = SPROD(d,d); real d1 = SQRT(d2); dd = ep_rcut - d1; if (dd > 0.0) { real f = ep_a * dd * dd / d1; /* force on atoms and indentor */ KRAFT(p,i,X) -= f * d.x; KRAFT(p,i,Y) -= f * d.y; KRAFT(p,i,Z) -= f * d.z; ep_fext[n] += f * ABS(dn); /* normal force on indentor */ ep_atomsincontact[n]++; /* for determination of contact area */ if(isinz) { ep_xmax[n] = MAX(ep_xmax[n], ORT(p,i,X) ); ep_ymax[n] = MAX(ep_ymax[n], ORT(p,i,Y) ); ep_xmin[n] = MIN(ep_xmin[n], ORT(p,i,X) ); ep_ymin[n] = MIN(ep_ymin[n], ORT(p,i,Y) ); } else if(isiny) { ep_xmax[n] = MAX(ep_xmax[n], ORT(p,i,X) ); ep_ymax[n] = MAX(ep_ymax[n], ORT(p,i,Z) ); ep_xmin[n] = MIN(ep_xmin[n], ORT(p,i,X) ); ep_ymin[n] = MIN(ep_ymin[n], ORT(p,i,Z) ); } else { ep_xmax[n] = MAX(ep_xmax[n], ORT(p,i,Y) ); ep_ymax[n] = MAX(ep_ymax[n], ORT(p,i,Z) ); ep_xmin[n] = MIN(ep_xmin[n], ORT(p,i,Y) ); ep_ymin[n] = MIN(ep_ymin[n], ORT(p,i,Z) ); } } } } /* potential wall */ else { if (dn*dn < ep_rcut*ep_rcut) { real d1 = (dn>0) ? dn : -1*dn ; dd = ep_rcut - d1; if (dd > 0.0) { ep_atomsincontact[n]++; real f = ep_a * dd * dd / d1; /* force on atoms and indentor */ KRAFT(p,i,X) += f * ep_dir[n].x; KRAFT(p,i,Y) += f * ep_dir[n].y; KRAFT(p,i,Z) += f * ep_dir[n].z; ep_fext[n] += f; /* magnitude of force on wall */ } } } } } } } else if(ep_key == 1) /* Ju Li's spherical indenter, see PRB 67, 104105 */ { totaddforce.x=0.0; totaddforce.y=0.0; totaddforce.z=0.0; for (k=0; k<NCELLS; ++k) { cell *p = CELLPTR(k); for (i=0; i<p->n; ++i) { for (n=0; n<ep_n; ++n) { isinx= ep_dir[n].x; isiny= ep_dir[n].y; isinz= ep_dir[n].z; if (n<ep_nind) { d.x = ORT(p,i,X)-ep_pos[n].x; d.y = ORT(p,i,Y)-ep_pos[n].y; d.z = ORT(p,i,Z)-ep_pos[n].z; dn = SPROD(d,ep_dir[n]); dd = SPROD(d,d); if ( dd < ep_rcut*ep_rcut) { ep_atomsincontact[n]++; /* for the determination of the contact area */ if(isinz) { ep_xmax[n] = MAX(ep_xmax[n], ORT(p,i,X) ); ep_ymax[n] = MAX(ep_ymax[n], ORT(p,i,Y) ); ep_xmin[n] = MIN(ep_xmin[n], ORT(p,i,X) ); ep_ymin[n] = MIN(ep_ymin[n], ORT(p,i,Y) ); } else if(isiny) { ep_xmax[n] = MAX(ep_xmax[n], ORT(p,i,X) ); ep_ymax[n] = MAX(ep_ymax[n], ORT(p,i,Z) ); ep_xmin[n] = MIN(ep_xmin[n], ORT(p,i,X) ); ep_ymin[n] = MIN(ep_ymin[n], ORT(p,i,Z) ); } else { ep_xmax[n] = MAX(ep_xmax[n], ORT(p,i,Y) ); ep_ymax[n] = MAX(ep_ymax[n], ORT(p,i,Z) ); ep_xmin[n] = MIN(ep_xmin[n], ORT(p,i,Y) ); ep_ymin[n] = MIN(ep_ymin[n], ORT(p,i,Z) ); } if(have_extpotfile == 1){ PAIR_INT3(pot_zwi, pot_grad, ext_pot, n, ep_nind, dd, is_short); tot_pot_energy += pot_zwi; force.x = -1.0* pot_grad * d.x; force.y = -1.0* pot_grad * d.y; force.z = -1.0* pot_grad * d.z; KRAFT(p,i,X) += force.x; KRAFT(p,i,Y) += force.y; KRAFT(p,i,Z) += force.z; totaddforce.x += force.x; totaddforce.y += force.y; totaddforce.z += force.z; ep_fext[n] += -pot_grad * ABS(dn); /* normal force on indentor */ #ifdef P_AXIAL tmp_vir_vect.x -= d.x * force.x; tmp_vir_vect.y -= d.y * force.y; #ifndef TWOD tmp_vir_vect.z -= d.z * force.z; #endif #else tmp_virial -= dd * pot_grad; #endif #ifdef STRESS_TENS if (do_press_calc) { PRESSTENS(p,i,xx) -= d.x * force.x; PRESSTENS(p,i,yy) -= d.y * force.y; PRESSTENS(p,i,xy) -= d.x * force.y; #ifndef TWOD PRESSTENS(p,i,zz) -= d.z * force.z; PRESSTENS(p,i,yz) -= d.y * force.z; PRESSTENS(p,i,zx) -= d.z * force.x; #endif } #endif } /* old version of extpot, kept for downwards compatibility */ else{ ddn= sqrt(dd); cc = (ep_rcut - ddn)/ep_a; if (cc > UPPER_EXP) cc = UPPER_EXP; if (cc < LOWER_EXP) cc = LOWER_EXP; ee = exp(cc - 1.0/cc); tot_pot_energy += ee; POTENG(p,i) += ee; ee = ee / ep_a / ddn * (1.0 + 1.0 /(cc*cc)); KRAFT(p,i,X) += ee * d.x; KRAFT(p,i,Y) += ee * d.y; KRAFT(p,i,Z) += ee * d.z; totaddforce.x += ee * d.x; totaddforce.y += ee * d.y; totaddforce.z += ee * d.z; ep_fext[n] += ee * ABS(dn); /* normal force on indentor */ } } } } } } #ifdef MPI tmpvec1[0] = totaddforce.x ; tmpvec1[1] = totaddforce.y ; tmpvec1[2] = totaddforce.z ; // printf("before totaddforcereduce allreduce\n");fflush(stdout); MPI_Allreduce( tmpvec1, tmpvec2, 4, REAL, MPI_SUM, cpugrid); // printf("after totaddforce allreduce\n");fflush(stdout); totaddforce.x = tmpvec2[0]; totaddforce.y = tmpvec2[1]; totaddforce.z = tmpvec2[2]; #endif /* no need for a wall as the total additional impuls is substracted */ totaddforce.x *= 1.0/nactive_vect[0]; totaddforce.y *= 1.0/nactive_vect[1]; totaddforce.z *= 1.0/nactive_vect[2]; for (k=0; k<NCELLS; ++k) { cell *p = CELLPTR(k); for (i=0; i<p->n; ++i) { KRAFT(p,i,X) -= totaddforce.x; KRAFT(p,i,Y) -= totaddforce.y; KRAFT(p,i,Z) -= totaddforce.z; } } } else if(ep_key == 2) /* Ju Li's spherical indenter made flat, see PRB 67, 104105 with subtraction of total additional impulse works only with indentation directions parallel to box vectors*/ { // vektor d,addforce,totaddforce; //real dd,cc; //real dn,ddn,ee; totaddforce.x=0.0; totaddforce.y=0.0; totaddforce.z=0.0; for (k=0; k<NCELLS; ++k) { cell *p = CELLPTR(k); for (i=0; i<p->n; ++i) { for (n=0; n<ep_n; ++n) { isinx= ep_dir[n].x; isiny= ep_dir[n].y; isinz= ep_dir[n].z; // vektor d; // real dn; d.x = (ep_dir[n].x==0) ? 0 : (ORT(p,i,X)-ep_pos[n].x); d.y = (ep_dir[n].y==0) ? 0 : (ORT(p,i,Y)-ep_pos[n].y); d.z = (ep_dir[n].z==0) ? 0 : (ORT(p,i,Z)-ep_pos[n].z); dn = SPROD(d,ep_dir[n]); dd = SPROD(d,d); if ( dd < ep_rcut*ep_rcut) { /* for the determination of the contact area */ ep_atomsincontact[n]++; if(isinz) { ep_xmax[n] = MAX(ep_xmax[n], ORT(p,i,X) ); ep_ymax[n] = MAX(ep_ymax[n], ORT(p,i,Y) ); ep_xmin[n] = MIN(ep_xmin[n], ORT(p,i,X) ); ep_ymin[n] = MIN(ep_ymin[n], ORT(p,i,Y) ); } else if(isiny) { ep_xmax[n] = MAX(ep_xmax[n], ORT(p,i,X) ); ep_ymax[n] = MAX(ep_ymax[n], ORT(p,i,Z) ); ep_xmin[n] = MIN(ep_xmin[n], ORT(p,i,X) ); ep_ymin[n] = MIN(ep_ymin[n], ORT(p,i,Z) ); } else { ep_xmax[n] = MAX(ep_xmax[n], ORT(p,i,Y) ); ep_ymax[n] = MAX(ep_ymax[n], ORT(p,i,Z) ); ep_xmin[n] = MIN(ep_xmin[n], ORT(p,i,Y) ); ep_ymin[n] = MIN(ep_ymin[n], ORT(p,i,Z) ); } if(have_extpotfile == 1){ PAIR_INT3(pot_zwi, pot_grad, ext_pot, n, ep_nind, dd, is_short); tot_pot_energy += pot_zwi; force.x = -1.0* pot_grad * d.x; force.y = -1.0* pot_grad * d.y; force.z = -1.0* pot_grad * d.z; KRAFT(p,i,X) += force.x; KRAFT(p,i,Y) += force.y; KRAFT(p,i,Z) += force.z; totaddforce.x += force.x; totaddforce.y += force.y; totaddforce.z += force.z; ep_fext[n] += -pot_grad * ABS(dn); /* normal force on indentor */ #ifdef P_AXIAL tmp_vir_vect.x -= d.x * force.x; tmp_vir_vect.y -= d.y * force.y; #ifndef TWOD tmp_vir_vect.z -= d.z * force.z; #endif #else tmp_virial -= dd * pot_grad; #endif #ifdef STRESS_TENS if (do_press_calc) { PRESSTENS(p,i,xx) -= d.x * force.x; PRESSTENS(p,i,yy) -= d.y * force.y; PRESSTENS(p,i,xy) -= d.x * force.y; #ifndef TWOD PRESSTENS(p,i,zz) -= d.z * force.z; PRESSTENS(p,i,yz) -= d.y * force.z; PRESSTENS(p,i,zx) -= d.z * force.x; #endif } #endif } /* old version of extpot, kept for downwards compatibility */ else{ ddn= sqrt(dd); cc = (ep_rcut - ddn)/ep_a; if (cc > UPPER_EXP) cc = UPPER_EXP; if (cc < LOWER_EXP) cc = LOWER_EXP; ee = exp(cc - 1.0/cc); tot_pot_energy += ee; POTENG(p,i) += ee; ee = ee / ep_a / ddn * (1.0 + 1.0 /(cc*cc)); KRAFT(p,i,X) += ee * d.x; KRAFT(p,i,Y) += ee * d.y; KRAFT(p,i,Z) += ee * d.z; totaddforce.x += ee * d.x; totaddforce.y += ee * d.y; totaddforce.z += ee * d.z; ep_fext[n] += ee * ABS(dn); /* normal force on indentor */ } } } } } #ifdef MPI tmpvec1[0] = totaddforce.x ; tmpvec1[1] = totaddforce.y ; tmpvec1[2] = totaddforce.z ; // printf("before totaddforcereduce allreduce\n");fflush(stdout); MPI_Allreduce( tmpvec1, tmpvec2, 4, REAL, MPI_SUM, cpugrid); // printf("after totaddforce allreduce\n");fflush(stdout); totaddforce.x = tmpvec2[0]; totaddforce.y = tmpvec2[1]; totaddforce.z = tmpvec2[2]; #endif /* no need for a wall as the total additional impuls is substracted */ totaddforce.x *= 1.0/nactive_vect[0]; totaddforce.y *= 1.0/nactive_vect[1]; totaddforce.z *= 1.0/nactive_vect[2]; for (k=0; k<NCELLS; ++k) { cell *p = CELLPTR(k); for (i=0; i<p->n; ++i) { KRAFT(p,i,X) -= totaddforce.x; KRAFT(p,i,Y) -= totaddforce.y; KRAFT(p,i,Z) -= totaddforce.z; } } } else if(ep_key == 3) /* Ju Li's spherical indenter made flat, see PRB 67, 104105 without subtraction of the total additional impulse works only with indentation directions parallel to box vectors*/ { // vektor d,addforce,totaddforce; // real dd,cc; // real dn,ddn,ee; totaddforce.x=0.0; totaddforce.y=0.0; totaddforce.z=0.0; for (k=0; k<NCELLS; ++k) { cell *p = CELLPTR(k); for (i=0; i<p->n; ++i) { for (n=0; n<ep_n; ++n) { isinx= ep_dir[n].x; isiny= ep_dir[n].y; isinz= ep_dir[n].z; vektor d; real dn; d.x = (ep_dir[n].x==0) ? 0 : (ORT(p,i,X)-ep_pos[n].x) ; d.y = (ep_dir[n].y==0) ? 0 : (ORT(p,i,Y)-ep_pos[n].y); d.z = (ep_dir[n].z==0) ? 0 : (ORT(p,i,Z)-ep_pos[n].z); dn = SPROD(d,ep_dir[n]); dd = SPROD(d,d); if ( dd < ep_rcut*ep_rcut) { /* for the determination of the contact area */ ep_atomsincontact[n]++; if(isinz) { ep_xmax[n] = MAX(ep_xmax[n], ORT(p,i,X) ); ep_ymax[n] = MAX(ep_ymax[n], ORT(p,i,Y) ); ep_xmin[n] = MIN(ep_xmin[n], ORT(p,i,X) ); ep_ymin[n] = MIN(ep_ymin[n], ORT(p,i,Y) ); } else if(isiny) { ep_xmax[n] = MAX(ep_xmax[n], ORT(p,i,X) ); ep_ymax[n] = MAX(ep_ymax[n], ORT(p,i,Z) ); ep_xmin[n] = MIN(ep_xmin[n], ORT(p,i,X) ); ep_ymin[n] = MIN(ep_ymin[n], ORT(p,i,Z) ); } else { ep_xmax[n] = MAX(ep_xmax[n], ORT(p,i,Y) ); ep_ymax[n] = MAX(ep_ymax[n], ORT(p,i,Z) ); ep_xmin[n] = MIN(ep_xmin[n], ORT(p,i,Y) ); ep_ymin[n] = MIN(ep_ymin[n], ORT(p,i,Z) ); } if(have_extpotfile == 1){ PAIR_INT3(pot_zwi, pot_grad, ext_pot, n, ep_nind, dd, is_short); tot_pot_energy += pot_zwi; force.x = - pot_grad * d.x; force.y = - pot_grad * d.y; force.z = - pot_grad * d.z; KRAFT(p,i,X) += force.x; KRAFT(p,i,Y) += force.y; KRAFT(p,i,Z) += force.z; totaddforce.x += force.x; totaddforce.y += force.y; totaddforce.z += force.z; ep_fext[n] += -pot_grad * ABS(dn); /* normal force on indentor */ #ifdef P_AXIAL tmp_vir_vect.x -= d.x * force.x; tmp_vir_vect.y -= d.y * force.y; #ifndef TWOD tmp_vir_vect.z -= d.z * force.z; #endif #else tmp_virial -= dd * pot_grad; #endif #ifdef STRESS_TENS if (do_press_calc) { PRESSTENS(p,i,xx) -= d.x * force.x; PRESSTENS(p,i,yy) -= d.y * force.y; PRESSTENS(p,i,xy) -= d.x * force.y; #ifndef TWOD PRESSTENS(p,i,zz) -= d.z * force.z; PRESSTENS(p,i,yz) -= d.y * force.z; PRESSTENS(p,i,zx) -= d.z * force.x; #endif } #endif } /* old version of extpot, kept for downwards compatibility */ else{ ddn= sqrt(dd); cc = (ep_rcut - ddn)/ep_a; if (cc > UPPER_EXP) cc = UPPER_EXP; if (cc < LOWER_EXP) cc = LOWER_EXP; ee = exp(cc - 1.0/cc); tot_pot_energy += ee; POTENG(p,i) += ee; ee = ee / ep_a / ddn * (1.0 + 1.0 /(cc*cc)); KRAFT(p,i,X) += ee * d.x; KRAFT(p,i,Y) += ee * d.y; KRAFT(p,i,Z) += ee * d.z; ep_fext[n] += ee * ABS(dn); /* normal force on indentor */ } } } } } } else { error("Error: external potential ep_key not defined.\n"); } #ifdef P_AXIAL vir_xx += tmp_vir_vect.x; vir_yy += tmp_vir_vect.y; virial += tmp_vir_vect.x; virial += tmp_vir_vect.y; #ifndef TWOD vir_zz += tmp_vir_vect.z; virial += tmp_vir_vect.z; #endif #else virial += tmp_virial; #endif }
void calc_forces(int steps) { int n, k; real tmpvec1[8], tmpvec2[8] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; /* fill the buffer cells */ if ((steps == steps_min) || (0 == steps % BUFSTEP)) setup_buffers(); send_cells(copy_cell,pack_cell,unpack_cell); /* clear global accumulation variables */ tot_pot_energy = 0.0; virial = 0.0; vir_xx = 0.0; vir_yy = 0.0; vir_zz = 0.0; vir_yz = 0.0; vir_zx = 0.0; vir_xy = 0.0; nfc++; /* clear per atom accumulation variables */ #ifdef _OPENMP #pragma omp parallel for #endif for (k=0; k<nallcells; ++k) { int i; cell *p; p = cell_array + k; for (i=0; i<p->n; ++i) { KRAFT(p,i,X) = 0.0; KRAFT(p,i,Y) = 0.0; KRAFT(p,i,Z) = 0.0; #ifdef UNIAX DREH_MOMENT(p,i,X) = 0.0; DREH_MOMENT(p,i,Y) = 0.0; DREH_MOMENT(p,i,Z) = 0.0; #endif #if defined(STRESS_TENS) PRESSTENS(p,i,xx) = 0.0; PRESSTENS(p,i,yy) = 0.0; PRESSTENS(p,i,zz) = 0.0; PRESSTENS(p,i,yz) = 0.0; PRESSTENS(p,i,zx) = 0.0; PRESSTENS(p,i,xy) = 0.0; #endif #ifndef MONOLJ POTENG(p,i) = 0.0; #endif #ifdef NNBR NBANZ(p,i) = 0; #endif #ifdef CNA if (cna) MARK(p,i) = 0; #endif #ifdef COVALENT NEIGH(p,i)->n = 0; #endif #ifdef EAM2 EAM_RHO(p,i) = 0.0; /* zero host electron density at atom site */ #ifdef EEAM EAM_P(p,i) = 0.0; /* zero host electron density at atom site */ #endif #endif } } #ifdef RIGID /* clear total forces */ if ( nsuperatoms>0 ) for(k=0; k<nsuperatoms; k++) { superforce[k].x = 0.0; superforce[k].y = 0.0; superforce[k].z = 0.0; } #endif /* What follows is the standard one-cpu force loop acting on our local data cells */ /* compute forces for all pairs of cells */ for (n=0; n<nlists; ++n) { #ifdef _OPENMP #pragma omp parallel for schedule(runtime) \ reduction(+:tot_pot_energy,virial,vir_xx,vir_yy,vir_zz,vir_yz,vir_zx,vir_xy) #endif for (k=0; k<npairs[n]; ++k) { vektor pbc; pair *P; P = pairs[n] + k; pbc.x = P->ipbc[0]*box_x.x + P->ipbc[1]*box_y.x + P->ipbc[2]*box_z.x; pbc.y = P->ipbc[0]*box_x.y + P->ipbc[1]*box_y.y + P->ipbc[2]*box_z.y; pbc.z = P->ipbc[0]*box_x.z + P->ipbc[1]*box_y.z + P->ipbc[2]*box_z.z; do_forces(cell_array + P->np, cell_array + P->nq, pbc, &tot_pot_energy, &virial, &vir_xx, &vir_yy, &vir_zz, &vir_yz, &vir_zx, &vir_xy); } } #ifdef COVALENT /* complete neighbor tables for remaining pairs of cells */ for (n=0; n<nlists; ++n) { #ifdef _OPENMP #pragma omp parallel for schedule(runtime) #endif for (k=npairs[n]; k<npairs2[n]; ++k) { vektor pbc; pair *P; P = pairs[n] + k; pbc.x = P->ipbc[0]*box_x.x + P->ipbc[1]*box_y.x + P->ipbc[2]*box_z.x; pbc.y = P->ipbc[0]*box_x.y + P->ipbc[1]*box_y.y + P->ipbc[2]*box_z.y; pbc.z = P->ipbc[0]*box_x.z + P->ipbc[1]*box_y.z + P->ipbc[2]*box_z.z; do_neightab(cell_array + P->np, cell_array + P->nq, pbc); } } #ifndef CNA /* second force loop for covalent systems */ /* does not work correctly - different threads may write to same variables #ifdef _OPENMP #pragma omp parallel for schedule(runtime) \ reduction(+:tot_pot_energy,virial,vir_xx,vir_yy,vir_zz,vir_yz,vir_zx,vir_xy) #endif */ for (k=0; k<ncells; ++k) { do_forces2(cell_array + CELLS(k), &tot_pot_energy, &virial, &vir_xx, &vir_yy, &vir_zz, &vir_yz, &vir_zx, &vir_xy); } #endif #endif /* COVALENT */ #ifndef AR /* If we don't use actio=reactio accross the cpus, we have do do the force loop also on the other half of the neighbours for the cells on the surface of the CPU */ /* compute forces for remaining pairs of cells */ for (n=0; n<nlists; ++n) { /* does not work correctly - different threads may write to same variables #ifdef _OPENMP #pragma omp parallel for schedule(runtime) #endif */ for (k=npairs[n]; k<npairs2[n]; ++k) { vektor pbc; pair *P; P = pairs[n] + k; pbc.x = P->ipbc[0]*box_x.x + P->ipbc[1]*box_y.x + P->ipbc[2]*box_z.x; pbc.y = P->ipbc[0]*box_x.y + P->ipbc[1]*box_y.y + P->ipbc[2]*box_z.y; pbc.z = P->ipbc[0]*box_x.z + P->ipbc[1]*box_y.z + P->ipbc[2]*box_z.z; /* potential energy and virial are already complete; */ /* to avoid double counting, we update only the dummy tmpvec2 */ do_forces(cell_array + P->np, cell_array + P->nq, pbc, tmpvec2, tmpvec2+1, tmpvec2+2, tmpvec2+3, tmpvec2+4, tmpvec2+5, tmpvec2+6, tmpvec2+7); } } #endif /* not AR */ #ifdef EAM2 #ifdef AR /* collect host electron density */ send_forces(add_rho,pack_rho,unpack_add_rho); #endif /* compute embedding energy and its derivative */ do_embedding_energy(); /* distribute derivative of embedding energy */ send_cells(copy_dF,pack_dF,unpack_dF); /* second EAM2 loop over all cells pairs */ for (n=0; n<nlists; ++n) { #ifdef _OPENMP #pragma omp parallel for schedule(runtime) \ reduction(+:virial,vir_xx,vir_yy,vir_zz,vir_yz,vir_zx,vir_xy) #endif for (k=0; k<npairs[n]; ++k) { vektor pbc; pair *P; P = pairs[n]+k; pbc.x = P->ipbc[0]*box_x.x + P->ipbc[1]*box_y.x + P->ipbc[2]*box_z.x; pbc.y = P->ipbc[0]*box_x.y + P->ipbc[1]*box_y.y + P->ipbc[2]*box_z.y; pbc.z = P->ipbc[0]*box_x.z + P->ipbc[1]*box_y.z + P->ipbc[2]*box_z.z; do_forces_eam2(cell_array + P->np, cell_array + P->nq, pbc, &virial, &vir_xx, &vir_yy, &vir_zz, &vir_yz, &vir_zx, &vir_xy); } } #ifndef AR /* If we don't use actio=reactio accross the cpus, we have do do the force loop also on the other half of the neighbours for the cells on the surface of the CPU */ /* compute forces for remaining pairs of cells */ for (n=0; n<nlists; ++n) { #ifdef _OPENMP #pragma omp parallel for schedule(runtime) #endif for (k=npairs[n]; k<npairs2[n]; ++k) { vektor pbc; pair *P; P = pairs[n]+k; pbc.x = P->ipbc[0]*box_x.x + P->ipbc[1]*box_y.x + P->ipbc[2]*box_z.x; pbc.y = P->ipbc[0]*box_x.y + P->ipbc[1]*box_y.y + P->ipbc[2]*box_z.y; pbc.z = P->ipbc[0]*box_x.z + P->ipbc[1]*box_y.z + P->ipbc[2]*box_z.z; /* potential energy and virial are already complete; */ /* to avoid double counting, we update only the dummy tmpvec2 */ do_forces_eam2(cell_array + P->np, cell_array + P->nq, pbc, tmpvec2, tmpvec2+1, tmpvec2+2, tmpvec2+3, tmpvec2+4, tmpvec2+5, tmpvec2+6, tmpvec2+7); } } #endif /* not AR */ #endif /* EAM2 */ /* sum up results of different CPUs */ tmpvec1[0] = tot_pot_energy; tmpvec1[1] = virial; tmpvec1[2] = vir_xx; tmpvec1[3] = vir_yy; tmpvec1[4] = vir_zz; tmpvec1[5] = vir_yz; tmpvec1[6] = vir_zx; tmpvec1[7] = vir_xy; MPI_Allreduce( tmpvec1, tmpvec2, 8, REAL, MPI_SUM, cpugrid); tot_pot_energy = tmpvec2[0]; virial = tmpvec2[1]; vir_xx = tmpvec2[2]; vir_yy = tmpvec2[3]; vir_zz = tmpvec2[4]; vir_yz = tmpvec2[5]; vir_zx = tmpvec2[6]; vir_xy = tmpvec2[7]; #ifdef AR send_forces(add_forces,pack_forces,unpack_forces); #endif }
void calc_forces(int steps) { int n, k; real tmpvec1[5], tmpvec2[8] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; /* fill the buffer cells */ if ((steps == steps_min) || (0 == steps % BUFSTEP)) setup_buffers(); send_cells(copy_cell,pack_cell,unpack_cell); /* clear global accumulation variables */ tot_pot_energy = 0.0; virial = 0.0; vir_xx = 0.0; vir_yy = 0.0; vir_xy = 0.0; nfc++; /* clear per atom accumulation variables */ #ifdef _OPENMP #pragma omp parallel for #endif for (k=0; k<nallcells; ++k) { int i; cell *p; p = cell_array + k; for (i=0; i<p->n; ++i) { KRAFT(p,i,X) = 0.0; KRAFT(p,i,Y) = 0.0; POTENG(p,i) = 0.0; #ifdef NNBR NBANZ(p,i) = 0; #endif #if defined(STRESS_TENS) PRESSTENS(p,i,xx) = 0.0; PRESSTENS(p,i,yy) = 0.0; PRESSTENS(p,i,xy) = 0.0; #endif } } #ifdef RIGID /* clear total forces */ if ( nsuperatoms>0 ) for(k=0; k<nsuperatoms; k++) { superforce[k].x = 0.0; superforce[k].y = 0.0; } #endif /* What follows is the standard one-cpu force loop acting on our local data cells */ /* compute forces for all pairs of cells */ for (n=0; n<nlists; ++n) { #ifdef _OPENMP #pragma omp parallel for schedule(runtime) \ reduction(+:tot_pot_energy,virial,vir_xx,vir_yy,vir_xy) #endif for (k=0; k<npairs[n]; ++k) { vektor pbc; pair *P; P = pairs[n] + k; pbc.x = P->ipbc[0] * box_x.x + P->ipbc[1] * box_y.x; pbc.y = P->ipbc[0] * box_x.y + P->ipbc[1] * box_y.y; do_forces(cell_array + P->np, cell_array + P->nq, pbc, &tot_pot_energy, &virial, &vir_xx, &vir_yy, &vir_zz, &vir_yz, &vir_zx, &vir_xy); } } #ifndef AR /* If we don't use actio=reactio accross the cpus, we have do do the force loop also on the other half of the neighbours for the cells on the surface of the CPU */ /* compute forces for remaining pairs of cells */ for (n=0; n<nlists; ++n) { #ifdef _OPENMP #pragma omp parallel for schedule(runtime) #endif for (k=npairs[n]; k<npairs2[n]; ++k) { vektor pbc; pair *P; P = pairs[n] + k; pbc.x = P->ipbc[0] * box_x.x + P->ipbc[1] * box_y.x; pbc.y = P->ipbc[0] * box_x.y + P->ipbc[1] * box_y.y; /* potential energy and virial are already complete; */ /* to avoid double counting, we update only the dummy tmpvec2 */ do_forces(cell_array + P->np, cell_array + P->nq, pbc, tmpvec2, tmpvec2+1, tmpvec2+2, tmpvec2+3, tmpvec2+4, tmpvec2+5, tmpvec2+6, tmpvec2+7); } } #endif /* AR */ /* sum up results of different CPUs */ tmpvec1[0] = tot_pot_energy; tmpvec1[1] = virial; tmpvec1[2] = vir_xx; tmpvec1[3] = vir_yy; tmpvec1[4] = vir_xy; MPI_Allreduce( tmpvec1, tmpvec2, 5, REAL, MPI_SUM, cpugrid); tot_pot_energy = tmpvec2[0]; virial = tmpvec2[1]; vir_xx = tmpvec2[2]; vir_yy = tmpvec2[3]; vir_xy = tmpvec2[4]; #ifdef AR send_forces(add_forces,pack_forces,unpack_forces); #endif }
void unpack_fcs(void) { fcs_float vir[9] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; FCSResult result; real pot1, pot2, e, c, sum=0.0, fac=0.5; int n, m, k, i; /* extract output and distribute it to cell array */ n=0; m=0; pot1=0.0; for (k=0; k<NCELLS; ++k) { cell *p = CELLPTR(k); for (i=0; i<p->n; ++i) { c = CHARGE(p,i) * coul_eng; KRAFT(p,i,X) += field[n++] * c; KRAFT(p,i,Y) += field[n++] * c; KRAFT(p,i,Z) += field[n++] * c; e = pot[m++] * c * fac; POTENG(p,i) += e; pot1 += e; } } /* unpack virial */ result = fcs_get_virial(handle, vir); ASSERT_FCS(result); #ifdef P_AXIAL vir_xx += vir[0]; vir_yy += vir[4]; vir_zz += vir[8]; #else virial += vir[0] + vir[4] + vir[8]; #endif #ifdef STRESS_TENS if (do_press_calc) { /* distribute virial tensor evenly on all atoms */ sym_tensor pp; pp.xx = vir[0] / natoms; pp.yy = vir[4] / natoms; pp.zz = vir[8] / natoms; pp.yz = (vir[5]+vir[7]) / (2*natoms); pp.zx = (vir[2]+vir[6]) / (2*natoms); pp.xy = (vir[1]+vir[3]) / (2*natoms); for (k=0; k<NCELLS; ++k) { cell *p = CELLPTR(k); for (i=0; i<p->n; ++i) { PRESSTENS(p,i,xx) += pp.xx; PRESSTENS(p,i,yy) += pp.yy; PRESSTENS(p,i,zz) += pp.zz; PRESSTENS(p,i,yz) += pp.yz; PRESSTENS(p,i,zx) += pp.zx; PRESSTENS(p,i,xy) += pp.xy; } } } #endif /* sum up potential energy */ #ifdef MPI MPI_Allreduce( &pot1, &pot2, 1, MPI_DOUBLE, MPI_SUM, cpugrid); tot_pot_energy += pot2; #else tot_pot_energy += pot1; #endif }
void calc_forces(int steps) { int n, k; /* clear global accumulation variables */ tot_pot_energy = 0.0; virial = 0.0; vir_xx = 0.0; vir_yy = 0.0; vir_zz = 0.0; vir_yz = 0.0; vir_zx = 0.0; vir_xy = 0.0; nfc++; /* clear per atom accumulation variables */ #ifdef _OPENMP #pragma omp parallel for #endif for (k=0; k<ncells; ++k) { int i; cell *p; p = cell_array + k; for (i=0; i<p->n; ++i) { KRAFT(p,i,X) = 0.0; KRAFT(p,i,Y) = 0.0; KRAFT(p,i,Z) = 0.0; #ifdef UNIAX DREH_MOMENT(p,i,X) = 0.0; DREH_MOMENT(p,i,Y) = 0.0; DREH_MOMENT(p,i,Z) = 0.0; #endif #if defined(STRESS_TENS) PRESSTENS(p,i,xx) = 0.0; PRESSTENS(p,i,yy) = 0.0; PRESSTENS(p,i,zz) = 0.0; PRESSTENS(p,i,yz) = 0.0; PRESSTENS(p,i,zx) = 0.0; PRESSTENS(p,i,xy) = 0.0; #endif #ifndef MONOLJ POTENG(p,i) = 0.0; #endif #ifdef CNA if (cna) MARK(p,i) = 0; #endif #ifdef NNBR NBANZ(p,i) = 0; #endif #ifdef COVALENT NEIGH(p,i)->n = 0; #endif #ifdef EAM2 EAM_RHO(p,i) = 0.0; /* zero host electron density at atom site */ #ifdef EEAM EAM_P(p,i) = 0.0; /* zero host electron density at atom site */ #endif #endif } } #ifdef RIGID /* clear total forces */ if ( nsuperatoms>0 ) for(k=0; k<nsuperatoms; k++) { superforce[k].x = 0.0; superforce[k].y = 0.0; superforce[k].z = 0.0; } #endif #ifdef EWALD if (steps==0) { ewald_time.total = 0.0; imd_start_timer( &ewald_time ); } #endif /* compute forces for all pairs of cells */ for (n=0; n<nlists; ++n) { #ifdef _OPENMP #pragma omp parallel for schedule(runtime) \ reduction(+:tot_pot_energy,virial,vir_xx,vir_yy,vir_zz,vir_yz,vir_zx,vir_xy) #endif for (k=0; k<npairs[n]; ++k) { vektor pbc; pair *P; P = pairs[n]+k; pbc.x = P->ipbc[0]*box_x.x + P->ipbc[1]*box_y.x + P->ipbc[2]*box_z.x; pbc.y = P->ipbc[0]*box_x.y + P->ipbc[1]*box_y.y + P->ipbc[2]*box_z.y; pbc.z = P->ipbc[0]*box_x.z + P->ipbc[1]*box_y.z + P->ipbc[2]*box_z.z; do_forces(cell_array + P->np, cell_array + P->nq, pbc, &tot_pot_energy, &virial, &vir_xx, &vir_yy, &vir_zz, &vir_yz, &vir_zx, &vir_xy); } } #ifdef EWALD if (steps==0) { imd_stop_timer( &ewald_time ); } #endif #ifdef EAM2 /* compute embedding energy and its derivative */ do_embedding_energy(); for (n=0; n<nlists; ++n) { #ifdef _OPENMP #pragma omp parallel for schedule(runtime) \ reduction(+:virial,vir_xx,vir_yy,vir_zz,vir_yz,vir_zx,vir_xy) #endif for (k=0; k<npairs[n]; ++k) { vektor pbc; pair *P; P = pairs[n]+k; pbc.x = P->ipbc[0]*box_x.x + P->ipbc[1]*box_y.x + P->ipbc[2]*box_z.x; pbc.y = P->ipbc[0]*box_x.y + P->ipbc[1]*box_y.y + P->ipbc[2]*box_z.y; pbc.z = P->ipbc[0]*box_x.z + P->ipbc[1]*box_y.z + P->ipbc[2]*box_z.z; do_forces_eam2(cell_array + P->np, cell_array + P->nq, pbc, &virial, &vir_xx, &vir_yy, &vir_zz, &vir_yz, &vir_zx, &vir_xy); } } #endif #if defined(COVALENT) && !defined(CNA) /* does not work correctly - different threads may write to same variables #ifdef _OPENMP #pragma omp parallel for schedule(runtime) \ reduction(+:tot_pot_energy,virial,vir_xx,vir_yy,vir_zz,vir_yz,vir_zx,vir_xy) #endif */ for (k=0; k<ncells; ++k) { do_forces2(cell_array+k, &tot_pot_energy, &virial, &vir_xx, &vir_yy, &vir_zz, &vir_yz, &vir_zx, &vir_xy); } #endif #ifdef EWALD do_forces_ewald(steps); #endif }