double calc_forces(double* xi_opt, double* forces, int flag) { double tmpsum = 0.0; double sum = 0.0; int first = 0; int col = 0; int ne = 0; int size = 0; int i = flag; double* xi = NULL; apot_table_t* apt = &g_pot.apot_table; double charge[g_param.ntypes]; double sum_charges; double dp_kappa; #if defined(DIPOLE) double dp_alpha[g_param.ntypes]; double dp_b[apt->number]; double dp_c[apt->number]; #endif // DIPOLE switch (g_pot.format_type) { case POTENTIAL_FORMAT_UNKNOWN: break; case POTENTIAL_FORMAT_ANALYTIC: xi = g_pot.calc_pot.table; break; case POTENTIAL_FORMAT_TABULATED_EQ_DIST: case POTENTIAL_FORMAT_TABULATED_NON_EQ_DIST: xi = xi_opt; break; } ne = g_pot.apot_table.total_ne_par; size = apt->number; /* This is the start of an infinite loop */ while (1) { tmpsum = 0.0; /* sum of squares of local process */ #if defined(APOT) && !defined(MPI) if (g_pot.format_type == POTENTIAL_FORMAT_ANALYTIC) { apot_check_params(xi_opt); update_calc_table(xi_opt, xi, 0); } #endif // APOT && !MPI #if defined(MPI) /* exchange potential and flag value */ #if !defined(APOT) MPI_Bcast(xi, g_pot.calc_pot.len, MPI_DOUBLE, 0, MPI_COMM_WORLD); #endif // !APOT MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD); if (flag == 1) break; /* Exception: flag 1 means clean up */ #if defined(APOT) if (g_mpi.myid == 0) apot_check_params(xi_opt); MPI_Bcast(xi_opt, g_calc.ndimtot, MPI_DOUBLE, 0, MPI_COMM_WORLD); if (g_pot.format_type == POTENTIAL_FORMAT_ANALYTIC) update_calc_table(xi_opt, xi, 0); #else // APOT /* if flag==2 then the potential parameters have changed -> sync */ if (flag == 2) potsync(); #endif // APOT #endif // MPI /* local arrays for electrostatic parameters */ sum_charges = 0; for (i = 0; i < g_param.ntypes - 1; i++) { if (xi_opt[2 * size + ne + i]) { charge[i] = xi_opt[2 * size + ne + i]; sum_charges += apt->ratio[i] * charge[i]; } else { charge[i] = 0.0; } } apt->last_charge = -sum_charges / apt->ratio[g_param.ntypes - 1]; charge[g_param.ntypes - 1] = apt->last_charge; if (xi_opt[2 * size + ne + g_param.ntypes - 1]) { dp_kappa = xi_opt[2 * size + ne + g_param.ntypes - 1]; } else { dp_kappa = 0.0; } #if defined(DIPOLE) for (i = 0; i < g_param.ntypes; i++) { if (xi_opt[2 * size + ne + g_param.ntypes + i]) { dp_alpha[i] = xi_opt[2 * size + ne + g_param.ntypes + i]; } else { dp_alpha[i] = 0.0; } } for (i = 0; i < size; i++) { if (xi_opt[2 * size + ne + 2 * g_param.ntypes + i]) { dp_b[i] = xi_opt[2 * size + ne + 2 * g_param.ntypes + i]; } else { dp_b[i] = 0.0; } if (xi_opt[3 * size + ne + 2 * g_param.ntypes + i]) { dp_c[i] = xi_opt[3 * size + ne + 2 * g_param.ntypes + i]; } else { dp_c[i] = 0.0; } } #endif // DIPOLE /* init second derivatives for splines */ for (col = 0; col < g_calc.paircol; col++) { first = g_pot.calc_pot.first[col]; switch (g_pot.format_type) { case POTENTIAL_FORMAT_UNKNOWN: error(1, "Unknown potential format detected! (%s:%d)\n", __FILE__, __LINE__); case POTENTIAL_FORMAT_ANALYTIC: case POTENTIAL_FORMAT_TABULATED_EQ_DIST: { spline_ed(g_pot.calc_pot.step[col], xi + first, g_pot.calc_pot.last[col] - first + 1, *(xi + first - 2), 0.0, g_pot.calc_pot.d2tab + first); break; } case POTENTIAL_FORMAT_TABULATED_NON_EQ_DIST: { spline_ne(g_pot.calc_pot.xcoord + first, xi + first, g_pot.calc_pot.last[col] - first + 1, *(xi + first - 2), 0.0, g_pot.calc_pot.d2tab + first); } } } #if !defined(MPI) g_mpi.myconf = g_config.nconf; #endif // MPI /* region containing loop over configurations, also OMP-parallelized region */ { int self; vector tmp_force; int h, j, type1, type2, uf; #if defined(STRESS) int us, stresses; #endif // STRESS int n_i, n_j; double fnval, grad, fnval_tail, grad_tail, grad_i, grad_j; #if defined(DIPOLE) double p_sr_tail; #endif // DIPOLE atom_t* atom; neigh_t* neigh; /* loop over configurations: M A I N LOOP CONTAINING ALL ATOM-LOOPS */ for (h = g_mpi.firstconf; h < g_mpi.firstconf + g_mpi.myconf; h++) { uf = g_config.conf_uf[h - g_mpi.firstconf]; #if defined(STRESS) us = g_config.conf_us[h - g_mpi.firstconf]; #endif // STRESS /* reset energies and stresses */ forces[g_calc.energy_p + h] = 0.0; #if defined(STRESS) stresses = g_calc.stress_p + 6 * h; for (i = 0; i < 6; i++) forces[stresses + i] = 0.0; #endif // STRESS #if defined(DIPOLE) /* reset dipoles and fields: LOOP Z E R O */ for (i = 0; i < g_config.inconf[h]; i++) { atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; atom->E_stat.x = 0.0; atom->E_stat.y = 0.0; atom->E_stat.z = 0.0; atom->p_sr.x = 0.0; atom->p_sr.y = 0.0; atom->p_sr.z = 0.0; } #endif // DIPOLE /* F I R S T LOOP OVER ATOMS: reset forces, dipoles */ for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ n_i = 3 * (g_config.cnfstart[h] + i); if (uf) { forces[n_i + 0] = -g_config.force_0[n_i + 0]; forces[n_i + 1] = -g_config.force_0[n_i + 1]; forces[n_i + 2] = -g_config.force_0[n_i + 2]; } else { forces[n_i + 0] = 0.0; forces[n_i + 1] = 0.0; forces[n_i + 2] = 0.0; } } /* end F I R S T LOOP */ /* S E C O N D loop: calculate short-range and monopole forces, calculate static field- and dipole-contributions */ for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; type1 = atom->type; n_i = 3 * (g_config.cnfstart[h] + i); for (j = 0; j < atom->num_neigh; j++) { /* neighbors */ neigh = atom->neigh + j; type2 = neigh->type; col = neigh->col[0]; /* updating tail-functions - only necessary with variing kappa */ if (!apt->sw_kappa) elstat_shift(neigh->r, dp_kappa, &neigh->fnval_el, &neigh->grad_el, &neigh->ggrad_el); /* In small cells, an atom might interact with itself */ self = (neigh->nr == i + g_config.cnfstart[h]) ? 1 : 0; /* calculate short-range forces */ if (neigh->r < g_pot.calc_pot.end[col]) { if (uf) { fnval = splint_comb_dir(&g_pot.calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0], &grad); } else { fnval = splint_dir(&g_pot.calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0]); } /* avoid double counting if atom is interacting with a copy of itself */ if (self) { fnval *= 0.5; grad *= 0.5; } forces[g_calc.energy_p + h] += fnval; if (uf) { tmp_force.x = neigh->dist_r.x * grad; tmp_force.y = neigh->dist_r.y * grad; tmp_force.z = neigh->dist_r.z * grad; forces[n_i + 0] += tmp_force.x; forces[n_i + 1] += tmp_force.y; forces[n_i + 2] += tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] -= tmp_force.x; forces[n_j + 1] -= tmp_force.y; forces[n_j + 2] -= tmp_force.z; #if defined(STRESS) /* calculate pair stresses */ if (us) { forces[stresses + 0] -= neigh->dist.x * tmp_force.x; forces[stresses + 1] -= neigh->dist.y * tmp_force.y; forces[stresses + 2] -= neigh->dist.z * tmp_force.z; forces[stresses + 3] -= neigh->dist.x * tmp_force.y; forces[stresses + 4] -= neigh->dist.y * tmp_force.z; forces[stresses + 5] -= neigh->dist.z * tmp_force.x; } #endif // STRESS } } /* calculate monopole forces */ if (neigh->r < g_config.dp_cut && (charge[type1] || charge[type2])) { fnval_tail = neigh->fnval_el; grad_tail = neigh->grad_el; grad_i = charge[type2] * grad_tail; if (type1 == type2) { grad_j = grad_i; } else { grad_j = charge[type1] * grad_tail; } fnval = charge[type1] * charge[type2] * fnval_tail; grad = charge[type1] * grad_i; if (self) { grad_i *= 0.5; grad_j *= 0.5; fnval *= 0.5; grad *= 0.5; } forces[g_calc.energy_p + h] += fnval; if (uf) { tmp_force.x = neigh->dist.x * grad; tmp_force.y = neigh->dist.y * grad; tmp_force.z = neigh->dist.z * grad; forces[n_i + 0] += tmp_force.x; forces[n_i + 1] += tmp_force.y; forces[n_i + 2] += tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] -= tmp_force.x; forces[n_j + 1] -= tmp_force.y; forces[n_j + 2] -= tmp_force.z; #if defined(STRESS) /* calculate coulomb stresses */ if (us) { forces[stresses + 0] -= neigh->dist.x * tmp_force.x; forces[stresses + 1] -= neigh->dist.y * tmp_force.y; forces[stresses + 2] -= neigh->dist.z * tmp_force.z; forces[stresses + 3] -= neigh->dist.x * tmp_force.y; forces[stresses + 4] -= neigh->dist.y * tmp_force.z; forces[stresses + 5] -= neigh->dist.z * tmp_force.x; } #endif // STRESS } #if defined(DIPOLE) /* calculate static field-contributions */ atom->E_stat.x += neigh->dist.x * grad_i; atom->E_stat.y += neigh->dist.y * grad_i; atom->E_stat.z += neigh->dist.z * grad_i; g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_stat.x -= neigh->dist.x * grad_j; g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_stat.y -= neigh->dist.y * grad_j; g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_stat.z -= neigh->dist.z * grad_j; /* calculate short-range dipoles */ if (dp_alpha[type1] && dp_b[col] && dp_c[col]) { p_sr_tail = grad_tail * neigh->r * shortrange_value(neigh->r, dp_alpha[type1], dp_b[col], dp_c[col]); atom->p_sr.x += charge[type2] * neigh->dist_r.x * p_sr_tail; atom->p_sr.y += charge[type2] * neigh->dist_r.y * p_sr_tail; atom->p_sr.z += charge[type2] * neigh->dist_r.z * p_sr_tail; } if (dp_alpha[type2] && dp_b[col] && dp_c[col] && !self) { p_sr_tail = grad_tail * neigh->r * shortrange_value(neigh->r, dp_alpha[type2], dp_b[col], dp_c[col]); g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_sr.x -= charge[type1] * neigh->dist_r.x * p_sr_tail; g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_sr.y -= charge[type1] * neigh->dist_r.y * p_sr_tail; g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_sr.z -= charge[type1] * neigh->dist_r.z * p_sr_tail; } #endif // DIPOLE } } /* loop over neighbours */ } /* end S E C O N D loop over atoms */ #if defined(DIPOLE) /* T H I R D loop: calculate whole dipole moment for every atom */ double rp, dp_sum; int dp_converged = 0, dp_it = 0; double max_diff = 10; while (dp_converged == 0) { dp_sum = 0; for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; type1 = atom->type; if (dp_alpha[type1]) { if (dp_it) { /* note: mixing parameter is different from that on in IMD */ atom->E_tot.x = (1 - g_config.dp_mix) * atom->E_ind.x + g_config.dp_mix * atom->E_old.x + atom->E_stat.x; atom->E_tot.y = (1 - g_config.dp_mix) * atom->E_ind.y + g_config.dp_mix * atom->E_old.y + atom->E_stat.y; atom->E_tot.z = (1 - g_config.dp_mix) * atom->E_ind.z + g_config.dp_mix * atom->E_old.z + atom->E_stat.z; } else { atom->E_tot.x = atom->E_ind.x + atom->E_stat.x; atom->E_tot.y = atom->E_ind.y + atom->E_stat.y; atom->E_tot.z = atom->E_ind.z + atom->E_stat.z; } atom->p_ind.x = dp_alpha[type1] * atom->E_tot.x + atom->p_sr.x; atom->p_ind.y = dp_alpha[type1] * atom->E_tot.y + atom->p_sr.y; atom->p_ind.z = dp_alpha[type1] * atom->E_tot.z + atom->p_sr.z; atom->E_old.x = atom->E_ind.x; atom->E_old.y = atom->E_ind.y; atom->E_old.z = atom->E_ind.z; atom->E_ind.x = 0.0; atom->E_ind.y = 0.0; atom->E_ind.z = 0.0; } } for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; type1 = atom->type; for (j = 0; j < atom->num_neigh; j++) { /* neighbors */ neigh = atom->neigh + j; type2 = neigh->type; col = neigh->col[0]; /* In small cells, an atom might interact with itself */ self = (neigh->nr == i + g_config.cnfstart[h]) ? 1 : 0; if (neigh->r < g_config.dp_cut && dp_alpha[type1] && dp_alpha[type2]) { rp = SPROD( g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind, neigh->dist_r); atom->E_ind.x += neigh->grad_el * (3 * rp * neigh->dist_r.x - g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.x); atom->E_ind.y += neigh->grad_el * (3 * rp * neigh->dist_r.y - g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.y); atom->E_ind.z += neigh->grad_el * (3 * rp * neigh->dist_r.z - g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.z); if (!self) { rp = SPROD(atom->p_ind, neigh->dist_r); g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_ind.x += neigh->grad_el * (3 * rp * neigh->dist_r.x - atom->p_ind.x); g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_ind.y += neigh->grad_el * (3 * rp * neigh->dist_r.y - atom->p_ind.y); g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_ind.z += neigh->grad_el * (3 * rp * neigh->dist_r.z - atom->p_ind.z); } } } } for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; type1 = atom->type; if (dp_alpha[type1]) { dp_sum += dsquare(dp_alpha[type1] * (atom->E_old.x - atom->E_ind.x)); dp_sum += dsquare(dp_alpha[type1] * (atom->E_old.y - atom->E_ind.y)); dp_sum += dsquare(dp_alpha[type1] * (atom->E_old.z - atom->E_ind.z)); } } dp_sum /= 3 * g_config.inconf[h]; dp_sum = sqrt(dp_sum); if (dp_it) { if ((dp_sum > max_diff) || (dp_it > 50)) { dp_converged = 1; for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; type1 = atom->type; if (dp_alpha[type1]) { atom->p_ind.x = dp_alpha[type1] * atom->E_stat.x + atom->p_sr.x; atom->p_ind.y = dp_alpha[type1] * atom->E_stat.y + atom->p_sr.y; atom->p_ind.z = dp_alpha[type1] * atom->E_stat.z + atom->p_sr.z; atom->E_ind.x = atom->E_stat.x; atom->E_ind.y = atom->E_stat.y; atom->E_ind.z = atom->E_stat.z; } } } } if (dp_sum < g_config.dp_tol) dp_converged = 1; dp_it++; } /* end T H I R D loop over atoms */ /* F O U R T H loop: calculate monopole-dipole and dipole-dipole forces */ double rp_i, rp_j, pp_ij, tmp_1, tmp_2; double grad_1, grad_2, srval, srgrad, srval_tail, srgrad_tail, fnval_sum, grad_sum; for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; type1 = atom->type; n_i = 3 * (g_config.cnfstart[h] + i); for (j = 0; j < atom->num_neigh; j++) { /* neighbors */ neigh = atom->neigh + j; type2 = neigh->type; col = neigh->col[0]; /* In small cells, an atom might interact with itself */ self = (neigh->nr == i + g_config.cnfstart[h]) ? 1 : 0; if (neigh->r < g_config.dp_cut && (dp_alpha[type1] || dp_alpha[type2])) { fnval_tail = -neigh->grad_el; grad_tail = -neigh->ggrad_el; if (dp_b[col] && dp_c[col]) { shortrange_term(neigh->r, dp_b[col], dp_c[col], &srval_tail, &srgrad_tail); srval = fnval_tail * srval_tail; srgrad = fnval_tail * srgrad_tail + grad_tail * srval_tail; } if (self) { fnval_tail *= 0.5; grad_tail *= 0.5; } /* monopole-dipole contributions */ if (charge[type1] && dp_alpha[type2]) { if (dp_b[col] && dp_c[col]) { fnval_sum = fnval_tail + srval; grad_sum = grad_tail + srgrad; } else { fnval_sum = fnval_tail; grad_sum = grad_tail; } rp_j = SPROD( g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind, neigh->dist_r); fnval = charge[type1] * rp_j * fnval_sum * neigh->r; grad_1 = charge[type1] * rp_j * grad_sum * neigh->r2; grad_2 = charge[type1] * fnval_sum; forces[g_calc.energy_p + h] -= fnval; if (uf) { tmp_force.x = neigh->dist_r.x * grad_1 + g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.x * grad_2; tmp_force.y = neigh->dist_r.y * grad_1 + g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.y * grad_2; tmp_force.z = neigh->dist_r.z * grad_1 + g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.z * grad_2; forces[n_i + 0] -= tmp_force.x; forces[n_i + 1] -= tmp_force.y; forces[n_i + 2] -= tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] += tmp_force.x; forces[n_j + 1] += tmp_force.y; forces[n_j + 2] += tmp_force.z; #if defined(STRESS) /* calculate stresses */ if (us) { forces[stresses + 0] += neigh->dist.x * tmp_force.x; forces[stresses + 1] += neigh->dist.y * tmp_force.y; forces[stresses + 2] += neigh->dist.z * tmp_force.z; forces[stresses + 3] += neigh->dist.x * tmp_force.y; forces[stresses + 4] += neigh->dist.y * tmp_force.z; forces[stresses + 5] += neigh->dist.z * tmp_force.x; } #endif // STRESS } } /* dipole-monopole contributions */ if (dp_alpha[type1] && charge[type2]) { if (dp_b[col] && dp_c[col]) { fnval_sum = fnval_tail + srval; grad_sum = grad_tail + srgrad; } else { fnval_sum = fnval_tail; grad_sum = grad_tail; } rp_i = SPROD(atom->p_ind, neigh->dist_r); fnval = charge[type2] * rp_i * fnval_sum * neigh->r; grad_1 = charge[type2] * rp_i * grad_sum * neigh->r2; grad_2 = charge[type2] * fnval_sum; forces[g_calc.energy_p + h] += fnval; if (uf) { tmp_force.x = neigh->dist_r.x * grad_1 + atom->p_ind.x * grad_2; tmp_force.y = neigh->dist_r.y * grad_1 + atom->p_ind.y * grad_2; tmp_force.z = neigh->dist_r.z * grad_1 + atom->p_ind.z * grad_2; forces[n_i + 0] += tmp_force.x; forces[n_i + 1] += tmp_force.y; forces[n_i + 2] += tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] -= tmp_force.x; forces[n_j + 1] -= tmp_force.y; forces[n_j + 2] -= tmp_force.z; #if defined(STRESS) /* calculate stresses */ if (us) { forces[stresses + 0] -= neigh->dist.x * tmp_force.x; forces[stresses + 1] -= neigh->dist.y * tmp_force.y; forces[stresses + 2] -= neigh->dist.z * tmp_force.z; forces[stresses + 3] -= neigh->dist.x * tmp_force.y; forces[stresses + 4] -= neigh->dist.y * tmp_force.z; forces[stresses + 5] -= neigh->dist.z * tmp_force.x; } #endif // STRESS } } /* dipole-dipole contributions */ if (dp_alpha[type1] && dp_alpha[type2]) { pp_ij = SPROD( atom->p_ind, g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind); tmp_1 = 3 * rp_i * rp_j; tmp_2 = 3 * fnval_tail / neigh->r2; fnval = -(tmp_1 - pp_ij) * fnval_tail; grad_1 = (tmp_1 - pp_ij) * grad_tail; grad_2 = 2 * rp_i * rp_j; forces[g_calc.energy_p + h] += fnval; if (uf) { tmp_force.x = grad_1 * neigh->dist.x - tmp_2 * (grad_2 * neigh->dist.x - rp_i * neigh->r * g_config.conf_atoms[neigh->nr - g_mpi.firstatom] .p_ind.x - rp_j * neigh->r * atom->p_ind.x); tmp_force.y = grad_1 * neigh->dist.y - tmp_2 * (grad_2 * neigh->dist.y - rp_i * neigh->r * g_config.conf_atoms[neigh->nr - g_mpi.firstatom] .p_ind.y - rp_j * neigh->r * atom->p_ind.y); tmp_force.z = grad_1 * neigh->dist.z - tmp_2 * (grad_2 * neigh->dist.z - rp_i * neigh->r * g_config.conf_atoms[neigh->nr - g_mpi.firstatom] .p_ind.z - rp_j * neigh->r * atom->p_ind.z); forces[n_i + 0] -= tmp_force.x; forces[n_i + 1] -= tmp_force.y; forces[n_i + 2] -= tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] += tmp_force.x; forces[n_j + 1] += tmp_force.y; forces[n_j + 2] += tmp_force.z; #if defined(STRESS) /* calculate stresses */ if (us) { forces[stresses + 0] += neigh->dist.x * tmp_force.x; forces[stresses + 1] += neigh->dist.y * tmp_force.y; forces[stresses + 2] += neigh->dist.z * tmp_force.z; forces[stresses + 3] += neigh->dist.x * tmp_force.y; forces[stresses + 4] += neigh->dist.y * tmp_force.z; forces[stresses + 5] += neigh->dist.z * tmp_force.x; } #endif // STRESS } } } } /* loop over neighbours */ } /* end F O U R T H loop over atoms */ #endif // DIPOLE /* F I F T H loop: self energy contributions and sum-up force * contributions */ double qq; #if defined(DIPOLE) double pp; #endif // DIPOLE for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; type1 = atom->type; n_i = 3 * (g_config.cnfstart[h] + i); /* self energy contributions */ if (charge[type1]) { qq = charge[type1] * charge[type1]; fnval = DP_EPS * dp_kappa * qq / sqrt(M_PI); forces[g_calc.energy_p + h] -= fnval; } #if defined(DIPOLE) if (dp_alpha[type1]) { pp = SPROD(atom->p_ind, atom->p_ind); fnval = pp / (2 * dp_alpha[type1]); forces[g_calc.energy_p + h] += fnval; } /* alternative dipole self energy including kappa-dependence */ // if (dp_alpha[type1]) { // pp = SPROD(atom->p_ind, atom->p_ind); // fnval = kkk * pp / sqrt(M_PI); // forces[energy_p + h] += fnval; //} #endif // DIPOLE /* sum-up: whole force contributions flow into tmpsum */ if (uf) { #if defined(FWEIGHT) /* Weigh by absolute value of force */ forces[n_i + 0] /= FORCE_EPS + atom->absforce; forces[n_i + 1] /= FORCE_EPS + atom->absforce; forces[n_i + 2] /= FORCE_EPS + atom->absforce; #endif // FWEIGHT #if defined(CONTRIB) if (atom->contrib) #endif // CONTRIB tmpsum += g_config.conf_weight[h] * (dsquare(forces[n_i + 0]) + dsquare(forces[n_i + 1]) + dsquare(forces[n_i + 2])); } } /* end F I F T H loop over atoms */ /* whole energy contributions flow into tmpsum */ forces[g_calc.energy_p + h] /= (double)g_config.inconf[h]; forces[g_calc.energy_p + h] -= g_config.force_0[g_calc.energy_p + h]; tmpsum += g_config.conf_weight[h] * g_param.eweight * dsquare(forces[g_calc.energy_p + h]); #if defined(STRESS) /* whole stress contributions flow into tmpsum */ if (uf && us) { for (i = 0; i < 6; i++) { forces[stresses + i] /= g_config.conf_vol[h - g_mpi.firstconf]; forces[stresses + i] -= g_config.force_0[stresses + i]; tmpsum += g_config.conf_weight[h] * g_param.sweight * dsquare(forces[stresses + i]); } } #endif // STRESS } /* end M A I N loop over configurations */ } /* parallel region */ /* dummy constraints (global) */ #if defined(APOT) /* add punishment for out of bounds (mostly for powell_lsq) */ if (g_mpi.myid == 0) { tmpsum += apot_punish(xi_opt, forces); } #endif // APOT #if defined(MPI) /* reduce global sum */ sum = 0.0; MPI_Reduce(&tmpsum, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); /* gather forces, energies, stresses */ if (g_mpi.myid == 0) { /* root node already has data in place */ /* forces */ MPI_Gatherv(MPI_IN_PLACE, g_mpi.myatoms, g_mpi.MPI_VECTOR, forces, g_mpi.atom_len, g_mpi.atom_dist, g_mpi.MPI_VECTOR, 0, MPI_COMM_WORLD); /* energies */ MPI_Gatherv(MPI_IN_PLACE, g_mpi.myconf, MPI_DOUBLE, forces + g_calc.energy_p, g_mpi.conf_len, g_mpi.conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); #if defined(STRESS) /* stresses */ MPI_Gatherv(MPI_IN_PLACE, g_mpi.myconf, g_mpi.MPI_STENS, forces + g_calc.stress_p, g_mpi.conf_len, g_mpi.conf_dist, g_mpi.MPI_STENS, 0, MPI_COMM_WORLD); #endif // STRESS } else { /* forces */ MPI_Gatherv(forces + g_mpi.firstatom * 3, g_mpi.myatoms, g_mpi.MPI_VECTOR, forces, g_mpi.atom_len, g_mpi.atom_dist, g_mpi.MPI_VECTOR, 0, MPI_COMM_WORLD); /* energies */ MPI_Gatherv(forces + g_calc.energy_p + g_mpi.firstconf, g_mpi.myconf, MPI_DOUBLE, forces + g_calc.energy_p, g_mpi.conf_len, g_mpi.conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); #if defined(STRESS) /* stresses */ MPI_Gatherv(forces + g_calc.stress_p + 6 * g_mpi.firstconf, g_mpi.myconf, g_mpi.MPI_STENS, forces + g_calc.stress_p, g_mpi.conf_len, g_mpi.conf_dist, g_mpi.MPI_STENS, 0, MPI_COMM_WORLD); #endif // STRESS } #else sum = tmpsum; /* global sum = local sum */ #endif // MPI /* root process exits this function now */ if (g_mpi.myid == 0) { g_calc.fcalls++; /* Increase function call counter */ if (isnan(sum)) { #if defined(DEBUG) printf("\n--> Force is nan! <--\n\n"); #endif // DEBUG return 10e10; } else return sum; } } /* once a non-root process arrives here, all is done. */ return -1.0; }
double calc_forces_eam(double *xi_opt, double *forces, int flag) { int first, col, i; double tmpsum = 0.0, sum = 0.0; double *xi = NULL; static double rho_sum_loc, rho_sum; rho_sum_loc = rho_sum = 0.0; switch (format) { case 0: xi = calc_pot.table; break; case 3: /* fall through */ case 4: xi = xi_opt; /* calc-table is opt-table */ break; case 5: xi = calc_pot.table; /* we need to update the calc-table */ } /* This is the start of an infinite loop */ while (1) { tmpsum = 0.0; /* sum of squares of local process */ rho_sum_loc = 0.0; #if defined APOT && !defined MPI if (0 == format) { apot_check_params(xi_opt); update_calc_table(xi_opt, xi, 0); } #endif /* APOT && !MPI */ #ifdef MPI #ifndef APOT /* exchange potential and flag value */ MPI_Bcast(xi, calc_pot.len, MPI_DOUBLE, 0, MPI_COMM_WORLD); #endif /* APOT */ MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD); if (1 == flag) break; /* Exception: flag 1 means clean up */ #ifdef APOT if (0 == myid) apot_check_params(xi_opt); MPI_Bcast(xi_opt, ndimtot, MPI_DOUBLE, 0, MPI_COMM_WORLD); update_calc_table(xi_opt, xi, 0); #else /* APOT */ /* if flag==2 then the potential parameters have changed -> sync */ if (2 == flag) potsync(); #endif /* APOT */ #endif /* MPI */ /* init second derivatives for splines */ /* [0, ..., paircol - 1] = pair potentials */ /* [paircol, ..., paircol + ntypes - 1] = transfer function */ for (col = 0; col < paircol + ntypes; col++) { first = calc_pot.first[col]; if (0 == format || 3 == format) spline_ed(calc_pot.step[col], xi + first, calc_pot.last[col] - first + 1, *(xi + first - 2), 0.0, calc_pot.d2tab + first); else /* format >= 4 ! */ spline_ne(calc_pot.xcoord + first, xi + first, calc_pot.last[col] - first + 1, *(xi + first - 2), 0.0, calc_pot.d2tab + first); } /* [paircol + ntypes, ..., paircol + 2 * ntypes - 1] = embedding function */ #ifndef PARABOLA /* if we have parabolic interpolation, we don't need that */ for (col = paircol + ntypes; col < paircol + 2 * ntypes; col++) { first = calc_pot.first[col]; /* gradient at left boundary matched to square root function, when 0 not in domain(F), else natural spline */ if (0 == format || 3 == format) spline_ed(calc_pot.step[col], xi + first, calc_pot.last[col] - first + 1, #ifdef WZERO ((calc_pot.begin[col] <= 0.0) ? *(xi + first - 2) : 0.5 / xi[first]), ((calc_pot.end[col] >= 0.0) ? *(xi + first - 1) : -0.5 / xi[calc_pot.last[col]]), #else /* WZERO: F is natural spline in any case */ *(xi + first - 2), *(xi + first - 1), #endif /* WZERO */ calc_pot.d2tab + first); else /* format >= 4 ! */ spline_ne(calc_pot.xcoord + first, xi + first, calc_pot.last[col] - first + 1, #ifdef WZERO (calc_pot.begin[col] <= 0.0 ? *(xi + first - 2) : 0.5 / xi[first]), (calc_pot.end[col] >= 0.0 ? *(xi + first - 1) : -0.5 / xi[calc_pot.last[col]]), #else /* WZERO */ *(xi + first - 2), *(xi + first - 1), #endif /* WZERO */ calc_pot.d2tab + first); } #endif /* PARABOLA */ #ifndef MPI myconf = nconf; #endif /* MPI */ /* region containing loop over configurations */ { atom_t *atom; int h, j; int n_i, n_j; int self; int uf; #ifdef APOT double temp_eng; #endif /* APOT */ #ifdef STRESS int us, stresses; #endif /* STRESS */ /* pointer for neighbor table */ neigh_t *neigh; /* pair variables */ double phi_val, phi_grad; double r; vector tmp_force; /* eam variables */ int col_F; double eam_force; double rho_val, rho_grad, rho_grad_j; /* loop over configurations */ for (h = firstconf; h < firstconf + myconf; h++) { uf = conf_uf[h - firstconf]; #ifdef STRESS us = conf_us[h - firstconf]; #endif /* STRESS */ /* reset energies and stresses */ forces[energy_p + h] = 0.0; #ifdef STRESS stresses = stress_p + 6 * h; for (i = 0; i < 6; i++) forces[stresses + i] = 0.0; #endif /* STRESS */ /* set limiting constraints */ forces[limit_p + h] = -force_0[limit_p + h]; /* first loop over atoms: reset forces, densities */ for (i = 0; i < inconf[h]; i++) { n_i = 3 * (cnfstart[h] + i); if (uf) { forces[n_i + 0] = -force_0[n_i + 0]; forces[n_i + 1] = -force_0[n_i + 1]; forces[n_i + 2] = -force_0[n_i + 2]; } else { forces[n_i + 0] = 0.0; forces[n_i + 1] = 0.0; forces[n_i + 2] = 0.0; } /* reset atomic density */ conf_atoms[cnfstart[h] - firstatom + i].rho = 0.0; } /* end of first loop */ /* 2nd loop: calculate pair forces and energies, atomic densities. */ for (i = 0; i < inconf[h]; i++) { atom = conf_atoms + i + cnfstart[h] - firstatom; n_i = 3 * (cnfstart[h] + i); /* loop over neighbors */ for (j = 0; j < atom->num_neigh; j++) { neigh = atom->neigh + j; /* In small cells, an atom might interact with itself */ self = (neigh->nr == i + cnfstart[h]) ? 1 : 0; /* pair potential part */ if (neigh->r < calc_pot.end[neigh->col[0]]) { /* fn value and grad are calculated in the same step */ if (uf) phi_val = splint_comb_dir(&calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0], &phi_grad); else phi_val = splint_dir(&calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0]); /* avoid double counting if atom is interacting with a copy of itself */ if (self) { phi_val *= 0.5; phi_grad *= 0.5; } /* add cohesive energy */ forces[energy_p + h] += phi_val; /* calculate forces */ if (uf) { tmp_force.x = neigh->dist_r.x * phi_grad; tmp_force.y = neigh->dist_r.y * phi_grad; tmp_force.z = neigh->dist_r.z * phi_grad; forces[n_i + 0] += tmp_force.x; forces[n_i + 1] += tmp_force.y; forces[n_i + 2] += tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] -= tmp_force.x; forces[n_j + 1] -= tmp_force.y; forces[n_j + 2] -= tmp_force.z; #ifdef STRESS /* also calculate pair stresses */ if (us) { forces[stresses + 0] -= neigh->dist.x * tmp_force.x; forces[stresses + 1] -= neigh->dist.y * tmp_force.y; forces[stresses + 2] -= neigh->dist.z * tmp_force.z; forces[stresses + 3] -= neigh->dist.x * tmp_force.y; forces[stresses + 4] -= neigh->dist.y * tmp_force.z; forces[stresses + 5] -= neigh->dist.z * tmp_force.x; } #endif /* STRESS */ } } /* neighbor in range */ /* calculate atomic densities */ if (atom->type == neigh->type) { /* then transfer(a->b)==transfer(b->a) */ if (neigh->r < calc_pot.end[neigh->col[1]]) { rho_val = splint_dir(&calc_pot, xi, neigh->slot[1], neigh->shift[1], neigh->step[1]); atom->rho += rho_val; /* avoid double counting if atom is interacting with a copy of itself */ if (!self) { conf_atoms[neigh->nr - firstatom].rho += rho_val; } } } else { /* transfer(a->b)!=transfer(b->a) */ if (neigh->r < calc_pot.end[neigh->col[1]]) { atom->rho += splint_dir(&calc_pot, xi, neigh->slot[1], neigh->shift[1], neigh->step[1]); } /* cannot use slot/shift to access splines */ if (neigh->r < calc_pot.end[paircol + atom->type]) conf_atoms[neigh->nr - firstatom].rho += splint(&calc_pot, xi, paircol + atom->type, neigh->r); } } /* loop over all neighbors */ col_F = paircol + ntypes + atom->type; /* column of F */ #ifndef NORESCALE if (atom->rho > calc_pot.end[col_F]) { /* then punish target function -> bad potential */ forces[limit_p + h] += DUMMY_WEIGHT * 10.0 * dsquare(atom->rho - calc_pot.end[col_F]); #ifndef PARABOLA /* then we use the final value, with PARABOLA: extrapolate */ atom->rho = calc_pot.end[col_F]; #endif /* PARABOLA */ } if (atom->rho < calc_pot.begin[col_F]) { /* then punish target function -> bad potential */ forces[limit_p + h] += DUMMY_WEIGHT * 10.0 * dsquare(calc_pot.begin[col_F] - atom->rho); #ifndef PARABOLA /* then we use the final value, with PARABOLA: extrapolate */ atom->rho = calc_pot.begin[col_F]; #endif /* PARABOLA */ } #endif /* !NORESCALE */ /* embedding energy, embedding gradient */ /* contribution to cohesive energy is F(n) */ #ifdef PARABOLA forces[energy_p + h] += parab_comb(&calc_pot, xi, col_F, atom->rho, &atom->gradF); #elif defined(NORESCALE) if (atom->rho < calc_pot.begin[col_F]) { #ifdef APOT /* calculate analytic value explicitly */ apot_table.fvalue[col_F] (atom->rho, xi_opt + opt_pot.first[col_F], &temp_eng); atom->gradF = apot_grad(atom->rho, xi_opt + opt_pot.first[col_F], apot_table.fvalue[col_F]); forces[energy_p + h] += temp_eng; #else /* linear extrapolation left */ rho_val = splint_comb(&calc_pot, xi, col_F, calc_pot.begin[col_F], &atom->gradF); forces[energy_p + h] += rho_val + (atom->rho - calc_pot.begin[col_F]) * atom->gradF; #endif /* APOT */ } else if (atom->rho > calc_pot.end[col_F]) { #ifdef APOT /* calculate analytic value explicitly */ apot_table.fvalue[col_F] (atom->rho, xi_opt + opt_pot.first[col_F], &temp_eng); atom->gradF = apot_grad(atom->rho, xi_opt + opt_pot.first[col_F], apot_table.fvalue[col_F]); forces[energy_p + h] += temp_eng; #else /* and right */ rho_val = splint_comb(&calc_pot, xi, col_F, calc_pot.end[col_F] - 0.5 * calc_pot.step[col_F], &atom->gradF); forces[energy_p + h] += rho_val + (atom->rho - calc_pot.end[col_F]) * atom->gradF; #endif /* APOT */ } /* and in-between */ else { #ifdef APOT /* calculate small values directly */ if (atom->rho < 0.1) { apot_table.fvalue[col_F] (atom->rho, xi_opt + opt_pot.first[col_F], &temp_eng); atom->gradF = apot_grad(atom->rho, xi_opt + opt_pot.first[col_F], apot_table.fvalue[col_F]); forces[energy_p + h] += temp_eng; } else #endif forces[energy_p + h] += splint_comb(&calc_pot, xi, col_F, atom->rho, &atom->gradF); } #else forces[energy_p + h] += splint_comb(&calc_pot, xi, col_F, atom->rho, &atom->gradF); #endif /* NORESCALE */ /* sum up rho */ rho_sum_loc += atom->rho; } /* second loop over atoms */ /* 3rd loop over atom: EAM force */ if (uf) { /* only required if we calc forces */ for (i = 0; i < inconf[h]; i++) { atom = conf_atoms + i + cnfstart[h] - firstatom; n_i = 3 * (cnfstart[h] + i); for (j = 0; j < atom->num_neigh; j++) { /* loop over neighbors */ neigh = atom->neigh + j; /* In small cells, an atom might interact with itself */ self = (neigh->nr == i + cnfstart[h]) ? 1 : 0; col_F = paircol + ntypes + atom->type; /* column of F */ r = neigh->r; /* are we within reach? */ if ((r < calc_pot.end[neigh->col[1]]) || (r < calc_pot.end[col_F - ntypes])) { rho_grad = (r < calc_pot.end[neigh->col[1]]) ? splint_grad_dir(&calc_pot, xi, neigh->slot[1], neigh->shift[1], neigh->step[1]) : 0.0; if (atom->type == neigh->type) /* use actio = reactio */ rho_grad_j = rho_grad; else rho_grad_j = (r < calc_pot.end[col_F - ntypes]) ? splint_grad(&calc_pot, xi, col_F - ntypes, r) : 0.; /* now we know everything - calculate forces */ eam_force = (rho_grad * atom->gradF + rho_grad_j * conf_atoms[(neigh->nr) - firstatom].gradF); /* avoid double counting if atom is interacting with a copy of itself */ if (self) eam_force *= 0.5; tmp_force.x = neigh->dist_r.x * eam_force; tmp_force.y = neigh->dist_r.y * eam_force; tmp_force.z = neigh->dist_r.z * eam_force; forces[n_i + 0] += tmp_force.x; forces[n_i + 1] += tmp_force.y; forces[n_i + 2] += tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] -= tmp_force.x; forces[n_j + 1] -= tmp_force.y; forces[n_j + 2] -= tmp_force.z; #ifdef STRESS /* and stresses */ if (us) { forces[stresses + 0] -= neigh->dist.x * tmp_force.x; forces[stresses + 1] -= neigh->dist.y * tmp_force.y; forces[stresses + 2] -= neigh->dist.z * tmp_force.z; forces[stresses + 3] -= neigh->dist.x * tmp_force.y; forces[stresses + 4] -= neigh->dist.y * tmp_force.z; forces[stresses + 5] -= neigh->dist.z * tmp_force.x; } #endif /* STRESS */ } /* within reach */ } /* loop over neighbours */ #ifdef FWEIGHT /* Weigh by absolute value of force */ forces[n_i + 0] /= FORCE_EPS + atom->absforce; forces[n_i + 1] /= FORCE_EPS + atom->absforce; forces[n_i + 2] /= FORCE_EPS + atom->absforce; #endif /* FWEIGHT */ /* sum up forces */ #ifdef CONTRIB if (atom->contrib) #endif /* CONTRIB */ tmpsum += conf_weight[h] * (dsquare(forces[n_i + 0]) + dsquare(forces[n_i + 1]) + dsquare(forces[n_i + 2])); } /* third loop over atoms */ } /* use forces */ /* energy contributions */ forces[energy_p + h] /= (double)inconf[h]; forces[energy_p + h] -= force_0[energy_p + h]; tmpsum += conf_weight[h] * eweight * dsquare(forces[energy_p + h]); #ifdef STRESS /* stress contributions */ if (uf && us) { for (i = 0; i < 6; i++) { forces[stresses + i] /= conf_vol[h - firstconf]; forces[stresses + i] -= force_0[stresses + i]; tmpsum += conf_weight[h] * sweight * dsquare(forces[stresses + i]); } } #endif /* STRESS */ /* limiting constraints per configuration */ tmpsum += conf_weight[h] * dsquare(forces[limit_p + h]); } /* loop over configurations */ } /* parallel region */ #ifdef MPI /* Reduce rho_sum */ rho_sum = 0.0; MPI_Reduce(&rho_sum_loc, &rho_sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); #else /* MPI */ rho_sum = rho_sum_loc; #endif /* MPI */ /* dummy constraints (global) */ #ifdef APOT /* add punishment for out of bounds (mostly for powell_lsq) */ if (0 == myid) { tmpsum += apot_punish(xi_opt, forces); } #endif /* APOT */ #ifndef NOPUNISH if (0 == myid) { int g; for (g = 0; g < ntypes; g++) { /* PARABOLA, WZERO, NORESC - different behaviour */ #ifdef PARABOLA /* constraints on U(n) */ forces[dummy_p + ntypes + g] = DUMMY_WEIGHT * parab(&calc_pot, xi, paircol + ntypes + g, 0.0) - force_0[dummy_p + ntypes + g]; /* constraints on U`(n) */ forces[dummy_p + g] = DUMMY_WEIGHT * parab_grad(&calc_pot, xi, paircol + ntypes + g, .5 * (calc_pot.begin[paircol + ntypes + g] + calc_pot.end[paircol + ntypes + g])) - force_0[dummy_p + g]; #elif defined(WZERO) if (calc_pot.begin[paircol + ntypes + g] <= 0.0) /* 0 in domain of U(n) */ /* constraints on U(n) */ forces[dummy_p + ntypes + g] = DUMMY_WEIGHT * splint(&calc_pot, xi, paircol + ntypes + g, 0.0) - force_0[dummy_p + ntypes + g]; else /* 0 not in domain of U(n) */ forces[dummy_p + ntypes + g] = 0.0; /* Free end... */ /* constraints on U`(n) */ forces[dummy_p + g] = DUMMY_WEIGHT * splint_grad(&calc_pot, xi, paircol + ntypes + g, 0.5 * (calc_pot.begin[paircol + ntypes + g] + calc_pot.end[paircol + ntypes + g])) - force_0[dummy_p + g]; #elif defined(NORESCALE) /* clear field */ forces[dummy_p + ntypes + g] = 0.0; /* Free end... */ /* NEW: Constraint on U': U'(1.)=0; */ forces[dummy_p + g] = DUMMY_WEIGHT * splint_grad(&calc_pot, xi, paircol + ntypes + g, 1.0); #else /* NOTHING */ forces[dummy_p + ntypes + g] = 0.0; /* Free end... */ /* constraints on U`(n) */ forces[dummy_p + g] = DUMMY_WEIGHT * splint_grad(&calc_pot, xi, paircol + ntypes + g, 0.5 * (calc_pot.begin[paircol + ntypes + g] + calc_pot.end[paircol + ntypes + g])) - force_0[dummy_p + g]; #endif /* Dummy constraints */ tmpsum += dsquare(forces[dummy_p + ntypes + g]); tmpsum += dsquare(forces[dummy_p + g]); } /* loop over types */ #ifdef NORESCALE /* NEW: Constraint on n: <n>=1. ONE CONSTRAINT ONLY */ /* Calculate averages */ rho_sum /= (double)natoms; /* ATTN: if there are invariant potentials, things might be problematic */ forces[dummy_p + ntypes] = DUMMY_WEIGHT * (rho_sum - 1.0); tmpsum += dsquare(forces[dummy_p + ntypes]); #endif /* NORESCALE */ } /* only root process */ #endif /* !NOPUNISH */ #ifdef MPI /* reduce global sum */ sum = 0.0; MPI_Reduce(&tmpsum, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); /* gather forces, energies, stresses */ if (0 == myid) { /* root node already has data in place */ /* forces */ MPI_Gatherv(MPI_IN_PLACE, myatoms, MPI_VECTOR, forces, atom_len, atom_dist, MPI_VECTOR, 0, MPI_COMM_WORLD); /* energies */ MPI_Gatherv(MPI_IN_PLACE, myconf, MPI_DOUBLE, forces + natoms * 3, conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); /* stresses */ MPI_Gatherv(MPI_IN_PLACE, myconf, MPI_STENS, forces + natoms * 3 + nconf, conf_len, conf_dist, MPI_STENS, 0, MPI_COMM_WORLD); /* punishment constraints */ MPI_Gatherv(MPI_IN_PLACE, myconf, MPI_DOUBLE, forces + natoms * 3 + 7 * nconf, conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); } else { /* forces */ MPI_Gatherv(forces + firstatom * 3, myatoms, MPI_VECTOR, forces, atom_len, atom_dist, MPI_VECTOR, 0, MPI_COMM_WORLD); /* energies */ MPI_Gatherv(forces + natoms * 3 + firstconf, myconf, MPI_DOUBLE, forces + natoms * 3, conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); /* stresses */ MPI_Gatherv(forces + natoms * 3 + nconf + 6 * firstconf, myconf, MPI_STENS, forces + natoms * 3 + nconf, conf_len, conf_dist, MPI_STENS, 0, MPI_COMM_WORLD); /* punishment constraints */ MPI_Gatherv(forces + natoms * 3 + 7 * nconf + firstconf, myconf, MPI_DOUBLE, forces + natoms * 3 + 7 * nconf, conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); } /* no need to pick up dummy constraints - are already @ root */ #else sum = tmpsum; /* global sum = local sum */ #endif /* MPI */ /* root process exits this function now */ if (0 == myid) { fcalls++; /* Increase function call counter */ if (isnan(sum)) { #ifdef DEBUG printf("\n--> Force is nan! <--\n\n"); #endif /* DEBUG */ return 10e10; } else return sum; } } /* end of infinite loop */ /* once a non-root process arrives here, all is done. */ return -1.0; }
double calc_forces_meam(double *xi_opt, double *forces, int flag) { int first, col, i; double *xi = NULL; /* Some useful temp variables */ static double tmpsum = 0.0, sum = 0.0; static double rho_sum = 0.0, rho_sum_loc = 0.0; switch (format) { case 0: xi = calc_pot.table; break; case 3: /* fall through */ case 4: xi = xi_opt; /* calc-table is opt-table */ break; case 5: xi = calc_pot.table; /* we need to update the calc-table */ } /* This is the start of an infinite loop */ while (1) { /* Reset tmpsum and rho_sum_loc tmpsum = Sum of all the forces, energies and constraints rho_sum_loc = Sum of density, rho, for all atoms */ tmpsum = 0.; rho_sum_loc = 0.; #if defined APOT && !defined MPI if (0 == format) { apot_check_params(xi_opt); update_calc_table(xi_opt, xi, 0); } #endif /* APOT && !MPI */ #ifdef MPI /* exchange potential and flag value */ #ifndef APOT MPI_Bcast(xi, calc_pot.len, MPI_DOUBLE, 0, MPI_COMM_WORLD); #endif /* APOT */ MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD); if (1 == flag) break; /* Exception: flag 1 means clean up */ #ifdef APOT if (0 == myid) apot_check_params(xi_opt); MPI_Bcast(xi_opt, ndimtot, MPI_DOUBLE, 0, MPI_COMM_WORLD); update_calc_table(xi_opt, xi, 0); #else /* if flag==2 then the potential parameters have changed -> sync */ if (2 == flag) potsync(); #endif /* APOT */ #endif /* MPI */ /* First step is to initialize 2nd derivatives for splines */ /* Pair potential (phi), density (rho), embedding funtion (F) where paircol is number of pair potential columns and ntypes is number of rho columns and ntypes is number of F columns */ for (col = 0; col < 2 * paircol + 3 * ntypes; col++) { /* Pointer to first entry */ first = calc_pot.first[col]; /* Initialize 2nd derivatives step = width of spline knots (known as h) xi+first = array with spline values calc_pot.last[col1] - first + 1 = num of spline pts *(xi + first - 2) = value of endpoint gradient (default: 1e30) *(xi + first - 1) = value of other endpoint gradient (default: phi=0.0, rho=0.0, F=1e30) calc_pot.d2tab + first = array to hold 2nd deriv */ spline_ed(calc_pot.step[col], xi + first, calc_pot.last[col] - first + 1, *(xi + first - 2), *(xi + first - 1), calc_pot.d2tab + first); } #ifndef MPI myconf = nconf; #endif /* MPI */ /* region containing loop over configurations */ { /* Temp variables */ atom_t *atom; /* atom pointer */ int h, j, k; int n_i, n_j, n_k; int uf; #ifdef APOT double temp_eng; #endif /* APOT */ #ifdef STRESS int us, stresses; #endif /* STRESS */ /* Some useful temp struct variable types */ /* neighbor pointers */ neigh_t *neigh_j, *neigh_k; /* Pair variables */ double phi_val, phi_grad; vector tmp_force; /* EAM variables */ int col_F; double eam_force; #if defined NORESCALE && !defined APOT double rho_val; #endif /* NORESCALE && !APOT */ /* MEAM variables */ double dV3j, dV3k, V3, vlj, vlk, vv3j, vv3k; vector dfj, dfk; angl *n_angl; /* Loop over configurations */ for (h = firstconf; h < firstconf + myconf; h++) { uf = conf_uf[h - firstconf]; #ifdef STRESS us = conf_us[h - firstconf]; #endif /* STRESS */ /* Reset energies */ forces[energy_p + h] = 0.0; #ifdef STRESS /* Reset stresses */ stresses = stress_p + 6 * h; for (i = 0; i < 6; ++i) forces[stresses + i] = 0.0; #endif /* STRESS */ /* Set limiting constraints */ forces[limit_p + h] = -force_0[limit_p + h]; /* FIRST LOOP: Reset forces and densities for each atom */ for (i = 0; i < inconf[h]; i++) { /* Skip every 3 spots in force array starting from position of first atom */ n_i = 3 * (cnfstart[h] + i); if (uf) { /* Set initial forces to negative of user given forces so we can take difference */ forces[n_i + 0] = -force_0[n_i + 0]; forces[n_i + 1] = -force_0[n_i + 1]; forces[n_i + 2] = -force_0[n_i + 2]; } else { /* Set initial forces to zero if not using forces */ forces[n_i + 0] = 0.0; forces[n_i + 1] = 0.0; forces[n_i + 2] = 0.0; } /* uf */ /* Reset the density for each atom */ conf_atoms[cnfstart[h] - firstatom + i].rho = 0.0; } /* i */ /* END OF FIRST LOOP */ /* SECOND LOOP: Calculate pair forces and energies, atomic densities */ for (i = 0; i < inconf[h]; i++) { /* Set pointer to temp atom pointer */ atom = conf_atoms + (cnfstart[h] - firstatom + i); /* Skip every 3 spots for force array */ n_i = 3 * (cnfstart[h] + i); /* Loop over neighbors */ for (j = 0; j < atom->num_neigh; j++) { /* Set pointer to temp neighbor pointer */ neigh_j = atom->neigh + j; /* Find the correct column in the potential table for pair potential: phi_ij For Binary Alloy: 0 = phi_AA, 1 = (phi_AB or phi_BA), 2 = phi_BB where typ = A = 0 and typ = B = 1 */ /* We need to check that neighbor atom exists inside pair potential's radius */ if (neigh_j->r < calc_pot.end[neigh_j->col[0]]) { /* Compute phi and phi' value given radial distance NOTE: slot = spline point index right below radial distance shift = % distance from 'slot' spline pt step = width of spline points (given as 'h' in books) 0 means the pair potential columns */ /* fn value and grad are calculated in the same step */ if (uf) phi_val = splint_comb_dir(&calc_pot, xi, neigh_j->slot[0], neigh_j->shift[0], neigh_j->step[0], &phi_grad); else phi_val = splint_dir(&calc_pot, xi, neigh_j->slot[0], neigh_j->shift[0], neigh_j->step[0]); /* Add in piece contributed by neighbor to energy */ forces[energy_p + h] += 0.5 * phi_val; if (uf) { /* Compute tmp force values */ tmp_force.x = neigh_j->dist_r.x * phi_grad; tmp_force.y = neigh_j->dist_r.y * phi_grad; tmp_force.z = neigh_j->dist_r.z * phi_grad; /* Add in force on atom i from atom j */ forces[n_i + 0] += tmp_force.x; forces[n_i + 1] += tmp_force.y; forces[n_i + 2] += tmp_force.z; #ifdef STRESS if (us) { /* also calculate pair stresses */ forces[stresses + 0] -= 0.5 * neigh_j->dist.x * tmp_force.x; forces[stresses + 1] -= 0.5 * neigh_j->dist.y * tmp_force.y; forces[stresses + 2] -= 0.5 * neigh_j->dist.z * tmp_force.z; forces[stresses + 3] -= 0.5 * neigh_j->dist.x * tmp_force.y; forces[stresses + 4] -= 0.5 * neigh_j->dist.y * tmp_force.z; forces[stresses + 5] -= 0.5 * neigh_j->dist.z * tmp_force.x; } #endif /* STRESS */ } } /* r < cutoff */ /* END IF STMNT: NEIGH LIES INSIDE CUTOFF FOR PAIR POTENTIAL */ /* Find the correct column in the potential table for atomic density, rho_ij paircol = number of pair potential columns Binary Alloy: paircol = 3 (3 pair potentials with index 0, 1, 2) index of densitiy functions: 3 = rho_A, 4 = rho_B where A, B are atom type for the neighbor */ /* Compute rho rho value and sum them up Need to play tricks so that rho values are put in the correct columns if alloy. If atom j is A or B, fn value needs to be in correct rho_A or rho_B respectively, it doesn't depend on atom i. */ /* Check that atom j lies inside rho_typ2 */ if (neigh_j->r < calc_pot.end[neigh_j->col[1]]) { /* Store gradient in the neighbor for the pair r_ij to be used in the future when computing forces and sum up rho for atom i */ atom->rho += splint_comb_dir(&calc_pot, xi, neigh_j->slot[1], neigh_j->shift[1], neigh_j->step[1], &neigh_j->drho); } else { /* If the pair distance does not lie inside rho_typ2 We set the grad to 0 so it doesn't sum into the net force */ neigh_j->drho = 0.0; } /* r < cutoff */ /* Compute the f_ij values and store the fn and grad in each neighbor struct for easy access later */ /* Find the correct column in the potential table for "f": f_ij For Binary Alloy: 0 = f_AA, 1 = f_AB, f_BA, 2 = f_BB where typ = A = 0 and typ = B = 1 Note: it is "paircol+2*ntypes" spots away in the array */ /* Check that atom j lies inside f_col2 */ if (neigh_j->r < calc_pot.end[neigh_j->col[2]]) { /* Store the f(r_ij) value and the gradient for future use */ neigh_j->f = splint_comb_dir(&calc_pot, xi, neigh_j->slot[2], neigh_j->shift[2], neigh_j->step[2], &neigh_j->df); } else { /* Store f and f' = 0 if doesn't lie in boundary to be used later when calculating forces */ neigh_j->f = 0.0; neigh_j->df = 0.0; } /* END LOOP OVER NEIGHBORS */ } /* Find the correct column in the potential table for angle part: g_ijk Binary Alloy: 0 = g_A, 1 = g_B where A, B are atom type for the main atom i Note: it is now "2*paircol+2*ntypes" from beginning column to account for phi(paircol)+rho(nytpes)+F(ntypes)+f(paircol) col2 = 2 * paircol + 2 * ntypes + typ1; */ /* Loop over every angle formed by neighbors N(N-1)/2 possible combinations Used in computing angular part g_ijk */ /* set n_angl pointer to angl_part of current atom */ n_angl = atom->angl_part; for (j = 0; j < atom->num_neigh - 1; j++) { /* Get pointer to neighbor jj */ neigh_j = atom->neigh + j; for (k = j + 1; k < atom->num_neigh; k++) { /* Get pointer to neighbor kk */ neigh_k = atom->neigh + k; /* The cos(theta) should always lie inside -1 ... 1 So store the g and g' without checking bounds */ n_angl->g = splint_comb_dir(&calc_pot, xi, n_angl->slot, n_angl->shift, n_angl->step, &n_angl->dg); /* Sum up rho piece for atom i caused by j and k f_ij * f_ik * m_ijk */ atom->rho += neigh_j->f * neigh_k->f * n_angl->g; /* Increase n_angl pointer */ n_angl++; } } /* Column for embedding function, F */ col_F = paircol + ntypes + atom->type; #ifndef NORESCALE /* Compute energy, gradient for embedding function F Check if rho lies short of inner cutoff of F(rho) */ if (atom->rho < calc_pot.begin[col_F]) { /* Punish this potential for having rho lie outside of F */ forces[limit_p + h] += DUMMY_WEIGHT * 10. * dsquare(calc_pot.begin[col_F] - atom->rho); /* Set the atomic density to the first rho in the spline F */ atom->rho = calc_pot.begin[col_F]; } else if (atom->rho > calc_pot.end[col_F]) { /* rho is to the right of the spline */ /* Punish this potential for having rho lie outside of F */ forces[limit_p + h] += DUMMY_WEIGHT * 10. * dsquare(atom->rho - calc_pot.end[col_F]); /* Set the atomic density to the last rho in the spline F */ atom->rho = calc_pot.end[col_F]; } /* Compute energy piece from F, and store the gradient for later use */ forces[energy_p + h] += splint_comb(&calc_pot, xi, col_F, atom->rho, &atom->gradF); #else /* Compute energy, gradient for embedding function F Check if rho lies short of inner cutoff of F(rho) */ if (atom->rho < calc_pot.begin[col_F]) { #ifdef APOT /* calculate analytic value explicitly */ apot_table.fvalue[col_F] (atom->rho, xi_opt + opt_pot.first[col_F], &temp_eng); atom->gradF = apot_grad(atom->rho, xi_opt + opt_pot.first[col_F], apot_table.fvalue[col_F]); forces[energy_p + h] += temp_eng; #else /* Linear extrapolate values to left to get F_i(rho) This gets value and grad of initial spline point */ rho_val = splint_comb(&calc_pot, xi, col_F, calc_pot.begin[col_F], &atom->gradF); /* Sum this to the total energy for this configuration Linear extrapolate this energy */ forces[energy_p + h] += rho_val + (atom->rho - calc_pot.begin[col_F]) * atom->gradF; #endif /* APOT */ /* rho is to the right of the spline */ } else if (atom->rho > calc_pot.end[col_F]) { #ifdef APOT /* calculate analytic value explicitly */ apot_table.fvalue[col_F] (atom->rho, xi_opt + opt_pot.first[col_F], &temp_eng); atom->gradF = apot_grad(atom->rho, xi_opt + opt_pot.first[col_F], apot_table.fvalue[col_F]); forces[energy_p + h] += temp_eng; #else /* Get value and grad at 1/2 the width from the final spline point */ rho_val = splint_comb(&calc_pot, xi, col_F, calc_pot.end[col_F] - .5 * calc_pot.step[col_F], &atom->gradF); /* Linear extrapolate to the right to get energy */ forces[energy_p + h] += rho_val + (atom->rho - calc_pot.end[col_F]) * atom->gradF; #endif /* APOT */ /* and in-between */ } else { #ifdef APOT /* calculate small values directly */ if (atom->rho < 0.1) { apot_table.fvalue[col_F] (atom->rho, xi_opt + opt_pot.first[col_F], &temp_eng); atom->gradF = apot_grad(atom->rho, xi_opt + opt_pot.first[col_F], apot_table.fvalue[col_F]); forces[energy_p + h] += temp_eng; } else #endif /* Get energy value from within spline and store the grad */ forces[energy_p + h] += splint_comb(&calc_pot, xi, col_F, atom->rho, &atom->gradF); } #endif /* !NORESCALE */ /* Sum up rho for future MPI use */ rho_sum_loc += atom->rho; /* Calculate remaining forces from embedding function */ if (uf) { /* Loop over neighbors */ for (j = 0; j < atom->num_neigh; ++j) { /* Set pointer to temp neighbor pointer and record type */ neigh_j = atom->neigh + j; /* Check that radial distance between pair is within cutoff distance of either possible rho_A or rho_B for alloys, where A or B stands for atom i WARNING: Double check this!!! May not need this since drho will be 0 otherwise */ if (neigh_j->r < calc_pot.end[neigh_j->col[1]]) { /* Calculate eam force */ eam_force = neigh_j->drho * atom->gradF; /* Multiply the eamforce with x/r to get real force */ tmp_force.x = neigh_j->dist_r.x * eam_force; tmp_force.y = neigh_j->dist_r.y * eam_force; tmp_force.z = neigh_j->dist_r.z * eam_force; /* Sum up forces acting on atom i from atom j */ forces[n_i + 0] += tmp_force.x; forces[n_i + 1] += tmp_force.y; forces[n_i + 2] += tmp_force.z; /* Subtract off forces acting on atom j from atom i */ n_j = 3 * neigh_j->nr; forces[n_j + 0] -= tmp_force.x; forces[n_j + 1] -= tmp_force.y; forces[n_j + 2] -= tmp_force.z; #ifdef STRESS if (us) { forces[stresses + 0] -= neigh_j->dist.x * tmp_force.x; forces[stresses + 1] -= neigh_j->dist.y * tmp_force.y; forces[stresses + 2] -= neigh_j->dist.z * tmp_force.z; forces[stresses + 3] -= neigh_j->dist.x * tmp_force.y; forces[stresses + 4] -= neigh_j->dist.y * tmp_force.z; forces[stresses + 5] -= neigh_j->dist.z * tmp_force.x; } #endif /* STRESS */ } /* END IF STMT: Inside reach of rho cutoff */ } /* END LOOP OVER NEIGHBORS */ /* Compute MEAM Forces */ /********************************/ /* Loop over every angle formed by neighbors N(N-1)/2 possible combinations Used in computing angular part g_ijk */ /* set n_angl pointer to angl_part of current atom */ n_angl = atom->angl_part; for (j = 0; j < atom->num_neigh - 1; j++) { /* Get pointer to neighbor j */ neigh_j = atom->neigh + j; /* Force location for atom j */ n_j = 3 * neigh_j->nr; for (k = j + 1; k < atom->num_neigh; k++) { /* Get pointer to neighbor k */ neigh_k = atom->neigh + k; /* Force location for atom k */ n_k = 3 * neigh_k->nr; /* Some tmp variables to clean up force fn below */ dV3j = n_angl->g * neigh_j->df * neigh_k->f; dV3k = n_angl->g * neigh_j->f * neigh_k->df; V3 = neigh_j->f * neigh_k->f * n_angl->dg; vlj = V3 * neigh_j->inv_r; vlk = V3 * neigh_k->inv_r; vv3j = dV3j - vlj * n_angl->cos; vv3k = dV3k - vlk * n_angl->cos; dfj.x = vv3j * neigh_j->dist_r.x + vlj * neigh_k->dist_r.x; dfj.y = vv3j * neigh_j->dist_r.y + vlj * neigh_k->dist_r.y; dfj.z = vv3j * neigh_j->dist_r.z + vlj * neigh_k->dist_r.z; dfk.x = vv3k * neigh_k->dist_r.x + vlk * neigh_j->dist_r.x; dfk.y = vv3k * neigh_k->dist_r.y + vlk * neigh_j->dist_r.y; dfk.z = vv3k * neigh_k->dist_r.z + vlk * neigh_j->dist_r.z; /* Force on atom i from j and k */ forces[n_i + 0] += atom->gradF * (dfj.x + dfk.x); forces[n_i + 1] += atom->gradF * (dfj.y + dfk.y); forces[n_i + 2] += atom->gradF * (dfj.z + dfk.z); /* Reaction force on atom j from i and k */ forces[n_j + 0] -= atom->gradF * dfj.x; forces[n_j + 1] -= atom->gradF * dfj.y; forces[n_j + 2] -= atom->gradF * dfj.z; /* Reaction force on atom k from i and j */ forces[n_k + 0] -= atom->gradF * dfk.x; forces[n_k + 1] -= atom->gradF * dfk.y; forces[n_k + 2] -= atom->gradF * dfk.z; #ifdef STRESS if (us) { /* Force from j on atom i */ tmp_force.x = atom->gradF * dfj.x; tmp_force.y = atom->gradF * dfj.y; tmp_force.z = atom->gradF * dfj.z; forces[stresses + 0] -= neigh_j->dist.x * tmp_force.x; forces[stresses + 1] -= neigh_j->dist.y * tmp_force.y; forces[stresses + 2] -= neigh_j->dist.z * tmp_force.z; forces[stresses + 3] -= neigh_j->dist.x * tmp_force.y; forces[stresses + 4] -= neigh_j->dist.y * tmp_force.z; forces[stresses + 5] -= neigh_j->dist.z * tmp_force.x; /* Force from k on atom i */ tmp_force.x = atom->gradF * dfk.x; tmp_force.y = atom->gradF * dfk.y; tmp_force.z = atom->gradF * dfk.z; forces[stresses + 0] -= neigh_k->dist.x * tmp_force.x; forces[stresses + 1] -= neigh_k->dist.y * tmp_force.y; forces[stresses + 2] -= neigh_k->dist.z * tmp_force.z; forces[stresses + 3] -= neigh_k->dist.x * tmp_force.y; forces[stresses + 4] -= neigh_k->dist.y * tmp_force.z; forces[stresses + 5] -= neigh_k->dist.z * tmp_force.x; } #endif // STRESS /* Increase n_angl pointer */ n_angl++; } /* End inner loop over angles (neighbor atom k) */ } /* End outer loop over angles (neighbor atom j) */ } /* uf */ } /* END OF SECOND LOOP OVER ATOM i */ /* 3RD LOOP OVER ATOM i */ /* Sum up the square of the forces for each atom then multiply it by the weight for this config */ for (i = 0; i < inconf[h]; i++) { atom = conf_atoms + i + cnfstart[h] - firstatom; n_i = 3 * (cnfstart[h] + i); #ifdef FWEIGHT /* Weigh by absolute value of force */ forces[n_i + 0] /= FORCE_EPS + atom->absforce; forces[n_i + 1] /= FORCE_EPS + atom->absforce; forces[n_i + 2] /= FORCE_EPS + atom->absforce; #endif /* FWEIGHT */ #ifdef CONTRIB if (atom->contrib) #endif /* CONTRIB */ tmpsum += conf_weight[h] * (dsquare(forces[n_i + 0]) + dsquare(forces[n_i + 1]) + dsquare(forces[n_i + 2])); } /* END OF THIRD LOOP OVER ATOM i */ /* Add in the energy per atom and its weight to the sum */ /* First divide by num atoms */ forces[energy_p + h] /= (double)inconf[h]; /* Then subtract off the cohesive energy given to use by user */ forces[energy_p + h] -= force_0[energy_p + h]; /* Sum up square of this new energy term for each config multiplied by its respective weight */ tmpsum += conf_weight[h] * eweight * dsquare(forces[energy_p + h]); #ifdef STRESS /* LOOP OVER STRESSES */ for (i = 0; i < 6; ++i) { /* Multiply weight to stresses and divide by volume */ forces[stresses + i] /= conf_vol[h - firstconf]; /* Subtract off user supplied stresses */ forces[stresses + i] -= force_0[stresses + i]; /* Sum in the square of each stress component with config weight */ tmpsum += conf_weight[h] * sweight * dsquare(forces[stresses + i]); } #endif /* STRESS */ #ifndef NORESCALE /* Add in the square of the limiting constraints for each config */ /* This is punishment from going out of bounds for F(rho) if NORESCALE is not defined */ forces[limit_p + h] *= conf_weight[h]; tmpsum += dsquare(forces[limit_p + h]); #endif /* !NORESCALE */ } /* END MAIN LOOP OVER CONFIGURATIONS */ } #ifdef MPI /* Reduce the rho_sum into root node */ MPI_Reduce(&rho_sum_loc, &rho_sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); #else rho_sum = rho_sum_loc; #endif // MPI #ifdef NORESCALE if (myid == 0) { /* Calculate the average rho_sum per atom NOTE: This gauge constraint exists for both EAM and MEAM */ rho_sum /= (double)natoms; /* Another constraint for the gauge conditions this sets the avg rho per atom to 1 Please read the other constraint on gauge conditions above. */ forces[dummy_p + ntypes] = DUMMY_WEIGHT * (rho_sum - 1.); tmpsum += dsquare(forces[dummy_p + ntypes]); } #endif /* NORESCALE */ #ifdef MPI /* Reduce the global sum from all the tmpsum's */ sum = 0.0; MPI_Reduce(&tmpsum, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); /* gather forces, energies, stresses */ if (myid == 0) { /* root node already has data in place */ /* forces */ MPI_Gatherv(MPI_IN_PLACE, myatoms, MPI_VECTOR, forces, atom_len, atom_dist, MPI_VECTOR, 0, MPI_COMM_WORLD); /* energies */ MPI_Gatherv(MPI_IN_PLACE, myconf, MPI_DOUBLE, forces + natoms * 3, conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); /* stresses */ MPI_Gatherv(MPI_IN_PLACE, myconf, MPI_STENS, forces + natoms * 3 + nconf, conf_len, conf_dist, MPI_STENS, 0, MPI_COMM_WORLD); /* punishment constraints */ MPI_Gatherv(MPI_IN_PLACE, myconf, MPI_DOUBLE, forces + natoms * 3 + 7 * nconf, conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); } else { /* forces */ MPI_Gatherv(forces + firstatom * 3, myatoms, MPI_VECTOR, forces, atom_len, atom_dist, MPI_VECTOR, 0, MPI_COMM_WORLD); /* energies */ MPI_Gatherv(forces + natoms * 3 + firstconf, myconf, MPI_DOUBLE, forces + natoms * 3, conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); /* stresses */ MPI_Gatherv(forces + natoms * 3 + nconf + 6 * firstconf, myconf, MPI_STENS, forces + natoms * 3 + nconf, conf_len, conf_dist, MPI_STENS, 0, MPI_COMM_WORLD); /* punishment constraints */ MPI_Gatherv(forces + natoms * 3 + 7 * nconf + firstconf, myconf, MPI_DOUBLE, forces + natoms * 3 + 7 * nconf, conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); } /* no need to pick up dummy constraints - are already @ root */ #else /* Set tmpsum to sum - only matters when not running MPI */ sum = tmpsum; #endif /* MPI */ /* Root process only */ if (myid == 0) { /* Increment function calls */ fcalls++; /* If total sum is NAN return large number instead */ if (isnan(sum)) { #ifdef DEBUG printf("\n--> Force is nan! <--\n\n"); #endif /* DEBUG */ return 10e10; } else return sum; } } /* END OF INFINITE LOOP */ /* Kill off other procs */ return -1.0; }
double rescale(pot_table_t* pt, double upper, int flag) { int mincol, maxcol, col, col2, first, vals, h, i, j, typ1, typ2, sign; neigh_t* neigh = NULL; double fnval, pos, grad, a; double min = 1e100, max = -1e100; double* xi = pt->table; int dimneuxi = pt->last[g_calc.paircol + 2 * g_param.ntypes - 1] - pt->last[g_calc.paircol + g_param.ntypes - 1]; double* neuxi = (double*)malloc(dimneuxi * sizeof(double)); double* neuord = (double*)malloc(dimneuxi * sizeof(double)); double* neustep = (double*)malloc(g_param.ntypes * sizeof(double)); double* maxrho = (double*)malloc(g_param.ntypes * sizeof(double)); double* minrho = (double*)malloc(g_param.ntypes * sizeof(double)); double* left = (double*)malloc(g_param.ntypes * sizeof(double)); double* right = (double*)malloc(g_param.ntypes * sizeof(double)); if (neuxi == NULL || neuord == NULL || neustep == NULL || maxrho == NULL || minrho == NULL || left == NULL || right == NULL) error(1, "Error allocating memory for rescale function\n"); for (i = 0; i < g_param.ntypes; i++) { maxrho[i] = -1e100; minrho[i] = 1e100; } /* find Max/Min rho */ /* init splines - better safe than sorry */ /* init second derivatives for splines */ for (col = 0; col < g_calc.paircol; col++) { /* just pair potentials */ first = pt->first[col]; if (g_pot.format_type == POTENTIAL_FORMAT_TABULATED_EQ_DIST) spline_ed(pt->step[col], pt->table + first, pt->last[col] - first + 1, *(pt->table + first - 2), 0.0, pt->d2tab + first); else /* format == 4 ! */ spline_ne(pt->xcoord + first, pt->table + first, pt->last[col] - first + 1, *(pt->table + first - 2), 0.0, pt->d2tab + first); } for (col = g_calc.paircol; col < g_calc.paircol + g_param.ntypes; col++) { /* rho */ first = pt->first[col]; if (g_pot.format_type == POTENTIAL_FORMAT_TABULATED_EQ_DIST) spline_ed(pt->step[col], xi + first, pt->last[col] - first + 1, *(xi + first - 2), 0.0, pt->d2tab + first); else /* format == 4 ! */ spline_ne(pt->xcoord + first, xi + first, pt->last[col] - first + 1, *(xi + first - 2), 0.0, pt->d2tab + first); } for (col = g_calc.paircol + g_param.ntypes; col < g_calc.paircol + 2 * g_param.ntypes; col++) { /* F */ first = pt->first[col]; /* gradient 0 at r_cut */ if (g_pot.format_type == POTENTIAL_FORMAT_TABULATED_EQ_DIST) spline_ed(pt->step[col], xi + first, pt->last[col] - first + 1, *(xi + first - 2), *(xi + first - 1), pt->d2tab + first); else /* format == 4 */ spline_ne(pt->xcoord + first, xi + first, pt->last[col] - first + 1, *(xi + first - 2), *(xi + first - 1), pt->d2tab + first); } /* re-calculate atom_rho (might be a waste...) */ for (h = 0; h < g_config.nconf; h++) { for (i = 0; i < g_config.inconf[h]; i++) g_config.atoms[g_config.cnfstart[h] + i].rho = 0.0; for (i = 0; i < g_config.inconf[h]; i++) { atom_t* atom = g_config.atoms + i + g_config.cnfstart[h]; typ1 = atom->type; for (j = 0; j < atom->num_neigh; j++) { neigh = atom->neigh + j; if (neigh->nr > i + g_config.cnfstart[h]) { typ2 = neigh->type; col2 = g_calc.paircol + typ2; if (typ2 == typ1) { if (neigh->r < pt->end[col2]) { fnval = splint_dir(pt, xi, neigh->slot[1], neigh->shift[1], neigh->step[1]); atom->rho += fnval; g_config.atoms[neigh->nr].rho += fnval; } } else { col = g_calc.paircol + typ1; if (neigh->r < pt->end[col2]) { atom->rho += splint_dir(pt, xi, neigh->slot[1], neigh->shift[1], neigh->step[1]); } if (neigh->r < pt->end[col]) g_config.atoms[neigh->nr].rho += (*g_splint)(pt, xi, col, neigh->r); } } } maxrho[typ1] = MAX(maxrho[typ1], atom->rho); minrho[typ1] = MIN(minrho[typ1], atom->rho); } } for (i = 0; i < g_param.ntypes; i++) { if (maxrho[i] > max) { max = maxrho[i]; maxcol = i; } if (minrho[i] < min) { min = minrho[i]; mincol = i; } } /* determine dominant side */ sign = (max >= -min) ? 1 : -1; /* determine new left and right boundary, add 40 percent... */ for (i = 0; i < g_param.ntypes; i++) { j = g_calc.paircol + g_param.ntypes + i; left[i] = minrho[i] - 0.3 * pt->step[j]; right[i] = maxrho[i] + 0.3 * pt->step[j]; /* is expansion necessary? */ if (flag || minrho[i] - pt->begin[j] < 0.0 || minrho[i] - pt->begin[j] > 0.95 * pt->step[j] || maxrho[i] - pt->end[j] > 0 || maxrho[i] - pt->end[j] < -0.95 * pt->step[j]) flag = 1; } /* determine scaling factor */ a = (sign == 1) ? upper / right[maxcol] : upper / left[mincol]; if (flag || fabs(a) > 1.05 || fabs(a) < 0.95) flag = 1; /* update needed? */ if (!flag) return 0.0; /* no */ /* Let's update... */ /* expand potential */ h = 0; for (i = 0; i < g_param.ntypes; i++) { col = g_calc.paircol + g_param.ntypes + i; /* 1. embedding function */ vals = pt->last[col] - pt->first[col]; neustep[i] = (right[i] - left[i]) / (double)vals; pos = left[i]; for (j = 0; j <= vals; j++) { neuxi[h] = splint_ne(pt, xi, col, pos); /* inter- or extrapolation */ neuord[h] = pos; h++; pos += neustep[i]; } /* correct gradient */ if (*(xi + pt->first[col] - 2) < 1.e30) *(xi + pt->first[col] - 2) = splint_grad_ne(pt, xi, col, left[i]); if (*(xi + pt->first[col] - 1) < 1.e30) *(xi + pt->first[col] - 1) = splint_grad_ne(pt, xi, col, right[i]); } /* write back values */ col = 0; /* first value to be changed */ for (j = g_calc.paircol + g_param.ntypes; j < g_calc.paircol + 2 * g_param.ntypes; j++) for (i = pt->first[j]; i <= pt->last[j]; i++) { xi[i] = neuxi[col]; pt->xcoord[i] = neuord[col]; col++; } printf("Scaling factor %f\n", a); /* scale */ for (i = g_calc.paircol; i < g_calc.paircol + g_param.ntypes; i++) { for (j = pt->first[i]; j <= pt->last[i]; j++) { pt->table[j] *= a; } if (*(xi + pt->first[i] - 2) < 1.e30) *(xi + pt->first[i] - 2) *= a; } /* rescale all embed. by a */ if (sign == 1) { j = 0; for (i = g_calc.paircol + g_param.ntypes; i < g_calc.paircol + 2 * g_param.ntypes; i++) { pt->begin[i] = a * left[j]; pt->end[i] = a * right[j]; pt->step[i] = a * neustep[j]; pt->invstep[i] = 1.0 / pt->step[i]; /* gradient correction */ if (xi[pt->first[i] - 2] < 1.e30) xi[pt->first[i] - 2] /= a; if (xi[pt->first[i] - 1] < 1.e30) xi[pt->first[i] - 1] /= a; pos = pt->begin[i]; for (h = pt->first[i]; h <= pt->last[i]; h++) { pt->xcoord[h] = pos; pos += pt->step[i]; } j++; } } else { /* reverse - a negativ */ j = 0; for (i = g_calc.paircol + g_param.ntypes; i < g_calc.paircol + 2 * g_param.ntypes; i++) { pt->begin[i] = a * right[j]; pt->end[i] = a * left[j]; pt->step[i] = -a * neustep[j]; pt->invstep[i] = 1.0 / pt->step[i]; /* gradient correction and exchange */ if (xi[pt->first[i] - 2] < 1.e30) grad = -xi[pt->first[i] - 2] / a; else grad = 1.e30; if (xi[pt->first[i] - 1] < 1.e30) xi[pt->first[i] - 2] = -xi[pt->first[i] - 1] / a; else xi[pt->first[i] - 2] = 1.e30; xi[pt->first[i] - 1] = grad; pos = pt->begin[i]; for (h = pt->first[i]; h <= pt->last[i]; h++) { pt->xcoord[h] = pos; pos += pt->step[i]; } j++; } h = 0; for (i = 0; i < g_param.ntypes; i++) { /* values in reverse order */ col = g_calc.paircol + g_param.ntypes + i; for (j = pt->last[col]; j >= pt->first[col]; j--) { neuxi[h] = xi[j]; h++; } } col = 0; /* and write back */ for (j = g_calc.paircol + g_param.ntypes; j < g_calc.paircol + 2 * g_param.ntypes; j++) for (i = pt->first[j]; i <= pt->last[j]; i++) { xi[i] = neuxi[col]; col++; } } /* re-initialise splines */ for (col = g_calc.paircol; col < g_calc.paircol + g_param.ntypes; col++) { /* rho */ first = pt->first[col]; if (g_pot.format_type == POTENTIAL_FORMAT_TABULATED_EQ_DIST) spline_ed(pt->step[col], xi + first, pt->last[col] - first + 1, *(xi + first - 2), 0.0, pt->d2tab + first); else /* format == 4 ! */ spline_ne(pt->xcoord + first, xi + first, pt->last[col] - first + 1, *(xi + first - 2), 0.0, pt->d2tab + first); } for (col = g_calc.paircol + g_param.ntypes; col < g_calc.paircol + 2 * g_param.ntypes; col++) { /* F */ first = pt->first[col]; /* gradient 0 at r_cut */ if (g_pot.format_type == POTENTIAL_FORMAT_TABULATED_EQ_DIST) spline_ed(pt->step[col], xi + first, pt->last[col] - first + 1, *(xi + first - 2), *(xi + first - 1), pt->d2tab + first); else /* format == 4 */ spline_ne(pt->xcoord + first, xi + first, pt->last[col] - first + 1, *(xi + first - 2), *(xi + first - 1), pt->d2tab + first); } /* correct gauge: U'(n_mean)=0 */ for (i = 0; i < g_param.ntypes; i++) { g_calc.lambda[i] = (*g_splint_grad)( &g_pot.opt_pot, pt->table, g_calc.paircol + g_param.ntypes + i, 0.5 * (pt->begin[g_calc.paircol + g_param.ntypes + i] + pt->end[g_calc.paircol + g_param.ntypes + i])); } for (i = 0; i < g_param.ntypes; i++) printf("lambda[%d] = %f\n", i, g_calc.lambda[i]); i = 0; for (col = 0; col < g_param.ntypes; col++) for (col2 = col; col2 < g_param.ntypes; col2++) { for (j = pt->first[i]; j <= pt->last[i]; j++) pt->table[j] += (pt->xcoord[j] < pt->end[g_calc.paircol + col2] ? g_calc.lambda[col] * splint_ne(pt, pt->table, g_calc.paircol + col2, pt->xcoord[j]) : 0.0) + (pt->xcoord[j] < pt->end[g_calc.paircol + col] ? g_calc.lambda[col2] * splint_ne(pt, pt->table, g_calc.paircol + col, pt->xcoord[j]) : 0.0); /* Gradient */ if (pt->table[pt->first[i] - 2] < 1e29) /* natural spline */ pt->table[pt->first[i] - 2] += (pt->begin[i] < pt->end[g_calc.paircol + col2] ? g_calc.lambda[col] * (*g_splint_grad)(pt, pt->table, g_calc.paircol + col2, pt->begin[i]) : 0.0) + (pt->begin[i] < pt->end[g_calc.paircol + col] ? g_calc.lambda[col2] * (*g_splint_grad)(pt, pt->table, g_calc.paircol + col, pt->begin[i]) : 0.0); if (pt->table[pt->first[i] - 1] < 1e29) /* natural spline */ pt->table[pt->first[i] - 1] += (pt->end[i] < pt->end[g_calc.paircol + col2] ? g_calc.lambda[col] * (*g_splint_grad)(pt, pt->table, g_calc.paircol + col2, pt->end[i]) : 0.0) + (pt->end[i] < pt->end[g_calc.paircol + col] ? g_calc.lambda[col2] * (*g_splint_grad)(pt, pt->table, g_calc.paircol + col, pt->end[i]) : 0.0); i++; } for (i = 0; i < g_param.ntypes; i++) { for (j = pt->first[g_calc.paircol + g_param.ntypes + i]; j <= pt->last[g_calc.paircol + g_param.ntypes + i]; j++) pt->table[j] -= pt->xcoord[j] * g_calc.lambda[i]; /* Gradients */ if (pt->table[pt->first[g_calc.paircol + g_param.ntypes + i] - 2] < 1e29) /* natural spline */ pt->table[pt->first[g_calc.paircol + g_param.ntypes + i] - 2] -= g_calc.lambda[i]; if (pt->table[pt->first[g_calc.paircol + g_param.ntypes + i] - 1] < 1e29) /* natural spline */ pt->table[pt->first[g_calc.paircol + g_param.ntypes + i] - 1] -= g_calc.lambda[i]; g_calc.lambda[i] = 0.0; } /* init second derivatives for splines */ for (col = 0; col < g_calc.paircol; col++) { /* just pair potentials */ first = pt->first[col]; if (g_pot.format_type == POTENTIAL_FORMAT_TABULATED_EQ_DIST) spline_ed(pt->step[col], pt->table + first, pt->last[col] - first + 1, *(pt->table + first - 2), 0.0, pt->d2tab + first); else /* format == 4 ! */ spline_ne(pt->xcoord + first, pt->table + first, pt->last[col] - first + 1, *(pt->table + first - 2), 0.0, pt->d2tab + first); } free(neuxi); free(neuord); free(neustep); free(maxrho); free(minrho); free(left); free(right); /* return factor */ return a; }
double calc_forces(double* xi_opt, double* forces, int flag) { double* xi = NULL; switch (g_pot.format_type) { case POTENTIAL_FORMAT_UNKNOWN: error(1, "Unknown potential format detected! (%s:%d)\n", __FILE__, __LINE__); case POTENTIAL_FORMAT_ANALYTIC: xi = g_pot.calc_pot.table; break; case POTENTIAL_FORMAT_TABULATED_EQ_DIST: case POTENTIAL_FORMAT_TABULATED_NON_EQ_DIST: xi = xi_opt; break; case POTENTIAL_FORMAT_KIM: error(1, "KIM format is not supported by pair force routine!"); break; } #if !defined(MPI) g_mpi.myconf = g_config.nconf; #endif // !MPI // This is the start of an infinite loop while (1) { // sum of squares of local process double error_sum = 0.0; #if defined(APOT) && !defined(MPI) if (g_pot.format_type == POTENTIAL_FORMAT_ANALYTIC) { apot_check_params(xi_opt); update_calc_table(xi_opt, xi, 0); } #endif // APOT && !MPI #if defined(MPI) #if !defined(APOT) // exchange potential and flag value MPI_Bcast(xi, g_pot.calc_pot.len, MPI_DOUBLE, 0, MPI_COMM_WORLD); #endif // !APOT MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD); if (flag == 1) break; // Exception: flag 1 means clean up #if defined(APOT) if (g_mpi.myid == 0) apot_check_params(xi_opt); MPI_Bcast(xi_opt, g_calc.ndimtot, MPI_DOUBLE, 0, MPI_COMM_WORLD); update_calc_table(xi_opt, xi, 0); #else // APOT // if flag == 2 then the potential parameters have changed -> sync if (flag == 2) potsync(); #endif // APOT #endif // MPI // init second derivatives for splines // pair potential // [0, ..., paircol - 1] update_splines(xi, 0, g_calc.paircol, 1); // loop over configurations for (int config_idx = g_mpi.firstconf; config_idx < g_mpi.firstconf + g_mpi.myconf; config_idx++) { int uf = g_config.conf_uf[config_idx - g_mpi.firstconf]; #if defined(STRESS) int us = g_config.conf_us[config_idx - g_mpi.firstconf]; #endif // STRESS // reset energies and stresses forces[g_calc.energy_p + config_idx] = 0.0; #if defined(STRESS) int stress_idx = g_calc.stress_p + 6 * config_idx; memset(forces + stress_idx, 0, 6 * sizeof(double)); #endif // STRESS #if defined(APOT) if (g_param.enable_cp) forces[g_calc.energy_p + config_idx] += chemical_potential( g_param.ntypes, g_config.na_type[config_idx], xi_opt + g_pot.cp_start); #endif // APOT // first loop: reset forces for (int atom_idx = 0; atom_idx < g_config.inconf[config_idx]; atom_idx++) { int n_i = 3 * (g_config.cnfstart[config_idx] + atom_idx); if (uf) { forces[n_i + 0] = -g_config.force_0[n_i + 0]; forces[n_i + 1] = -g_config.force_0[n_i + 1]; forces[n_i + 2] = -g_config.force_0[n_i + 2]; } else { memset(forces + n_i, 0, 3 * sizeof(double)); } } // second loop: calculate pair forces and energies for (int atom_idx = 0; atom_idx < g_config.inconf[config_idx]; atom_idx++) { atom_t* atom = g_config.conf_atoms + atom_idx + g_config.cnfstart[config_idx] - g_mpi.firstatom; int n_i = 3 * (g_config.cnfstart[config_idx] + atom_idx); // loop over all neighbors for (int neigh_idx = 0; neigh_idx < atom->num_neigh; neigh_idx++) { neigh_t* neigh = atom->neigh + neigh_idx; // In small cells, an atom might interact with itself int self = (neigh->nr == atom_idx + g_config.cnfstart[config_idx]) ? 1 : 0; // pair potential part if (neigh->r < g_pot.calc_pot.end[neigh->col[0]]) { double phi_val = 0.0; double phi_grad = 0.0; // potential value and gradient are calculated in the same step if (uf) phi_val = splint_comb_dir(&g_pot.calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0], &phi_grad); else phi_val = splint_dir(&g_pot.calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0]); // avoid double counting if atom is interacting with itself if (self) { phi_val *= 0.5; phi_grad *= 0.5; } // add cohesive energy forces[g_calc.energy_p + config_idx] += phi_val; // calculate forces if (uf) { vector tmp_force; tmp_force.x = neigh->dist_r.x * phi_grad; tmp_force.y = neigh->dist_r.y * phi_grad; tmp_force.z = neigh->dist_r.z * phi_grad; forces[n_i + 0] += tmp_force.x; forces[n_i + 1] += tmp_force.y; forces[n_i + 2] += tmp_force.z; // actio = reactio int n_j = 3 * neigh->nr; forces[n_j + 0] -= tmp_force.x; forces[n_j + 1] -= tmp_force.y; forces[n_j + 2] -= tmp_force.z; #if defined(STRESS) /* also calculate pair stresses */ if (us) { forces[stress_idx + 0] -= neigh->dist.x * tmp_force.x; forces[stress_idx + 1] -= neigh->dist.y * tmp_force.y; forces[stress_idx + 2] -= neigh->dist.z * tmp_force.z; forces[stress_idx + 3] -= neigh->dist.x * tmp_force.y; forces[stress_idx + 4] -= neigh->dist.y * tmp_force.z; forces[stress_idx + 5] -= neigh->dist.z * tmp_force.x; } #endif // STRESS } } // neighbors in range } // loop over all neighbors // calculate contribution of forces right away if (uf) { #if defined(FWEIGHT) // weigh by absolute value of force forces[n_i + 0] /= FORCE_EPS + atom->absforce; forces[n_i + 1] /= FORCE_EPS + atom->absforce; forces[n_i + 2] /= FORCE_EPS + atom->absforce; #endif // FWEIGHT // sum up forces #if defined(CONTRIB) if (atom->contrib) #endif // CONTRIB error_sum += g_config.conf_weight[config_idx] * (dsquare(forces[n_i + 0]) + dsquare(forces[n_i + 1]) + dsquare(forces[n_i + 2])); } } // second loop over atoms // energy contributions forces[g_calc.energy_p + config_idx] /= (double)g_config.inconf[config_idx]; forces[g_calc.energy_p + config_idx] -= g_config.force_0[g_calc.energy_p + config_idx]; error_sum += g_config.conf_weight[config_idx] * g_param.eweight * dsquare(forces[g_calc.energy_p + config_idx]); #if defined(STRESS) // stress contributions if (uf && us) { for (int i = 0; i < 6; i++) { forces[stress_idx + i] /= g_config.conf_vol[config_idx - g_mpi.firstconf]; forces[stress_idx + i] -= g_config.force_0[stress_idx + i]; error_sum += g_config.conf_weight[config_idx] * g_param.sweight * dsquare(forces[stress_idx + i]); } } #endif // STRESS } // loop over configurations // dummy constraints (global) #if defined(APOT) // add punishment for out of bounds (mostly for powell_lsq) if (g_mpi.myid == 0) error_sum += apot_punish(xi_opt, forces); #endif // APOT gather_forces(&error_sum, forces); // root process exits this function now if (g_mpi.myid == 0) { // Increase function call counter g_calc.fcalls++; if (isnan(error_sum)) { #if defined(DEBUG) printf("\n--> Force is nan! <--\n\n"); #endif // DEBUG return 10e10; } else return error_sum; } } // end of infinite loop // once a non-root process arrives here, all is done return -1.0; }
double calc_forces_elstat(double *xi_opt, double *forces, int flag) { double tmpsum, sum = 0.; int first, col, ne, size, i; double *xi = NULL; apot_table_t *apt = &apot_table; double charge[ntypes]; double sum_charges; double dp_kappa; #ifdef DIPOLE double dp_alpha[ntypes]; double dp_b[apt->number]; double dp_c[apt->number]; #endif /* DIPOLE */ switch (format) { case 0: xi = calc_pot.table; break; case 3: /* fall through */ case 4: xi = xi_opt; /* calc-table is opt-table */ break; case 5: xi = calc_pot.table; /* we need to update the calc-table */ } ne = apot_table.total_ne_par; size = apt->number; /* This is the start of an infinite loop */ while (1) { tmpsum = 0.; /* sum of squares of local process */ #if defined APOT && !defined MPI if (format == 0) { apot_check_params(xi_opt); update_calc_table(xi_opt, xi, 0); } #endif /* APOT && !MPI */ #ifdef MPI /* exchange potential and flag value */ #ifndef APOT MPI_Bcast(xi, calc_pot.len, MPI_DOUBLE, 0, MPI_COMM_WORLD); #endif /* APOT */ MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD); if (flag == 1) break; /* Exception: flag 1 means clean up */ #ifdef APOT if (myid == 0) apot_check_params(xi_opt); MPI_Bcast(xi_opt, ndimtot, MPI_DOUBLE, 0, MPI_COMM_WORLD); if (format == 0) update_calc_table(xi_opt, xi, 0); #else /* APOT */ /* if flag==2 then the potential parameters have changed -> sync */ if (flag == 2) potsync(); #endif /* APOT */ #endif /* MPI */ /* local arrays for electrostatic parameters */ sum_charges = 0; for (i = 0; i < ntypes - 1; i++) { if (xi_opt[2 * size + ne + i]) { charge[i] = xi_opt[2 * size + ne + i]; sum_charges += apt->ratio[i] * charge[i]; } else { charge[i] = 0.; } } apt->last_charge = -sum_charges / apt->ratio[ntypes - 1]; charge[ntypes - 1] = apt->last_charge; if (xi_opt[2 * size + ne + ntypes - 1]) { dp_kappa = xi_opt[2 * size + ne + ntypes - 1]; } else { dp_kappa = 0.; } #ifdef DIPOLE for (i = 0; i < ntypes; i++) { if (xi_opt[2 * size + ne + ntypes + i]) { dp_alpha[i] = xi_opt[2 * size + ne + ntypes + i]; } else { dp_alpha[i] = 0.; } } for (i = 0; i < size; i++) { if (xi_opt[2 * size + ne + 2 * ntypes + i]) { dp_b[i] = xi_opt[2 * size + ne + 2 * ntypes + i]; } else { dp_b[i] = 0.; } if (xi_opt[3 * size + ne + 2 * ntypes + i]) { dp_c[i] = xi_opt[3 * size + ne + 2 * ntypes + i]; } else { dp_c[i] = 0.; } } #endif /* DIPOLE */ /* init second derivatives for splines */ for (col = 0; col < paircol; col++) { first = calc_pot.first[col]; if (format == 3 || format == 0) { spline_ed(calc_pot.step[col], xi + first, calc_pot.last[col] - first + 1, *(xi + first - 2), 0.0, calc_pot.d2tab + first); } else { /* format >= 4 ! */ spline_ne(calc_pot.xcoord + first, xi + first, calc_pot.last[col] - first + 1, *(xi + first - 2), 0.0, calc_pot.d2tab + first); } } #ifndef MPI myconf = nconf; #endif /* MPI */ /* region containing loop over configurations, also OMP-parallelized region */ { int self; vector tmp_force; int h, j, type1, type2, uf, us, stresses; int n_i, n_j; double fnval, grad, fnval_tail, grad_tail, grad_i, grad_j; #ifdef DIPOLE double p_sr_tail; #endif /* DIPOLE */ atom_t *atom; neigh_t *neigh; /* loop over configurations: M A I N LOOP CONTAINING ALL ATOM-LOOPS */ for (h = firstconf; h < firstconf + myconf; h++) { uf = conf_uf[h - firstconf]; #ifdef STRESS us = conf_us[h - firstconf]; #endif /* STRESS */ /* reset energies and stresses */ forces[energy_p + h] = 0.0; #ifdef STRESS stresses = stress_p + 6 * h; for (i = 0; i < 6; i++) forces[stresses + i] = 0.0; #endif /* STRESS */ #ifdef DIPOLE /* reset dipoles and fields: LOOP Z E R O */ for (i = 0; i < inconf[h]; i++) { atom = conf_atoms + i + cnfstart[h] - firstatom; atom->E_stat.x = 0.0; atom->E_stat.y = 0.0; atom->E_stat.z = 0.0; atom->p_sr.x = 0.0; atom->p_sr.y = 0.0; atom->p_sr.z = 0.0; } #endif /* DIPOLE */ /* F I R S T LOOP OVER ATOMS: reset forces, dipoles */ for (i = 0; i < inconf[h]; i++) { /* atoms */ n_i = 3 * (cnfstart[h] + i); if (uf) { forces[n_i + 0] = -force_0[n_i + 0]; forces[n_i + 1] = -force_0[n_i + 1]; forces[n_i + 2] = -force_0[n_i + 2]; } else { forces[n_i + 0] = 0.0; forces[n_i + 1] = 0.0; forces[n_i + 2] = 0.0; } } /* end F I R S T LOOP */ /* S E C O N D loop: calculate short-range and monopole forces, calculate static field- and dipole-contributions */ for (i = 0; i < inconf[h]; i++) { /* atoms */ atom = conf_atoms + i + cnfstart[h] - firstatom; type1 = atom->type; n_i = 3 * (cnfstart[h] + i); for (j = 0; j < atom->num_neigh; j++) { /* neighbors */ neigh = atom->neigh + j; type2 = neigh->type; col = neigh->col[0]; /* updating tail-functions - only necessary with variing kappa */ if (!apt->sw_kappa) elstat_shift(neigh->r, dp_kappa, &neigh->fnval_el, &neigh->grad_el, &neigh->ggrad_el); /* In small cells, an atom might interact with itself */ self = (neigh->nr == i + cnfstart[h]) ? 1 : 0; /* calculate short-range forces */ if (neigh->r < calc_pot.end[col]) { if (uf) { fnval = splint_comb_dir(&calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0], &grad); } else { fnval = splint_dir(&calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0]); } /* avoid double counting if atom is interacting with a copy of itself */ if (self) { fnval *= 0.5; grad *= 0.5; } forces[energy_p + h] += fnval; if (uf) { tmp_force.x = neigh->dist_r.x * grad; tmp_force.y = neigh->dist_r.y * grad; tmp_force.z = neigh->dist_r.z * grad; forces[n_i + 0] += tmp_force.x; forces[n_i + 1] += tmp_force.y; forces[n_i + 2] += tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] -= tmp_force.x; forces[n_j + 1] -= tmp_force.y; forces[n_j + 2] -= tmp_force.z; #ifdef STRESS /* calculate pair stresses */ if (us) { forces[stresses + 0] -= neigh->dist.x * tmp_force.x; forces[stresses + 1] -= neigh->dist.y * tmp_force.y; forces[stresses + 2] -= neigh->dist.z * tmp_force.z; forces[stresses + 3] -= neigh->dist.x * tmp_force.y; forces[stresses + 4] -= neigh->dist.y * tmp_force.z; forces[stresses + 5] -= neigh->dist.z * tmp_force.x; } #endif /* STRESS */ } } /* calculate monopole forces */ if (neigh->r < dp_cut && (charge[type1] || charge[type2])) { fnval_tail = neigh->fnval_el; grad_tail = neigh->grad_el; grad_i = charge[type2] * grad_tail; if (type1 == type2) { grad_j = grad_i; } else { grad_j = charge[type1] * grad_tail; } fnval = charge[type1] * charge[type2] * fnval_tail; grad = charge[type1] * grad_i; if (self) { grad_i *= 0.5; grad_j *= 0.5; fnval *= 0.5; grad *= 0.5; } forces[energy_p + h] += fnval; if (uf) { tmp_force.x = neigh->dist.x * grad; tmp_force.y = neigh->dist.y * grad; tmp_force.z = neigh->dist.z * grad; forces[n_i + 0] += tmp_force.x; forces[n_i + 1] += tmp_force.y; forces[n_i + 2] += tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] -= tmp_force.x; forces[n_j + 1] -= tmp_force.y; forces[n_j + 2] -= tmp_force.z; #ifdef STRESS /* calculate coulomb stresses */ if (us) { forces[stresses + 0] -= neigh->dist.x * tmp_force.x; forces[stresses + 1] -= neigh->dist.y * tmp_force.y; forces[stresses + 2] -= neigh->dist.z * tmp_force.z; forces[stresses + 3] -= neigh->dist.x * tmp_force.y; forces[stresses + 4] -= neigh->dist.y * tmp_force.z; forces[stresses + 5] -= neigh->dist.z * tmp_force.x; } #endif /* STRESS */ } #ifdef DIPOLE /* calculate static field-contributions */ atom->E_stat.x += neigh->dist.x * grad_i; atom->E_stat.y += neigh->dist.y * grad_i; atom->E_stat.z += neigh->dist.z * grad_i; conf_atoms[neigh->nr - firstatom].E_stat.x -= neigh->dist.x * grad_j; conf_atoms[neigh->nr - firstatom].E_stat.y -= neigh->dist.y * grad_j; conf_atoms[neigh->nr - firstatom].E_stat.z -= neigh->dist.z * grad_j; /* calculate short-range dipoles */ if (dp_alpha[type1] && dp_b[col] && dp_c[col]) { p_sr_tail = grad_tail * neigh->r * shortrange_value(neigh->r, dp_alpha[type1], dp_b[col], dp_c[col]); atom->p_sr.x += charge[type2] * neigh->dist_r.x * p_sr_tail; atom->p_sr.y += charge[type2] * neigh->dist_r.y * p_sr_tail; atom->p_sr.z += charge[type2] * neigh->dist_r.z * p_sr_tail; } if (dp_alpha[type2] && dp_b[col] && dp_c[col] && !self) { p_sr_tail = grad_tail * neigh->r * shortrange_value(neigh->r, dp_alpha[type2], dp_b[col], dp_c[col]); conf_atoms[neigh->nr - firstatom].p_sr.x -= charge[type1] * neigh->dist_r.x * p_sr_tail; conf_atoms[neigh->nr - firstatom].p_sr.y -= charge[type1] * neigh->dist_r.y * p_sr_tail; conf_atoms[neigh->nr - firstatom].p_sr.z -= charge[type1] * neigh->dist_r.z * p_sr_tail; } #endif /* DIPOLE */ } } /* loop over neighbours */ } /* end S E C O N D loop over atoms */ #ifdef DIPOLE /* T H I R D loop: calculate whole dipole moment for every atom */ double rp, dp_sum; int dp_converged = 0, dp_it = 0; double max_diff = 10; while (dp_converged == 0) { dp_sum = 0; for (i = 0; i < inconf[h]; i++) { /* atoms */ atom = conf_atoms + i + cnfstart[h] - firstatom; type1 = atom->type; if (dp_alpha[type1]) { if (dp_it) { /* note: mixing parameter is different from that on in IMD */ atom->E_tot.x = (1 - dp_mix) * atom->E_ind.x + dp_mix * atom->E_old.x + atom->E_stat.x; atom->E_tot.y = (1 - dp_mix) * atom->E_ind.y + dp_mix * atom->E_old.y + atom->E_stat.y; atom->E_tot.z = (1 - dp_mix) * atom->E_ind.z + dp_mix * atom->E_old.z + atom->E_stat.z; } else { atom->E_tot.x = atom->E_ind.x + atom->E_stat.x; atom->E_tot.y = atom->E_ind.y + atom->E_stat.y; atom->E_tot.z = atom->E_ind.z + atom->E_stat.z; } atom->p_ind.x = dp_alpha[type1] * atom->E_tot.x + atom->p_sr.x; atom->p_ind.y = dp_alpha[type1] * atom->E_tot.y + atom->p_sr.y; atom->p_ind.z = dp_alpha[type1] * atom->E_tot.z + atom->p_sr.z; atom->E_old.x = atom->E_ind.x; atom->E_old.y = atom->E_ind.y; atom->E_old.z = atom->E_ind.z; atom->E_ind.x = 0.; atom->E_ind.y = 0.; atom->E_ind.z = 0.; } } for (i = 0; i < inconf[h]; i++) { /* atoms */ atom = conf_atoms + i + cnfstart[h] - firstatom; type1 = atom->type; for (j = 0; j < atom->num_neigh; j++) { /* neighbors */ neigh = atom->neigh + j; type2 = neigh->type; col = neigh->col[0]; /* In small cells, an atom might interact with itself */ self = (neigh->nr == i + cnfstart[h]) ? 1 : 0; if (neigh->r < dp_cut && dp_alpha[type1] && dp_alpha[type2]) { rp = SPROD(conf_atoms[neigh->nr - firstatom].p_ind, neigh->dist_r); atom->E_ind.x += neigh->grad_el * (3 * rp * neigh->dist_r.x - conf_atoms[neigh->nr - firstatom].p_ind.x); atom->E_ind.y += neigh->grad_el * (3 * rp * neigh->dist_r.y - conf_atoms[neigh->nr - firstatom].p_ind.y); atom->E_ind.z += neigh->grad_el * (3 * rp * neigh->dist_r.z - conf_atoms[neigh->nr - firstatom].p_ind.z); if (!self) { rp = SPROD(atom->p_ind, neigh->dist_r); conf_atoms[neigh->nr - firstatom].E_ind.x += neigh->grad_el * (3 * rp * neigh->dist_r.x - atom->p_ind.x); conf_atoms[neigh->nr - firstatom].E_ind.y += neigh->grad_el * (3 * rp * neigh->dist_r.y - atom->p_ind.y); conf_atoms[neigh->nr - firstatom].E_ind.z += neigh->grad_el * (3 * rp * neigh->dist_r.z - atom->p_ind.z); } } } } for (i = 0; i < inconf[h]; i++) { /* atoms */ atom = conf_atoms + i + cnfstart[h] - firstatom; type1 = atom->type; if (dp_alpha[type1]) { dp_sum += dsquare(dp_alpha[type1] * (atom->E_old.x - atom->E_ind.x)); dp_sum += dsquare(dp_alpha[type1] * (atom->E_old.y - atom->E_ind.y)); dp_sum += dsquare(dp_alpha[type1] * (atom->E_old.z - atom->E_ind.z)); } } dp_sum /= 3 * inconf[h]; dp_sum = sqrt(dp_sum); if (dp_it) { if ((dp_sum > max_diff) || (dp_it > 50)) { dp_converged = 1; for (i = 0; i < inconf[h]; i++) { /* atoms */ atom = conf_atoms + i + cnfstart[h] - firstatom; type1 = atom->type; if (dp_alpha[type1]) { atom->p_ind.x = dp_alpha[type1] * atom->E_stat.x + atom->p_sr.x; atom->p_ind.y = dp_alpha[type1] * atom->E_stat.y + atom->p_sr.y; atom->p_ind.z = dp_alpha[type1] * atom->E_stat.z + atom->p_sr.z; atom->E_ind.x = atom->E_stat.x; atom->E_ind.y = atom->E_stat.y; atom->E_ind.z = atom->E_stat.z; } } } } if (dp_sum < dp_tol) { dp_converged = 1; } dp_it++; } /* end T H I R D loop over atoms */ /* F O U R T H loop: calculate monopole-dipole and dipole-dipole forces */ double rp_i, rp_j, pp_ij, tmp_1, tmp_2; double grad_1, grad_2, srval, srgrad, srval_tail, srgrad_tail, fnval_sum, grad_sum; for (i = 0; i < inconf[h]; i++) { /* atoms */ atom = conf_atoms + i + cnfstart[h] - firstatom; type1 = atom->type; n_i = 3 * (cnfstart[h] + i); for (j = 0; j < atom->num_neigh; j++) { /* neighbors */ neigh = atom->neigh + j; type2 = neigh->type; col = neigh->col[0]; /* In small cells, an atom might interact with itself */ self = (neigh->nr == i + cnfstart[h]) ? 1 : 0; if (neigh->r < dp_cut && (dp_alpha[type1] || dp_alpha[type2])) { fnval_tail = -neigh->grad_el; grad_tail = -neigh->ggrad_el; if (dp_b[col] && dp_c[col]) { shortrange_term(neigh->r, dp_b[col], dp_c[col], &srval_tail, &srgrad_tail); srval = fnval_tail * srval_tail; srgrad = fnval_tail * srgrad_tail + grad_tail * srval_tail; } if (self) { fnval_tail *= 0.5; grad_tail *= 0.5; } /* monopole-dipole contributions */ if (charge[type1] && dp_alpha[type2]) { if (dp_b[col] && dp_c[col]) { fnval_sum = fnval_tail + srval; grad_sum = grad_tail + srgrad; } else { fnval_sum = fnval_tail; grad_sum = grad_tail; } rp_j = SPROD(conf_atoms[neigh->nr - firstatom].p_ind, neigh->dist_r); fnval = charge[type1] * rp_j * fnval_sum * neigh->r; grad_1 = charge[type1] * rp_j * grad_sum * neigh->r2; grad_2 = charge[type1] * fnval_sum; forces[energy_p + h] -= fnval; if (uf) { tmp_force.x = neigh->dist_r.x * grad_1 + conf_atoms[neigh->nr - firstatom].p_ind.x * grad_2; tmp_force.y = neigh->dist_r.y * grad_1 + conf_atoms[neigh->nr - firstatom].p_ind.y * grad_2; tmp_force.z = neigh->dist_r.z * grad_1 + conf_atoms[neigh->nr - firstatom].p_ind.z * grad_2; forces[n_i + 0] -= tmp_force.x; forces[n_i + 1] -= tmp_force.y; forces[n_i + 2] -= tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] += tmp_force.x; forces[n_j + 1] += tmp_force.y; forces[n_j + 2] += tmp_force.z; #ifdef STRESS /* calculate stresses */ if (us) { forces[stresses + 0] += neigh->dist.x * tmp_force.x; forces[stresses + 1] += neigh->dist.y * tmp_force.y; forces[stresses + 2] += neigh->dist.z * tmp_force.z; forces[stresses + 3] += neigh->dist.x * tmp_force.y; forces[stresses + 4] += neigh->dist.y * tmp_force.z; forces[stresses + 5] += neigh->dist.z * tmp_force.x; } #endif /* STRESS */ } } /* dipole-monopole contributions */ if (dp_alpha[type2] && charge[type2]) { if (dp_b[col] && dp_c[col]) { fnval_sum = fnval_tail + srval; grad_sum = grad_tail + srgrad; } else { fnval_sum = fnval_tail; grad_sum = grad_tail; } rp_i = SPROD(atom->p_ind, neigh->dist_r); fnval = charge[type2] * rp_i * fnval_sum * neigh->r; grad_1 = charge[type2] * rp_i * grad_sum * neigh->r2; grad_2 = charge[type2] * fnval_sum; forces[energy_p + h] += fnval; if (uf) { tmp_force.x = neigh->dist_r.x * grad_1 + atom->p_ind.x * grad_2; tmp_force.y = neigh->dist_r.y * grad_1 + atom->p_ind.y * grad_2; tmp_force.z = neigh->dist_r.z * grad_1 + atom->p_ind.z * grad_2; forces[n_i + 0] += tmp_force.x; forces[n_i + 1] += tmp_force.y; forces[n_i + 2] += tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] -= tmp_force.x; forces[n_j + 1] -= tmp_force.y; forces[n_j + 2] -= tmp_force.z; #ifdef STRESS /* calculate stresses */ if (us) { forces[stresses + 0] -= neigh->dist.x * tmp_force.x; forces[stresses + 1] -= neigh->dist.y * tmp_force.y; forces[stresses + 2] -= neigh->dist.z * tmp_force.z; forces[stresses + 3] -= neigh->dist.x * tmp_force.y; forces[stresses + 4] -= neigh->dist.y * tmp_force.z; forces[stresses + 5] -= neigh->dist.z * tmp_force.x; } #endif /* STRESS */ } } /* dipole-dipole contributions */ if (dp_alpha[type1] && dp_alpha[type2]) { pp_ij = SPROD(atom->p_ind, conf_atoms[neigh->nr - firstatom].p_ind); tmp_1 = 3 * rp_i * rp_j; tmp_2 = 3 * fnval_tail / neigh->r2; fnval = -(tmp_1 - pp_ij) * fnval_tail; grad_1 = (tmp_1 - pp_ij) * grad_tail; grad_2 = 2 * rp_i * rp_j; forces[energy_p + h] += fnval; if (uf) { tmp_force.x = grad_1 * neigh->dist.x - tmp_2 * (grad_2 * neigh->dist.x - rp_i * neigh->r * conf_atoms[neigh->nr - firstatom].p_ind.x - rp_j * neigh->r * atom->p_ind.x); tmp_force.y = grad_1 * neigh->dist.y - tmp_2 * (grad_2 * neigh->dist.y - rp_i * neigh->r * conf_atoms[neigh->nr - firstatom].p_ind.y - rp_j * neigh->r * atom->p_ind.y); tmp_force.z = grad_1 * neigh->dist.z - tmp_2 * (grad_2 * neigh->dist.z - rp_i * neigh->r * conf_atoms[neigh->nr - firstatom].p_ind.z - rp_j * neigh->r * atom->p_ind.z); forces[n_i + 0] -= tmp_force.x; forces[n_i + 1] -= tmp_force.y; forces[n_i + 2] -= tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] += tmp_force.x; forces[n_j + 1] += tmp_force.y; forces[n_j + 2] += tmp_force.z; #ifdef STRESS /* calculate stresses */ if (us) { forces[stresses + 0] += neigh->dist.x * tmp_force.x; forces[stresses + 1] += neigh->dist.y * tmp_force.y; forces[stresses + 2] += neigh->dist.z * tmp_force.z; forces[stresses + 3] += neigh->dist.x * tmp_force.y; forces[stresses + 4] += neigh->dist.y * tmp_force.z; forces[stresses + 5] += neigh->dist.z * tmp_force.x; } #endif /* STRESS */ } } } } /* loop over neighbours */ } /* end F O U R T H loop over atoms */ #endif /* DIPOLE */ /* F I F T H loop: self energy contributions and sum-up force contributions */ double qq; #ifdef DIPOLE double pp; #endif /* DIPOLE */ for (i = 0; i < inconf[h]; i++) { /* atoms */ atom = conf_atoms + i + cnfstart[h] - firstatom; type1 = atom->type; n_i = 3 * (cnfstart[h] + i); /* self energy contributions */ if (charge[type1]) { qq = charge[type1] * charge[type1]; fnval = dp_eps * dp_kappa * qq / sqrt(M_PI); forces[energy_p + h] -= fnval; } #ifdef DIPOLE if (dp_alpha[type1]) { pp = SPROD(atom->p_ind, atom->p_ind); fnval = pp / (2 * dp_alpha[type1]); forces[energy_p + h] += fnval; } /* alternative dipole self energy including kappa-dependence */ //if (dp_alpha[type1]) { // pp = SPROD(atom->p_ind, atom->p_ind); // fnval = kkk * pp / sqrt(M_PI); // forces[energy_p + h] += fnval; //} #endif /* DIPOLE */ /* sum-up: whole force contributions flow into tmpsum */ if (uf) { #ifdef FWEIGHT /* Weigh by absolute value of force */ forces[n_i + 0] /= FORCE_EPS + atom->absforce; forces[n_i + 1] /= FORCE_EPS + atom->absforce; forces[n_i + 2] /= FORCE_EPS + atom->absforce; #endif /* FWEIGHT */ #ifdef CONTRIB if (atom->contrib) #endif /* CONTRIB */ tmpsum += conf_weight[h] * (dsquare(forces[n_i + 0]) + dsquare(forces[n_i + 1]) + dsquare(forces[n_i + 2])); } } /* end F I F T H loop over atoms */ /* whole energy contributions flow into tmpsum */ forces[energy_p + h] /= (double)inconf[h]; forces[energy_p + h] -= force_0[energy_p + h]; tmpsum += conf_weight[h] * eweight * dsquare(forces[energy_p + h]); #ifdef STRESS /* whole stress contributions flow into tmpsum */ if (uf && us) { for (i = 0; i < 6; i++) { forces[stresses + i] /= conf_vol[h - firstconf]; forces[stresses + i] -= force_0[stresses + i]; tmpsum += conf_weight[h] * sweight * dsquare(forces[stresses + i]); } } #endif /* STRESS */ } /* end M A I N loop over configurations */ } /* parallel region */ /* dummy constraints (global) */ #ifdef APOT /* add punishment for out of bounds (mostly for powell_lsq) */ if (myid == 0) { tmpsum += apot_punish(xi_opt, forces); } #endif /* APOT */ sum = tmpsum; /* global sum = local sum */ #ifdef MPI /* reduce global sum */ sum = 0.; MPI_Reduce(&tmpsum, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); /* gather forces, energies, stresses */ if (myid == 0) { /* root node already has data in place */ /* forces */ MPI_Gatherv(MPI_IN_PLACE, myatoms, MPI_VECTOR, forces, atom_len, atom_dist, MPI_VECTOR, 0, MPI_COMM_WORLD); /* energies */ MPI_Gatherv(MPI_IN_PLACE, myconf, MPI_DOUBLE, forces + natoms * 3, conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); /* stresses */ MPI_Gatherv(MPI_IN_PLACE, myconf, MPI_STENS, forces + natoms * 3 + nconf, conf_len, conf_dist, MPI_STENS, 0, MPI_COMM_WORLD); } else { /* forces */ MPI_Gatherv(forces + firstatom * 3, myatoms, MPI_VECTOR, forces, atom_len, atom_dist, MPI_VECTOR, 0, MPI_COMM_WORLD); /* energies */ MPI_Gatherv(forces + natoms * 3 + firstconf, myconf, MPI_DOUBLE, forces + natoms * 3, conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); /* stresses */ MPI_Gatherv(forces + natoms * 3 + nconf + 6 * firstconf, myconf, MPI_STENS, forces + natoms * 3 + nconf, conf_len, conf_dist, MPI_STENS, 0, MPI_COMM_WORLD); } #endif /* MPI */ /* root process exits this function now */ if (myid == 0) { fcalls++; /* Increase function call counter */ if (isnan(sum)) { #ifdef DEBUG printf("\n--> Force is nan! <--\n\n"); #endif /* DEBUG */ return 10e10; } else return sum; } } /* once a non-root process arrives here, all is done. */ return -1.; }
double calc_forces(double* xi_opt, double* forces, int flag) { double tmpsum, sum = 0.0; int first, col, ne, size, i = flag; double* xi = NULL; apot_table_t* apt = &g_pot.apot_table; double charge[g_param.ntypes]; double sum_charges; double dp_kappa; #if defined(DIPOLE) double dp_alpha[g_param.ntypes]; double dp_b[g_calc.paircol]; double dp_c[g_calc.paircol]; #endif // DIPOLE static double rho_sum_loc, rho_sum; rho_sum_loc = rho_sum = 0.0; switch (g_pot.format_type) { case POTENTIAL_FORMAT_UNKNOWN: error(1, "Unknown potential format detected! (%s:%d)\n", __FILE__, __LINE__); case POTENTIAL_FORMAT_ANALYTIC: xi = g_pot.calc_pot.table; break; case POTENTIAL_FORMAT_TABULATED_EQ_DIST: case POTENTIAL_FORMAT_TABULATED_NON_EQ_DIST: xi = xi_opt; break; case POTENTIAL_FORMAT_KIM: error(1, "KIM format is not supported by EAM elstat force routine!"); } #if !defined(MPI) g_mpi.myconf = g_config.nconf; #endif // MPI ne = g_pot.apot_table.total_ne_par; size = apt->number; /* This is the start of an infinite loop */ while (1) { tmpsum = 0.0; /* sum of squares of local process */ rho_sum_loc = 0.0; #if defined APOT && !defined MPI if (g_pot.format_type == POTENTIAL_FORMAT_ANALYTIC) { apot_check_params(xi_opt); update_calc_table(xi_opt, xi, 0); } #endif // APOT && !MPI #if defined(MPI) /* exchange potential and flag value */ #if !defined(APOT) MPI_Bcast(xi, g_pot.calc_pot.len, MPI_DOUBLE, 0, MPI_COMM_WORLD); #endif // APOT MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD); if (flag == 1) break; /* Exception: flag 1 means clean up */ #if defined(APOT) if (g_mpi.myid == 0) apot_check_params(xi_opt); MPI_Bcast(xi_opt, g_calc.ndimtot, MPI_DOUBLE, 0, MPI_COMM_WORLD); if (g_pot.format_type == POTENTIAL_FORMAT_ANALYTIC) update_calc_table(xi_opt, xi, 0); #else /* APOT */ /* if flag==2 then the potential parameters have changed -> sync */ if (flag == 2) potsync(); #endif // APOT #endif // MPI /* local arrays for electrostatic parameters */ sum_charges = 0; for (i = 0; i < g_param.ntypes - 1; i++) { if (xi_opt[2 * size + ne + i]) { charge[i] = xi_opt[2 * size + ne + i]; sum_charges += apt->ratio[i] * charge[i]; } else { charge[i] = 0.0; } } apt->last_charge = -sum_charges / apt->ratio[g_param.ntypes - 1]; charge[g_param.ntypes - 1] = apt->last_charge; if (xi_opt[2 * size + ne + g_param.ntypes - 1]) { dp_kappa = xi_opt[2 * size + ne + g_param.ntypes - 1]; } else { dp_kappa = 0.0; } #if defined(DIPOLE) for (i = 0; i < g_param.ntypes; i++) { if (xi_opt[2 * size + ne + g_param.ntypes + i]) { dp_alpha[i] = xi_opt[2 * size + ne + g_param.ntypes + i]; } else { dp_alpha[i] = 0.0; } } for (i = 0; i < g_calc.paircol; i++) { if (xi_opt[2 * size + ne + 2 * g_param.ntypes + i]) { dp_b[i] = xi_opt[2 * size + ne + 2 * g_param.ntypes + i]; } else { dp_b[i] = 0.0; } if (xi_opt[2 * size + ne + 2 * g_param.ntypes + g_calc.paircol + i]) { dp_c[i] = xi_opt[2 * size + ne + 2 * g_param.ntypes + g_calc.paircol + i]; } else { dp_c[i] = 0.0; } } #endif // DIPOLE /* init second derivatives for splines */ /* pair potentials & rho */ for (col = 0; col < g_calc.paircol + g_param.ntypes; col++) { first = g_pot.calc_pot.first[col]; switch (g_pot.format_type) { case POTENTIAL_FORMAT_UNKNOWN: error(1, "Unknown potential format detected! (%s:%d)\n", __FILE__, __LINE__); case POTENTIAL_FORMAT_ANALYTIC: case POTENTIAL_FORMAT_TABULATED_EQ_DIST: { spline_ed(g_pot.calc_pot.step[col], xi + first, g_pot.calc_pot.last[col] - first + 1, *(xi + first - 2), 0.0, g_pot.calc_pot.d2tab + first); break; } case POTENTIAL_FORMAT_TABULATED_NON_EQ_DIST: { spline_ne(g_pot.calc_pot.xcoord + first, xi + first, g_pot.calc_pot.last[col] - first + 1, *(xi + first - 2), 0.0, g_pot.calc_pot.d2tab + first); } case POTENTIAL_FORMAT_KIM: error(1, "KIM format is not supported by EAM elstat force routine!"); } } /* F */ for (col = g_calc.paircol + g_param.ntypes; col < g_calc.paircol + 2 * g_param.ntypes; col++) { first = g_pot.calc_pot.first[col]; /* gradient at left boundary matched to square root function, when 0 not in domain(F), else natural spline */ switch (g_pot.format_type) { case POTENTIAL_FORMAT_UNKNOWN: error(1, "Unknown potential format detected! (%s:%d)\n", __FILE__, __LINE__); case POTENTIAL_FORMAT_ANALYTIC: case POTENTIAL_FORMAT_TABULATED_EQ_DIST: { spline_ed(g_pot.calc_pot.step[col], xi + first, g_pot.calc_pot.last[col] - first + 1, *(xi + first - 2), *(xi + first - 1), g_pot.calc_pot.d2tab + first); break; } case POTENTIAL_FORMAT_TABULATED_NON_EQ_DIST: { spline_ne(g_pot.calc_pot.xcoord + first, xi + first, g_pot.calc_pot.last[col] - first + 1, *(xi + first - 2), *(xi + first - 1), g_pot.calc_pot.d2tab + first); } case POTENTIAL_FORMAT_KIM: error(1, "KIM format is not supported by EAM elstat force routine!"); } } /* region containing loop over configurations */ { int self; vector tmp_force; int h, j, type1, type2, uf; #if defined(STRESS) int us = 0; int stresses = 0; #endif int n_i, n_j; double fnval, grad, fnval_tail, grad_tail, grad_i, grad_j; #if defined(DIPOLE) double p_sr_tail = 0.0; #endif atom_t* atom; neigh_t* neigh; double r; int col_F; double eam_force; double rho_val, rho_grad, rho_grad_j; /* loop over configurations: M A I N LOOP CONTAINING ALL ATOM-LOOPS */ for (h = g_mpi.firstconf; h < g_mpi.firstconf + g_mpi.myconf; h++) { uf = g_config.conf_uf[h - g_mpi.firstconf]; #if defined(STRESS) us = g_config.conf_us[h - g_mpi.firstconf]; #endif // STRESS /* reset energies and stresses */ forces[g_calc.energy_p + h] = 0.0; #if defined(STRESS) stresses = g_calc.stress_p + 6 * h; for (i = 0; i < 6; i++) forces[stresses + i] = 0.0; #endif // STRESS /* set limiting constraints */ forces[g_calc.limit_p + h] = -g_config.force_0[g_calc.limit_p + h]; #if defined(DIPOLE) /* reset dipoles and fields: LOOP Z E R O */ for (i = 0; i < g_config.inconf[h]; i++) { atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; atom->E_stat.x = 0.0; atom->E_stat.y = 0.0; atom->E_stat.z = 0.0; atom->p_sr.x = 0.0; atom->p_sr.y = 0.0; atom->p_sr.z = 0.0; } #endif // DIPOLE /* F I R S T LOOP OVER ATOMS: reset forces, dipoles */ for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ n_i = 3 * (g_config.cnfstart[h] + i); if (uf) { forces[n_i + 0] = -g_config.force_0[n_i + 0]; forces[n_i + 1] = -g_config.force_0[n_i + 1]; forces[n_i + 2] = -g_config.force_0[n_i + 2]; } else { forces[n_i + 0] = 0.0; forces[n_i + 1] = 0.0; forces[n_i + 2] = 0.0; } /* reset atomic density */ g_config.conf_atoms[g_config.cnfstart[h] - g_mpi.firstatom + i].rho = 0.0; } /* end F I R S T LOOP */ /* S E C O N D loop: calculate short-range and monopole forces, calculate static field- and dipole-contributions, calculate atomic densities */ for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; type1 = atom->type; n_i = 3 * (g_config.cnfstart[h] + i); for (j = 0; j < atom->num_neigh; j++) { /* neighbors */ neigh = atom->neigh + j; type2 = neigh->type; col = neigh->col[0]; /* updating tail-functions - only necessary with variing kappa */ if (!apt->sw_kappa) #if defined(DSF) elstat_dsf(neigh->r, dp_kappa, &neigh->fnval_el, &neigh->grad_el, &neigh->ggrad_el); #else elstat_shift(neigh->r, dp_kappa, &neigh->fnval_el, &neigh->grad_el, &neigh->ggrad_el); #endif // DSF /* In small cells, an atom might interact with itself */ self = (neigh->nr == i + g_config.cnfstart[h]) ? 1 : 0; /* calculate short-range forces */ if (neigh->r < g_pot.calc_pot.end[col]) { if (uf) { fnval = splint_comb_dir(&g_pot.calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0], &grad); } else { fnval = splint_dir(&g_pot.calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0]); } /* avoid double counting if atom is interacting with a copy of * itself */ if (self) { fnval *= 0.5; grad *= 0.5; } forces[g_calc.energy_p + h] += fnval; if (uf) { tmp_force.x = neigh->dist_r.x * grad; tmp_force.y = neigh->dist_r.y * grad; tmp_force.z = neigh->dist_r.z * grad; forces[n_i + 0] += tmp_force.x; forces[n_i + 1] += tmp_force.y; forces[n_i + 2] += tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] -= tmp_force.x; forces[n_j + 1] -= tmp_force.y; forces[n_j + 2] -= tmp_force.z; #if defined(STRESS) /* calculate pair stresses */ if (us) { forces[stresses + 0] -= neigh->dist.x * tmp_force.x; forces[stresses + 1] -= neigh->dist.y * tmp_force.y; forces[stresses + 2] -= neigh->dist.z * tmp_force.z; forces[stresses + 3] -= neigh->dist.x * tmp_force.y; forces[stresses + 4] -= neigh->dist.y * tmp_force.z; forces[stresses + 5] -= neigh->dist.z * tmp_force.x; } #endif // STRESS } } /* calculate monopole forces */ if (neigh->r < g_config.dp_cut && (charge[type1] || charge[type2])) { fnval_tail = neigh->fnval_el; grad_tail = neigh->grad_el; grad_i = charge[type2] * grad_tail; if (type1 == type2) { grad_j = grad_i; } else { grad_j = charge[type1] * grad_tail; } fnval = charge[type1] * charge[type2] * fnval_tail; grad = charge[type1] * grad_i; if (self) { grad_i *= 0.5; grad_j *= 0.5; fnval *= 0.5; grad *= 0.5; } forces[g_calc.energy_p + h] += fnval; if (uf) { tmp_force.x = neigh->dist.x * grad; tmp_force.y = neigh->dist.y * grad; tmp_force.z = neigh->dist.z * grad; forces[n_i + 0] += tmp_force.x; forces[n_i + 1] += tmp_force.y; forces[n_i + 2] += tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] -= tmp_force.x; forces[n_j + 1] -= tmp_force.y; forces[n_j + 2] -= tmp_force.z; #if defined(STRESS) /* calculate coulomb stresses */ if (us) { forces[stresses + 0] -= neigh->dist.x * tmp_force.x; forces[stresses + 1] -= neigh->dist.y * tmp_force.y; forces[stresses + 2] -= neigh->dist.z * tmp_force.z; forces[stresses + 3] -= neigh->dist.x * tmp_force.y; forces[stresses + 4] -= neigh->dist.y * tmp_force.z; forces[stresses + 5] -= neigh->dist.z * tmp_force.x; } #endif // STRESS } #if defined(DIPOLE) /* calculate static field-contributions */ atom->E_stat.x += neigh->dist.x * grad_i; atom->E_stat.y += neigh->dist.y * grad_i; atom->E_stat.z += neigh->dist.z * grad_i; g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_stat.x -= neigh->dist.x * grad_j; g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_stat.y -= neigh->dist.y * grad_j; g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_stat.z -= neigh->dist.z * grad_j; /* calculate short-range dipoles */ if (dp_alpha[type1] && dp_b[col] && dp_c[col]) { p_sr_tail = grad_tail * neigh->r * shortrange_value(neigh->r, dp_alpha[type1], dp_b[col], dp_c[col]); atom->p_sr.x += charge[type2] * neigh->dist_r.x * p_sr_tail; atom->p_sr.y += charge[type2] * neigh->dist_r.y * p_sr_tail; atom->p_sr.z += charge[type2] * neigh->dist_r.z * p_sr_tail; } if (dp_alpha[type2] && dp_b[col] && dp_c[col] && !self) { p_sr_tail = grad_tail * neigh->r * shortrange_value(neigh->r, dp_alpha[type2], dp_b[col], dp_c[col]); g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_sr.x -= charge[type1] * neigh->dist_r.x * p_sr_tail; g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_sr.y -= charge[type1] * neigh->dist_r.y * p_sr_tail; g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_sr.z -= charge[type1] * neigh->dist_r.z * p_sr_tail; } #endif // DIPOLE } /* calculate atomic densities */ if (atom->type == neigh->type) { /* then transfer(a->b)==transfer(b->a) */ if (neigh->r < g_pot.calc_pot.end[neigh->col[1]]) { rho_val = splint_dir(&g_pot.calc_pot, xi, neigh->slot[1], neigh->shift[1], neigh->step[1]); atom->rho += rho_val; /* avoid double counting if atom is interacting with a copy of itself */ if (!self) { g_config.conf_atoms[neigh->nr - g_mpi.firstatom].rho += rho_val; } } } else { /* transfer(a->b)!=transfer(b->a) */ if (neigh->r < g_pot.calc_pot.end[neigh->col[1]]) { atom->rho += splint_dir(&g_pot.calc_pot, xi, neigh->slot[1], neigh->shift[1], neigh->step[1]); } /* cannot use slot/shift to access splines */ if (neigh->r < g_pot.calc_pot.end[g_calc.paircol + atom->type]) g_config.conf_atoms[neigh->nr - g_mpi.firstatom].rho += (*g_splint)(&g_pot.calc_pot, xi, g_calc.paircol + atom->type, neigh->r); } } /* loop over neighbours */ col_F = g_calc.paircol + g_param.ntypes + atom->type; /* column of F */ if (atom->rho > g_pot.calc_pot.end[col_F]) { /* then punish target function -> bad potential */ forces[g_calc.limit_p + h] += DUMMY_WEIGHT * 10.0 * dsquare(atom->rho - g_pot.calc_pot.end[col_F]); atom->rho = g_pot.calc_pot.end[col_F]; } if (atom->rho < g_pot.calc_pot.begin[col_F]) { /* then punish target function -> bad potential */ forces[g_calc.limit_p + h] += DUMMY_WEIGHT * 10.0 * dsquare(g_pot.calc_pot.begin[col_F] - atom->rho); atom->rho = g_pot.calc_pot.begin[col_F]; } /* embedding energy, embedding gradient */ /* contribution to cohesive energy is F(n) */ #if defined(NORESCALE) if (atom->rho < g_pot.calc_pot.begin[col_F]) { /* linear extrapolation left */ rho_val = splint_comb(&calc_pot, xi, col_F, g_pot.calc_pot.begin[col_F], &atom->gradF); forces[energy_p + h] += rho_val + (atom->rho - g_pot.calc_pot.begin[col_F]) * atom->gradF; #if defined(APOT) forces[limit_p + h] += DUMMY_WEIGHT * 10.0 * dsquare(calc_pot.begin[col_F] - atom->rho); #endif // APOT } else if (atom->rho > g_pot.calc_pot.end[col_F]) { /* and right */ rho_val = splint_comb( &calc_pot, xi, col_F, g_pot.calc_pot.end[col_F] - 0.5 * g_pot.calc_pot.step[col_F], &atom->gradF); forces[energy_p + h] += rho_val + (atom->rho - g_pot.calc_pot.end[col_F]) * atom->gradF; #if defined(APOT) forces[limit_p + h] += DUMMY_WEIGHT * 10.0 * dsquare(atom->rho - g_pot.calc_pot.end[col_F]); #endif // APOT } /* and in-between */ else { forces[energy_p + h] += splint_comb(&calc_pot, xi, col_F, atom->rho, &atom->gradF); } #else forces[g_calc.energy_p + h] += (*g_splint_comb)( &g_pot.calc_pot, xi, col_F, atom->rho, &atom->gradF); #endif // NORESCALE /* sum up rho */ rho_sum_loc += atom->rho; } /* end S E C O N D loop over atoms */ #if defined(DIPOLE) /* T H I R D loop: calculate whole dipole moment for every atom */ double rp, dp_sum; int dp_converged = 0, dp_it = 0; double max_diff = 10; while (dp_converged == 0) { dp_sum = 0; for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; type1 = atom->type; if (dp_alpha[type1]) { if (dp_it) { /* note: mixing parameter is different from that on in IMD */ atom->E_tot.x = (1 - g_config.dp_mix) * atom->E_ind.x + g_config.dp_mix * atom->E_old.x + atom->E_stat.x; atom->E_tot.y = (1 - g_config.dp_mix) * atom->E_ind.y + g_config.dp_mix * atom->E_old.y + atom->E_stat.y; atom->E_tot.z = (1 - g_config.dp_mix) * atom->E_ind.z + g_config.dp_mix * atom->E_old.z + atom->E_stat.z; } else { atom->E_tot.x = atom->E_ind.x + atom->E_stat.x; atom->E_tot.y = atom->E_ind.y + atom->E_stat.y; atom->E_tot.z = atom->E_ind.z + atom->E_stat.z; } atom->p_ind.x = dp_alpha[type1] * atom->E_tot.x + atom->p_sr.x; atom->p_ind.y = dp_alpha[type1] * atom->E_tot.y + atom->p_sr.y; atom->p_ind.z = dp_alpha[type1] * atom->E_tot.z + atom->p_sr.z; atom->E_old.x = atom->E_ind.x; atom->E_old.y = atom->E_ind.y; atom->E_old.z = atom->E_ind.z; atom->E_ind.x = 0.0; atom->E_ind.y = 0.0; atom->E_ind.z = 0.0; } } for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; type1 = atom->type; for (j = 0; j < atom->num_neigh; j++) { /* neighbors */ neigh = atom->neigh + j; type2 = neigh->type; col = neigh->col[0]; /* In small cells, an atom might interact with itself */ self = (neigh->nr == i + g_config.cnfstart[h]) ? 1 : 0; if (neigh->r < g_config.dp_cut && dp_alpha[type1] && dp_alpha[type2]) { rp = SPROD( g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind, neigh->dist_r); atom->E_ind.x += neigh->grad_el * (3 * rp * neigh->dist_r.x - g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.x); atom->E_ind.y += neigh->grad_el * (3 * rp * neigh->dist_r.y - g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.y); atom->E_ind.z += neigh->grad_el * (3 * rp * neigh->dist_r.z - g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.z); if (!self) { rp = SPROD(atom->p_ind, neigh->dist_r); g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_ind.x += neigh->grad_el * (3 * rp * neigh->dist_r.x - atom->p_ind.x); g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_ind.y += neigh->grad_el * (3 * rp * neigh->dist_r.y - atom->p_ind.y); g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_ind.z += neigh->grad_el * (3 * rp * neigh->dist_r.z - atom->p_ind.z); } } } } for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; type1 = atom->type; if (dp_alpha[type1]) { dp_sum += dsquare(dp_alpha[type1] * (atom->E_old.x - atom->E_ind.x)); dp_sum += dsquare(dp_alpha[type1] * (atom->E_old.y - atom->E_ind.y)); dp_sum += dsquare(dp_alpha[type1] * (atom->E_old.z - atom->E_ind.z)); } } dp_sum /= 3 * g_config.inconf[h]; dp_sum = sqrt(dp_sum); if (dp_it) { if ((dp_sum > max_diff) || (dp_it > 50)) { dp_converged = 1; for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; type1 = atom->type; if (dp_alpha[type1]) { atom->p_ind.x = dp_alpha[type1] * atom->E_stat.x + atom->p_sr.x; atom->p_ind.y = dp_alpha[type1] * atom->E_stat.y + atom->p_sr.y; atom->p_ind.z = dp_alpha[type1] * atom->E_stat.z + atom->p_sr.z; atom->E_ind.x = atom->E_stat.x; atom->E_ind.y = atom->E_stat.y; atom->E_ind.z = atom->E_stat.z; } } } } if (dp_sum < g_config.dp_tol) dp_converged = 1; dp_it++; } /* end T H I R D loop over atoms */ /* F O U R T H loop: calculate monopole-dipole and dipole-dipole forces */ double rp_i, rp_j, pp_ij, tmp_1, tmp_2; double grad_1, grad_2, srval, srgrad, srval_tail, srgrad_tail, fnval_sum, grad_sum; for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; type1 = atom->type; n_i = 3 * (g_config.cnfstart[h] + i); for (j = 0; j < atom->num_neigh; j++) { /* neighbors */ neigh = atom->neigh + j; type2 = neigh->type; col = neigh->col[0]; /* In small cells, an atom might interact with itself */ self = (neigh->nr == i + g_config.cnfstart[h]) ? 1 : 0; if (neigh->r < g_config.dp_cut && (dp_alpha[type1] || dp_alpha[type2])) { fnval_tail = -neigh->grad_el; grad_tail = -neigh->ggrad_el; if (dp_b[col] && dp_c[col]) { shortrange_term(neigh->r, dp_b[col], dp_c[col], &srval_tail, &srgrad_tail); srval = fnval_tail * srval_tail; srgrad = fnval_tail * srgrad_tail + grad_tail * srval_tail; } if (self) { fnval_tail *= 0.5; grad_tail *= 0.5; } /* monopole-dipole contributions */ if (charge[type1] && dp_alpha[type2]) { if (dp_b[col] && dp_c[col]) { fnval_sum = fnval_tail + srval; grad_sum = grad_tail + srgrad; } else { fnval_sum = fnval_tail; grad_sum = grad_tail; } rp_j = SPROD( g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind, neigh->dist_r); fnval = charge[type1] * rp_j * fnval_sum * neigh->r; grad_1 = charge[type1] * rp_j * grad_sum * neigh->r2; grad_2 = charge[type1] * fnval_sum; forces[g_calc.energy_p + h] -= fnval; if (uf) { tmp_force.x = neigh->dist_r.x * grad_1 + g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.x * grad_2; tmp_force.y = neigh->dist_r.y * grad_1 + g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.y * grad_2; tmp_force.z = neigh->dist_r.z * grad_1 + g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.z * grad_2; forces[n_i + 0] -= tmp_force.x; forces[n_i + 1] -= tmp_force.y; forces[n_i + 2] -= tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] += tmp_force.x; forces[n_j + 1] += tmp_force.y; forces[n_j + 2] += tmp_force.z; #if defined(STRESS) /* calculate stresses */ if (us) { forces[stresses + 0] += neigh->dist.x * tmp_force.x; forces[stresses + 1] += neigh->dist.y * tmp_force.y; forces[stresses + 2] += neigh->dist.z * tmp_force.z; forces[stresses + 3] += neigh->dist.x * tmp_force.y; forces[stresses + 4] += neigh->dist.y * tmp_force.z; forces[stresses + 5] += neigh->dist.z * tmp_force.x; } #endif // STRESS } } /* dipole-monopole contributions */ if (dp_alpha[type2] && charge[type2]) { if (dp_b[col] && dp_c[col]) { fnval_sum = fnval_tail + srval; grad_sum = grad_tail + srgrad; } else { fnval_sum = fnval_tail; grad_sum = grad_tail; } rp_i = SPROD(atom->p_ind, neigh->dist_r); fnval = charge[type2] * rp_i * fnval_sum * neigh->r; grad_1 = charge[type2] * rp_i * grad_sum * neigh->r2; grad_2 = charge[type2] * fnval_sum; forces[g_calc.energy_p + h] += fnval; if (uf) { tmp_force.x = neigh->dist_r.x * grad_1 + atom->p_ind.x * grad_2; tmp_force.y = neigh->dist_r.y * grad_1 + atom->p_ind.y * grad_2; tmp_force.z = neigh->dist_r.z * grad_1 + atom->p_ind.z * grad_2; forces[n_i + 0] += tmp_force.x; forces[n_i + 1] += tmp_force.y; forces[n_i + 2] += tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] -= tmp_force.x; forces[n_j + 1] -= tmp_force.y; forces[n_j + 2] -= tmp_force.z; #if defined(STRESS) /* calculate stresses */ if (us) { forces[stresses + 0] -= neigh->dist.x * tmp_force.x; forces[stresses + 1] -= neigh->dist.y * tmp_force.y; forces[stresses + 2] -= neigh->dist.z * tmp_force.z; forces[stresses + 3] -= neigh->dist.x * tmp_force.y; forces[stresses + 4] -= neigh->dist.y * tmp_force.z; forces[stresses + 5] -= neigh->dist.z * tmp_force.x; } #endif // STRESS } } /* dipole-dipole contributions */ if (dp_alpha[type1] && dp_alpha[type2]) { pp_ij = SPROD( atom->p_ind, g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind); tmp_1 = 3 * rp_i * rp_j; tmp_2 = 3 * fnval_tail / neigh->r2; fnval = -(tmp_1 - pp_ij) * fnval_tail; grad_1 = (tmp_1 - pp_ij) * grad_tail; grad_2 = 2 * rp_i * rp_j; forces[g_calc.energy_p + h] += fnval; if (uf) { tmp_force.x = grad_1 * neigh->dist.x - tmp_2 * (grad_2 * neigh->dist.x - rp_i * neigh->r * g_config.conf_atoms[neigh->nr - g_mpi.firstatom] .p_ind.x - rp_j * neigh->r * atom->p_ind.x); tmp_force.y = grad_1 * neigh->dist.y - tmp_2 * (grad_2 * neigh->dist.y - rp_i * neigh->r * g_config.conf_atoms[neigh->nr - g_mpi.firstatom] .p_ind.y - rp_j * neigh->r * atom->p_ind.y); tmp_force.z = grad_1 * neigh->dist.z - tmp_2 * (grad_2 * neigh->dist.z - rp_i * neigh->r * g_config.conf_atoms[neigh->nr - g_mpi.firstatom] .p_ind.z - rp_j * neigh->r * atom->p_ind.z); forces[n_i + 0] -= tmp_force.x; forces[n_i + 1] -= tmp_force.y; forces[n_i + 2] -= tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] += tmp_force.x; forces[n_j + 1] += tmp_force.y; forces[n_j + 2] += tmp_force.z; #if defined(STRESS) /* calculate stresses */ if (us) { forces[stresses + 0] += neigh->dist.x * tmp_force.x; forces[stresses + 1] += neigh->dist.y * tmp_force.y; forces[stresses + 2] += neigh->dist.z * tmp_force.z; forces[stresses + 3] += neigh->dist.x * tmp_force.y; forces[stresses + 4] += neigh->dist.y * tmp_force.z; forces[stresses + 5] += neigh->dist.z * tmp_force.x; } #endif // STRESS } } } } /* loop over neighbours */ } /* end F O U R T H loop over atoms */ #endif // DIPOLE /* F I F T H loop: self energy contributions and sum-up force * contributions */ double qq; #if defined(DSF) double fnval_cut, gtail_cut, ggrad_cut; elstat_value(g_config.dp_cut, dp_kappa, &fnval_cut, >ail_cut, &ggrad_cut); #endif // DSF for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */ atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; type1 = atom->type; n_i = 3 * (g_config.cnfstart[h] + i); /* self energy contributions */ if (charge[type1]) { qq = charge[type1] * charge[type1]; #if defined(DSF) fnval = qq * ( DP_EPS * dp_kappa / sqrt(M_PI) + (fnval_cut - gtail_cut * g_config.dp_cut * g_config.dp_cut )*0.5 ); #else fnval = DP_EPS * dp_kappa * qq / sqrt(M_PI); #endif // DSF forces[g_calc.energy_p + h] -= fnval; } #if defined(DIPOLE) double pp; if (dp_alpha[type1]) { pp = SPROD(atom->p_ind, atom->p_ind); fnval = pp / (2 * dp_alpha[type1]); forces[g_calc.energy_p + h] += fnval; } /* alternative dipole self energy including kappa-dependence */ // if (dp_alpha[type1]) { // pp = SPROD(atom->p_ind, atom->p_ind); // fnval = kkk * pp / sqrt(M_PI); // forces[energy_p + h] += fnval; //} #endif // DIPOLE /* sum-up: whole force contributions flow into tmpsum */ /* if (uf) {*/ /*#ifdef FWEIGHT*/ /* Weigh by absolute value of force */ /* forces[k] /= FORCE_EPS + atom->absforce;*/ /* forces[k + 1] /= FORCE_EPS + atom->absforce;*/ /* forces[k + 2] /= FORCE_EPS + atom->absforce;*/ /*#endif |+ FWEIGHT +|*/ /*#ifdef CONTRIB*/ /* if (atom->contrib)*/ /*#endif |+ CONTRIB +|*/ /* tmpsum +=*/ /* conf_weight[h] * (dsquare(forces[k]) + * dsquare(forces[k + 1]) + dsquare(forces[k + 2]));*/ /* printf("tmpsum = %f (forces)\n",tmpsum);*/ /* }*/ } /* end F I F T H loop over atoms */ /* S I X T H loop: EAM force */ if (uf) { /* only required if we calc forces */ for (i = 0; i < g_config.inconf[h]; i++) { atom = g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom; n_i = 3 * (g_config.cnfstart[h] + i); for (j = 0; j < atom->num_neigh; j++) { /* loop over neighbors */ neigh = atom->neigh + j; /* In small cells, an atom might interact with itself */ self = (neigh->nr == i + g_config.cnfstart[h]) ? 1 : 0; col_F = g_calc.paircol + g_param.ntypes + atom->type; /* column of F */ r = neigh->r; /* are we within reach? */ if ((r < g_pot.calc_pot.end[neigh->col[1]]) || (r < g_pot.calc_pot.end[col_F - g_param.ntypes])) { rho_grad = (r < g_pot.calc_pot.end[neigh->col[1]]) ? splint_grad_dir(&g_pot.calc_pot, xi, neigh->slot[1], neigh->shift[1], neigh->step[1]) : 0.0; if (atom->type == neigh->type) /* use actio = reactio */ rho_grad_j = rho_grad; else rho_grad_j = (r < g_pot.calc_pot.end[col_F - g_param.ntypes]) ? (*g_splint_grad)(&g_pot.calc_pot, xi, col_F - g_param.ntypes, r) : 0.0; /* now we know everything - calculate forces */ eam_force = (rho_grad * atom->gradF + rho_grad_j * g_config.conf_atoms[(neigh->nr) - g_mpi.firstatom] .gradF); /* avoid double counting if atom is interacting with a copy of itself */ if (self) eam_force *= 0.5; tmp_force.x = neigh->dist_r.x * eam_force; tmp_force.y = neigh->dist_r.y * eam_force; tmp_force.z = neigh->dist_r.z * eam_force; forces[n_i + 0] += tmp_force.x; forces[n_i + 1] += tmp_force.y; forces[n_i + 2] += tmp_force.z; /* actio = reactio */ n_j = 3 * neigh->nr; forces[n_j + 0] -= tmp_force.x; forces[n_j + 1] -= tmp_force.y; forces[n_j + 2] -= tmp_force.z; #if defined(STRESS) /* and stresses */ if (us) { forces[stresses + 0] -= neigh->dist.x * tmp_force.x; forces[stresses + 1] -= neigh->dist.y * tmp_force.y; forces[stresses + 2] -= neigh->dist.z * tmp_force.z; forces[stresses + 3] -= neigh->dist.x * tmp_force.y; forces[stresses + 4] -= neigh->dist.y * tmp_force.z; forces[stresses + 5] -= neigh->dist.z * tmp_force.x; } #endif // STRESS } /* within reach */ } /* loop over neighbours */ #if defined(FWEIGHT) /* Weigh by absolute value of force */ forces[n_i + 0] /= FORCE_EPS + atom->absforce; forces[n_i + 1] /= FORCE_EPS + atom->absforce; forces[n_i + 2] /= FORCE_EPS + atom->absforce; #endif // FWEIGHT /* sum up forces */ #if defined(CONTRIB) if (atom->contrib) #endif // CONTRIB tmpsum += g_config.conf_weight[h] * (dsquare(forces[n_i + 0]) + dsquare(forces[n_i + 1]) + dsquare(forces[n_i + 2])); } } /* end S I X T H loop over atoms */ /* whole energy contributions flow into tmpsum */ forces[g_calc.energy_p + h] /= (double)g_config.inconf[h]; forces[g_calc.energy_p + h] -= g_config.force_0[g_calc.energy_p + h]; tmpsum += g_config.conf_weight[h] * g_param.eweight * dsquare(forces[g_calc.energy_p + h]); #if defined(STRESS) /* whole stress contributions flow into tmpsum */ if (uf && us) { for (i = 0; i < 6; i++) { forces[stresses + i] /= g_config.conf_vol[h - g_mpi.firstconf]; forces[stresses + i] -= g_config.force_0[stresses + i]; tmpsum += g_config.conf_weight[h] * g_param.sweight * dsquare(forces[stresses + i]); } } #endif // STRESS /* limiting constraints per configuration */ tmpsum += g_config.conf_weight[h] * dsquare(forces[g_calc.limit_p + h]); } /* end M A I N loop over configurations */ } /* parallel region */ #if defined(MPI) /* Reduce rho_sum */ MPI_Reduce(&rho_sum_loc, &rho_sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); #else /* MPI */ rho_sum = rho_sum_loc; #endif // MPI /* dummy constraints (global) */ #if defined(APOT) /* add punishment for out of bounds (mostly for powell_lsq) */ if (g_mpi.myid == 0) { tmpsum += apot_punish(xi_opt, forces); } #endif // APOT #if !defined(NOPUNISH) if (g_mpi.myid == 0) { int g; for (g = 0; g < g_param.ntypes; g++) { #if defined(NORESCALE) /* clear field */ forces[g_calc.dummy_p + g_param.ntypes + g] = 0.0; /* Free end... */ /* NEW: Constraint on U': U'(1.0)=0.0; */ forces[g_calc.dummy_p + g] = DUMMY_WEIGHT * splint_grad(&calc_pot, xi, paircol + g_param.ntypes + g, 1.0); #else /* NOTHING */ forces[g_calc.dummy_p + g_param.ntypes + g] = 0.0; /* Free end... */ /* constraints on U`(n) */ forces[g_calc.dummy_p + g] = DUMMY_WEIGHT * (*g_splint_grad)( &g_pot.calc_pot, xi, g_calc.paircol + g_param.ntypes + g, 0.5 * (g_pot.calc_pot .begin[g_calc.paircol + g_param.ntypes + g] + g_pot.calc_pot .end[g_calc.paircol + g_param.ntypes + g])) - g_config.force_0[g_calc.dummy_p + g]; #endif // NORESCALE tmpsum += dsquare(forces[g_calc.dummy_p + g_param.ntypes + g]); tmpsum += dsquare(forces[g_calc.dummy_p + g]); } /* loop over types */ #if defined(NORESCALE) /* NEW: Constraint on n: <n>=1.0 ONE CONSTRAINT ONLY */ /* Calculate averages */ rho_sum /= (double)natoms; /* ATTN: if there are invariant potentials, things might be problematic */ forces[dummy_p + g_param.ntypes] = DUMMY_WEIGHT * (rho_sum - 1.0); tmpsum += dsquare(forces[dummy_p + g_param.ntypes]); #endif // NORESCALE } #endif // NOPUNISH #if defined(MPI) /* reduce global sum */ sum = 0.0; MPI_Reduce(&tmpsum, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); /* gather forces, energies, stresses */ if (g_mpi.myid == 0) { /* root node already has data in place */ /* forces */ MPI_Gatherv(MPI_IN_PLACE, g_mpi.myatoms, g_mpi.MPI_VECTOR, forces, g_mpi.atom_len, g_mpi.atom_dist, g_mpi.MPI_VECTOR, 0, MPI_COMM_WORLD); /* energies */ MPI_Gatherv(MPI_IN_PLACE, g_mpi.myconf, MPI_DOUBLE, forces + g_calc.energy_p, g_mpi.conf_len, g_mpi.conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); #if defined(STRESS) /* stresses */ MPI_Gatherv(MPI_IN_PLACE, g_mpi.myconf, g_mpi.MPI_STENS, forces + g_calc.stress_p, g_mpi.conf_len, g_mpi.conf_dist, g_mpi.MPI_STENS, 0, MPI_COMM_WORLD); #endif // STRESS #if !defined(NORESCALE) /* punishment constraints */ MPI_Gatherv(MPI_IN_PLACE, g_mpi.myconf, MPI_DOUBLE, forces + g_calc.limit_p, g_mpi.conf_len, g_mpi.conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); #endif // !NORESCALE } else { /* forces */ MPI_Gatherv(forces + g_mpi.firstatom * 3, g_mpi.myatoms, g_mpi.MPI_VECTOR, forces, g_mpi.atom_len, g_mpi.atom_dist, g_mpi.MPI_VECTOR, 0, MPI_COMM_WORLD); /* energies */ MPI_Gatherv(forces + g_calc.energy_p + g_mpi.firstconf, g_mpi.myconf, MPI_DOUBLE, forces + g_calc.energy_p, g_mpi.conf_len, g_mpi.conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); #if defined(STRESS) /* stresses */ MPI_Gatherv(forces + g_calc.stress_p + 6 * g_mpi.firstconf, g_mpi.myconf, g_mpi.MPI_STENS, forces + g_calc.stress_p, g_mpi.conf_len, g_mpi.conf_dist, g_mpi.MPI_STENS, 0, MPI_COMM_WORLD); #endif // STRESS #if !defined(NORESCALE) /* punishment constraints */ MPI_Gatherv(forces + g_calc.limit_p + g_mpi.firstconf, g_mpi.myconf, MPI_DOUBLE, forces + g_calc.limit_p, g_mpi.conf_len, g_mpi.conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD); #endif // !NORESCALE } /* no need to pick up dummy constraints - they are already @ root */ #else sum = tmpsum; /* global sum = local sum */ #endif // MPI /* root process exits this function now */ if (g_mpi.myid == 0) { g_calc.fcalls++; /* Increase function call counter */ if (isnan(sum)) { #if defined(DEBUG) printf("\n--> Force is nan! <--\n\n"); #endif // DEBUG return 10e10; } else return sum; } } /* once a non-root process arrives here, all is done. */ return -1.0; }