Beispiel #1
0
double calc_forces_eam(double *xi_opt, double *forces, int flag)
{
  int   first, col, i;
  double tmpsum = 0.0, sum = 0.0;
  double *xi = NULL;

  static double rho_sum_loc, rho_sum;
  rho_sum_loc = rho_sum = 0.0;

  switch (format) {
      case 0:
	xi = calc_pot.table;
	break;
      case 3:			/* fall through */
      case 4:
	xi = xi_opt;		/* calc-table is opt-table */
	break;
      case 5:
	xi = calc_pot.table;	/* we need to update the calc-table */
  }

  /* This is the start of an infinite loop */
  while (1) {
    tmpsum = 0.0;		/* sum of squares of local process */
    rho_sum_loc = 0.0;

#if defined APOT && !defined MPI
    if (0 == format) {
      apot_check_params(xi_opt);
      update_calc_table(xi_opt, xi, 0);
    }
#endif /* APOT && !MPI */

#ifdef MPI
#ifndef APOT
    /* exchange potential and flag value */
    MPI_Bcast(xi, calc_pot.len, MPI_DOUBLE, 0, MPI_COMM_WORLD);
#endif /* APOT */
    MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (1 == flag)
      break;			/* Exception: flag 1 means clean up */

#ifdef APOT
    if (0 == myid)
      apot_check_params(xi_opt);
    MPI_Bcast(xi_opt, ndimtot, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    update_calc_table(xi_opt, xi, 0);
#else /* APOT */
    /* if flag==2 then the potential parameters have changed -> sync */
    if (2 == flag)
      potsync();
#endif /* APOT */
#endif /* MPI */

    /* init second derivatives for splines */

    /* [0, ...,  paircol - 1] = pair potentials */
    /* [paircol, ..., paircol + ntypes - 1] = transfer function */
    for (col = 0; col < paircol + ntypes; col++) {
      first = calc_pot.first[col];
      if (0 == format || 3 == format)
	spline_ed(calc_pot.step[col], xi + first,
	  calc_pot.last[col] - first + 1, *(xi + first - 2), 0.0, calc_pot.d2tab + first);
      else			/* format >= 4 ! */
	spline_ne(calc_pot.xcoord + first, xi + first,
	  calc_pot.last[col] - first + 1, *(xi + first - 2), 0.0, calc_pot.d2tab + first);
    }

    /* [paircol + ntypes, ..., paircol + 2 * ntypes - 1] = embedding function */
#ifndef PARABOLA
    /* if we have parabolic interpolation, we don't need that */
    for (col = paircol + ntypes; col < paircol + 2 * ntypes; col++) {
      first = calc_pot.first[col];
      /* gradient at left boundary matched to square root function,
         when 0 not in domain(F), else natural spline */
      if (0 == format || 3 == format)
	spline_ed(calc_pot.step[col], xi + first, calc_pot.last[col] - first + 1,
#ifdef WZERO
	  ((calc_pot.begin[col] <= 0.0) ? *(xi + first - 2)
	    : 0.5 / xi[first]), ((calc_pot.end[col] >= 0.0) ? *(xi + first - 1)
	    : -0.5 / xi[calc_pot.last[col]]),
#else /* WZERO: F is natural spline in any case */
	  *(xi + first - 2), *(xi + first - 1),
#endif /* WZERO */
	  calc_pot.d2tab + first);
      else			/* format >= 4 ! */
	spline_ne(calc_pot.xcoord + first, xi + first, calc_pot.last[col] - first + 1,
#ifdef WZERO
	  (calc_pot.begin[col] <= 0.0 ? *(xi + first - 2)
	    : 0.5 / xi[first]), (calc_pot.end[col] >= 0.0 ? *(xi + first - 1)
	    : -0.5 / xi[calc_pot.last[col]]),
#else /* WZERO */
	  *(xi + first - 2), *(xi + first - 1),
#endif /* WZERO */
	  calc_pot.d2tab + first);
    }
#endif /* PARABOLA */

#ifndef MPI
    myconf = nconf;
#endif /* MPI */

    /* region containing loop over configurations */
    {
      atom_t *atom;
      int   h, j;
      int   n_i, n_j;
      int   self;
      int   uf;
#ifdef APOT
      double temp_eng;
#endif /* APOT */
#ifdef STRESS
      int   us, stresses;
#endif /* STRESS */

      /* pointer for neighbor table */
      neigh_t *neigh;

      /* pair variables */
      double phi_val, phi_grad;
      double r;
      vector tmp_force;

      /* eam variables */
      int   col_F;
      double eam_force;
      double rho_val, rho_grad, rho_grad_j;

      /* loop over configurations */
      for (h = firstconf; h < firstconf + myconf; h++) {
	uf = conf_uf[h - firstconf];
#ifdef STRESS
	us = conf_us[h - firstconf];
#endif /* STRESS */
	/* reset energies and stresses */
	forces[energy_p + h] = 0.0;
#ifdef STRESS
	stresses = stress_p + 6 * h;
	for (i = 0; i < 6; i++)
	  forces[stresses + i] = 0.0;
#endif /* STRESS */

	/* set limiting constraints */
	forces[limit_p + h] = -force_0[limit_p + h];

	/* first loop over atoms: reset forces, densities */
	for (i = 0; i < inconf[h]; i++) {
	  n_i = 3 * (cnfstart[h] + i);
	  if (uf) {
	    forces[n_i + 0] = -force_0[n_i + 0];
	    forces[n_i + 1] = -force_0[n_i + 1];
	    forces[n_i + 2] = -force_0[n_i + 2];
	  } else {
	    forces[n_i + 0] = 0.0;
	    forces[n_i + 1] = 0.0;
	    forces[n_i + 2] = 0.0;
	  }
	  /* reset atomic density */
	  conf_atoms[cnfstart[h] - firstatom + i].rho = 0.0;
	}
	/* end of first loop */

	/* 2nd loop: calculate pair forces and energies, atomic densities. */
	for (i = 0; i < inconf[h]; i++) {
	  atom = conf_atoms + i + cnfstart[h] - firstatom;
	  n_i = 3 * (cnfstart[h] + i);
	  /* loop over neighbors */
	  for (j = 0; j < atom->num_neigh; j++) {
	    neigh = atom->neigh + j;
	    /* In small cells, an atom might interact with itself */
	    self = (neigh->nr == i + cnfstart[h]) ? 1 : 0;

	    /* pair potential part */
	    if (neigh->r < calc_pot.end[neigh->col[0]]) {
	      /* fn value and grad are calculated in the same step */
	      if (uf)
		phi_val =
		  splint_comb_dir(&calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0], &phi_grad);
	      else
		phi_val = splint_dir(&calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0]);
	      /* avoid double counting if atom is interacting with a copy of itself */
	      if (self) {
		phi_val *= 0.5;
		phi_grad *= 0.5;
	      }

	      /* add cohesive energy */
	      forces[energy_p + h] += phi_val;

	      /* calculate forces */
	      if (uf) {
		tmp_force.x = neigh->dist_r.x * phi_grad;
		tmp_force.y = neigh->dist_r.y * phi_grad;
		tmp_force.z = neigh->dist_r.z * phi_grad;
		forces[n_i + 0] += tmp_force.x;
		forces[n_i + 1] += tmp_force.y;
		forces[n_i + 2] += tmp_force.z;
		/* actio = reactio */
		n_j = 3 * neigh->nr;
		forces[n_j + 0] -= tmp_force.x;
		forces[n_j + 1] -= tmp_force.y;
		forces[n_j + 2] -= tmp_force.z;
#ifdef STRESS
		/* also calculate pair stresses */
		if (us) {
		  forces[stresses + 0] -= neigh->dist.x * tmp_force.x;
		  forces[stresses + 1] -= neigh->dist.y * tmp_force.y;
		  forces[stresses + 2] -= neigh->dist.z * tmp_force.z;
		  forces[stresses + 3] -= neigh->dist.x * tmp_force.y;
		  forces[stresses + 4] -= neigh->dist.y * tmp_force.z;
		  forces[stresses + 5] -= neigh->dist.z * tmp_force.x;
		}
#endif /* STRESS */
	      }
	    }

	    /* neighbor in range */
	    /* calculate atomic densities */
	    if (atom->type == neigh->type) {
	      /* then transfer(a->b)==transfer(b->a) */
	      if (neigh->r < calc_pot.end[neigh->col[1]]) {
		rho_val = splint_dir(&calc_pot, xi, neigh->slot[1], neigh->shift[1], neigh->step[1]);
		atom->rho += rho_val;
		/* avoid double counting if atom is interacting with a
		   copy of itself */
		if (!self) {
		  conf_atoms[neigh->nr - firstatom].rho += rho_val;
		}
	      }
	    } else {
	      /* transfer(a->b)!=transfer(b->a) */
	      if (neigh->r < calc_pot.end[neigh->col[1]]) {
		atom->rho += splint_dir(&calc_pot, xi, neigh->slot[1], neigh->shift[1], neigh->step[1]);
	      }
	      /* cannot use slot/shift to access splines */
	      if (neigh->r < calc_pot.end[paircol + atom->type])
		conf_atoms[neigh->nr - firstatom].rho +=
		  splint(&calc_pot, xi, paircol + atom->type, neigh->r);
	    }
	  }			/* loop over all neighbors */

	  col_F = paircol + ntypes + atom->type;	/* column of F */
#ifndef NORESCALE
	  if (atom->rho > calc_pot.end[col_F]) {
	    /* then punish target function -> bad potential */
	    forces[limit_p + h] += DUMMY_WEIGHT * 10.0 * dsquare(atom->rho - calc_pot.end[col_F]);
#ifndef PARABOLA
	    /* then we use the final value, with PARABOLA: extrapolate */
	    atom->rho = calc_pot.end[col_F];
#endif /* PARABOLA */
	  }

	  if (atom->rho < calc_pot.begin[col_F]) {
	    /* then punish target function -> bad potential */
	    forces[limit_p + h] += DUMMY_WEIGHT * 10.0 * dsquare(calc_pot.begin[col_F] - atom->rho);
#ifndef PARABOLA
	    /* then we use the final value, with PARABOLA: extrapolate */
	    atom->rho = calc_pot.begin[col_F];
#endif /* PARABOLA */
	  }
#endif /* !NORESCALE */

	  /* embedding energy, embedding gradient */
	  /* contribution to cohesive energy is F(n) */

#ifdef PARABOLA
	  forces[energy_p + h] += parab_comb(&calc_pot, xi, col_F, atom->rho, &atom->gradF);
#elif defined(NORESCALE)
	  if (atom->rho < calc_pot.begin[col_F]) {
#ifdef APOT
	    /* calculate analytic value explicitly */
	    apot_table.fvalue[col_F] (atom->rho, xi_opt + opt_pot.first[col_F], &temp_eng);
	    atom->gradF = apot_grad(atom->rho, xi_opt + opt_pot.first[col_F], apot_table.fvalue[col_F]);
	    forces[energy_p + h] += temp_eng;
#else
	    /* linear extrapolation left */
	    rho_val = splint_comb(&calc_pot, xi, col_F, calc_pot.begin[col_F], &atom->gradF);
	    forces[energy_p + h] += rho_val + (atom->rho - calc_pot.begin[col_F]) * atom->gradF;
#endif /* APOT */
	  } else if (atom->rho > calc_pot.end[col_F]) {
#ifdef APOT
	    /* calculate analytic value explicitly */
	    apot_table.fvalue[col_F] (atom->rho, xi_opt + opt_pot.first[col_F], &temp_eng);
	    atom->gradF = apot_grad(atom->rho, xi_opt + opt_pot.first[col_F], apot_table.fvalue[col_F]);
	    forces[energy_p + h] += temp_eng;
#else
	    /* and right */
	    rho_val =
	      splint_comb(&calc_pot, xi, col_F, calc_pot.end[col_F] - 0.5 * calc_pot.step[col_F],
	      &atom->gradF);
	    forces[energy_p + h] += rho_val + (atom->rho - calc_pot.end[col_F]) * atom->gradF;
#endif /* APOT */
	  }
	  /* and in-between */
	  else {
#ifdef APOT
	    /* calculate small values directly */
	    if (atom->rho < 0.1) {
	      apot_table.fvalue[col_F] (atom->rho, xi_opt + opt_pot.first[col_F], &temp_eng);
	      atom->gradF = apot_grad(atom->rho, xi_opt + opt_pot.first[col_F], apot_table.fvalue[col_F]);
	      forces[energy_p + h] += temp_eng;
	    } else
#endif
	      forces[energy_p + h] += splint_comb(&calc_pot, xi, col_F, atom->rho, &atom->gradF);
	  }
#else
	  forces[energy_p + h] += splint_comb(&calc_pot, xi, col_F, atom->rho, &atom->gradF);
#endif /* NORESCALE */
	  /* sum up rho */
	  rho_sum_loc += atom->rho;
	}			/* second loop over atoms */

	/* 3rd loop over atom: EAM force */
	if (uf) {		/* only required if we calc forces */
	  for (i = 0; i < inconf[h]; i++) {
	    atom = conf_atoms + i + cnfstart[h] - firstatom;
	    n_i = 3 * (cnfstart[h] + i);
	    for (j = 0; j < atom->num_neigh; j++) {
	      /* loop over neighbors */
	      neigh = atom->neigh + j;
	      /* In small cells, an atom might interact with itself */
	      self = (neigh->nr == i + cnfstart[h]) ? 1 : 0;
	      col_F = paircol + ntypes + atom->type;	/* column of F */
	      r = neigh->r;
	      /* are we within reach? */
	      if ((r < calc_pot.end[neigh->col[1]]) || (r < calc_pot.end[col_F - ntypes])) {
		rho_grad =
		  (r < calc_pot.end[neigh->col[1]]) ? splint_grad_dir(&calc_pot, xi, neigh->slot[1],
		  neigh->shift[1], neigh->step[1]) : 0.0;
		if (atom->type == neigh->type)	/* use actio = reactio */
		  rho_grad_j = rho_grad;
		else
		  rho_grad_j =
		    (r < calc_pot.end[col_F - ntypes]) ? splint_grad(&calc_pot, xi, col_F - ntypes, r) : 0.;
		/* now we know everything - calculate forces */
		eam_force = (rho_grad * atom->gradF + rho_grad_j * conf_atoms[(neigh->nr) - firstatom].gradF);
		/* avoid double counting if atom is interacting with a copy of itself */
		if (self)
		  eam_force *= 0.5;
		tmp_force.x = neigh->dist_r.x * eam_force;
		tmp_force.y = neigh->dist_r.y * eam_force;
		tmp_force.z = neigh->dist_r.z * eam_force;
		forces[n_i + 0] += tmp_force.x;
		forces[n_i + 1] += tmp_force.y;
		forces[n_i + 2] += tmp_force.z;
		/* actio = reactio */
		n_j = 3 * neigh->nr;
		forces[n_j + 0] -= tmp_force.x;
		forces[n_j + 1] -= tmp_force.y;
		forces[n_j + 2] -= tmp_force.z;
#ifdef STRESS
		/* and stresses */
		if (us) {
		  forces[stresses + 0] -= neigh->dist.x * tmp_force.x;
		  forces[stresses + 1] -= neigh->dist.y * tmp_force.y;
		  forces[stresses + 2] -= neigh->dist.z * tmp_force.z;
		  forces[stresses + 3] -= neigh->dist.x * tmp_force.y;
		  forces[stresses + 4] -= neigh->dist.y * tmp_force.z;
		  forces[stresses + 5] -= neigh->dist.z * tmp_force.x;
		}
#endif /* STRESS */
	      }			/* within reach */
	    }			/* loop over neighbours */

#ifdef FWEIGHT
	    /* Weigh by absolute value of force */
	    forces[n_i + 0] /= FORCE_EPS + atom->absforce;
	    forces[n_i + 1] /= FORCE_EPS + atom->absforce;
	    forces[n_i + 2] /= FORCE_EPS + atom->absforce;
#endif /* FWEIGHT */

	    /* sum up forces  */
#ifdef CONTRIB
	    if (atom->contrib)
#endif /* CONTRIB */
	      tmpsum += conf_weight[h] *
		(dsquare(forces[n_i + 0]) + dsquare(forces[n_i + 1]) + dsquare(forces[n_i + 2]));
	  }			/* third loop over atoms */
	}

	/* use forces */
	/* energy contributions */
	forces[energy_p + h] /= (double)inconf[h];
	forces[energy_p + h] -= force_0[energy_p + h];
	tmpsum += conf_weight[h] * eweight * dsquare(forces[energy_p + h]);

#ifdef STRESS
	/* stress contributions */
	if (uf && us) {
	  for (i = 0; i < 6; i++) {
	    forces[stresses + i] /= conf_vol[h - firstconf];
	    forces[stresses + i] -= force_0[stresses + i];
	    tmpsum += conf_weight[h] * sweight * dsquare(forces[stresses + i]);
	  }
	}
#endif /* STRESS */
	/* limiting constraints per configuration */
	tmpsum += conf_weight[h] * dsquare(forces[limit_p + h]);
      }				/* loop over configurations */
    }				/* parallel region */
#ifdef MPI
    /* Reduce rho_sum */
    rho_sum = 0.0;
    MPI_Reduce(&rho_sum_loc, &rho_sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
#else /* MPI */
    rho_sum = rho_sum_loc;
#endif /* MPI */

    /* dummy constraints (global) */
#ifdef APOT
    /* add punishment for out of bounds (mostly for powell_lsq) */
    if (0 == myid) {
      tmpsum += apot_punish(xi_opt, forces);
    }
#endif /* APOT */

#ifndef NOPUNISH
    if (0 == myid) {
      int   g;
      for (g = 0; g < ntypes; g++) {
	/* PARABOLA, WZERO, NORESC - different behaviour */
#ifdef PARABOLA
/* constraints on U(n) */
	forces[dummy_p + ntypes + g] = DUMMY_WEIGHT * parab(&calc_pot, xi, paircol + ntypes + g, 0.0)
	  - force_0[dummy_p + ntypes + g];
/* constraints on U`(n) */
	forces[dummy_p + g] =
	  DUMMY_WEIGHT * parab_grad(&calc_pot, xi, paircol + ntypes + g,
	  .5 * (calc_pot.begin[paircol + ntypes + g] + calc_pot.end[paircol + ntypes + g])) -
	  force_0[dummy_p + g];
#elif defined(WZERO)
	if (calc_pot.begin[paircol + ntypes + g] <= 0.0)
	  /* 0 in domain of U(n) */
/* constraints on U(n) */
	  forces[dummy_p + ntypes + g] = DUMMY_WEIGHT * splint(&calc_pot, xi, paircol + ntypes + g, 0.0)
	    - force_0[dummy_p + ntypes + g];
	else
	  /* 0 not in domain of U(n) */
	  forces[dummy_p + ntypes + g] = 0.0;	/* Free end... */
/* constraints on U`(n) */
	forces[dummy_p + g] =
	  DUMMY_WEIGHT * splint_grad(&calc_pot, xi, paircol + ntypes + g,
	  0.5 * (calc_pot.begin[paircol + ntypes + g] + calc_pot.end[paircol + ntypes + g]))
	  - force_0[dummy_p + g];
#elif defined(NORESCALE)
	/* clear field */
	forces[dummy_p + ntypes + g] = 0.0;	/* Free end... */
	/* NEW: Constraint on U': U'(1.)=0; */
	forces[dummy_p + g] = DUMMY_WEIGHT * splint_grad(&calc_pot, xi, paircol + ntypes + g, 1.0);
#else /* NOTHING */
	forces[dummy_p + ntypes + g] = 0.0;	/* Free end... */
/* constraints on U`(n) */
	forces[dummy_p + g] =
	  DUMMY_WEIGHT * splint_grad(&calc_pot, xi, paircol + ntypes + g,
	  0.5 * (calc_pot.begin[paircol + ntypes + g] + calc_pot.end[paircol + ntypes + g]))
	  - force_0[dummy_p + g];
#endif /* Dummy constraints */
	tmpsum += dsquare(forces[dummy_p + ntypes + g]);
	tmpsum += dsquare(forces[dummy_p + g]);
      }				/* loop over types */
#ifdef NORESCALE
      /* NEW: Constraint on n: <n>=1. ONE CONSTRAINT ONLY */
      /* Calculate averages */
      rho_sum /= (double)natoms;
      /* ATTN: if there are invariant potentials, things might be problematic */
      forces[dummy_p + ntypes] = DUMMY_WEIGHT * (rho_sum - 1.0);
      tmpsum += dsquare(forces[dummy_p + ntypes]);
#endif /* NORESCALE */
    }				/* only root process */
#endif /* !NOPUNISH */

#ifdef MPI
    /* reduce global sum */
    sum = 0.0;
    MPI_Reduce(&tmpsum, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    /* gather forces, energies, stresses */
    if (0 == myid) {		/* root node already has data in place */
      /* forces */
      MPI_Gatherv(MPI_IN_PLACE, myatoms, MPI_VECTOR, forces, atom_len,
	atom_dist, MPI_VECTOR, 0, MPI_COMM_WORLD);
      /* energies */
      MPI_Gatherv(MPI_IN_PLACE, myconf, MPI_DOUBLE, forces + natoms * 3,
	conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD);
      /* stresses */
      MPI_Gatherv(MPI_IN_PLACE, myconf, MPI_STENS, forces + natoms * 3 + nconf,
	conf_len, conf_dist, MPI_STENS, 0, MPI_COMM_WORLD);
      /* punishment constraints */
      MPI_Gatherv(MPI_IN_PLACE, myconf, MPI_DOUBLE, forces + natoms * 3 + 7 * nconf,
	conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    } else {
      /* forces */
      MPI_Gatherv(forces + firstatom * 3, myatoms, MPI_VECTOR, forces, atom_len,
	atom_dist, MPI_VECTOR, 0, MPI_COMM_WORLD);
      /* energies */
      MPI_Gatherv(forces + natoms * 3 + firstconf, myconf, MPI_DOUBLE,
	forces + natoms * 3, conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD);
      /* stresses */
      MPI_Gatherv(forces + natoms * 3 + nconf + 6 * firstconf, myconf, MPI_STENS,
	forces + natoms * 3 + nconf, conf_len, conf_dist, MPI_STENS, 0, MPI_COMM_WORLD);
      /* punishment constraints */
      MPI_Gatherv(forces + natoms * 3 + 7 * nconf + firstconf, myconf, MPI_DOUBLE,
	forces + natoms * 3 + 7 * nconf, conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    }
    /* no need to pick up dummy constraints - are already @ root */
#else
    sum = tmpsum;		/* global sum = local sum  */
#endif /* MPI */

    /* root process exits this function now */
    if (0 == myid) {
      fcalls++;			/* Increase function call counter */
      if (isnan(sum)) {
#ifdef DEBUG
	printf("\n--> Force is nan! <--\n\n");
#endif /* DEBUG */
	return 10e10;
      } else
	return sum;
    }

  }				/* end of infinite loop */

  /* once a non-root process arrives here, all is done. */
  return -1.0;
}
Beispiel #2
0
double calc_forces(double* xi_opt, double* forces, int flag)
{
  double tmpsum = 0.0;
  double sum = 0.0;
  int first = 0;
  int col = 0;
  int ne = 0;
  int size = 0;
  int i = flag;
  double* xi = NULL;
  apot_table_t* apt = &g_pot.apot_table;
  double charge[g_param.ntypes];
  double sum_charges;
  double dp_kappa;

#if defined(DIPOLE)
  double dp_alpha[g_param.ntypes];
  double dp_b[apt->number];
  double dp_c[apt->number];
#endif  // DIPOLE

  switch (g_pot.format_type) {
    case POTENTIAL_FORMAT_UNKNOWN:
      break;
    case POTENTIAL_FORMAT_ANALYTIC:
      xi = g_pot.calc_pot.table;
      break;
    case POTENTIAL_FORMAT_TABULATED_EQ_DIST:
    case POTENTIAL_FORMAT_TABULATED_NON_EQ_DIST:
      xi = xi_opt;
      break;
  }

  ne = g_pot.apot_table.total_ne_par;
  size = apt->number;

  /* This is the start of an infinite loop */
  while (1) {
    tmpsum = 0.0; /* sum of squares of local process */

#if defined(APOT) && !defined(MPI)
    if (g_pot.format_type == POTENTIAL_FORMAT_ANALYTIC) {
      apot_check_params(xi_opt);
      update_calc_table(xi_opt, xi, 0);
    }
#endif  // APOT && !MPI

#if defined(MPI)
/* exchange potential and flag value */
#if !defined(APOT)
    MPI_Bcast(xi, g_pot.calc_pot.len, MPI_DOUBLE, 0, MPI_COMM_WORLD);
#endif  // !APOT
    MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (flag == 1)
      break; /* Exception: flag 1 means clean up */

#if defined(APOT)
    if (g_mpi.myid == 0)
      apot_check_params(xi_opt);
    MPI_Bcast(xi_opt, g_calc.ndimtot, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    if (g_pot.format_type == POTENTIAL_FORMAT_ANALYTIC)
      update_calc_table(xi_opt, xi, 0);
#else   // APOT
    /* if flag==2 then the potential parameters have changed -> sync */
    if (flag == 2)
      potsync();
#endif  // APOT
#endif  // MPI

    /* local arrays for electrostatic parameters */
    sum_charges = 0;
    for (i = 0; i < g_param.ntypes - 1; i++) {
      if (xi_opt[2 * size + ne + i]) {
        charge[i] = xi_opt[2 * size + ne + i];
        sum_charges += apt->ratio[i] * charge[i];
      } else {
        charge[i] = 0.0;
      }
    }
    apt->last_charge = -sum_charges / apt->ratio[g_param.ntypes - 1];
    charge[g_param.ntypes - 1] = apt->last_charge;
    if (xi_opt[2 * size + ne + g_param.ntypes - 1]) {
      dp_kappa = xi_opt[2 * size + ne + g_param.ntypes - 1];
    } else {
      dp_kappa = 0.0;
    }

#if defined(DIPOLE)
    for (i = 0; i < g_param.ntypes; i++) {
      if (xi_opt[2 * size + ne + g_param.ntypes + i]) {
        dp_alpha[i] = xi_opt[2 * size + ne + g_param.ntypes + i];
      } else {
        dp_alpha[i] = 0.0;
      }
    }
    for (i = 0; i < size; i++) {
      if (xi_opt[2 * size + ne + 2 * g_param.ntypes + i]) {
        dp_b[i] = xi_opt[2 * size + ne + 2 * g_param.ntypes + i];
      } else {
        dp_b[i] = 0.0;
      }
      if (xi_opt[3 * size + ne + 2 * g_param.ntypes + i]) {
        dp_c[i] = xi_opt[3 * size + ne + 2 * g_param.ntypes + i];
      } else {
        dp_c[i] = 0.0;
      }
    }
#endif  // DIPOLE

    /* init second derivatives for splines */
    for (col = 0; col < g_calc.paircol; col++) {
      first = g_pot.calc_pot.first[col];
      switch (g_pot.format_type) {
        case POTENTIAL_FORMAT_UNKNOWN:
          error(1, "Unknown potential format detected! (%s:%d)\n", __FILE__,
                __LINE__);
        case POTENTIAL_FORMAT_ANALYTIC:
        case POTENTIAL_FORMAT_TABULATED_EQ_DIST: {
          spline_ed(g_pot.calc_pot.step[col], xi + first,
                    g_pot.calc_pot.last[col] - first + 1, *(xi + first - 2),
                    0.0, g_pot.calc_pot.d2tab + first);
          break;
        }
        case POTENTIAL_FORMAT_TABULATED_NON_EQ_DIST: {
          spline_ne(g_pot.calc_pot.xcoord + first, xi + first,
                    g_pot.calc_pot.last[col] - first + 1, *(xi + first - 2),
                    0.0, g_pot.calc_pot.d2tab + first);
        }
      }
    }

#if !defined(MPI)
    g_mpi.myconf = g_config.nconf;
#endif  // MPI

    /* region containing loop over configurations,
       also OMP-parallelized region */
    {
      int self;
      vector tmp_force;
      int h, j, type1, type2, uf;
#if defined(STRESS)
      int us, stresses;
#endif  // STRESS
      int n_i, n_j;
      double fnval, grad, fnval_tail, grad_tail, grad_i, grad_j;
#if defined(DIPOLE)
      double p_sr_tail;
#endif  // DIPOLE
      atom_t* atom;
      neigh_t* neigh;

      /* loop over configurations: M A I N LOOP CONTAINING ALL ATOM-LOOPS */
      for (h = g_mpi.firstconf; h < g_mpi.firstconf + g_mpi.myconf; h++) {
        uf = g_config.conf_uf[h - g_mpi.firstconf];
#if defined(STRESS)
        us = g_config.conf_us[h - g_mpi.firstconf];
#endif  // STRESS
        /* reset energies and stresses */
        forces[g_calc.energy_p + h] = 0.0;
#if defined(STRESS)
        stresses = g_calc.stress_p + 6 * h;
        for (i = 0; i < 6; i++)
          forces[stresses + i] = 0.0;
#endif  // STRESS

#if defined(DIPOLE)
        /* reset dipoles and fields: LOOP Z E R O */
        for (i = 0; i < g_config.inconf[h]; i++) {
          atom =
              g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom;
          atom->E_stat.x = 0.0;
          atom->E_stat.y = 0.0;
          atom->E_stat.z = 0.0;
          atom->p_sr.x = 0.0;
          atom->p_sr.y = 0.0;
          atom->p_sr.z = 0.0;
        }
#endif  // DIPOLE

        /* F I R S T LOOP OVER ATOMS: reset forces, dipoles */
        for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
          n_i = 3 * (g_config.cnfstart[h] + i);
          if (uf) {
            forces[n_i + 0] = -g_config.force_0[n_i + 0];
            forces[n_i + 1] = -g_config.force_0[n_i + 1];
            forces[n_i + 2] = -g_config.force_0[n_i + 2];
          } else {
            forces[n_i + 0] = 0.0;
            forces[n_i + 1] = 0.0;
            forces[n_i + 2] = 0.0;
          }
        } /* end F I R S T LOOP */

        /* S E C O N D loop: calculate short-range and monopole forces,
           calculate static field- and dipole-contributions */
        for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
          atom =
              g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom;
          type1 = atom->type;
          n_i = 3 * (g_config.cnfstart[h] + i);
          for (j = 0; j < atom->num_neigh; j++) { /* neighbors */
            neigh = atom->neigh + j;
            type2 = neigh->type;
            col = neigh->col[0];

            /* updating tail-functions - only necessary with variing kappa */
            if (!apt->sw_kappa)
              elstat_shift(neigh->r, dp_kappa, &neigh->fnval_el,
                           &neigh->grad_el, &neigh->ggrad_el);

            /* In small cells, an atom might interact with itself */
            self = (neigh->nr == i + g_config.cnfstart[h]) ? 1 : 0;

            /* calculate short-range forces */
            if (neigh->r < g_pot.calc_pot.end[col]) {
              if (uf) {
                fnval = splint_comb_dir(&g_pot.calc_pot, xi, neigh->slot[0],
                                        neigh->shift[0], neigh->step[0], &grad);
              } else {
                fnval = splint_dir(&g_pot.calc_pot, xi, neigh->slot[0],
                                   neigh->shift[0], neigh->step[0]);
              }

              /* avoid double counting if atom is interacting with a
                 copy of itself */
              if (self) {
                fnval *= 0.5;
                grad *= 0.5;
              }
              forces[g_calc.energy_p + h] += fnval;

              if (uf) {
                tmp_force.x = neigh->dist_r.x * grad;
                tmp_force.y = neigh->dist_r.y * grad;
                tmp_force.z = neigh->dist_r.z * grad;
                forces[n_i + 0] += tmp_force.x;
                forces[n_i + 1] += tmp_force.y;
                forces[n_i + 2] += tmp_force.z;
                /* actio = reactio */
                n_j = 3 * neigh->nr;
                forces[n_j + 0] -= tmp_force.x;
                forces[n_j + 1] -= tmp_force.y;
                forces[n_j + 2] -= tmp_force.z;

#if defined(STRESS)
                /* calculate pair stresses */
                if (us) {
                  forces[stresses + 0] -= neigh->dist.x * tmp_force.x;
                  forces[stresses + 1] -= neigh->dist.y * tmp_force.y;
                  forces[stresses + 2] -= neigh->dist.z * tmp_force.z;
                  forces[stresses + 3] -= neigh->dist.x * tmp_force.y;
                  forces[stresses + 4] -= neigh->dist.y * tmp_force.z;
                  forces[stresses + 5] -= neigh->dist.z * tmp_force.x;
                }
#endif  // STRESS
              }
            }

            /* calculate monopole forces */
            if (neigh->r < g_config.dp_cut &&
                (charge[type1] || charge[type2])) {
              fnval_tail = neigh->fnval_el;
              grad_tail = neigh->grad_el;

              grad_i = charge[type2] * grad_tail;
              if (type1 == type2) {
                grad_j = grad_i;
              } else {
                grad_j = charge[type1] * grad_tail;
              }
              fnval = charge[type1] * charge[type2] * fnval_tail;
              grad = charge[type1] * grad_i;

              if (self) {
                grad_i *= 0.5;
                grad_j *= 0.5;
                fnval *= 0.5;
                grad *= 0.5;
              }

              forces[g_calc.energy_p + h] += fnval;

              if (uf) {
                tmp_force.x = neigh->dist.x * grad;
                tmp_force.y = neigh->dist.y * grad;
                tmp_force.z = neigh->dist.z * grad;
                forces[n_i + 0] += tmp_force.x;
                forces[n_i + 1] += tmp_force.y;
                forces[n_i + 2] += tmp_force.z;
                /* actio = reactio */
                n_j = 3 * neigh->nr;
                forces[n_j + 0] -= tmp_force.x;
                forces[n_j + 1] -= tmp_force.y;
                forces[n_j + 2] -= tmp_force.z;
#if defined(STRESS)
                /* calculate coulomb stresses */
                if (us) {
                  forces[stresses + 0] -= neigh->dist.x * tmp_force.x;
                  forces[stresses + 1] -= neigh->dist.y * tmp_force.y;
                  forces[stresses + 2] -= neigh->dist.z * tmp_force.z;
                  forces[stresses + 3] -= neigh->dist.x * tmp_force.y;
                  forces[stresses + 4] -= neigh->dist.y * tmp_force.z;
                  forces[stresses + 5] -= neigh->dist.z * tmp_force.x;
                }
#endif  // STRESS
              }
#if defined(DIPOLE)
              /* calculate static field-contributions */
              atom->E_stat.x += neigh->dist.x * grad_i;
              atom->E_stat.y += neigh->dist.y * grad_i;
              atom->E_stat.z += neigh->dist.z * grad_i;

              g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_stat.x -=
                  neigh->dist.x * grad_j;
              g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_stat.y -=
                  neigh->dist.y * grad_j;
              g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_stat.z -=
                  neigh->dist.z * grad_j;

              /* calculate short-range dipoles */
              if (dp_alpha[type1] && dp_b[col] && dp_c[col]) {
                p_sr_tail = grad_tail * neigh->r *
                            shortrange_value(neigh->r, dp_alpha[type1],
                                             dp_b[col], dp_c[col]);
                atom->p_sr.x += charge[type2] * neigh->dist_r.x * p_sr_tail;
                atom->p_sr.y += charge[type2] * neigh->dist_r.y * p_sr_tail;
                atom->p_sr.z += charge[type2] * neigh->dist_r.z * p_sr_tail;
              }
              if (dp_alpha[type2] && dp_b[col] && dp_c[col] && !self) {
                p_sr_tail = grad_tail * neigh->r *
                            shortrange_value(neigh->r, dp_alpha[type2],
                                             dp_b[col], dp_c[col]);
                g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_sr.x -=
                    charge[type1] * neigh->dist_r.x * p_sr_tail;
                g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_sr.y -=
                    charge[type1] * neigh->dist_r.y * p_sr_tail;
                g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_sr.z -=
                    charge[type1] * neigh->dist_r.z * p_sr_tail;
              }
#endif  // DIPOLE
            }

          } /* loop over neighbours */
        }   /* end S E C O N D loop over atoms */

#if defined(DIPOLE)
        /* T H I R D loop: calculate whole dipole moment for every atom */
        double rp, dp_sum;
        int dp_converged = 0, dp_it = 0;
        double max_diff = 10;

        while (dp_converged == 0) {
          dp_sum = 0;
          for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
            atom = g_config.conf_atoms + i + g_config.cnfstart[h] -
                   g_mpi.firstatom;
            type1 = atom->type;
            if (dp_alpha[type1]) {
              if (dp_it) {
                /* note: mixing parameter is different from that on in IMD */
                atom->E_tot.x = (1 - g_config.dp_mix) * atom->E_ind.x +
                                g_config.dp_mix * atom->E_old.x +
                                atom->E_stat.x;
                atom->E_tot.y = (1 - g_config.dp_mix) * atom->E_ind.y +
                                g_config.dp_mix * atom->E_old.y +
                                atom->E_stat.y;
                atom->E_tot.z = (1 - g_config.dp_mix) * atom->E_ind.z +
                                g_config.dp_mix * atom->E_old.z +
                                atom->E_stat.z;
              } else {
                atom->E_tot.x = atom->E_ind.x + atom->E_stat.x;
                atom->E_tot.y = atom->E_ind.y + atom->E_stat.y;
                atom->E_tot.z = atom->E_ind.z + atom->E_stat.z;
              }

              atom->p_ind.x = dp_alpha[type1] * atom->E_tot.x + atom->p_sr.x;
              atom->p_ind.y = dp_alpha[type1] * atom->E_tot.y + atom->p_sr.y;
              atom->p_ind.z = dp_alpha[type1] * atom->E_tot.z + atom->p_sr.z;

              atom->E_old.x = atom->E_ind.x;
              atom->E_old.y = atom->E_ind.y;
              atom->E_old.z = atom->E_ind.z;

              atom->E_ind.x = 0.0;
              atom->E_ind.y = 0.0;
              atom->E_ind.z = 0.0;
            }
          }

          for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
            atom = g_config.conf_atoms + i + g_config.cnfstart[h] -
                   g_mpi.firstatom;
            type1 = atom->type;
            for (j = 0; j < atom->num_neigh; j++) { /* neighbors */
              neigh = atom->neigh + j;
              type2 = neigh->type;
              col = neigh->col[0];
              /* In small cells, an atom might interact with itself */
              self = (neigh->nr == i + g_config.cnfstart[h]) ? 1 : 0;

              if (neigh->r < g_config.dp_cut && dp_alpha[type1] &&
                  dp_alpha[type2]) {
                rp = SPROD(
                    g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind,
                    neigh->dist_r);
                atom->E_ind.x +=
                    neigh->grad_el *
                    (3 * rp * neigh->dist_r.x -
                     g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.x);
                atom->E_ind.y +=
                    neigh->grad_el *
                    (3 * rp * neigh->dist_r.y -
                     g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.y);
                atom->E_ind.z +=
                    neigh->grad_el *
                    (3 * rp * neigh->dist_r.z -
                     g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.z);

                if (!self) {
                  rp = SPROD(atom->p_ind, neigh->dist_r);
                  g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_ind.x +=
                      neigh->grad_el *
                      (3 * rp * neigh->dist_r.x - atom->p_ind.x);
                  g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_ind.y +=
                      neigh->grad_el *
                      (3 * rp * neigh->dist_r.y - atom->p_ind.y);
                  g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_ind.z +=
                      neigh->grad_el *
                      (3 * rp * neigh->dist_r.z - atom->p_ind.z);
                }
              }
            }
          }

          for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
            atom = g_config.conf_atoms + i + g_config.cnfstart[h] -
                   g_mpi.firstatom;
            type1 = atom->type;
            if (dp_alpha[type1]) {
              dp_sum +=
                  dsquare(dp_alpha[type1] * (atom->E_old.x - atom->E_ind.x));
              dp_sum +=
                  dsquare(dp_alpha[type1] * (atom->E_old.y - atom->E_ind.y));
              dp_sum +=
                  dsquare(dp_alpha[type1] * (atom->E_old.z - atom->E_ind.z));
            }
          }

          dp_sum /= 3 * g_config.inconf[h];
          dp_sum = sqrt(dp_sum);

          if (dp_it) {
            if ((dp_sum > max_diff) || (dp_it > 50)) {
              dp_converged = 1;
              for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
                atom = g_config.conf_atoms + i + g_config.cnfstart[h] -
                       g_mpi.firstatom;
                type1 = atom->type;
                if (dp_alpha[type1]) {
                  atom->p_ind.x =
                      dp_alpha[type1] * atom->E_stat.x + atom->p_sr.x;
                  atom->p_ind.y =
                      dp_alpha[type1] * atom->E_stat.y + atom->p_sr.y;
                  atom->p_ind.z =
                      dp_alpha[type1] * atom->E_stat.z + atom->p_sr.z;
                  atom->E_ind.x = atom->E_stat.x;
                  atom->E_ind.y = atom->E_stat.y;
                  atom->E_ind.z = atom->E_stat.z;
                }
              }
            }
          }

          if (dp_sum < g_config.dp_tol)
            dp_converged = 1;

          dp_it++;
        } /* end T H I R D loop over atoms */

        /* F O U R T H  loop: calculate monopole-dipole and dipole-dipole forces
         */
        double rp_i, rp_j, pp_ij, tmp_1, tmp_2;
        double grad_1, grad_2, srval, srgrad, srval_tail, srgrad_tail,
            fnval_sum, grad_sum;
        for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
          atom =
              g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom;
          type1 = atom->type;
          n_i = 3 * (g_config.cnfstart[h] + i);
          for (j = 0; j < atom->num_neigh; j++) { /* neighbors */
            neigh = atom->neigh + j;
            type2 = neigh->type;
            col = neigh->col[0];

            /* In small cells, an atom might interact with itself */
            self = (neigh->nr == i + g_config.cnfstart[h]) ? 1 : 0;
            if (neigh->r < g_config.dp_cut &&
                (dp_alpha[type1] || dp_alpha[type2])) {
              fnval_tail = -neigh->grad_el;
              grad_tail = -neigh->ggrad_el;

              if (dp_b[col] && dp_c[col]) {
                shortrange_term(neigh->r, dp_b[col], dp_c[col], &srval_tail,
                                &srgrad_tail);
                srval = fnval_tail * srval_tail;
                srgrad = fnval_tail * srgrad_tail + grad_tail * srval_tail;
              }

              if (self) {
                fnval_tail *= 0.5;
                grad_tail *= 0.5;
              }

              /* monopole-dipole contributions */
              if (charge[type1] && dp_alpha[type2]) {
                if (dp_b[col] && dp_c[col]) {
                  fnval_sum = fnval_tail + srval;
                  grad_sum = grad_tail + srgrad;
                } else {
                  fnval_sum = fnval_tail;
                  grad_sum = grad_tail;
                }

                rp_j = SPROD(
                    g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind,
                    neigh->dist_r);
                fnval = charge[type1] * rp_j * fnval_sum * neigh->r;
                grad_1 = charge[type1] * rp_j * grad_sum * neigh->r2;
                grad_2 = charge[type1] * fnval_sum;

                forces[g_calc.energy_p + h] -= fnval;

                if (uf) {
                  tmp_force.x =
                      neigh->dist_r.x * grad_1 +
                      g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.x *
                          grad_2;
                  tmp_force.y =
                      neigh->dist_r.y * grad_1 +
                      g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.y *
                          grad_2;
                  tmp_force.z =
                      neigh->dist_r.z * grad_1 +
                      g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.z *
                          grad_2;
                  forces[n_i + 0] -= tmp_force.x;
                  forces[n_i + 1] -= tmp_force.y;
                  forces[n_i + 2] -= tmp_force.z;
                  /* actio = reactio */
                  n_j = 3 * neigh->nr;
                  forces[n_j + 0] += tmp_force.x;
                  forces[n_j + 1] += tmp_force.y;
                  forces[n_j + 2] += tmp_force.z;
#if defined(STRESS)
                  /* calculate stresses */
                  if (us) {
                    forces[stresses + 0] += neigh->dist.x * tmp_force.x;
                    forces[stresses + 1] += neigh->dist.y * tmp_force.y;
                    forces[stresses + 2] += neigh->dist.z * tmp_force.z;
                    forces[stresses + 3] += neigh->dist.x * tmp_force.y;
                    forces[stresses + 4] += neigh->dist.y * tmp_force.z;
                    forces[stresses + 5] += neigh->dist.z * tmp_force.x;
                  }
#endif  // STRESS
                }
              }

              /* dipole-monopole contributions */
              if (dp_alpha[type1] && charge[type2]) {
                if (dp_b[col] && dp_c[col]) {
                  fnval_sum = fnval_tail + srval;
                  grad_sum = grad_tail + srgrad;
                } else {
                  fnval_sum = fnval_tail;
                  grad_sum = grad_tail;
                }

                rp_i = SPROD(atom->p_ind, neigh->dist_r);
                fnval = charge[type2] * rp_i * fnval_sum * neigh->r;
                grad_1 = charge[type2] * rp_i * grad_sum * neigh->r2;
                grad_2 = charge[type2] * fnval_sum;

                forces[g_calc.energy_p + h] += fnval;

                if (uf) {
                  tmp_force.x =
                      neigh->dist_r.x * grad_1 + atom->p_ind.x * grad_2;
                  tmp_force.y =
                      neigh->dist_r.y * grad_1 + atom->p_ind.y * grad_2;
                  tmp_force.z =
                      neigh->dist_r.z * grad_1 + atom->p_ind.z * grad_2;
                  forces[n_i + 0] += tmp_force.x;
                  forces[n_i + 1] += tmp_force.y;
                  forces[n_i + 2] += tmp_force.z;
                  /* actio = reactio */
                  n_j = 3 * neigh->nr;
                  forces[n_j + 0] -= tmp_force.x;
                  forces[n_j + 1] -= tmp_force.y;
                  forces[n_j + 2] -= tmp_force.z;
#if defined(STRESS)
                  /* calculate stresses */
                  if (us) {
                    forces[stresses + 0] -= neigh->dist.x * tmp_force.x;
                    forces[stresses + 1] -= neigh->dist.y * tmp_force.y;
                    forces[stresses + 2] -= neigh->dist.z * tmp_force.z;
                    forces[stresses + 3] -= neigh->dist.x * tmp_force.y;
                    forces[stresses + 4] -= neigh->dist.y * tmp_force.z;
                    forces[stresses + 5] -= neigh->dist.z * tmp_force.x;
                  }
#endif  // STRESS
                }
              }

              /* dipole-dipole contributions */
              if (dp_alpha[type1] && dp_alpha[type2]) {
                pp_ij = SPROD(
                    atom->p_ind,
                    g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind);
                tmp_1 = 3 * rp_i * rp_j;
                tmp_2 = 3 * fnval_tail / neigh->r2;

                fnval = -(tmp_1 - pp_ij) * fnval_tail;
                grad_1 = (tmp_1 - pp_ij) * grad_tail;
                grad_2 = 2 * rp_i * rp_j;

                forces[g_calc.energy_p + h] += fnval;

                if (uf) {
                  tmp_force.x =
                      grad_1 * neigh->dist.x -
                      tmp_2 *
                          (grad_2 * neigh->dist.x -
                           rp_i * neigh->r *
                               g_config.conf_atoms[neigh->nr - g_mpi.firstatom]
                                   .p_ind.x -
                           rp_j * neigh->r * atom->p_ind.x);
                  tmp_force.y =
                      grad_1 * neigh->dist.y -
                      tmp_2 *
                          (grad_2 * neigh->dist.y -
                           rp_i * neigh->r *
                               g_config.conf_atoms[neigh->nr - g_mpi.firstatom]
                                   .p_ind.y -
                           rp_j * neigh->r * atom->p_ind.y);
                  tmp_force.z =
                      grad_1 * neigh->dist.z -
                      tmp_2 *
                          (grad_2 * neigh->dist.z -
                           rp_i * neigh->r *
                               g_config.conf_atoms[neigh->nr - g_mpi.firstatom]
                                   .p_ind.z -
                           rp_j * neigh->r * atom->p_ind.z);
                  forces[n_i + 0] -= tmp_force.x;
                  forces[n_i + 1] -= tmp_force.y;
                  forces[n_i + 2] -= tmp_force.z;
                  /* actio = reactio */
                  n_j = 3 * neigh->nr;
                  forces[n_j + 0] += tmp_force.x;
                  forces[n_j + 1] += tmp_force.y;
                  forces[n_j + 2] += tmp_force.z;

#if defined(STRESS)
                  /* calculate stresses */
                  if (us) {
                    forces[stresses + 0] += neigh->dist.x * tmp_force.x;
                    forces[stresses + 1] += neigh->dist.y * tmp_force.y;
                    forces[stresses + 2] += neigh->dist.z * tmp_force.z;
                    forces[stresses + 3] += neigh->dist.x * tmp_force.y;
                    forces[stresses + 4] += neigh->dist.y * tmp_force.z;
                    forces[stresses + 5] += neigh->dist.z * tmp_force.x;
                  }
#endif  // STRESS
                }
              }
            }
          } /* loop over neighbours */
        }   /* end F O U R T H loop over atoms */
#endif      // DIPOLE

        /* F I F T H  loop: self energy contributions and sum-up force
         * contributions */
        double qq;
#if defined(DIPOLE)
        double pp;
#endif                                             // DIPOLE
        for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
          atom =
              g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom;
          type1 = atom->type;
          n_i = 3 * (g_config.cnfstart[h] + i);

          /* self energy contributions */
          if (charge[type1]) {
            qq = charge[type1] * charge[type1];
            fnval = DP_EPS * dp_kappa * qq / sqrt(M_PI);
            forces[g_calc.energy_p + h] -= fnval;
          }
#if defined(DIPOLE)
          if (dp_alpha[type1]) {
            pp = SPROD(atom->p_ind, atom->p_ind);
            fnval = pp / (2 * dp_alpha[type1]);
            forces[g_calc.energy_p + h] += fnval;
          }
/* alternative dipole self energy including kappa-dependence */
// if (dp_alpha[type1]) {
// pp = SPROD(atom->p_ind, atom->p_ind);
// fnval = kkk * pp / sqrt(M_PI);
// forces[energy_p + h] += fnval;
//}
#endif  // DIPOLE

          /* sum-up: whole force contributions flow into tmpsum */
          if (uf) {
#if defined(FWEIGHT)
            /* Weigh by absolute value of force */
            forces[n_i + 0] /= FORCE_EPS + atom->absforce;
            forces[n_i + 1] /= FORCE_EPS + atom->absforce;
            forces[n_i + 2] /= FORCE_EPS + atom->absforce;
#endif  // FWEIGHT
#if defined(CONTRIB)
            if (atom->contrib)
#endif  // CONTRIB
              tmpsum += g_config.conf_weight[h] *
                        (dsquare(forces[n_i + 0]) + dsquare(forces[n_i + 1]) +
                         dsquare(forces[n_i + 2]));
          }
        } /* end F I F T H loop over atoms */

        /* whole energy contributions flow into tmpsum */
        forces[g_calc.energy_p + h] /= (double)g_config.inconf[h];
        forces[g_calc.energy_p + h] -= g_config.force_0[g_calc.energy_p + h];
        tmpsum += g_config.conf_weight[h] * g_param.eweight *
                  dsquare(forces[g_calc.energy_p + h]);

#if defined(STRESS)
        /* whole stress contributions flow into tmpsum */
        if (uf && us) {
          for (i = 0; i < 6; i++) {
            forces[stresses + i] /= g_config.conf_vol[h - g_mpi.firstconf];
            forces[stresses + i] -= g_config.force_0[stresses + i];
            tmpsum += g_config.conf_weight[h] * g_param.sweight *
                      dsquare(forces[stresses + i]);
          }
        }
#endif  // STRESS
      } /* end M A I N loop over configurations */
    }   /* parallel region */

/* dummy constraints (global) */
#if defined(APOT)
    /* add punishment for out of bounds (mostly for powell_lsq) */
    if (g_mpi.myid == 0) {
      tmpsum += apot_punish(xi_opt, forces);
    }
#endif  // APOT

#if defined(MPI)
    /* reduce global sum */
    sum = 0.0;
    MPI_Reduce(&tmpsum, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    /* gather forces, energies, stresses */
    if (g_mpi.myid == 0) { /* root node already has data in place */
      /* forces */
      MPI_Gatherv(MPI_IN_PLACE, g_mpi.myatoms, g_mpi.MPI_VECTOR, forces,
                  g_mpi.atom_len, g_mpi.atom_dist, g_mpi.MPI_VECTOR, 0,
                  MPI_COMM_WORLD);
      /* energies */
      MPI_Gatherv(MPI_IN_PLACE, g_mpi.myconf, MPI_DOUBLE,
                  forces + g_calc.energy_p, g_mpi.conf_len, g_mpi.conf_dist,
                  MPI_DOUBLE, 0, MPI_COMM_WORLD);
#if defined(STRESS)
      /* stresses */
      MPI_Gatherv(MPI_IN_PLACE, g_mpi.myconf, g_mpi.MPI_STENS,
                  forces + g_calc.stress_p, g_mpi.conf_len, g_mpi.conf_dist,
                  g_mpi.MPI_STENS, 0, MPI_COMM_WORLD);
#endif  // STRESS
    } else {
      /* forces */
      MPI_Gatherv(forces + g_mpi.firstatom * 3, g_mpi.myatoms, g_mpi.MPI_VECTOR,
                  forces, g_mpi.atom_len, g_mpi.atom_dist, g_mpi.MPI_VECTOR, 0,
                  MPI_COMM_WORLD);
      /* energies */
      MPI_Gatherv(forces + g_calc.energy_p + g_mpi.firstconf, g_mpi.myconf,
                  MPI_DOUBLE, forces + g_calc.energy_p, g_mpi.conf_len,
                  g_mpi.conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD);
#if defined(STRESS)
      /* stresses */
      MPI_Gatherv(forces + g_calc.stress_p + 6 * g_mpi.firstconf, g_mpi.myconf,
                  g_mpi.MPI_STENS, forces + g_calc.stress_p, g_mpi.conf_len,
                  g_mpi.conf_dist, g_mpi.MPI_STENS, 0, MPI_COMM_WORLD);
#endif  // STRESS
    }
#else
    sum = tmpsum; /* global sum = local sum  */
#endif  // MPI

    /* root process exits this function now */
    if (g_mpi.myid == 0) {
      g_calc.fcalls++; /* Increase function call counter */
      if (isnan(sum)) {
#if defined(DEBUG)
        printf("\n--> Force is nan! <--\n\n");
#endif  // DEBUG
        return 10e10;
      } else
        return sum;
    }
  }

  /* once a non-root process arrives here, all is done. */
  return -1.0;
}
Beispiel #3
0
double calc_forces(double* xi_opt, double* forces, int flag)
{
  double* xi = NULL;

  switch (g_pot.format_type) {
    case POTENTIAL_FORMAT_UNKNOWN:
      error(1, "Unknown potential format detected! (%s:%d)\n", __FILE__, __LINE__);
    case POTENTIAL_FORMAT_ANALYTIC:
      xi = g_pot.calc_pot.table;
      break;
    case POTENTIAL_FORMAT_TABULATED_EQ_DIST:
    case POTENTIAL_FORMAT_TABULATED_NON_EQ_DIST:
      xi = xi_opt;
      break;
    case POTENTIAL_FORMAT_KIM:
      error(1, "KIM format is not supported by pair force routine!");
      break;
  }

#if !defined(MPI)
  g_mpi.myconf = g_config.nconf;
#endif  // !MPI

  // This is the start of an infinite loop

  while (1) {
    // sum of squares of local process
    double error_sum = 0.0;

#if defined(APOT) && !defined(MPI)
    if (g_pot.format_type == POTENTIAL_FORMAT_ANALYTIC) {
      apot_check_params(xi_opt);
      update_calc_table(xi_opt, xi, 0);
    }
#endif  // APOT && !MPI

#if defined(MPI)
#if !defined(APOT)
    // exchange potential and flag value
    MPI_Bcast(xi, g_pot.calc_pot.len, MPI_DOUBLE, 0, MPI_COMM_WORLD);
#endif  // !APOT
    MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (flag == 1)
      break; // Exception: flag 1 means clean up

#if defined(APOT)
    if (g_mpi.myid == 0)
      apot_check_params(xi_opt);
    MPI_Bcast(xi_opt, g_calc.ndimtot, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    update_calc_table(xi_opt, xi, 0);
#else   // APOT
    // if flag == 2 then the potential parameters have changed -> sync
    if (flag == 2)
      potsync();
#endif  // APOT
#endif  // MPI

    // init second derivatives for splines

    // pair potential
    //   [0, ...,  paircol - 1]
    update_splines(xi, 0, g_calc.paircol, 1);

    // loop over configurations
    for (int config_idx = g_mpi.firstconf; config_idx < g_mpi.firstconf + g_mpi.myconf; config_idx++) {
      int uf = g_config.conf_uf[config_idx - g_mpi.firstconf];
#if defined(STRESS)
      int us = g_config.conf_us[config_idx - g_mpi.firstconf];
#endif  // STRESS
      // reset energies and stresses
      forces[g_calc.energy_p + config_idx] = 0.0;
#if defined(STRESS)
      int stress_idx = g_calc.stress_p + 6 * config_idx;
      memset(forces + stress_idx, 0, 6 * sizeof(double));
#endif  // STRESS

#if defined(APOT)
      if (g_param.enable_cp)
        forces[g_calc.energy_p + config_idx] += chemical_potential(
            g_param.ntypes, g_config.na_type[config_idx], xi_opt + g_pot.cp_start);
#endif  // APOT

      // first loop: reset forces
      for (int atom_idx = 0; atom_idx < g_config.inconf[config_idx]; atom_idx++) {
        int n_i = 3 * (g_config.cnfstart[config_idx] + atom_idx);
        if (uf) {
          forces[n_i + 0] = -g_config.force_0[n_i + 0];
          forces[n_i + 1] = -g_config.force_0[n_i + 1];
          forces[n_i + 2] = -g_config.force_0[n_i + 2];
        } else {
          memset(forces + n_i, 0, 3 * sizeof(double));
        }
      }

      // second loop: calculate pair forces and energies
      for (int atom_idx = 0; atom_idx < g_config.inconf[config_idx]; atom_idx++) {
        atom_t* atom = g_config.conf_atoms + atom_idx + g_config.cnfstart[config_idx] - g_mpi.firstatom;
        int n_i = 3 * (g_config.cnfstart[config_idx] + atom_idx);
        // loop over all neighbors
        for (int neigh_idx = 0; neigh_idx < atom->num_neigh; neigh_idx++) {
          neigh_t* neigh = atom->neigh + neigh_idx;
          // In small cells, an atom might interact with itself
          int self = (neigh->nr == atom_idx + g_config.cnfstart[config_idx]) ? 1 : 0;

          // pair potential part
          if (neigh->r < g_pot.calc_pot.end[neigh->col[0]]) {
            double phi_val = 0.0;
            double phi_grad = 0.0;
            // potential value and gradient are calculated in the same step
            if (uf)
              phi_val = splint_comb_dir(&g_pot.calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0], &phi_grad);
            else
              phi_val = splint_dir(&g_pot.calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0]);

            // avoid double counting if atom is interacting with itself
            if (self) {
              phi_val *= 0.5;
              phi_grad *= 0.5;
            }

            // add cohesive energy
            forces[g_calc.energy_p + config_idx] += phi_val;

            // calculate forces
            if (uf) {
              vector tmp_force;
              tmp_force.x = neigh->dist_r.x * phi_grad;
              tmp_force.y = neigh->dist_r.y * phi_grad;
              tmp_force.z = neigh->dist_r.z * phi_grad;
              forces[n_i + 0] += tmp_force.x;
              forces[n_i + 1] += tmp_force.y;
              forces[n_i + 2] += tmp_force.z;
              // actio = reactio
              int n_j = 3 * neigh->nr;
              forces[n_j + 0] -= tmp_force.x;
              forces[n_j + 1] -= tmp_force.y;
              forces[n_j + 2] -= tmp_force.z;
#if defined(STRESS)
              /* also calculate pair stresses */
              if (us) {
                forces[stress_idx + 0] -= neigh->dist.x * tmp_force.x;
                forces[stress_idx + 1] -= neigh->dist.y * tmp_force.y;
                forces[stress_idx + 2] -= neigh->dist.z * tmp_force.z;
                forces[stress_idx + 3] -= neigh->dist.x * tmp_force.y;
                forces[stress_idx + 4] -= neigh->dist.y * tmp_force.z;
                forces[stress_idx + 5] -= neigh->dist.z * tmp_force.x;
              }
#endif  // STRESS
            }
          } // neighbors in range
        }   // loop over all neighbors

        // calculate contribution of forces right away
        if (uf) {
#if defined(FWEIGHT)
          // weigh by absolute value of force
          forces[n_i + 0] /= FORCE_EPS + atom->absforce;
          forces[n_i + 1] /= FORCE_EPS + atom->absforce;
          forces[n_i + 2] /= FORCE_EPS + atom->absforce;
#endif  // FWEIGHT

          // sum up forces
#if defined(CONTRIB)
          if (atom->contrib)
#endif  // CONTRIB
            error_sum += g_config.conf_weight[config_idx] * (dsquare(forces[n_i + 0]) + dsquare(forces[n_i + 1]) + dsquare(forces[n_i + 2]));
        }
      } // second loop over atoms

      // energy contributions
      forces[g_calc.energy_p + config_idx] /= (double)g_config.inconf[config_idx];
      forces[g_calc.energy_p + config_idx] -= g_config.force_0[g_calc.energy_p + config_idx];
      error_sum += g_config.conf_weight[config_idx] * g_param.eweight * dsquare(forces[g_calc.energy_p + config_idx]);

#if defined(STRESS)
      // stress contributions
      if (uf && us) {
        for (int i = 0; i < 6; i++) {
          forces[stress_idx + i] /= g_config.conf_vol[config_idx - g_mpi.firstconf];
          forces[stress_idx + i] -= g_config.force_0[stress_idx + i];
          error_sum += g_config.conf_weight[config_idx] * g_param.sweight * dsquare(forces[stress_idx + i]);
        }
      }
#endif  // STRESS

    } // loop over configurations

    // dummy constraints (global)
#if defined(APOT)
    // add punishment for out of bounds (mostly for powell_lsq)
    if (g_mpi.myid == 0)
      error_sum += apot_punish(xi_opt, forces);
#endif  // APOT

    gather_forces(&error_sum, forces);

    // root process exits this function now
    if (g_mpi.myid == 0) {
      // Increase function call counter
      g_calc.fcalls++;
      if (isnan(error_sum)) {
#if defined(DEBUG)
        printf("\n--> Force is nan! <--\n\n");
#endif  // DEBUG
        return 10e10;
      } else
        return error_sum;
    }
  } // end of infinite loop

  // once a non-root process arrives here, all is done
  return -1.0;
}
Beispiel #4
0
double calc_forces_elstat(double *xi_opt, double *forces, int flag)
{
  double tmpsum, sum = 0.;
  int   first, col, ne, size, i;
  double *xi = NULL;
  apot_table_t *apt = &apot_table;
  double charge[ntypes];
  double sum_charges;
  double dp_kappa;

#ifdef DIPOLE
  double dp_alpha[ntypes];
  double dp_b[apt->number];
  double dp_c[apt->number];
#endif /* DIPOLE */

  switch (format) {
      case 0:
	xi = calc_pot.table;
	break;
      case 3:			/* fall through */
      case 4:
	xi = xi_opt;		/* calc-table is opt-table */
	break;
      case 5:
	xi = calc_pot.table;	/* we need to update the calc-table */
  }

  ne = apot_table.total_ne_par;
  size = apt->number;

  /* This is the start of an infinite loop */
  while (1) {
    tmpsum = 0.;		/* sum of squares of local process */

#if defined APOT && !defined MPI
    if (format == 0) {
      apot_check_params(xi_opt);
      update_calc_table(xi_opt, xi, 0);
    }
#endif /* APOT && !MPI */

#ifdef MPI
    /* exchange potential and flag value */
#ifndef APOT
    MPI_Bcast(xi, calc_pot.len, MPI_DOUBLE, 0, MPI_COMM_WORLD);
#endif /* APOT */
    MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (flag == 1)
      break;			/* Exception: flag 1 means clean up */

#ifdef APOT
    if (myid == 0)
      apot_check_params(xi_opt);
    MPI_Bcast(xi_opt, ndimtot, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    if (format == 0)
      update_calc_table(xi_opt, xi, 0);
#else /* APOT */
    /* if flag==2 then the potential parameters have changed -> sync */
    if (flag == 2)
      potsync();
#endif /* APOT */
#endif /* MPI */

    /* local arrays for electrostatic parameters */
    sum_charges = 0;
    for (i = 0; i < ntypes - 1; i++) {
      if (xi_opt[2 * size + ne + i]) {
	charge[i] = xi_opt[2 * size + ne + i];
	sum_charges += apt->ratio[i] * charge[i];
      } else {
	charge[i] = 0.;
      }
    }
    apt->last_charge = -sum_charges / apt->ratio[ntypes - 1];
    charge[ntypes - 1] = apt->last_charge;
    if (xi_opt[2 * size + ne + ntypes - 1]) {
      dp_kappa = xi_opt[2 * size + ne + ntypes - 1];
    } else {
      dp_kappa = 0.;
    }

#ifdef DIPOLE
    for (i = 0; i < ntypes; i++) {
      if (xi_opt[2 * size + ne + ntypes + i]) {
	dp_alpha[i] = xi_opt[2 * size + ne + ntypes + i];
      } else {
	dp_alpha[i] = 0.;
      }
    }
    for (i = 0; i < size; i++) {
      if (xi_opt[2 * size + ne + 2 * ntypes + i]) {
	dp_b[i] = xi_opt[2 * size + ne + 2 * ntypes + i];
      } else {
	dp_b[i] = 0.;
      }
      if (xi_opt[3 * size + ne + 2 * ntypes + i]) {
	dp_c[i] = xi_opt[3 * size + ne + 2 * ntypes + i];
      } else {
	dp_c[i] = 0.;
      }
    }
#endif /* DIPOLE */

    /* init second derivatives for splines */
    for (col = 0; col < paircol; col++) {
      first = calc_pot.first[col];
      if (format == 3 || format == 0) {
	spline_ed(calc_pot.step[col], xi + first,
	  calc_pot.last[col] - first + 1, *(xi + first - 2), 0.0, calc_pot.d2tab + first);
      } else {			/* format >= 4 ! */
	spline_ne(calc_pot.xcoord + first, xi + first,
	  calc_pot.last[col] - first + 1, *(xi + first - 2), 0.0, calc_pot.d2tab + first);
      }
    }

#ifndef MPI
    myconf = nconf;
#endif /* MPI */

    /* region containing loop over configurations,
       also OMP-parallelized region */
    {
      int   self;
      vector tmp_force;
      int   h, j, type1, type2, uf, us, stresses;
      int   n_i, n_j;
      double fnval, grad, fnval_tail, grad_tail, grad_i, grad_j;
#ifdef DIPOLE
      double p_sr_tail;
#endif /* DIPOLE */
      atom_t *atom;
      neigh_t *neigh;

      /* loop over configurations: M A I N LOOP CONTAINING ALL ATOM-LOOPS */
      for (h = firstconf; h < firstconf + myconf; h++) {
	uf = conf_uf[h - firstconf];
#ifdef STRESS
	us = conf_us[h - firstconf];
#endif /* STRESS */
	/* reset energies and stresses */
	forces[energy_p + h] = 0.0;
#ifdef STRESS
	stresses = stress_p + 6 * h;
	for (i = 0; i < 6; i++)
	  forces[stresses + i] = 0.0;
#endif /* STRESS */

#ifdef DIPOLE
	/* reset dipoles and fields: LOOP Z E R O */
	for (i = 0; i < inconf[h]; i++) {
	  atom = conf_atoms + i + cnfstart[h] - firstatom;
	  atom->E_stat.x = 0.0;
	  atom->E_stat.y = 0.0;
	  atom->E_stat.z = 0.0;
	  atom->p_sr.x = 0.0;
	  atom->p_sr.y = 0.0;
	  atom->p_sr.z = 0.0;
	}
#endif /* DIPOLE */

	/* F I R S T LOOP OVER ATOMS: reset forces, dipoles */
	for (i = 0; i < inconf[h]; i++) {	/* atoms */
	  n_i = 3 * (cnfstart[h] + i);
	  if (uf) {
	    forces[n_i + 0] = -force_0[n_i + 0];
	    forces[n_i + 1] = -force_0[n_i + 1];
	    forces[n_i + 2] = -force_0[n_i + 2];
	  } else {
	    forces[n_i + 0] = 0.0;
	    forces[n_i + 1] = 0.0;
	    forces[n_i + 2] = 0.0;
	  }
	}			/* end F I R S T LOOP */

	/* S E C O N D loop: calculate short-range and monopole forces,
	   calculate static field- and dipole-contributions */
	for (i = 0; i < inconf[h]; i++) {	/* atoms */
	  atom = conf_atoms + i + cnfstart[h] - firstatom;
	  type1 = atom->type;
	  n_i = 3 * (cnfstart[h] + i);
	  for (j = 0; j < atom->num_neigh; j++) {	/* neighbors */
	    neigh = atom->neigh + j;
	    type2 = neigh->type;
	    col = neigh->col[0];

	    /* updating tail-functions - only necessary with variing kappa */
	    if (!apt->sw_kappa)
	      elstat_shift(neigh->r, dp_kappa, &neigh->fnval_el, &neigh->grad_el, &neigh->ggrad_el);

	    /* In small cells, an atom might interact with itself */
	    self = (neigh->nr == i + cnfstart[h]) ? 1 : 0;

	    /* calculate short-range forces */
	    if (neigh->r < calc_pot.end[col]) {

	      if (uf) {
		fnval =
		  splint_comb_dir(&calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0], &grad);
	      } else {
		fnval = splint_dir(&calc_pot, xi, neigh->slot[0], neigh->shift[0], neigh->step[0]);
	      }

	      /* avoid double counting if atom is interacting with a
	         copy of itself */
	      if (self) {
		fnval *= 0.5;
		grad *= 0.5;
	      }
	      forces[energy_p + h] += fnval;

	      if (uf) {
		tmp_force.x = neigh->dist_r.x * grad;
		tmp_force.y = neigh->dist_r.y * grad;
		tmp_force.z = neigh->dist_r.z * grad;
		forces[n_i + 0] += tmp_force.x;
		forces[n_i + 1] += tmp_force.y;
		forces[n_i + 2] += tmp_force.z;
		/* actio = reactio */
		n_j = 3 * neigh->nr;
		forces[n_j + 0] -= tmp_force.x;
		forces[n_j + 1] -= tmp_force.y;
		forces[n_j + 2] -= tmp_force.z;

#ifdef STRESS
		/* calculate pair stresses */
		if (us) {
		  forces[stresses + 0] -= neigh->dist.x * tmp_force.x;
		  forces[stresses + 1] -= neigh->dist.y * tmp_force.y;
		  forces[stresses + 2] -= neigh->dist.z * tmp_force.z;
		  forces[stresses + 3] -= neigh->dist.x * tmp_force.y;
		  forces[stresses + 4] -= neigh->dist.y * tmp_force.z;
		  forces[stresses + 5] -= neigh->dist.z * tmp_force.x;
		}
#endif /* STRESS */
	      }
	    }

	    /* calculate monopole forces */
	    if (neigh->r < dp_cut && (charge[type1] || charge[type2])) {

	      fnval_tail = neigh->fnval_el;
	      grad_tail = neigh->grad_el;

	      grad_i = charge[type2] * grad_tail;
	      if (type1 == type2) {
		grad_j = grad_i;
	      } else {
		grad_j = charge[type1] * grad_tail;
	      }
	      fnval = charge[type1] * charge[type2] * fnval_tail;
	      grad = charge[type1] * grad_i;

	      if (self) {
		grad_i *= 0.5;
		grad_j *= 0.5;
		fnval *= 0.5;
		grad *= 0.5;
	      }

	      forces[energy_p + h] += fnval;

	      if (uf) {
		tmp_force.x = neigh->dist.x * grad;
		tmp_force.y = neigh->dist.y * grad;
		tmp_force.z = neigh->dist.z * grad;
		forces[n_i + 0] += tmp_force.x;
		forces[n_i + 1] += tmp_force.y;
		forces[n_i + 2] += tmp_force.z;
		/* actio = reactio */
		n_j = 3 * neigh->nr;
		forces[n_j + 0] -= tmp_force.x;
		forces[n_j + 1] -= tmp_force.y;
		forces[n_j + 2] -= tmp_force.z;
#ifdef STRESS
		/* calculate coulomb stresses */
		if (us) {
		  forces[stresses + 0] -= neigh->dist.x * tmp_force.x;
		  forces[stresses + 1] -= neigh->dist.y * tmp_force.y;
		  forces[stresses + 2] -= neigh->dist.z * tmp_force.z;
		  forces[stresses + 3] -= neigh->dist.x * tmp_force.y;
		  forces[stresses + 4] -= neigh->dist.y * tmp_force.z;
		  forces[stresses + 5] -= neigh->dist.z * tmp_force.x;
		}
#endif /* STRESS */
	      }
#ifdef DIPOLE
	      /* calculate static field-contributions */
	      atom->E_stat.x += neigh->dist.x * grad_i;
	      atom->E_stat.y += neigh->dist.y * grad_i;
	      atom->E_stat.z += neigh->dist.z * grad_i;

	      conf_atoms[neigh->nr - firstatom].E_stat.x -= neigh->dist.x * grad_j;
	      conf_atoms[neigh->nr - firstatom].E_stat.y -= neigh->dist.y * grad_j;
	      conf_atoms[neigh->nr - firstatom].E_stat.z -= neigh->dist.z * grad_j;

	      /* calculate short-range dipoles */
	      if (dp_alpha[type1] && dp_b[col] && dp_c[col]) {
		p_sr_tail =
		  grad_tail * neigh->r * shortrange_value(neigh->r, dp_alpha[type1], dp_b[col], dp_c[col]);
		atom->p_sr.x += charge[type2] * neigh->dist_r.x * p_sr_tail;
		atom->p_sr.y += charge[type2] * neigh->dist_r.y * p_sr_tail;
		atom->p_sr.z += charge[type2] * neigh->dist_r.z * p_sr_tail;
	      }
	      if (dp_alpha[type2] && dp_b[col] && dp_c[col] && !self) {
		p_sr_tail =
		  grad_tail * neigh->r * shortrange_value(neigh->r, dp_alpha[type2], dp_b[col], dp_c[col]);
		conf_atoms[neigh->nr - firstatom].p_sr.x -= charge[type1] * neigh->dist_r.x * p_sr_tail;
		conf_atoms[neigh->nr - firstatom].p_sr.y -= charge[type1] * neigh->dist_r.y * p_sr_tail;
		conf_atoms[neigh->nr - firstatom].p_sr.z -= charge[type1] * neigh->dist_r.z * p_sr_tail;
	      }
#endif /* DIPOLE */

	    }

	  }			/* loop over neighbours */
	}			/* end S E C O N D loop over atoms */

#ifdef DIPOLE
	/* T H I R D loop: calculate whole dipole moment for every atom */
	double rp, dp_sum;
	int   dp_converged = 0, dp_it = 0;
	double max_diff = 10;

	while (dp_converged == 0) {
	  dp_sum = 0;
	  for (i = 0; i < inconf[h]; i++) {	/* atoms */
	    atom = conf_atoms + i + cnfstart[h] - firstatom;
	    type1 = atom->type;
	    if (dp_alpha[type1]) {

	      if (dp_it) {
		/* note: mixing parameter is different from that on in IMD */
		atom->E_tot.x = (1 - dp_mix) * atom->E_ind.x + dp_mix * atom->E_old.x + atom->E_stat.x;
		atom->E_tot.y = (1 - dp_mix) * atom->E_ind.y + dp_mix * atom->E_old.y + atom->E_stat.y;
		atom->E_tot.z = (1 - dp_mix) * atom->E_ind.z + dp_mix * atom->E_old.z + atom->E_stat.z;
	      } else {
		atom->E_tot.x = atom->E_ind.x + atom->E_stat.x;
		atom->E_tot.y = atom->E_ind.y + atom->E_stat.y;
		atom->E_tot.z = atom->E_ind.z + atom->E_stat.z;
	      }

	      atom->p_ind.x = dp_alpha[type1] * atom->E_tot.x + atom->p_sr.x;
	      atom->p_ind.y = dp_alpha[type1] * atom->E_tot.y + atom->p_sr.y;
	      atom->p_ind.z = dp_alpha[type1] * atom->E_tot.z + atom->p_sr.z;

	      atom->E_old.x = atom->E_ind.x;
	      atom->E_old.y = atom->E_ind.y;
	      atom->E_old.z = atom->E_ind.z;

	      atom->E_ind.x = 0.;
	      atom->E_ind.y = 0.;
	      atom->E_ind.z = 0.;
	    }
	  }

	  for (i = 0; i < inconf[h]; i++) {	/* atoms */
	    atom = conf_atoms + i + cnfstart[h] - firstatom;
	    type1 = atom->type;
	    for (j = 0; j < atom->num_neigh; j++) {	/* neighbors */
	      neigh = atom->neigh + j;
	      type2 = neigh->type;
	      col = neigh->col[0];
	      /* In small cells, an atom might interact with itself */
	      self = (neigh->nr == i + cnfstart[h]) ? 1 : 0;

	      if (neigh->r < dp_cut && dp_alpha[type1] && dp_alpha[type2]) {

		rp = SPROD(conf_atoms[neigh->nr - firstatom].p_ind, neigh->dist_r);
		atom->E_ind.x +=
		  neigh->grad_el * (3 * rp * neigh->dist_r.x - conf_atoms[neigh->nr - firstatom].p_ind.x);
		atom->E_ind.y +=
		  neigh->grad_el * (3 * rp * neigh->dist_r.y - conf_atoms[neigh->nr - firstatom].p_ind.y);
		atom->E_ind.z +=
		  neigh->grad_el * (3 * rp * neigh->dist_r.z - conf_atoms[neigh->nr - firstatom].p_ind.z);

		if (!self) {
		  rp = SPROD(atom->p_ind, neigh->dist_r);
		  conf_atoms[neigh->nr - firstatom].E_ind.x +=
		    neigh->grad_el * (3 * rp * neigh->dist_r.x - atom->p_ind.x);
		  conf_atoms[neigh->nr - firstatom].E_ind.y +=
		    neigh->grad_el * (3 * rp * neigh->dist_r.y - atom->p_ind.y);
		  conf_atoms[neigh->nr - firstatom].E_ind.z +=
		    neigh->grad_el * (3 * rp * neigh->dist_r.z - atom->p_ind.z);
		}
	      }
	    }
	  }

	  for (i = 0; i < inconf[h]; i++) {	/* atoms */
	    atom = conf_atoms + i + cnfstart[h] - firstatom;
	    type1 = atom->type;
	    if (dp_alpha[type1]) {
	      dp_sum += dsquare(dp_alpha[type1] * (atom->E_old.x - atom->E_ind.x));
	      dp_sum += dsquare(dp_alpha[type1] * (atom->E_old.y - atom->E_ind.y));
	      dp_sum += dsquare(dp_alpha[type1] * (atom->E_old.z - atom->E_ind.z));
	    }
	  }

	  dp_sum /= 3 * inconf[h];
	  dp_sum = sqrt(dp_sum);

	  if (dp_it) {
	    if ((dp_sum > max_diff) || (dp_it > 50)) {
	      dp_converged = 1;
	      for (i = 0; i < inconf[h]; i++) {	/* atoms */
		atom = conf_atoms + i + cnfstart[h] - firstatom;
		type1 = atom->type;
		if (dp_alpha[type1]) {
		  atom->p_ind.x = dp_alpha[type1] * atom->E_stat.x + atom->p_sr.x;
		  atom->p_ind.y = dp_alpha[type1] * atom->E_stat.y + atom->p_sr.y;
		  atom->p_ind.z = dp_alpha[type1] * atom->E_stat.z + atom->p_sr.z;
		  atom->E_ind.x = atom->E_stat.x;
		  atom->E_ind.y = atom->E_stat.y;
		  atom->E_ind.z = atom->E_stat.z;
		}
	      }
	    }
	  }

	  if (dp_sum < dp_tol) {
	    dp_converged = 1;
	  }

	  dp_it++;
	}			/* end T H I R D loop over atoms */


	/* F O U R T H  loop: calculate monopole-dipole and dipole-dipole forces */
	double rp_i, rp_j, pp_ij, tmp_1, tmp_2;
	double grad_1, grad_2, srval, srgrad, srval_tail, srgrad_tail, fnval_sum, grad_sum;
	for (i = 0; i < inconf[h]; i++) {	/* atoms */
	  atom = conf_atoms + i + cnfstart[h] - firstatom;
	  type1 = atom->type;
	  n_i = 3 * (cnfstart[h] + i);
	  for (j = 0; j < atom->num_neigh; j++) {	/* neighbors */
	    neigh = atom->neigh + j;
	    type2 = neigh->type;
	    col = neigh->col[0];

	    /* In small cells, an atom might interact with itself */
	    self = (neigh->nr == i + cnfstart[h]) ? 1 : 0;
	    if (neigh->r < dp_cut && (dp_alpha[type1] || dp_alpha[type2])) {

	      fnval_tail = -neigh->grad_el;
	      grad_tail = -neigh->ggrad_el;

	      if (dp_b[col] && dp_c[col]) {
		shortrange_term(neigh->r, dp_b[col], dp_c[col], &srval_tail, &srgrad_tail);
		srval = fnval_tail * srval_tail;
		srgrad = fnval_tail * srgrad_tail + grad_tail * srval_tail;
	      }

	      if (self) {
		fnval_tail *= 0.5;
		grad_tail *= 0.5;
	      }

	      /* monopole-dipole contributions */
	      if (charge[type1] && dp_alpha[type2]) {

		if (dp_b[col] && dp_c[col]) {
		  fnval_sum = fnval_tail + srval;
		  grad_sum = grad_tail + srgrad;
		} else {
		  fnval_sum = fnval_tail;
		  grad_sum = grad_tail;
		}

		rp_j = SPROD(conf_atoms[neigh->nr - firstatom].p_ind, neigh->dist_r);
		fnval = charge[type1] * rp_j * fnval_sum * neigh->r;
		grad_1 = charge[type1] * rp_j * grad_sum * neigh->r2;
		grad_2 = charge[type1] * fnval_sum;

		forces[energy_p + h] -= fnval;

		if (uf) {
		  tmp_force.x = neigh->dist_r.x * grad_1 + conf_atoms[neigh->nr - firstatom].p_ind.x * grad_2;
		  tmp_force.y = neigh->dist_r.y * grad_1 + conf_atoms[neigh->nr - firstatom].p_ind.y * grad_2;
		  tmp_force.z = neigh->dist_r.z * grad_1 + conf_atoms[neigh->nr - firstatom].p_ind.z * grad_2;
		  forces[n_i + 0] -= tmp_force.x;
		  forces[n_i + 1] -= tmp_force.y;
		  forces[n_i + 2] -= tmp_force.z;
		  /* actio = reactio */
		  n_j = 3 * neigh->nr;
		  forces[n_j + 0] += tmp_force.x;
		  forces[n_j + 1] += tmp_force.y;
		  forces[n_j + 2] += tmp_force.z;
#ifdef STRESS
		  /* calculate stresses */
		  if (us) {
		    forces[stresses + 0] += neigh->dist.x * tmp_force.x;
		    forces[stresses + 1] += neigh->dist.y * tmp_force.y;
		    forces[stresses + 2] += neigh->dist.z * tmp_force.z;
		    forces[stresses + 3] += neigh->dist.x * tmp_force.y;
		    forces[stresses + 4] += neigh->dist.y * tmp_force.z;
		    forces[stresses + 5] += neigh->dist.z * tmp_force.x;
		  }
#endif /* STRESS */
		}
	      }

	      /* dipole-monopole contributions */
	      if (dp_alpha[type2] && charge[type2]) {

		if (dp_b[col] && dp_c[col]) {
		  fnval_sum = fnval_tail + srval;
		  grad_sum = grad_tail + srgrad;
		} else {
		  fnval_sum = fnval_tail;
		  grad_sum = grad_tail;
		}

		rp_i = SPROD(atom->p_ind, neigh->dist_r);
		fnval = charge[type2] * rp_i * fnval_sum * neigh->r;
		grad_1 = charge[type2] * rp_i * grad_sum * neigh->r2;
		grad_2 = charge[type2] * fnval_sum;

		forces[energy_p + h] += fnval;

		if (uf) {
		  tmp_force.x = neigh->dist_r.x * grad_1 + atom->p_ind.x * grad_2;
		  tmp_force.y = neigh->dist_r.y * grad_1 + atom->p_ind.y * grad_2;
		  tmp_force.z = neigh->dist_r.z * grad_1 + atom->p_ind.z * grad_2;
		  forces[n_i + 0] += tmp_force.x;
		  forces[n_i + 1] += tmp_force.y;
		  forces[n_i + 2] += tmp_force.z;
		  /* actio = reactio */
		  n_j = 3 * neigh->nr;
		  forces[n_j + 0] -= tmp_force.x;
		  forces[n_j + 1] -= tmp_force.y;
		  forces[n_j + 2] -= tmp_force.z;
#ifdef STRESS
		  /* calculate stresses */
		  if (us) {
		    forces[stresses + 0] -= neigh->dist.x * tmp_force.x;
		    forces[stresses + 1] -= neigh->dist.y * tmp_force.y;
		    forces[stresses + 2] -= neigh->dist.z * tmp_force.z;
		    forces[stresses + 3] -= neigh->dist.x * tmp_force.y;
		    forces[stresses + 4] -= neigh->dist.y * tmp_force.z;
		    forces[stresses + 5] -= neigh->dist.z * tmp_force.x;
		  }
#endif /* STRESS */
		}
	      }


	      /* dipole-dipole contributions */
	      if (dp_alpha[type1] && dp_alpha[type2]) {

		pp_ij = SPROD(atom->p_ind, conf_atoms[neigh->nr - firstatom].p_ind);
		tmp_1 = 3 * rp_i * rp_j;
		tmp_2 = 3 * fnval_tail / neigh->r2;

		fnval = -(tmp_1 - pp_ij) * fnval_tail;
		grad_1 = (tmp_1 - pp_ij) * grad_tail;
		grad_2 = 2 * rp_i * rp_j;

		forces[energy_p + h] += fnval;

		if (uf) {
		  tmp_force.x =
		    grad_1 * neigh->dist.x -
		    tmp_2 * (grad_2 * neigh->dist.x -
		    rp_i * neigh->r * conf_atoms[neigh->nr - firstatom].p_ind.x -
		    rp_j * neigh->r * atom->p_ind.x);
		  tmp_force.y =
		    grad_1 * neigh->dist.y - tmp_2 * (grad_2 * neigh->dist.y -
		    rp_i * neigh->r * conf_atoms[neigh->nr - firstatom].p_ind.y -
		    rp_j * neigh->r * atom->p_ind.y);
		  tmp_force.z =
		    grad_1 * neigh->dist.z - tmp_2 * (grad_2 * neigh->dist.z -
		    rp_i * neigh->r * conf_atoms[neigh->nr - firstatom].p_ind.z -
		    rp_j * neigh->r * atom->p_ind.z);
		  forces[n_i + 0] -= tmp_force.x;
		  forces[n_i + 1] -= tmp_force.y;
		  forces[n_i + 2] -= tmp_force.z;
		  /* actio = reactio */
		  n_j = 3 * neigh->nr;
		  forces[n_j + 0] += tmp_force.x;
		  forces[n_j + 1] += tmp_force.y;
		  forces[n_j + 2] += tmp_force.z;

#ifdef STRESS
		  /* calculate stresses */
		  if (us) {
		    forces[stresses + 0] += neigh->dist.x * tmp_force.x;
		    forces[stresses + 1] += neigh->dist.y * tmp_force.y;
		    forces[stresses + 2] += neigh->dist.z * tmp_force.z;
		    forces[stresses + 3] += neigh->dist.x * tmp_force.y;
		    forces[stresses + 4] += neigh->dist.y * tmp_force.z;
		    forces[stresses + 5] += neigh->dist.z * tmp_force.x;
		  }
#endif /* STRESS */
		}
	      }
	    }
	  }			/* loop over neighbours */
	}			/* end F O U R T H loop over atoms */
#endif /* DIPOLE */


	/* F I F T H  loop: self energy contributions and sum-up force contributions */
	double qq;
#ifdef DIPOLE
	double pp;
#endif /* DIPOLE */
	for (i = 0; i < inconf[h]; i++) {	/* atoms */
	  atom = conf_atoms + i + cnfstart[h] - firstatom;
	  type1 = atom->type;
	  n_i = 3 * (cnfstart[h] + i);

	  /* self energy contributions */
	  if (charge[type1]) {
	    qq = charge[type1] * charge[type1];
	    fnval = dp_eps * dp_kappa * qq / sqrt(M_PI);
	    forces[energy_p + h] -= fnval;
	  }
#ifdef DIPOLE
	  if (dp_alpha[type1]) {
	    pp = SPROD(atom->p_ind, atom->p_ind);
	    fnval = pp / (2 * dp_alpha[type1]);
	    forces[energy_p + h] += fnval;
	  }
	  /* alternative dipole self energy including kappa-dependence */
	  //if (dp_alpha[type1]) {
	  // pp = SPROD(atom->p_ind, atom->p_ind);
	  // fnval = kkk * pp / sqrt(M_PI);
	  // forces[energy_p + h] += fnval;
	  //}
#endif /* DIPOLE */


	  /* sum-up: whole force contributions flow into tmpsum */
	  if (uf) {
#ifdef FWEIGHT
	    /* Weigh by absolute value of force */
	    forces[n_i + 0] /= FORCE_EPS + atom->absforce;
	    forces[n_i + 1] /= FORCE_EPS + atom->absforce;
	    forces[n_i + 2] /= FORCE_EPS + atom->absforce;
#endif /* FWEIGHT */
#ifdef CONTRIB
	    if (atom->contrib)
#endif /* CONTRIB */
	      tmpsum +=
		conf_weight[h] * (dsquare(forces[n_i + 0]) + dsquare(forces[n_i + 1]) + dsquare(forces[n_i +
		    2]));
	  }
	}			/* end F I F T H loop over atoms */


	/* whole energy contributions flow into tmpsum */
	forces[energy_p + h] /= (double)inconf[h];
	forces[energy_p + h] -= force_0[energy_p + h];
	tmpsum += conf_weight[h] * eweight * dsquare(forces[energy_p + h]);

#ifdef STRESS
	/* whole stress contributions flow into tmpsum */
	if (uf && us) {
	  for (i = 0; i < 6; i++) {
	    forces[stresses + i] /= conf_vol[h - firstconf];
	    forces[stresses + i] -= force_0[stresses + i];
	    tmpsum += conf_weight[h] * sweight * dsquare(forces[stresses + i]);
	  }
	}
#endif /* STRESS */
      }				/* end M A I N loop over configurations */
    }				/* parallel region */

    /* dummy constraints (global) */
#ifdef APOT
    /* add punishment for out of bounds (mostly for powell_lsq) */
    if (myid == 0) {
      tmpsum += apot_punish(xi_opt, forces);
    }
#endif /* APOT */

    sum = tmpsum;		/* global sum = local sum  */

#ifdef MPI
    /* reduce global sum */
    sum = 0.;
    MPI_Reduce(&tmpsum, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    /* gather forces, energies, stresses */
    if (myid == 0) {		/* root node already has data in place */
      /* forces */
      MPI_Gatherv(MPI_IN_PLACE, myatoms, MPI_VECTOR, forces, atom_len,
	atom_dist, MPI_VECTOR, 0, MPI_COMM_WORLD);
      /* energies */
      MPI_Gatherv(MPI_IN_PLACE, myconf, MPI_DOUBLE, forces + natoms * 3,
	conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD);
      /* stresses */
      MPI_Gatherv(MPI_IN_PLACE, myconf, MPI_STENS, forces + natoms * 3 + nconf,
	conf_len, conf_dist, MPI_STENS, 0, MPI_COMM_WORLD);
    } else {
      /* forces */
      MPI_Gatherv(forces + firstatom * 3, myatoms, MPI_VECTOR, forces, atom_len,
	atom_dist, MPI_VECTOR, 0, MPI_COMM_WORLD);
      /* energies */
      MPI_Gatherv(forces + natoms * 3 + firstconf, myconf, MPI_DOUBLE,
	forces + natoms * 3, conf_len, conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD);
      /* stresses */
      MPI_Gatherv(forces + natoms * 3 + nconf + 6 * firstconf, myconf, MPI_STENS,
	forces + natoms * 3 + nconf, conf_len, conf_dist, MPI_STENS, 0, MPI_COMM_WORLD);
    }
#endif /* MPI */

    /* root process exits this function now */
    if (myid == 0) {
      fcalls++;			/* Increase function call counter */
      if (isnan(sum)) {
#ifdef DEBUG
	printf("\n--> Force is nan! <--\n\n");
#endif /* DEBUG */
	return 10e10;
      } else
	return sum;
    }

  }

  /* once a non-root process arrives here, all is done. */
  return -1.;
}
Beispiel #5
0
double calc_forces(double* xi_opt, double* forces, int flag)
{
  double tmpsum, sum = 0.0;
  int first, col, ne, size, i = flag;
  double* xi = NULL;
  apot_table_t* apt = &g_pot.apot_table;
  double charge[g_param.ntypes];
  double sum_charges;
  double dp_kappa;

#if defined(DIPOLE)
  double dp_alpha[g_param.ntypes];
  double dp_b[g_calc.paircol];
  double dp_c[g_calc.paircol];
#endif  // DIPOLE

  static double rho_sum_loc, rho_sum;
  rho_sum_loc = rho_sum = 0.0;

  switch (g_pot.format_type) {
    case POTENTIAL_FORMAT_UNKNOWN:
      error(1, "Unknown potential format detected! (%s:%d)\n", __FILE__, __LINE__);
    case POTENTIAL_FORMAT_ANALYTIC:
      xi = g_pot.calc_pot.table;
      break;
    case POTENTIAL_FORMAT_TABULATED_EQ_DIST:
    case POTENTIAL_FORMAT_TABULATED_NON_EQ_DIST:
      xi = xi_opt;
      break;
    case POTENTIAL_FORMAT_KIM:
      error(1, "KIM format is not supported by EAM elstat force routine!");
  }

#if !defined(MPI)
  g_mpi.myconf = g_config.nconf;
#endif  // MPI

  ne = g_pot.apot_table.total_ne_par;
  size = apt->number;

  /* This is the start of an infinite loop */
  while (1) {
    tmpsum = 0.0; /* sum of squares of local process */
    rho_sum_loc = 0.0;

#if defined APOT && !defined MPI
    if (g_pot.format_type == POTENTIAL_FORMAT_ANALYTIC) {
      apot_check_params(xi_opt);
      update_calc_table(xi_opt, xi, 0);
    }
#endif  // APOT && !MPI

#if defined(MPI)
/* exchange potential and flag value */
#if !defined(APOT)
    MPI_Bcast(xi, g_pot.calc_pot.len, MPI_DOUBLE, 0, MPI_COMM_WORLD);
#endif  // APOT
    MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (flag == 1)
      break; /* Exception: flag 1 means clean up */

#if defined(APOT)
    if (g_mpi.myid == 0)
      apot_check_params(xi_opt);
    MPI_Bcast(xi_opt, g_calc.ndimtot, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    if (g_pot.format_type == POTENTIAL_FORMAT_ANALYTIC)
      update_calc_table(xi_opt, xi, 0);
#else   /* APOT */
    /* if flag==2 then the potential parameters have changed -> sync */
    if (flag == 2)
      potsync();
#endif  // APOT
#endif  // MPI

    /* local arrays for electrostatic parameters */
    sum_charges = 0;
    for (i = 0; i < g_param.ntypes - 1; i++) {
      if (xi_opt[2 * size + ne + i]) {
        charge[i] = xi_opt[2 * size + ne + i];
        sum_charges += apt->ratio[i] * charge[i];
      } else {
        charge[i] = 0.0;
      }
    }
    apt->last_charge = -sum_charges / apt->ratio[g_param.ntypes - 1];
    charge[g_param.ntypes - 1] = apt->last_charge;
    if (xi_opt[2 * size + ne + g_param.ntypes - 1]) {
      dp_kappa = xi_opt[2 * size + ne + g_param.ntypes - 1];
    } else {
      dp_kappa = 0.0;
    }

#if defined(DIPOLE)
    for (i = 0; i < g_param.ntypes; i++) {
      if (xi_opt[2 * size + ne + g_param.ntypes + i]) {
        dp_alpha[i] = xi_opt[2 * size + ne + g_param.ntypes + i];
      } else {
        dp_alpha[i] = 0.0;
      }
    }
    for (i = 0; i < g_calc.paircol; i++) {
      if (xi_opt[2 * size + ne + 2 * g_param.ntypes + i]) {
        dp_b[i] = xi_opt[2 * size + ne + 2 * g_param.ntypes + i];
      } else {
        dp_b[i] = 0.0;
      }
      if (xi_opt[2 * size + ne + 2 * g_param.ntypes + g_calc.paircol + i]) {
        dp_c[i] =
            xi_opt[2 * size + ne + 2 * g_param.ntypes + g_calc.paircol + i];
      } else {
        dp_c[i] = 0.0;
      }
    }
#endif  // DIPOLE

    /* init second derivatives for splines */

    /* pair potentials & rho */
    for (col = 0; col < g_calc.paircol + g_param.ntypes; col++) {
      first = g_pot.calc_pot.first[col];

      switch (g_pot.format_type) {
        case POTENTIAL_FORMAT_UNKNOWN:
          error(1, "Unknown potential format detected! (%s:%d)\n", __FILE__,
                __LINE__);
        case POTENTIAL_FORMAT_ANALYTIC:
        case POTENTIAL_FORMAT_TABULATED_EQ_DIST: {
          spline_ed(g_pot.calc_pot.step[col], xi + first,
                    g_pot.calc_pot.last[col] - first + 1, *(xi + first - 2),
                    0.0, g_pot.calc_pot.d2tab + first);
          break;
        }
        case POTENTIAL_FORMAT_TABULATED_NON_EQ_DIST: {
          spline_ne(g_pot.calc_pot.xcoord + first, xi + first,
                    g_pot.calc_pot.last[col] - first + 1, *(xi + first - 2),
                    0.0, g_pot.calc_pot.d2tab + first);
        }
        case POTENTIAL_FORMAT_KIM:
          error(1, "KIM format is not supported by EAM elstat force routine!");
      }
    }

    /* F */
    for (col = g_calc.paircol + g_param.ntypes;
         col < g_calc.paircol + 2 * g_param.ntypes; col++) {
      first = g_pot.calc_pot.first[col];
      /* gradient at left boundary matched to square root function,
         when 0 not in domain(F), else natural spline */
      switch (g_pot.format_type) {
        case POTENTIAL_FORMAT_UNKNOWN:
          error(1, "Unknown potential format detected! (%s:%d)\n", __FILE__,
                __LINE__);
        case POTENTIAL_FORMAT_ANALYTIC:
        case POTENTIAL_FORMAT_TABULATED_EQ_DIST: {
          spline_ed(g_pot.calc_pot.step[col], xi + first,
                    g_pot.calc_pot.last[col] - first + 1, *(xi + first - 2),
                    *(xi + first - 1), g_pot.calc_pot.d2tab + first);
          break;
        }
        case POTENTIAL_FORMAT_TABULATED_NON_EQ_DIST: {
          spline_ne(g_pot.calc_pot.xcoord + first, xi + first,
                    g_pot.calc_pot.last[col] - first + 1, *(xi + first - 2),
                    *(xi + first - 1), g_pot.calc_pot.d2tab + first);
        }
        case POTENTIAL_FORMAT_KIM:
          error(1, "KIM format is not supported by EAM elstat force routine!");
      }
    }

    /* region containing loop over configurations */
    {
      int self;
      vector tmp_force;
      int h, j, type1, type2, uf;
#if defined(STRESS)
      int us = 0;
      int stresses = 0;
#endif
      int n_i, n_j;
      double fnval, grad, fnval_tail, grad_tail, grad_i, grad_j;
#if defined(DIPOLE)
      double p_sr_tail = 0.0;
#endif
      atom_t* atom;
      neigh_t* neigh;
      double r;
      int col_F;
      double eam_force;
      double rho_val, rho_grad, rho_grad_j;

      /* loop over configurations: M A I N LOOP CONTAINING ALL ATOM-LOOPS */
      for (h = g_mpi.firstconf; h < g_mpi.firstconf + g_mpi.myconf; h++) {
        uf = g_config.conf_uf[h - g_mpi.firstconf];
#if defined(STRESS)
        us = g_config.conf_us[h - g_mpi.firstconf];
#endif  // STRESS
        /* reset energies and stresses */
        forces[g_calc.energy_p + h] = 0.0;
#if defined(STRESS)
        stresses = g_calc.stress_p + 6 * h;
        for (i = 0; i < 6; i++)
          forces[stresses + i] = 0.0;
#endif  // STRESS

        /* set limiting constraints */
        forces[g_calc.limit_p + h] = -g_config.force_0[g_calc.limit_p + h];

#if defined(DIPOLE)
        /* reset dipoles and fields: LOOP Z E R O */
        for (i = 0; i < g_config.inconf[h]; i++) {
          atom =
              g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom;
          atom->E_stat.x = 0.0;
          atom->E_stat.y = 0.0;
          atom->E_stat.z = 0.0;
          atom->p_sr.x = 0.0;
          atom->p_sr.y = 0.0;
          atom->p_sr.z = 0.0;
        }
#endif  // DIPOLE

        /* F I R S T LOOP OVER ATOMS: reset forces, dipoles */
        for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
          n_i = 3 * (g_config.cnfstart[h] + i);
          if (uf) {
            forces[n_i + 0] = -g_config.force_0[n_i + 0];
            forces[n_i + 1] = -g_config.force_0[n_i + 1];
            forces[n_i + 2] = -g_config.force_0[n_i + 2];
          } else {
            forces[n_i + 0] = 0.0;
            forces[n_i + 1] = 0.0;
            forces[n_i + 2] = 0.0;
          }
          /* reset atomic density */
          g_config.conf_atoms[g_config.cnfstart[h] - g_mpi.firstatom + i].rho =
              0.0;
        } /* end F I R S T LOOP */

        /* S E C O N D loop: calculate short-range and monopole forces,
           calculate static field- and dipole-contributions,
           calculate atomic densities */
        for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
          atom =
              g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom;
          type1 = atom->type;
          n_i = 3 * (g_config.cnfstart[h] + i);
          for (j = 0; j < atom->num_neigh; j++) { /* neighbors */
            neigh = atom->neigh + j;
            type2 = neigh->type;
            col = neigh->col[0];

            /* updating tail-functions - only necessary with variing kappa */
            if (!apt->sw_kappa)
#if defined(DSF)
              elstat_dsf(neigh->r, dp_kappa, &neigh->fnval_el,
                           &neigh->grad_el, &neigh->ggrad_el);
#else
              elstat_shift(neigh->r, dp_kappa, &neigh->fnval_el,
                           &neigh->grad_el, &neigh->ggrad_el);
#endif // DSF

            /* In small cells, an atom might interact with itself */
            self = (neigh->nr == i + g_config.cnfstart[h]) ? 1 : 0;

            /* calculate short-range forces */
            if (neigh->r < g_pot.calc_pot.end[col]) {
              if (uf) {
                fnval = splint_comb_dir(&g_pot.calc_pot, xi, neigh->slot[0],
                                        neigh->shift[0], neigh->step[0], &grad);
              } else {
                fnval = splint_dir(&g_pot.calc_pot, xi, neigh->slot[0],
                                   neigh->shift[0], neigh->step[0]);
              }

              /* avoid double counting if atom is interacting with a copy of
               * itself */
              if (self) {
                fnval *= 0.5;
                grad *= 0.5;
              }
              forces[g_calc.energy_p + h] += fnval;

              if (uf) {
                tmp_force.x = neigh->dist_r.x * grad;
                tmp_force.y = neigh->dist_r.y * grad;
                tmp_force.z = neigh->dist_r.z * grad;
                forces[n_i + 0] += tmp_force.x;
                forces[n_i + 1] += tmp_force.y;
                forces[n_i + 2] += tmp_force.z;
                /* actio = reactio */
                n_j = 3 * neigh->nr;
                forces[n_j + 0] -= tmp_force.x;
                forces[n_j + 1] -= tmp_force.y;
                forces[n_j + 2] -= tmp_force.z;
#if defined(STRESS)
                /* calculate pair stresses */
                if (us) {
                  forces[stresses + 0] -= neigh->dist.x * tmp_force.x;
                  forces[stresses + 1] -= neigh->dist.y * tmp_force.y;
                  forces[stresses + 2] -= neigh->dist.z * tmp_force.z;
                  forces[stresses + 3] -= neigh->dist.x * tmp_force.y;
                  forces[stresses + 4] -= neigh->dist.y * tmp_force.z;
                  forces[stresses + 5] -= neigh->dist.z * tmp_force.x;
                }
#endif  // STRESS
              }
            }

            /* calculate monopole forces */
            if (neigh->r < g_config.dp_cut &&
                (charge[type1] || charge[type2])) {
              fnval_tail = neigh->fnval_el;
              grad_tail = neigh->grad_el;

              grad_i = charge[type2] * grad_tail;
              if (type1 == type2) {
                grad_j = grad_i;
              } else {
                grad_j = charge[type1] * grad_tail;
              }
              fnval = charge[type1] * charge[type2] * fnval_tail;
              grad = charge[type1] * grad_i;

              if (self) {
                grad_i *= 0.5;
                grad_j *= 0.5;
                fnval *= 0.5;
                grad *= 0.5;
              }

              forces[g_calc.energy_p + h] += fnval;

              if (uf) {
                tmp_force.x = neigh->dist.x * grad;
                tmp_force.y = neigh->dist.y * grad;
                tmp_force.z = neigh->dist.z * grad;
                forces[n_i + 0] += tmp_force.x;
                forces[n_i + 1] += tmp_force.y;
                forces[n_i + 2] += tmp_force.z;
                /* actio = reactio */
                n_j = 3 * neigh->nr;
                forces[n_j + 0] -= tmp_force.x;
                forces[n_j + 1] -= tmp_force.y;
                forces[n_j + 2] -= tmp_force.z;
#if defined(STRESS)
                /* calculate coulomb stresses */
                if (us) {
                  forces[stresses + 0] -= neigh->dist.x * tmp_force.x;
                  forces[stresses + 1] -= neigh->dist.y * tmp_force.y;
                  forces[stresses + 2] -= neigh->dist.z * tmp_force.z;
                  forces[stresses + 3] -= neigh->dist.x * tmp_force.y;
                  forces[stresses + 4] -= neigh->dist.y * tmp_force.z;
                  forces[stresses + 5] -= neigh->dist.z * tmp_force.x;
                }
#endif  // STRESS
              }
#if defined(DIPOLE)
              /* calculate static field-contributions */
              atom->E_stat.x += neigh->dist.x * grad_i;
              atom->E_stat.y += neigh->dist.y * grad_i;
              atom->E_stat.z += neigh->dist.z * grad_i;

              g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_stat.x -=
                  neigh->dist.x * grad_j;
              g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_stat.y -=
                  neigh->dist.y * grad_j;
              g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_stat.z -=
                  neigh->dist.z * grad_j;

              /* calculate short-range dipoles */
              if (dp_alpha[type1] && dp_b[col] && dp_c[col]) {
                p_sr_tail = grad_tail * neigh->r *
                            shortrange_value(neigh->r, dp_alpha[type1],
                                             dp_b[col], dp_c[col]);
                atom->p_sr.x += charge[type2] * neigh->dist_r.x * p_sr_tail;
                atom->p_sr.y += charge[type2] * neigh->dist_r.y * p_sr_tail;
                atom->p_sr.z += charge[type2] * neigh->dist_r.z * p_sr_tail;
              }
              if (dp_alpha[type2] && dp_b[col] && dp_c[col] && !self) {
                p_sr_tail = grad_tail * neigh->r *
                            shortrange_value(neigh->r, dp_alpha[type2],
                                             dp_b[col], dp_c[col]);
                g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_sr.x -=
                    charge[type1] * neigh->dist_r.x * p_sr_tail;
                g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_sr.y -=
                    charge[type1] * neigh->dist_r.y * p_sr_tail;
                g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_sr.z -=
                    charge[type1] * neigh->dist_r.z * p_sr_tail;
              }
#endif  // DIPOLE
            }

            /* calculate atomic densities */
            if (atom->type == neigh->type) {
              /* then transfer(a->b)==transfer(b->a) */
              if (neigh->r < g_pot.calc_pot.end[neigh->col[1]]) {
                rho_val = splint_dir(&g_pot.calc_pot, xi, neigh->slot[1],
                                     neigh->shift[1], neigh->step[1]);
                atom->rho += rho_val;
                /* avoid double counting if atom is interacting with a
                   copy of itself */
                if (!self) {
                  g_config.conf_atoms[neigh->nr - g_mpi.firstatom].rho +=
                      rho_val;
                }
              }
            } else {
              /* transfer(a->b)!=transfer(b->a) */
              if (neigh->r < g_pot.calc_pot.end[neigh->col[1]]) {
                atom->rho += splint_dir(&g_pot.calc_pot, xi, neigh->slot[1],
                                        neigh->shift[1], neigh->step[1]);
              }
              /* cannot use slot/shift to access splines */
              if (neigh->r < g_pot.calc_pot.end[g_calc.paircol + atom->type])
                g_config.conf_atoms[neigh->nr - g_mpi.firstatom].rho +=
                    (*g_splint)(&g_pot.calc_pot, xi,
                                g_calc.paircol + atom->type, neigh->r);
            }

          } /* loop over neighbours */

          col_F =
              g_calc.paircol + g_param.ntypes + atom->type; /* column of F */
          if (atom->rho > g_pot.calc_pot.end[col_F]) {
            /* then punish target function -> bad potential */
            forces[g_calc.limit_p + h] +=
                DUMMY_WEIGHT * 10.0 *
                dsquare(atom->rho - g_pot.calc_pot.end[col_F]);
            atom->rho = g_pot.calc_pot.end[col_F];
          }

          if (atom->rho < g_pot.calc_pot.begin[col_F]) {
            /* then punish target function -> bad potential */
            forces[g_calc.limit_p + h] +=
                DUMMY_WEIGHT * 10.0 *
                dsquare(g_pot.calc_pot.begin[col_F] - atom->rho);
            atom->rho = g_pot.calc_pot.begin[col_F];
          }

/* embedding energy, embedding gradient */
/* contribution to cohesive energy is F(n) */
#if defined(NORESCALE)
          if (atom->rho < g_pot.calc_pot.begin[col_F]) {
            /* linear extrapolation left */
            rho_val = splint_comb(&calc_pot, xi, col_F,
                                  g_pot.calc_pot.begin[col_F], &atom->gradF);
            forces[energy_p + h] +=
                rho_val +
                (atom->rho - g_pot.calc_pot.begin[col_F]) * atom->gradF;
#if defined(APOT)
            forces[limit_p + h] += DUMMY_WEIGHT * 10.0 *
                                   dsquare(calc_pot.begin[col_F] - atom->rho);
#endif  // APOT
          } else if (atom->rho > g_pot.calc_pot.end[col_F]) {
            /* and right */
            rho_val = splint_comb(
                &calc_pot, xi, col_F,
                g_pot.calc_pot.end[col_F] - 0.5 * g_pot.calc_pot.step[col_F],
                &atom->gradF);
            forces[energy_p + h] +=
                rho_val + (atom->rho - g_pot.calc_pot.end[col_F]) * atom->gradF;
#if defined(APOT)
            forces[limit_p + h] +=
                DUMMY_WEIGHT * 10.0 *
                dsquare(atom->rho - g_pot.calc_pot.end[col_F]);
#endif  // APOT
          }
          /* and in-between */
          else {
            forces[energy_p + h] +=
                splint_comb(&calc_pot, xi, col_F, atom->rho, &atom->gradF);
          }
#else
          forces[g_calc.energy_p + h] += (*g_splint_comb)(
              &g_pot.calc_pot, xi, col_F, atom->rho, &atom->gradF);
#endif  // NORESCALE
          /* sum up rho */
          rho_sum_loc += atom->rho;

        } /* end S E C O N D loop over atoms */

#if defined(DIPOLE)
        /* T H I R D loop: calculate whole dipole moment for every atom */
        double rp, dp_sum;
        int dp_converged = 0, dp_it = 0;
        double max_diff = 10;

        while (dp_converged == 0) {
          dp_sum = 0;
          for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
            atom = g_config.conf_atoms + i + g_config.cnfstart[h] -
                   g_mpi.firstatom;
            type1 = atom->type;
            if (dp_alpha[type1]) {
              if (dp_it) {
                /* note: mixing parameter is different from that on in IMD */
                atom->E_tot.x = (1 - g_config.dp_mix) * atom->E_ind.x +
                                g_config.dp_mix * atom->E_old.x +
                                atom->E_stat.x;
                atom->E_tot.y = (1 - g_config.dp_mix) * atom->E_ind.y +
                                g_config.dp_mix * atom->E_old.y +
                                atom->E_stat.y;
                atom->E_tot.z = (1 - g_config.dp_mix) * atom->E_ind.z +
                                g_config.dp_mix * atom->E_old.z +
                                atom->E_stat.z;
              } else {
                atom->E_tot.x = atom->E_ind.x + atom->E_stat.x;
                atom->E_tot.y = atom->E_ind.y + atom->E_stat.y;
                atom->E_tot.z = atom->E_ind.z + atom->E_stat.z;
              }

              atom->p_ind.x = dp_alpha[type1] * atom->E_tot.x + atom->p_sr.x;
              atom->p_ind.y = dp_alpha[type1] * atom->E_tot.y + atom->p_sr.y;
              atom->p_ind.z = dp_alpha[type1] * atom->E_tot.z + atom->p_sr.z;

              atom->E_old.x = atom->E_ind.x;
              atom->E_old.y = atom->E_ind.y;
              atom->E_old.z = atom->E_ind.z;

              atom->E_ind.x = 0.0;
              atom->E_ind.y = 0.0;
              atom->E_ind.z = 0.0;
            }
          }

          for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
            atom = g_config.conf_atoms + i + g_config.cnfstart[h] -
                   g_mpi.firstatom;
            type1 = atom->type;
            for (j = 0; j < atom->num_neigh; j++) { /* neighbors */
              neigh = atom->neigh + j;
              type2 = neigh->type;
              col = neigh->col[0];
              /* In small cells, an atom might interact with itself */
              self = (neigh->nr == i + g_config.cnfstart[h]) ? 1 : 0;

              if (neigh->r < g_config.dp_cut && dp_alpha[type1] &&
                  dp_alpha[type2]) {
                rp = SPROD(
                    g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind,
                    neigh->dist_r);
                atom->E_ind.x +=
                    neigh->grad_el *
                    (3 * rp * neigh->dist_r.x -
                     g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.x);
                atom->E_ind.y +=
                    neigh->grad_el *
                    (3 * rp * neigh->dist_r.y -
                     g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.y);
                atom->E_ind.z +=
                    neigh->grad_el *
                    (3 * rp * neigh->dist_r.z -
                     g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.z);

                if (!self) {
                  rp = SPROD(atom->p_ind, neigh->dist_r);
                  g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_ind.x +=
                      neigh->grad_el *
                      (3 * rp * neigh->dist_r.x - atom->p_ind.x);
                  g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_ind.y +=
                      neigh->grad_el *
                      (3 * rp * neigh->dist_r.y - atom->p_ind.y);
                  g_config.conf_atoms[neigh->nr - g_mpi.firstatom].E_ind.z +=
                      neigh->grad_el *
                      (3 * rp * neigh->dist_r.z - atom->p_ind.z);
                }
              }
            }
          }

          for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
            atom = g_config.conf_atoms + i + g_config.cnfstart[h] -
                   g_mpi.firstatom;
            type1 = atom->type;
            if (dp_alpha[type1]) {
              dp_sum +=
                  dsquare(dp_alpha[type1] * (atom->E_old.x - atom->E_ind.x));
              dp_sum +=
                  dsquare(dp_alpha[type1] * (atom->E_old.y - atom->E_ind.y));
              dp_sum +=
                  dsquare(dp_alpha[type1] * (atom->E_old.z - atom->E_ind.z));
            }
          }

          dp_sum /= 3 * g_config.inconf[h];
          dp_sum = sqrt(dp_sum);

          if (dp_it) {
            if ((dp_sum > max_diff) || (dp_it > 50)) {
              dp_converged = 1;
              for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
                atom = g_config.conf_atoms + i + g_config.cnfstart[h] -
                       g_mpi.firstatom;
                type1 = atom->type;
                if (dp_alpha[type1]) {
                  atom->p_ind.x =
                      dp_alpha[type1] * atom->E_stat.x + atom->p_sr.x;
                  atom->p_ind.y =
                      dp_alpha[type1] * atom->E_stat.y + atom->p_sr.y;
                  atom->p_ind.z =
                      dp_alpha[type1] * atom->E_stat.z + atom->p_sr.z;
                  atom->E_ind.x = atom->E_stat.x;
                  atom->E_ind.y = atom->E_stat.y;
                  atom->E_ind.z = atom->E_stat.z;
                }
              }
            }
          }

          if (dp_sum < g_config.dp_tol)
            dp_converged = 1;

          dp_it++;
        } /* end T H I R D loop over atoms */

        /* F O U R T H  loop: calculate monopole-dipole and dipole-dipole forces
         */
        double rp_i, rp_j, pp_ij, tmp_1, tmp_2;
        double grad_1, grad_2, srval, srgrad, srval_tail, srgrad_tail,
            fnval_sum, grad_sum;

        for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
          atom =
              g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom;
          type1 = atom->type;
          n_i = 3 * (g_config.cnfstart[h] + i);
          for (j = 0; j < atom->num_neigh; j++) { /* neighbors */
            neigh = atom->neigh + j;
            type2 = neigh->type;
            col = neigh->col[0];

            /* In small cells, an atom might interact with itself */
            self = (neigh->nr == i + g_config.cnfstart[h]) ? 1 : 0;
            if (neigh->r < g_config.dp_cut &&
                (dp_alpha[type1] || dp_alpha[type2])) {
              fnval_tail = -neigh->grad_el;
              grad_tail = -neigh->ggrad_el;

              if (dp_b[col] && dp_c[col]) {
                shortrange_term(neigh->r, dp_b[col], dp_c[col], &srval_tail,
                                &srgrad_tail);
                srval = fnval_tail * srval_tail;
                srgrad = fnval_tail * srgrad_tail + grad_tail * srval_tail;
              }

              if (self) {
                fnval_tail *= 0.5;
                grad_tail *= 0.5;
              }

              /* monopole-dipole contributions */
              if (charge[type1] && dp_alpha[type2]) {
                if (dp_b[col] && dp_c[col]) {
                  fnval_sum = fnval_tail + srval;
                  grad_sum = grad_tail + srgrad;
                } else {
                  fnval_sum = fnval_tail;
                  grad_sum = grad_tail;
                }

                rp_j = SPROD(
                    g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind,
                    neigh->dist_r);
                fnval = charge[type1] * rp_j * fnval_sum * neigh->r;
                grad_1 = charge[type1] * rp_j * grad_sum * neigh->r2;
                grad_2 = charge[type1] * fnval_sum;

                forces[g_calc.energy_p + h] -= fnval;

                if (uf) {
                  tmp_force.x =
                      neigh->dist_r.x * grad_1 +
                      g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.x *
                          grad_2;
                  tmp_force.y =
                      neigh->dist_r.y * grad_1 +
                      g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.y *
                          grad_2;
                  tmp_force.z =
                      neigh->dist_r.z * grad_1 +
                      g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind.z *
                          grad_2;
                  forces[n_i + 0] -= tmp_force.x;
                  forces[n_i + 1] -= tmp_force.y;
                  forces[n_i + 2] -= tmp_force.z;
                  /* actio = reactio */
                  n_j = 3 * neigh->nr;
                  forces[n_j + 0] += tmp_force.x;
                  forces[n_j + 1] += tmp_force.y;
                  forces[n_j + 2] += tmp_force.z;

#if defined(STRESS)
                  /* calculate stresses */
                  if (us) {
                    forces[stresses + 0] += neigh->dist.x * tmp_force.x;
                    forces[stresses + 1] += neigh->dist.y * tmp_force.y;
                    forces[stresses + 2] += neigh->dist.z * tmp_force.z;
                    forces[stresses + 3] += neigh->dist.x * tmp_force.y;
                    forces[stresses + 4] += neigh->dist.y * tmp_force.z;
                    forces[stresses + 5] += neigh->dist.z * tmp_force.x;
                  }
#endif  // STRESS
                }
              }

              /* dipole-monopole contributions */
              if (dp_alpha[type2] && charge[type2]) {
                if (dp_b[col] && dp_c[col]) {
                  fnval_sum = fnval_tail + srval;
                  grad_sum = grad_tail + srgrad;
                } else {
                  fnval_sum = fnval_tail;
                  grad_sum = grad_tail;
                }

                rp_i = SPROD(atom->p_ind, neigh->dist_r);
                fnval = charge[type2] * rp_i * fnval_sum * neigh->r;
                grad_1 = charge[type2] * rp_i * grad_sum * neigh->r2;
                grad_2 = charge[type2] * fnval_sum;

                forces[g_calc.energy_p + h] += fnval;

                if (uf) {
                  tmp_force.x =
                      neigh->dist_r.x * grad_1 + atom->p_ind.x * grad_2;
                  tmp_force.y =
                      neigh->dist_r.y * grad_1 + atom->p_ind.y * grad_2;
                  tmp_force.z =
                      neigh->dist_r.z * grad_1 + atom->p_ind.z * grad_2;
                  forces[n_i + 0] += tmp_force.x;
                  forces[n_i + 1] += tmp_force.y;
                  forces[n_i + 2] += tmp_force.z;
                  /* actio = reactio */
                  n_j = 3 * neigh->nr;
                  forces[n_j + 0] -= tmp_force.x;
                  forces[n_j + 1] -= tmp_force.y;
                  forces[n_j + 2] -= tmp_force.z;

#if defined(STRESS)
                  /* calculate stresses */
                  if (us) {
                    forces[stresses + 0] -= neigh->dist.x * tmp_force.x;
                    forces[stresses + 1] -= neigh->dist.y * tmp_force.y;
                    forces[stresses + 2] -= neigh->dist.z * tmp_force.z;
                    forces[stresses + 3] -= neigh->dist.x * tmp_force.y;
                    forces[stresses + 4] -= neigh->dist.y * tmp_force.z;
                    forces[stresses + 5] -= neigh->dist.z * tmp_force.x;
                  }
#endif  // STRESS
                }
              }

              /* dipole-dipole contributions */
              if (dp_alpha[type1] && dp_alpha[type2]) {
                pp_ij = SPROD(
                    atom->p_ind,
                    g_config.conf_atoms[neigh->nr - g_mpi.firstatom].p_ind);
                tmp_1 = 3 * rp_i * rp_j;
                tmp_2 = 3 * fnval_tail / neigh->r2;

                fnval = -(tmp_1 - pp_ij) * fnval_tail;
                grad_1 = (tmp_1 - pp_ij) * grad_tail;
                grad_2 = 2 * rp_i * rp_j;

                forces[g_calc.energy_p + h] += fnval;

                if (uf) {
                  tmp_force.x =
                      grad_1 * neigh->dist.x -
                      tmp_2 *
                          (grad_2 * neigh->dist.x -
                           rp_i * neigh->r *
                               g_config.conf_atoms[neigh->nr - g_mpi.firstatom]
                                   .p_ind.x -
                           rp_j * neigh->r * atom->p_ind.x);
                  tmp_force.y =
                      grad_1 * neigh->dist.y -
                      tmp_2 *
                          (grad_2 * neigh->dist.y -
                           rp_i * neigh->r *
                               g_config.conf_atoms[neigh->nr - g_mpi.firstatom]
                                   .p_ind.y -
                           rp_j * neigh->r * atom->p_ind.y);
                  tmp_force.z =
                      grad_1 * neigh->dist.z -
                      tmp_2 *
                          (grad_2 * neigh->dist.z -
                           rp_i * neigh->r *
                               g_config.conf_atoms[neigh->nr - g_mpi.firstatom]
                                   .p_ind.z -
                           rp_j * neigh->r * atom->p_ind.z);
                  forces[n_i + 0] -= tmp_force.x;
                  forces[n_i + 1] -= tmp_force.y;
                  forces[n_i + 2] -= tmp_force.z;
                  /* actio = reactio */
                  n_j = 3 * neigh->nr;
                  forces[n_j + 0] += tmp_force.x;
                  forces[n_j + 1] += tmp_force.y;
                  forces[n_j + 2] += tmp_force.z;

#if defined(STRESS)
                  /* calculate stresses */
                  if (us) {
                    forces[stresses + 0] += neigh->dist.x * tmp_force.x;
                    forces[stresses + 1] += neigh->dist.y * tmp_force.y;
                    forces[stresses + 2] += neigh->dist.z * tmp_force.z;
                    forces[stresses + 3] += neigh->dist.x * tmp_force.y;
                    forces[stresses + 4] += neigh->dist.y * tmp_force.z;
                    forces[stresses + 5] += neigh->dist.z * tmp_force.x;
                  }
#endif  // STRESS
                }
              }
            }
          } /* loop over neighbours */
        }   /* end F O U R T H loop over atoms */
#endif      // DIPOLE

        /* F I F T H  loop: self energy contributions and sum-up force
         * contributions */
        double qq;
#if defined(DSF)
       double fnval_cut, gtail_cut, ggrad_cut;
        elstat_value(g_config.dp_cut, dp_kappa, &fnval_cut, &gtail_cut, &ggrad_cut);
#endif // DSF
        for (i = 0; i < g_config.inconf[h]; i++) { /* atoms */
          atom =
              g_config.conf_atoms + i + g_config.cnfstart[h] - g_mpi.firstatom;
          type1 = atom->type;
          n_i = 3 * (g_config.cnfstart[h] + i);

          /* self energy contributions */
          if (charge[type1]) {
            qq = charge[type1] * charge[type1];
#if defined(DSF)
           fnval = qq * ( DP_EPS * dp_kappa / sqrt(M_PI) +
              (fnval_cut - gtail_cut * g_config.dp_cut * g_config.dp_cut )*0.5 );
#else
             fnval = DP_EPS * dp_kappa * qq / sqrt(M_PI);
#endif // DSF
            forces[g_calc.energy_p + h] -= fnval;
          }
#if defined(DIPOLE)
          double pp;
          if (dp_alpha[type1]) {
            pp = SPROD(atom->p_ind, atom->p_ind);
            fnval = pp / (2 * dp_alpha[type1]);
            forces[g_calc.energy_p + h] += fnval;
          }
/* alternative dipole self energy including kappa-dependence */
// if (dp_alpha[type1]) {
// pp = SPROD(atom->p_ind, atom->p_ind);
// fnval = kkk * pp / sqrt(M_PI);
// forces[energy_p + h] += fnval;
//}
#endif  // DIPOLE

          /* sum-up: whole force contributions flow into tmpsum */
          /*          if (uf) {*/
          /*#ifdef FWEIGHT*/
          /*             Weigh by absolute value of force */
          /*            forces[k] /= FORCE_EPS + atom->absforce;*/
          /*            forces[k + 1] /= FORCE_EPS + atom->absforce;*/
          /*            forces[k + 2] /= FORCE_EPS + atom->absforce;*/
          /*#endif |+ FWEIGHT +|*/
          /*#ifdef CONTRIB*/
          /*            if (atom->contrib)*/
          /*#endif |+ CONTRIB +|*/
          /*              tmpsum +=*/
          /*                conf_weight[h] * (dsquare(forces[k]) +
           * dsquare(forces[k + 1]) + dsquare(forces[k + 2]));*/
          /*            printf("tmpsum = %f (forces)\n",tmpsum);*/
          /*          }*/

        } /* end F I F T H loop over atoms */

        /* S I X T H  loop: EAM force */
        if (uf) { /* only required if we calc forces */
          for (i = 0; i < g_config.inconf[h]; i++) {
            atom = g_config.conf_atoms + i + g_config.cnfstart[h] -
                   g_mpi.firstatom;
            n_i = 3 * (g_config.cnfstart[h] + i);
            for (j = 0; j < atom->num_neigh; j++) {
              /* loop over neighbors */
              neigh = atom->neigh + j;
              /* In small cells, an atom might interact with itself */
              self = (neigh->nr == i + g_config.cnfstart[h]) ? 1 : 0;
              col_F = g_calc.paircol + g_param.ntypes +
                      atom->type; /* column of F */
              r = neigh->r;
              /* are we within reach? */
              if ((r < g_pot.calc_pot.end[neigh->col[1]]) ||
                  (r < g_pot.calc_pot.end[col_F - g_param.ntypes])) {
                rho_grad =
                    (r < g_pot.calc_pot.end[neigh->col[1]])
                        ? splint_grad_dir(&g_pot.calc_pot, xi, neigh->slot[1],
                                          neigh->shift[1], neigh->step[1])
                        : 0.0;
                if (atom->type == neigh->type) /* use actio = reactio */
                  rho_grad_j = rho_grad;
                else
                  rho_grad_j = (r < g_pot.calc_pot.end[col_F - g_param.ntypes])
                                   ? (*g_splint_grad)(&g_pot.calc_pot, xi,
                                                      col_F - g_param.ntypes, r)
                                   : 0.0;
                /* now we know everything - calculate forces */
                eam_force =
                    (rho_grad * atom->gradF +
                     rho_grad_j *
                         g_config.conf_atoms[(neigh->nr) - g_mpi.firstatom]
                             .gradF);
                /* avoid double counting if atom is interacting with a
                   copy of itself */
                if (self)
                  eam_force *= 0.5;
                tmp_force.x = neigh->dist_r.x * eam_force;
                tmp_force.y = neigh->dist_r.y * eam_force;
                tmp_force.z = neigh->dist_r.z * eam_force;
                forces[n_i + 0] += tmp_force.x;
                forces[n_i + 1] += tmp_force.y;
                forces[n_i + 2] += tmp_force.z;
                /* actio = reactio */
                n_j = 3 * neigh->nr;
                forces[n_j + 0] -= tmp_force.x;
                forces[n_j + 1] -= tmp_force.y;
                forces[n_j + 2] -= tmp_force.z;
#if defined(STRESS)
                /* and stresses */
                if (us) {
                  forces[stresses + 0] -= neigh->dist.x * tmp_force.x;
                  forces[stresses + 1] -= neigh->dist.y * tmp_force.y;
                  forces[stresses + 2] -= neigh->dist.z * tmp_force.z;
                  forces[stresses + 3] -= neigh->dist.x * tmp_force.y;
                  forces[stresses + 4] -= neigh->dist.y * tmp_force.z;
                  forces[stresses + 5] -= neigh->dist.z * tmp_force.x;
                }
#endif          // STRESS
              } /* within reach */
            }   /* loop over neighbours */
#if defined(FWEIGHT)
            /* Weigh by absolute value of force */
            forces[n_i + 0] /= FORCE_EPS + atom->absforce;
            forces[n_i + 1] /= FORCE_EPS + atom->absforce;
            forces[n_i + 2] /= FORCE_EPS + atom->absforce;
#endif  // FWEIGHT
        /* sum up forces  */
#if defined(CONTRIB)
            if (atom->contrib)
#endif  // CONTRIB
              tmpsum += g_config.conf_weight[h] *
                        (dsquare(forces[n_i + 0]) + dsquare(forces[n_i + 1]) +
                         dsquare(forces[n_i + 2]));
          }
        }

        /* end S I X T H loop over atoms */
        /* whole energy contributions flow into tmpsum */
        forces[g_calc.energy_p + h] /= (double)g_config.inconf[h];
        forces[g_calc.energy_p + h] -= g_config.force_0[g_calc.energy_p + h];
        tmpsum += g_config.conf_weight[h] * g_param.eweight *
                  dsquare(forces[g_calc.energy_p + h]);

#if defined(STRESS)
        /* whole stress contributions flow into tmpsum */
        if (uf && us) {
          for (i = 0; i < 6; i++) {
            forces[stresses + i] /= g_config.conf_vol[h - g_mpi.firstconf];
            forces[stresses + i] -= g_config.force_0[stresses + i];
            tmpsum += g_config.conf_weight[h] * g_param.sweight *
                      dsquare(forces[stresses + i]);
          }
        }
#endif  // STRESS
        /* limiting constraints per configuration */
        tmpsum += g_config.conf_weight[h] * dsquare(forces[g_calc.limit_p + h]);
      } /* end M A I N loop over configurations */
    }   /* parallel region */
#if defined(MPI)
    /* Reduce rho_sum */
    MPI_Reduce(&rho_sum_loc, &rho_sum, 1, MPI_DOUBLE, MPI_SUM, 0,
               MPI_COMM_WORLD);
#else   /* MPI */
    rho_sum = rho_sum_loc;
#endif  // MPI

/* dummy constraints (global) */
#if defined(APOT)
    /* add punishment for out of bounds (mostly for powell_lsq) */
    if (g_mpi.myid == 0) {
      tmpsum += apot_punish(xi_opt, forces);
    }
#endif  // APOT

#if !defined(NOPUNISH)
    if (g_mpi.myid == 0) {
      int g;
      for (g = 0; g < g_param.ntypes; g++) {
#if defined(NORESCALE)
        /* clear field */
        forces[g_calc.dummy_p + g_param.ntypes + g] = 0.0; /* Free end... */
        /* NEW: Constraint on U': U'(1.0)=0.0; */
        forces[g_calc.dummy_p + g] =
            DUMMY_WEIGHT *
            splint_grad(&calc_pot, xi, paircol + g_param.ntypes + g, 1.0);
#else   /* NOTHING */
        forces[g_calc.dummy_p + g_param.ntypes + g] = 0.0; /* Free end... */
        /* constraints on U`(n) */
        forces[g_calc.dummy_p + g] =
            DUMMY_WEIGHT *
                (*g_splint_grad)(
                    &g_pot.calc_pot, xi, g_calc.paircol + g_param.ntypes + g,
                    0.5 * (g_pot.calc_pot
                               .begin[g_calc.paircol + g_param.ntypes + g] +
                           g_pot.calc_pot
                               .end[g_calc.paircol + g_param.ntypes + g])) -
            g_config.force_0[g_calc.dummy_p + g];
#endif  // NORESCALE
        tmpsum += dsquare(forces[g_calc.dummy_p + g_param.ntypes + g]);
        tmpsum += dsquare(forces[g_calc.dummy_p + g]);
      } /* loop over types */
#if defined(NORESCALE)
      /* NEW: Constraint on n: <n>=1.0 ONE CONSTRAINT ONLY */
      /* Calculate averages */
      rho_sum /= (double)natoms;
      /* ATTN: if there are invariant potentials, things might be problematic */
      forces[dummy_p + g_param.ntypes] = DUMMY_WEIGHT * (rho_sum - 1.0);
      tmpsum += dsquare(forces[dummy_p + g_param.ntypes]);
#endif  // NORESCALE
    }
#endif  // NOPUNISH

#if defined(MPI)
    /* reduce global sum */
    sum = 0.0;
    MPI_Reduce(&tmpsum, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    /* gather forces, energies, stresses */
    if (g_mpi.myid == 0) { /* root node already has data in place */
      /* forces */
      MPI_Gatherv(MPI_IN_PLACE, g_mpi.myatoms, g_mpi.MPI_VECTOR, forces,
                  g_mpi.atom_len, g_mpi.atom_dist, g_mpi.MPI_VECTOR, 0,
                  MPI_COMM_WORLD);
      /* energies */
      MPI_Gatherv(MPI_IN_PLACE, g_mpi.myconf, MPI_DOUBLE,
                  forces + g_calc.energy_p, g_mpi.conf_len, g_mpi.conf_dist,
                  MPI_DOUBLE, 0, MPI_COMM_WORLD);
#if defined(STRESS)
      /* stresses */
      MPI_Gatherv(MPI_IN_PLACE, g_mpi.myconf, g_mpi.MPI_STENS,
                  forces + g_calc.stress_p, g_mpi.conf_len, g_mpi.conf_dist,
                  g_mpi.MPI_STENS, 0, MPI_COMM_WORLD);
#endif  // STRESS
#if !defined(NORESCALE)
      /* punishment constraints */
      MPI_Gatherv(MPI_IN_PLACE, g_mpi.myconf, MPI_DOUBLE,
                  forces + g_calc.limit_p, g_mpi.conf_len, g_mpi.conf_dist,
                  MPI_DOUBLE, 0, MPI_COMM_WORLD);
#endif  // !NORESCALE
    } else {
      /* forces */
      MPI_Gatherv(forces + g_mpi.firstatom * 3, g_mpi.myatoms, g_mpi.MPI_VECTOR,
                  forces, g_mpi.atom_len, g_mpi.atom_dist, g_mpi.MPI_VECTOR, 0,
                  MPI_COMM_WORLD);
      /* energies */
      MPI_Gatherv(forces + g_calc.energy_p + g_mpi.firstconf, g_mpi.myconf,
                  MPI_DOUBLE, forces + g_calc.energy_p, g_mpi.conf_len,
                  g_mpi.conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD);
#if defined(STRESS)
      /* stresses */
      MPI_Gatherv(forces + g_calc.stress_p + 6 * g_mpi.firstconf, g_mpi.myconf,
                  g_mpi.MPI_STENS, forces + g_calc.stress_p, g_mpi.conf_len,
                  g_mpi.conf_dist, g_mpi.MPI_STENS, 0, MPI_COMM_WORLD);
#endif  // STRESS
#if !defined(NORESCALE)
      /* punishment constraints */
      MPI_Gatherv(forces + g_calc.limit_p + g_mpi.firstconf, g_mpi.myconf,
                  MPI_DOUBLE, forces + g_calc.limit_p, g_mpi.conf_len,
                  g_mpi.conf_dist, MPI_DOUBLE, 0, MPI_COMM_WORLD);
#endif  // !NORESCALE
    }
/* no need to pick up dummy constraints - they are already @ root */
#else
    sum = tmpsum; /* global sum = local sum  */
#endif  // MPI

    /* root process exits this function now */
    if (g_mpi.myid == 0) {
      g_calc.fcalls++; /* Increase function call counter */
      if (isnan(sum)) {
#if defined(DEBUG)
        printf("\n--> Force is nan! <--\n\n");
#endif  // DEBUG
        return 10e10;
      } else
        return sum;
    }
  }

  /* once a non-root process arrives here, all is done. */
  return -1.0;
}
Beispiel #6
0
double calc_forces(double* xi_opt, double* forces, int flag)
{
  const tersoff_t* ters = &g_pot.apot_table.tersoff;

#if !defined(MPI)
  g_mpi.myconf = g_config.nconf;
#endif  // !MPI

  // This is the start of an infinite loop
  while (1) {
    // sum of squares of local process
    double error_sum = 0.0;

#if defined(MPI)
    MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (flag == 1)
      break; // Exception: flag 1 means clean up

    if (g_mpi.myid == 0)
      apot_check_params(xi_opt);
    MPI_Bcast(xi_opt, g_calc.ndimtot, MPI_DOUBLE, 0, MPI_COMM_WORLD);
#else
    apot_check_params(xi_opt);
#endif  // MPI

    update_tersoff_pointers(xi_opt);

    // loop over configurations
    for (int config_idx = g_mpi.firstconf; config_idx < g_mpi.firstconf + g_mpi.myconf; config_idx++) {
      int uf = g_config.conf_uf[config_idx - g_mpi.firstconf];

      // reset energies and stresses
      forces[g_calc.energy_p + config_idx] = 0.0;

#if defined(STRESS)
      int us = g_config.conf_us[config_idx - g_mpi.firstconf];
      int stress_idx = g_calc.stress_p + 6 * config_idx;
      memset(forces + stress_idx, 0, 6 * sizeof(double));
#endif  // STRESS

      // first loop over all atoms: reset forces
      for (int atom_idx = 0; atom_idx < g_config.inconf[config_idx]; atom_idx++) {
        int n_i = 3 * (g_config.cnfstart[config_idx] + atom_idx);
        if (uf) {
          forces[n_i + 0] = -g_config.force_0[n_i + 0];
          forces[n_i + 1] = -g_config.force_0[n_i + 1];
          forces[n_i + 2] = -g_config.force_0[n_i + 2];
        } else {
          memset(forces + n_i, 0, 3 * sizeof(double));
        }
      }

      // second loop: calculate cutoff function f_c for all neighbors
      for (int atom_idx = 0; atom_idx < g_config.inconf[config_idx]; atom_idx++) {
        atom_t* atom = g_config.conf_atoms + atom_idx + g_config.cnfstart[config_idx] - g_mpi.firstatom;
        int n_i = 3 * (g_config.cnfstart[config_idx] + atom_idx);

        // loop over neighbors
        // calculate pair potential part: f*A*exp(-lambda*r)
        for (int neigh_idx = 0; neigh_idx < atom->num_neigh; neigh_idx++) {
          neigh_t* neigh_j = atom->neigh + neigh_idx;
          int col_j = neigh_j->col[0];
          // check if we are within the cutoff range
          if (neigh_j->r < ters->R2[col_j][0]) {
            int self = (neigh_j->nr == atom_idx + g_config.cnfstart[config_idx]) ? 1 : 0;

            // calculate cutoff function f_c and store it for every neighbor
            double tmp_1 = M_PI / (ters->R2[col_j][0] - ters->R1[col_j][0]);
            double tmp_2 = tmp_1 * (neigh_j->r - ters->R1[col_j][0]);
            if (neigh_j->r < ters->R1[col_j][0]) {
              neigh_j->f = 1.0;
              neigh_j->df = 0.0;
            } else {
              neigh_j->f = 0.5 * (1.0 + 1.125 * cos(tmp_2) - 0.125 * cos(3.0 * tmp_2));
              neigh_j->df = -0.5 * tmp_1 * (1.125 * sin(tmp_2) - 0.375 * sin(3.0 * tmp_2));
            }

            // calculate pair part f_c*A*exp(-lambda*r) and the derivative
            tmp_1 = exp(-ters->lambda[col_j][0] * neigh_j->r);
            double phi_val = neigh_j->f * ters->A[col_j][0] * tmp_1;
            double phi_grad = neigh_j->df - ters->lambda[col_j][0] * neigh_j->f;
            phi_grad *= ters->A[col_j][0] * tmp_1;

            // avoid double counting if atom is interacting with itself
            if (self) {
              phi_val *= 0.5;
              phi_grad *= 0.5;
            }

            // only half cohesive energy because we have a full neighbor list
            forces[g_calc.energy_p + config_idx] += 0.5 * phi_val;

            if (uf) {
              // calculate pair forces
              vector tmp_force;
              tmp_force.x = neigh_j->dist_r.x * phi_grad;
              tmp_force.y = neigh_j->dist_r.y * phi_grad;
              tmp_force.z = neigh_j->dist_r.z * phi_grad;
              forces[n_i + 0] += tmp_force.x;
              forces[n_i + 1] += tmp_force.y;
              forces[n_i + 2] += tmp_force.z;
#if defined(STRESS)
              // also calculate pair stresses
              if (us) {
                forces[stress_idx + 0] -= 0.5 * neigh_j->dist.x * tmp_force.x;
                forces[stress_idx + 1] -= 0.5 * neigh_j->dist.y * tmp_force.y;
                forces[stress_idx + 2] -= 0.5 * neigh_j->dist.z * tmp_force.z;
                forces[stress_idx + 3] -= 0.5 * neigh_j->dist.x * tmp_force.y;
                forces[stress_idx + 4] -= 0.5 * neigh_j->dist.y * tmp_force.z;
                forces[stress_idx + 5] -= 0.5 * neigh_j->dist.z * tmp_force.x;
              }
#endif  // STRESS
            }
          } else {
            neigh_j->f = 0.0;
            neigh_j->df = 0.0;
          }
        } // loop over neighbors

        // loop over neighbors
        // calculate threebody part
        for (int neigh_j_idx = 0; neigh_j_idx < atom->num_neigh; neigh_j_idx++) {
          neigh_t* neigh_j = atom->neigh + neigh_j_idx;
          int col_j = neigh_j->col[0];
          // check if we are within the cutoff range
          if (neigh_j->r < ters->R2[col_j][0]) {
            int ijk = neigh_j->ijk_start;
            int n_j = 3 * neigh_j->nr;

            // skip neighbor if coefficient is zero
            if (ters->B[col_j][0] == 0.0)
              continue;

            // reset variables for each neighbor
            double zeta = 0.0;
            double dzeta_ij = 0.0;
            vector dzeta_i;
            dzeta_i.x = 0.0;
            dzeta_i.y = 0.0;
            dzeta_i.z = 0.0;
            vector dzeta_j;
            dzeta_j.x = 0.0;
            dzeta_j.y = 0.0;
            dzeta_j.z = 0.0;

            // inner loop over neighbors
            for (int neigh_k_idx = 0; neigh_k_idx < atom->num_neigh; neigh_k_idx++) {
              if (neigh_j_idx == neigh_k_idx)
                continue;
              neigh_t* neigh_k = atom->neigh + neigh_k_idx;
              int col_k = neigh_k->col[0];
              angle_t* angle = atom->angle_part + ijk++;

              if (neigh_k->r < ters->R2[col_k][0]) {
                vector dcos_j;
                dcos_j.x = (neigh_k->dist_r.x - neigh_j->dist_r.x * angle->cos) / neigh_j->r;
                dcos_j.y = (neigh_k->dist_r.y - neigh_j->dist_r.y * angle->cos) / neigh_j->r;
                dcos_j.z = (neigh_k->dist_r.z - neigh_j->dist_r.z * angle->cos) / neigh_j->r;
                vector dcos_k;
                dcos_k.x = (neigh_j->dist_r.x - neigh_k->dist_r.x * angle->cos) / neigh_k->r;
                dcos_k.y = (neigh_j->dist_r.y - neigh_k->dist_r.y * angle->cos) / neigh_k->r;
                dcos_k.z = (neigh_j->dist_r.z - neigh_k->dist_r.z * angle->cos) / neigh_k->r;

                // g(theta)
                double tmp_1 = ters->h[col_j][0] - angle->cos;
                double tmp_2 = 1.0 / (ters->c3[col_j][0] + tmp_1 * tmp_1);
                double tmp_3 = ters->c4[col_j][0] * exp(-ters->c5[col_j][0] * tmp_1 * tmp_1);

                double g_theta = ters->c1[col_j][0] + ters->c2[col_j][0] * tmp_1 * tmp_1 * tmp_2 * (1.0 + tmp_3);
                double dg_theta = 2.0 * ters->c2[col_j][0] * tmp_1 * tmp_2 * (ters->c5[col_j][0] * tmp_1 * tmp_1 * tmp_3 - ters->c3[col_j][0] * tmp_2 * (1.0 + tmp_3));

                tmp_1 = neigh_j->r - neigh_k->r;
                tmp_2 = ters->alpha[col_j][0] * ters->beta[col_j][0] * pow(tmp_1, ters->beta[col_j][0] - 1.0);
                tmp_3 = exp(ters->alpha[col_j][0] * pow(tmp_1, ters->beta[col_j][0]));

                double dzeta_ik = (neigh_k->df - neigh_k->f * tmp_2) * g_theta * tmp_3;

                // zeta
                double tmp_4 = neigh_k->f * g_theta * tmp_3;

                zeta += tmp_4;
                dzeta_ij += tmp_4 * tmp_2;
                double dzeta_cos = neigh_k->f * dg_theta * tmp_3;

                neigh_k->dzeta.x = dzeta_cos * dcos_k.x + dzeta_ik * neigh_k->dist_r.x;
                neigh_k->dzeta.y = dzeta_cos * dcos_k.y + dzeta_ik * neigh_k->dist_r.y;
                neigh_k->dzeta.z = dzeta_cos * dcos_k.z + dzeta_ik * neigh_k->dist_r.z;

                dzeta_i.x -= neigh_k->dzeta.x;
                dzeta_i.y -= neigh_k->dzeta.y;
                dzeta_i.z -= neigh_k->dzeta.z;

                dzeta_j.x += dzeta_cos * dcos_j.x;
                dzeta_j.y += dzeta_cos * dcos_j.y;
                dzeta_j.z += dzeta_cos * dcos_j.z;
              }
            } // neigh_k_idx

            dzeta_j.x += dzeta_ij * neigh_j->dist_r.x;
            dzeta_j.y += dzeta_ij * neigh_j->dist_r.y;
            dzeta_j.z += dzeta_ij * neigh_j->dist_r.z;

            double tmp_1 = pow(zeta, ters->eta[col_j][0]);
            double b = pow(1.0 + tmp_1, -ters->delta[col_j][0]);

            double tmp_2 = 0.5 * b * ters->B[col_j][0] * exp(-ters->mu[col_j][0] * neigh_j->r);

            double tmp_3 = 0.0;
            if (zeta != 0.0)
              tmp_3 = tmp_2 * neigh_j->f * ters->eta[col_j][0] * ters->delta[col_j][0] * tmp_1 / ((1.0 + tmp_1) * zeta);

            double phi_val = -tmp_2;
            double tmp_4 = -tmp_2 * (neigh_j->df - *(ters->mu[col_j]) * neigh_j->f);

            forces[g_calc.energy_p + config_idx] += neigh_j->f * phi_val;

            vector force_j;
            force_j.x = -tmp_4 * neigh_j->dist_r.x - tmp_3 * dzeta_j.x;
            force_j.y = -tmp_4 * neigh_j->dist_r.y - tmp_3 * dzeta_j.y;
            force_j.z = -tmp_4 * neigh_j->dist_r.z - tmp_3 * dzeta_j.z;

            // update force on particle j
            forces[n_j + 0] += force_j.x;
            forces[n_j + 1] += force_j.y;
            forces[n_j + 2] += force_j.z;

            // update force on particle i
            forces[n_i + 0] -= tmp_3 * dzeta_i.x + force_j.x;
            forces[n_i + 1] -= tmp_3 * dzeta_i.y + force_j.y;
            forces[n_i + 2] -= tmp_3 * dzeta_i.z + force_j.z;

#if defined(STRESS)
            if (us) {
              // Distribute stress among atoms
              double tmp = neigh_j->dist.x * force_j.x;
              forces[stress_idx + 0] += tmp;
              tmp = neigh_j->dist.y * force_j.y;
              forces[stress_idx + 1] += tmp;
              tmp = neigh_j->dist.z * force_j.z;
              forces[stress_idx + 2] += tmp;
              tmp = 0.5 * (neigh_j->dist.x * force_j.y + neigh_j->dist.y * force_j.x);
              forces[stress_idx + 3] += tmp;
              tmp = 0.5 * (neigh_j->dist.y * force_j.z + neigh_j->dist.z * force_j.y);
              forces[stress_idx + 4] += tmp;
              tmp = 0.5 * (neigh_j->dist.z * force_j.x + neigh_j->dist.x * force_j.z);
              forces[stress_idx + 5] += tmp;
            }
#endif  // STRESS

            for (int neigh_k_idx = 0; neigh_k_idx < atom->num_neigh; neigh_k_idx++) {
              if (neigh_k_idx != neigh_j_idx) {
                neigh_t* neigh_k = atom->neigh + neigh_k_idx;
                int col_k = neigh_k->col[0];
                if (neigh_k->r < ters->R2[col_k][0]) {
                  int n_k = 3 * neigh_k->nr;
                  // update force on particle k
                  forces[n_k + 0] -= tmp_3 * neigh_k->dzeta.x;
                  forces[n_k + 1] -= tmp_3 * neigh_k->dzeta.y;
                  forces[n_k + 2] -= tmp_3 * neigh_k->dzeta.z;

#if defined(STRESS)
                  if (us) {
                    // Distribute stress among atoms
                    double tmp = neigh_k->dist.x * tmp_3 * neigh_k->dzeta.x;
                    forces[stress_idx + 0] -= tmp;
                    tmp = neigh_k->dist.y * tmp_3 * neigh_k->dzeta.y;
                    forces[stress_idx + 1] -= tmp;
                    tmp = neigh_k->dist.z * tmp_3 * neigh_k->dzeta.z;
                    forces[stress_idx + 2] -= tmp;
                    tmp = 0.5 * tmp_3 * (neigh_k->dist.x * neigh_k->dzeta.y + neigh_k->dist.y * neigh_k->dzeta.x);
                    forces[stress_idx + 3] -= tmp;
                    tmp = 0.5 * tmp_3 * (neigh_k->dist.y * neigh_k->dzeta.z + neigh_k->dist.z * neigh_k->dzeta.y);
                    forces[stress_idx + 4] -= tmp;
                    tmp = 0.5 * tmp_3 * (neigh_k->dist.z * neigh_k->dzeta.x + neigh_k->dist.x * neigh_k->dzeta.z);
                    forces[stress_idx + 5] -= tmp;
                  }
#endif  // STRESS
                }
              } // neigh_k_idx != neigh_j_idx
            }   // neigh_k_idx loop
          }     // neigh_f_idx
        }
      } // end second loop over all atoms

      // third loop over all atoms, sum up forces
      if (uf) {
        for (int atom_idx = 0; atom_idx < g_config.inconf[config_idx]; atom_idx++) {
          atom_t* atom = g_config.conf_atoms + atom_idx + g_config.cnfstart[config_idx] - g_mpi.firstatom;
          int n_i = 3 * (g_config.cnfstart[config_idx] + atom_idx);
#if defined(FWEIGHT)
          // Weigh by absolute value of force
          forces[n_i + 0] /= FORCE_EPS + atom->absforce;
          forces[n_i + 1] /= FORCE_EPS + atom->absforce;
          forces[n_i + 2] /= FORCE_EPS + atom->absforce;
#endif  // FWEIGHT

// sum up forces
#if defined(CONTRIB)
          if (atom->contrib)
#endif  // CONTRIB
            error_sum += g_config.conf_weight[config_idx] * (dsquare(forces[n_i + 0]) + dsquare(forces[n_i + 1]) + dsquare(forces[n_i + 2]));
        }
      } // end third loop over all atoms

      // energy contributions
      forces[g_calc.energy_p + config_idx] /= (double)g_config.inconf[config_idx];
      forces[g_calc.energy_p + config_idx] -= g_config.force_0[g_calc.energy_p + config_idx];
      error_sum += g_config.conf_weight[config_idx] * g_param.eweight *
                dsquare(forces[g_calc.energy_p + config_idx]);

#if defined(STRESS)
      // stress contributions
      if (uf && us) {
        for (int i = 0; i < 6; i++) {
          forces[stress_idx + i] /= g_config.conf_vol[config_idx - g_mpi.firstconf];
          forces[stress_idx + i] -= g_config.force_0[stress_idx + i];
          error_sum += g_config.conf_weight[config_idx] * g_param.sweight * dsquare(forces[stress_idx + i]);
        }
      }
#endif  // STRESS
    } // loop over configurations

    // add punishment for out of bounds (mostly for powell_lsq)
    if (g_mpi.myid == 0)
      error_sum += apot_punish(xi_opt, forces);

    gather_forces(&error_sum, forces);

    // root process exits this function now
    if (g_mpi.myid == 0) {
      // Increase function call counter
      g_calc.fcalls++;
      if (isnan(error_sum)) {
#if defined(DEBUG)
        printf("\n--> Force is nan! <--\n\n");
#endif  // DEBUG
        return 10e10;
      } else
        return error_sum;
    }
  } // end of infinite loop

  // once a non-root process arrives here, all is done
  return -1.0;
}