Ejemplo n.º 1
0
void ComputeNonbondedUtil::select(void)
{
  if ( CkMyRank() ) return;

  // These defaults die cleanly if nothing appropriate is assigned.
  ComputeNonbondedUtil::calcPair = calc_error;
  ComputeNonbondedUtil::calcPairEnergy = calc_error;
  ComputeNonbondedUtil::calcSelf = calc_error;
  ComputeNonbondedUtil::calcSelfEnergy = calc_error;
  ComputeNonbondedUtil::calcFullPair = calc_error;
  ComputeNonbondedUtil::calcFullPairEnergy = calc_error;
  ComputeNonbondedUtil::calcFullSelf = calc_error;
  ComputeNonbondedUtil::calcFullSelfEnergy = calc_error;
  ComputeNonbondedUtil::calcMergePair = calc_error;
  ComputeNonbondedUtil::calcMergePairEnergy = calc_error;
  ComputeNonbondedUtil::calcMergeSelf = calc_error;
  ComputeNonbondedUtil::calcMergeSelfEnergy = calc_error;
  ComputeNonbondedUtil::calcSlowPair = calc_error;
  ComputeNonbondedUtil::calcSlowPairEnergy = calc_error;
  ComputeNonbondedUtil::calcSlowSelf = calc_error;
  ComputeNonbondedUtil::calcSlowSelfEnergy = calc_error;

  SimParameters * simParams = Node::Object()->simParameters;
  Parameters * params = Node::Object()->parameters;

  table_ener = params->table_ener;
  rowsize = params->rowsize;
  columnsize = params->columnsize;

  commOnly = simParams->commOnly;
  fixedAtomsOn = ( simParams->fixedAtomsOn && ! simParams->fixedAtomsForces );

  cutoff = simParams->cutoff;
  cutoff2 = cutoff*cutoff;

//fepb
  alchFepOn = simParams->alchFepOn;
  Fep_WCA_repuOn = simParams->alchFepWCARepuOn;
  Fep_WCA_dispOn = simParams->alchFepWCADispOn;
  alchThermIntOn = simParams->alchThermIntOn;
  alchLambda = alchLambda2 = 0;
  lesOn = simParams->lesOn;
  lesScaling = lesFactor = 0;
  Bool tabulatedEnergies = simParams->tabulatedEnergies;
  alchVdwShiftCoeff = simParams->alchVdwShiftCoeff;
  WCA_rcut1 = simParams->alchFepWCArcut1;
  WCA_rcut2 = simParams->alchFepWCArcut2;
  alchVdwLambdaEnd = simParams->alchVdwLambdaEnd;
  alchElecLambdaStart = simParams->alchElecLambdaStart;

  alchDecouple = simParams->alchDecouple;

  delete [] lambda_table;
  lambda_table = 0;

  pairInteractionOn = simParams->pairInteractionOn;
  pairInteractionSelf = simParams->pairInteractionSelf;
  pressureProfileOn = simParams->pressureProfileOn;

  // Ported by JLai -- Original JE - Go
  goForcesOn = simParams->goForcesOn;
  goMethod = simParams->goMethod; 
  // End of port

  accelMDOn = simParams->accelMDOn;

  drudeNbthole = simParams->drudeOn && (simParams->drudeNbtholeCut > 0.0);

  if ( drudeNbthole ) {
#ifdef NAMD_CUDA
    NAMD_die("drudeNbthole is not supported in CUDA version");
#endif
    if ( alchFepOn )
      NAMD_die("drudeNbthole is not supported with alchemical free-energy perturbation");
    if ( alchThermIntOn )
      NAMD_die("drudeNbthole is not supported with alchemical thermodynamic integration");
    if ( lesOn )
      NAMD_die("drudeNbthole is not supported with locally enhanced sampling");
    if ( pairInteractionOn )
      NAMD_die("drudeNbthole is not supported with pair interaction calculation");
    if ( pressureProfileOn )
      NAMD_die("drudeNbthole is not supported with pressure profile calculation");
  }

  if ( alchFepOn ) {
#ifdef NAMD_CUDA
    NAMD_die("Alchemical free-energy perturbation is not supported in CUDA version");
#endif
    alchLambda = simParams->alchLambda;
    alchLambda2 = simParams->alchLambda2;
    ComputeNonbondedUtil::calcPair = calc_pair_energy_fep;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_fep;
    ComputeNonbondedUtil::calcSelf = calc_self_energy_fep;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_fep;
    ComputeNonbondedUtil::calcFullPair = calc_pair_energy_fullelect_fep;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_fep;
    ComputeNonbondedUtil::calcFullSelf = calc_self_energy_fullelect_fep;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_fep;
    ComputeNonbondedUtil::calcMergePair = calc_pair_energy_merge_fullelect_fep;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_fep;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_energy_merge_fullelect_fep;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_fep;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_energy_slow_fullelect_fep;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_fep;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_energy_slow_fullelect_fep;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_fep;
  }  else if ( alchThermIntOn ) {
#ifdef NAMD_CUDA
    NAMD_die("Alchemical thermodynamic integration is not supported in CUDA version");
#endif
    alchLambda = simParams->alchLambda;
    ComputeNonbondedUtil::calcPair = calc_pair_ti;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_ti;
    ComputeNonbondedUtil::calcSelf = calc_self_ti;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_ti;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_ti;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_ti;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_ti;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_ti;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_ti;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_ti;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_ti;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_ti;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_ti;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_ti;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_ti;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_ti;
  } else if ( lesOn ) {
#ifdef NAMD_CUDA
    NAMD_die("Locally enhanced sampling is not supported in CUDA version");
#endif
    lesFactor = simParams->lesFactor;
    lesScaling = 1.0 / (double)lesFactor;
    lambda_table = new BigReal[(lesFactor+1)*(lesFactor+1)];
    for ( int ip=0; ip<=lesFactor; ++ip ) {
      for ( int jp=0; jp<=lesFactor; ++jp ) {
        BigReal lambda_pair = 1.0;
        if (ip || jp ) {
          if (ip && jp && ip != jp) {
            lambda_pair = 0.0;
          } else {
            lambda_pair = lesScaling;
          }
        }
        lambda_table[(lesFactor+1)*ip+jp] = lambda_pair;
      }
    }
    ComputeNonbondedUtil::calcPair = calc_pair_les;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_les;
    ComputeNonbondedUtil::calcSelf = calc_self_les;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_les;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_les;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_les;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_les;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_les;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_les;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_les;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_les;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_les;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_les;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_les;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_les;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_les;
  } else if ( pressureProfileOn) {
#ifdef NAMD_CUDA
    NAMD_die("Pressure profile calculation is not supported in CUDA version");
#endif
    pressureProfileSlabs = simParams->pressureProfileSlabs;
    pressureProfileAtomTypes = simParams->pressureProfileAtomTypes;

    ComputeNonbondedUtil::calcPair = calc_pair_pprof;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_pprof;
    ComputeNonbondedUtil::calcSelf = calc_self_pprof;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_pprof;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_pprof;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_pprof;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_pprof;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_pprof;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_pprof;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_pprof;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_pprof;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_pprof;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_pprof;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_pprof;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_pprof;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_pprof;
  } else if ( pairInteractionOn ) {
#ifdef NAMD_CUDA
    NAMD_die("Pair interaction calculation is not supported in CUDA version");
#endif
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_int;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_int;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_int;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_int;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_int;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_int;
  } else if ( tabulatedEnergies ) {
#ifdef NAMD_CUDA
    NAMD_die("Tabulated energies is not supported in CUDA version");
#endif
    ComputeNonbondedUtil::calcPair = calc_pair_tabener;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_tabener;
    ComputeNonbondedUtil::calcSelf = calc_self_tabener;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_tabener;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_tabener;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_tabener;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_tabener;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_tabener;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_tabener;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_tabener;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_tabener;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_tabener;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_tabener;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_tabener;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_tabener;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_tabener;
  } else if ( goForcesOn ) {
#ifdef NAMD_CUDA
    NAMD_die("Go forces is not supported in CUDA version");
#endif
    ComputeNonbondedUtil::calcPair = calc_pair_go;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_go;
    ComputeNonbondedUtil::calcSelf = calc_self_go;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_go;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_go;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_go;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_go;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_go;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_go;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_go;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_go;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_go;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_go;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_go;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_go;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_go;
  } else {
    ComputeNonbondedUtil::calcPair = calc_pair;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy;
    ComputeNonbondedUtil::calcSelf = calc_self;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect;
  }

//fepe

  dielectric_1 = 1.0/simParams->dielectric;
  if ( ! ljTable ) ljTable = new LJTable;
  mol = Node::Object()->molecule;
  scaling = simParams->nonbondedScaling;
  if ( simParams->exclude == SCALED14 )
  {
    scale14 = simParams->scale14;
  }
  else
  {
    scale14 = 1.;
  }
  if ( simParams->switchingActive )
  {
    switchOn = simParams->switchingDist;
    switchOn_1 = 1.0/switchOn;
    // d0 = 1.0/(cutoff-switchOn);
    switchOn2 = switchOn*switchOn;
    c0 = 1.0/(cutoff2-switchOn2);

    if ( simParams->vdwForceSwitching ) {
      double switchOn3 = switchOn * switchOn2;
      double cutoff3 = cutoff * cutoff2;
      double switchOn6 = switchOn3 * switchOn3;
      double cutoff6 = cutoff3 * cutoff3;
      v_vdwa = -1. / ( switchOn6 * cutoff6 );
      v_vdwb = -1. / ( switchOn3 * cutoff3 );
      k_vdwa = cutoff6 / ( cutoff6 - switchOn6 );
      k_vdwb = cutoff3 / ( cutoff3 - switchOn3 );
      cutoff_3 = 1. / cutoff3;
      cutoff_6 = 1. / cutoff6;
    }
  }
  else
  {
    switchOn = cutoff;
    switchOn_1 = 1.0/switchOn;
    // d0 = 0.;  // avoid division by zero
    switchOn2 = switchOn*switchOn;
    c0 = 0.;  // avoid division by zero
  }
  c1 = c0*c0*c0;
  c3 = 3.0 * (cutoff2 - switchOn2);
  c5 = 0;
  c6 = 0;
  c7 = 0;
  c8 = 0;

  const int PMEOn = simParams->PMEOn;
  const int MSMOn = simParams->MSMOn;
  const int MSMSplit = simParams->MSMSplit;

  if ( PMEOn ) {
    ewaldcof = simParams->PMEEwaldCoefficient;
    BigReal TwoBySqrtPi = 1.12837916709551;
    pi_ewaldcof = TwoBySqrtPi * ewaldcof;
  }

  int splitType = SPLIT_NONE;
  if ( simParams->switchingActive ) splitType = SPLIT_SHIFT;
  if ( simParams->martiniSwitching ) splitType = SPLIT_MARTINI;
  if ( simParams->fullDirectOn || simParams->FMAOn || PMEOn || MSMOn ) {
    switch ( simParams->longSplitting ) {
      case C2:
      splitType = SPLIT_C2;
      break;

      case C1:
      splitType = SPLIT_C1;
      break;

      case XPLOR:
      NAMD_die("Sorry, XPLOR splitting not supported.");
      break;

      case SHARP:
      NAMD_die("Sorry, SHARP splitting not supported.");
      break;

      default:
      NAMD_die("Unknown splitting type found!");

    }
  }

  BigReal r2_tol = 0.1;
  
  r2_delta = 1.0;
  r2_delta_exp = 0;
  while ( r2_delta > r2_tol ) { r2_delta /= 2.0; r2_delta_exp += 1; }
  r2_delta_1 = 1.0 / r2_delta;

  if ( ! CkMyPe() ) {
    iout << iINFO << "NONBONDED TABLE R-SQUARED SPACING: " <<
				r2_delta << "\n" << endi;
  }

  BigReal r2_tmp = 1.0;
  int cutoff2_exp = 0;
  while ( (cutoff2 + r2_delta) > r2_tmp ) { r2_tmp *= 2.0; cutoff2_exp += 1; }

  int i;
  int n = (r2_delta_exp + cutoff2_exp) * 64 + 1;

  if ( ! CkMyPe() ) {
    iout << iINFO << "NONBONDED TABLE SIZE: " <<
				n << " POINTS\n" << endi;
  }

  if ( table_alloc ) delete [] table_alloc;
  table_alloc = new BigReal[61*n+16];
  BigReal *table_align = table_alloc;
  while ( ((long)table_align) % 128 ) ++table_align;
  table_noshort = table_align;
  table_short = table_align + 16*n;
  slow_table = table_align + 32*n;
  fast_table = table_align + 36*n;
  scor_table = table_align + 40*n;
  corr_table = table_align + 44*n;
  full_table = table_align + 48*n;
  vdwa_table = table_align + 52*n;
  vdwb_table = table_align + 56*n;
  r2_table = table_align + 60*n;
  BigReal *fast_i = fast_table + 4;
  BigReal *scor_i = scor_table + 4;
  BigReal *slow_i = slow_table + 4;
  BigReal *vdwa_i = vdwa_table + 4;
  BigReal *vdwb_i = vdwb_table + 4;
  BigReal *r2_i = r2_table;  *(r2_i++) = r2_delta;
  BigReal r2_limit = simParams->limitDist * simParams->limitDist;
  if ( r2_limit < r2_delta ) r2_limit = r2_delta;
  int r2_delta_i = 0;  // entry for r2 == r2_delta

  // fill in the table, fix up i==0 (r2==0) below
  for ( i=1; i<n; ++i ) {

    const BigReal r2_base = r2_delta * ( 1 << (i/64) );
    const BigReal r2_del = r2_base / 64.0;
    const BigReal r2 = r2_base - r2_delta + r2_del * (i%64);

    if ( r2 <= r2_limit ) r2_delta_i = i;

    const BigReal r = sqrt(r2);
    const BigReal r_1 = 1.0/r;
    const BigReal r_2 = 1.0/r2;

    // fast_ is defined as (full_ - slow_)
    // corr_ and fast_ are both zero at the cutoff, full_ is not
    // all three are approx 1/r at short distances

    // for actual interpolation, we use fast_ for fast forces and
    // scor_ = slow_ + corr_ - full_ and slow_ for slow forces
    // since these last two are of small magnitude

    BigReal fast_energy, fast_gradient;
    BigReal scor_energy, scor_gradient;
    BigReal slow_energy, slow_gradient;

    // corr_ is PME direct sum, or similar correction term
    // corr_energy is multiplied by r until later
    // corr_gradient is multiplied by -r^2 until later
    BigReal corr_energy, corr_gradient;

    
    if ( PMEOn ) {
      BigReal tmp_a = r * ewaldcof;
      BigReal tmp_b = erfc(tmp_a);
      corr_energy = tmp_b;
      corr_gradient = pi_ewaldcof*exp(-(tmp_a*tmp_a))*r + tmp_b;
    } else if ( MSMOn ) {
      BigReal a_1 = 1.0/cutoff;
      BigReal r_a = r * a_1;
      BigReal g, dg;
      SPOLY(&g, &dg, r_a, MSMSplit);
      corr_energy = 1 - r_a * g;
      corr_gradient = 1 + r_a*r_a * dg;
    } else {
      corr_energy = corr_gradient = 0;
    }

    switch(splitType) {
      case SPLIT_NONE:
        fast_energy = 1.0/r;
        fast_gradient = -1.0/r2;
        scor_energy = scor_gradient = 0;
        slow_energy = slow_gradient = 0;
	break;
      case SPLIT_SHIFT: {
	BigReal shiftVal = r2/cutoff2 - 1.0;
	shiftVal *= shiftVal;
	BigReal dShiftVal = 2.0 * (r2/cutoff2 - 1.0) * 2.0*r/cutoff2;
        fast_energy = shiftVal/r;
        fast_gradient = dShiftVal/r - shiftVal/r2;
        scor_energy = scor_gradient = 0;
        slow_energy = slow_gradient = 0;
        } 
	break;
      case SPLIT_MARTINI: { 
        // in Martini, the Coulomb switching distance is zero
        const BigReal COUL_SWITCH = 0.;
        // Gromacs shifting function
        const BigReal p1 = 1.;
        BigReal A1 = p1 * ((p1+1)*COUL_SWITCH-(p1+4)*cutoff)/(pow(cutoff,p1+2)*pow(cutoff-COUL_SWITCH,2));
        BigReal B1 = -p1 * ((p1+1)*COUL_SWITCH-(p1+3)*cutoff)/(pow(cutoff,p1+2)*pow(cutoff-COUL_SWITCH,3));
        BigReal X1 = 1.0/pow(cutoff,p1)-A1/3.0*pow(cutoff-COUL_SWITCH,3)-B1/4.0*pow(cutoff-COUL_SWITCH,4);
        BigReal r12 = (r-COUL_SWITCH)*(r-COUL_SWITCH);
        BigReal r13 = (r-COUL_SWITCH)*(r-COUL_SWITCH)*(r-COUL_SWITCH);
        BigReal shiftVal = -(A1/3.0)*r13 - (B1/4.0)*r12*r12 - X1;
        BigReal dShiftVal = -A1*r12 - B1*r13;
        fast_energy = (1/r) + shiftVal;
        fast_gradient = -1/(r2) + dShiftVal;
        scor_energy = scor_gradient = 0;
        slow_energy = slow_gradient = 0;
        } 
	break;
      case SPLIT_C1:
	// calculate actual energy and gradient
	slow_energy = 0.5/cutoff * (3.0 - (r2/cutoff2));
	slow_gradient = -1.0/cutoff2 * (r/cutoff);
	// calculate scor from slow and corr
	scor_energy = slow_energy + (corr_energy - 1.0)/r;
	scor_gradient = slow_gradient - (corr_gradient - 1.0)/r2;
	// calculate fast from slow
	fast_energy = 1.0/r - slow_energy;
	fast_gradient = -1.0/r2 - slow_gradient;
	break;
      case SPLIT_C2:
        //
        // Quintic splitting function contributed by
        // Bruce Berne, Ruhong Zhou, and Joe Morrone
        //
	// calculate actual energy and gradient
        slow_energy = r2/(cutoff*cutoff2) * (6.0 * (r2/cutoff2)
            - 15.0*(r/cutoff) + 10.0);
        slow_gradient = r/(cutoff*cutoff2) * (24.0 * (r2/cutoff2)
            - 45.0 *(r/cutoff) + 20.0);
	// calculate scor from slow and corr
        scor_energy = slow_energy + (corr_energy - 1.0)/r;
        scor_gradient = slow_gradient - (corr_gradient - 1.0)/r2;
	// calculate fast from slow
	fast_energy = 1.0/r - slow_energy;
	fast_gradient = -1.0/r2 - slow_gradient;
	break;
    }

    // foo_gradient is calculated as ( d foo_energy / d r )
    // and now divided by 2r to get ( d foo_energy / d r2 )

    fast_gradient *= 0.5 * r_1;
    scor_gradient *= 0.5 * r_1;
    slow_gradient *= 0.5 * r_1;

    // let modf be 1 if excluded, 1-scale14 if modified, 0 otherwise,
    // add scor_ - modf * slow_ to slow terms and
    // add fast_ - modf * fast_ to fast terms.

    BigReal vdwa_energy, vdwa_gradient;
    BigReal vdwb_energy, vdwb_gradient;

    const BigReal r_6 = r_2*r_2*r_2;
    const BigReal r_12 = r_6*r_6;

    // Lennard-Jones switching function
  if ( simParams->vdwForceSwitching ) {  // switch force
    // from Steinbach & Brooks, JCC 15, pgs 667-683, 1994, eqns 10-13
    if ( r2 > switchOn2 ) {
      BigReal tmpa = r_6 - cutoff_6;
      vdwa_energy = k_vdwa * tmpa * tmpa;
      BigReal tmpb = r_1 * r_2 - cutoff_3;
      vdwb_energy = k_vdwb * tmpb * tmpb;
      vdwa_gradient = -6.0 * k_vdwa * tmpa * r_2 * r_6;
      vdwb_gradient = -3.0 * k_vdwb * tmpb * r_2 * r_2 * r_1;
    } else {
      vdwa_energy = r_12 + v_vdwa;
      vdwb_energy = r_6 + v_vdwb;
      vdwa_gradient = -6.0 * r_2 * r_12;
      vdwb_gradient = -3.0 * r_2 * r_6;
    }
  } else if ( simParams->martiniSwitching ) { // switching fxn for Martini RBCG

    BigReal r12 = (r-switchOn)*(r-switchOn);        BigReal r13 = (r-switchOn)*(r-switchOn)*(r-switchOn);

    BigReal p6 = 6;
    BigReal A6 = p6 * ((p6+1)*switchOn-(p6+4)*cutoff)/(pow(cutoff,p6+2)*pow(cutoff-switchOn,2));
    BigReal B6 = -p6 * ((p6+1)*switchOn-(p6+3)*cutoff)/(pow(cutoff,p6+2)*pow(cutoff-switchOn,3));        
    BigReal C6 = 1.0/pow(cutoff,p6)-A6/3.0*pow(cutoff-switchOn,3)-B6/4.0*pow(cutoff-switchOn,4);

    BigReal p12 = 12;
    BigReal A12 = p12 * ((p12+1)*switchOn-(p12+4)*cutoff)/(pow(cutoff,p12+2)*pow(cutoff-switchOn,2));
    BigReal B12 = -p12 * ((p12+1)*switchOn-(p12+3)*cutoff)/(pow(cutoff,p12+2)*pow(cutoff-switchOn,3));
    BigReal C12 = 1.0/pow(cutoff,p12)-A12/3.0*pow(cutoff-switchOn,3)-B12/4.0*pow(cutoff-switchOn,4);

    BigReal LJshifttempA = -(A12/3)*r13 - (B12/4)*r12*r12 - C12;
    BigReal LJshifttempB = -(A6/3)*r13 - (B6/4)*r12*r12 - C6;
    const BigReal shiftValA =         // used for Lennard-Jones
                        ( r2 > switchOn2 ? LJshifttempA : -C12);
    const BigReal shiftValB =         // used for Lennard-Jones
                        ( r2 > switchOn2 ? LJshifttempB : -C6);

    BigReal LJdshifttempA = -A12*r12 - B12*r13;
    BigReal LJdshifttempB = -A6*r12 - B6*r13;
    const BigReal dshiftValA =         // used for Lennard-Jones
                        ( r2 > switchOn2 ? LJdshifttempA*0.5*r_1 : 0 );
    const BigReal dshiftValB =         // used for Lennard-Jones
                        ( r2 > switchOn2 ? LJdshifttempB*0.5*r_1 : 0 );




    //have not addressed r > cutoff

    //  dshiftValA*= 0.5*r_1;
    //  dshiftValB*= 0.5*r_1;

    vdwa_energy = r_12 + shiftValA;
    vdwb_energy = r_6 + shiftValB;
   
    vdwa_gradient = -6/pow(r,14) + dshiftValA ;
    vdwb_gradient = -3/pow(r,8) + dshiftValB;

  } else {  // switch energy
    const BigReal c2 = cutoff2-r2;
    const BigReal c4 = c2*(c3-2.0*c2);
    const BigReal switchVal =         // used for Lennard-Jones
                        ( r2 > switchOn2 ? c2*c4*c1 : 1.0 );
    const BigReal dSwitchVal =        // d switchVal / d r2
                        ( r2 > switchOn2 ? 2*c1*(c2*c2-c4) : 0.0 );

    vdwa_energy = switchVal * r_12;
    vdwb_energy = switchVal * r_6;

    vdwa_gradient = ( dSwitchVal - 6.0 * switchVal * r_2 ) * r_12;
    vdwb_gradient = ( dSwitchVal - 3.0 * switchVal * r_2 ) * r_6;
  }


    *(fast_i++) = fast_energy;
    *(fast_i++) = fast_gradient;
    *(fast_i++) = 0;
    *(fast_i++) = 0;
    *(scor_i++) = scor_energy;
    *(scor_i++) = scor_gradient;
    *(scor_i++) = 0;
    *(scor_i++) = 0;
    *(slow_i++) = slow_energy;
    *(slow_i++) = slow_gradient;
    *(slow_i++) = 0;
    *(slow_i++) = 0;
    *(vdwa_i++) = vdwa_energy;
    *(vdwa_i++) = vdwa_gradient;
    *(vdwa_i++) = 0;
    *(vdwa_i++) = 0;
    *(vdwb_i++) = vdwb_energy;
    *(vdwb_i++) = vdwb_gradient;
    *(vdwb_i++) = 0;
    *(vdwb_i++) = 0;
    *(r2_i++) = r2 + r2_delta;

  }

  if ( ! r2_delta_i ) {
    NAMD_bug("Failed to find table entry for r2 == r2_limit\n");
  }
  if ( r2_table[r2_delta_i] > r2_limit + r2_delta ) {
    NAMD_bug("Found bad table entry for r2 == r2_limit\n");
  }

  int j;
  const char *table_name = "XXXX";
  int smooth_short = 0;
  for ( j=0; j<5; ++j ) {
    BigReal *t0 = 0;
    switch (j) {
      case 0: 
        t0 = fast_table;
        table_name = "FAST";
        smooth_short = 1;
      break;
      case 1: 
        t0 = scor_table;
        table_name = "SCOR";
        smooth_short = 0;
      break;
      case 2: 
        t0 = slow_table;
        table_name = "SLOW";
        smooth_short = 0;
      break;
      case 3: 
        t0 = vdwa_table;
        table_name = "VDWA";
        smooth_short = 1;
      break;
      case 4: 
        t0 = vdwb_table;
        table_name = "VDWB";
        smooth_short = 1;
      break;
    }
    // patch up data for i=0
    t0[0] = t0[4] - t0[5] * ( r2_delta / 64.0 );  // energy
    t0[1] = t0[5];  // gradient
    t0[2] = 0;
    t0[3] = 0;
    if ( smooth_short ) {
      BigReal energy0 = t0[4*r2_delta_i];
      BigReal gradient0 = t0[4*r2_delta_i+1];
      BigReal r20 = r2_table[r2_delta_i];
      t0[0] = energy0 - gradient0 * (r20 - r2_table[0]);  // energy
      t0[1] = gradient0;  // gradient
    }
    BigReal *t;
    for ( i=0,t=t0; i<(n-1); ++i,t+=4 ) {
      BigReal x = ( r2_delta * ( 1 << (i/64) ) ) / 64.0;
      if ( r2_table[i+1] != r2_table[i] + x ) {
        NAMD_bug("Bad table delta calculation.\n");
      }
      if ( smooth_short && i+1 < r2_delta_i ) {
        BigReal energy0 = t0[4*r2_delta_i];
        BigReal gradient0 = t0[4*r2_delta_i+1];
        BigReal r20 = r2_table[r2_delta_i];
        t[4] = energy0 - gradient0 * (r20 - r2_table[i+1]);  // energy
        t[5] = gradient0;  // gradient
      }
      BigReal v1 = t[0];
      BigReal g1 = t[1];
      BigReal v2 = t[4];
      BigReal g2 = t[5];
      // explicit formulas for v1 + g1 x + c x^2 + d x^3
      BigReal c = ( 3.0 * (v2 - v1) - x * (2.0 * g1 + g2) ) / ( x * x );
      BigReal d = ( -2.0 * (v2 - v1) + x * (g1 + g2) ) / ( x * x * x );
      // since v2 - v1 is imprecise, we refine c and d numerically
      // important because we need accurate forces (more than energies!)
      for ( int k=0; k < 2; ++k ) {
        BigReal dv = (v1 - v2) + ( ( d * x + c ) * x + g1 ) * x;
        BigReal dg = (g1 - g2) + ( 3.0 * d * x + 2.0 * c ) * x;
        c -= ( 3.0 * dv - x * dg ) / ( x * x );
        d -= ( -2.0 * dv + x * dg ) / ( x * x * x );
      }
      // store in the array;
      t[2] = c;  t[3] = d;
    }

    if ( ! CkMyPe() ) {
    BigReal dvmax = 0;
    BigReal dgmax = 0;
    BigReal dvmax_r = 0;
    BigReal dgmax_r = 0;
    BigReal fdvmax = 0;
    BigReal fdgmax = 0;
    BigReal fdvmax_r = 0;
    BigReal fdgmax_r = 0;
    BigReal dgcdamax = 0;
    BigReal dgcdimax = 0;
    BigReal dgcaimax = 0;
    BigReal dgcdamax_r = 0;
    BigReal dgcdimax_r = 0;
    BigReal dgcaimax_r = 0;
    BigReal fdgcdamax = 0;
    BigReal fdgcdimax = 0;
    BigReal fdgcaimax = 0;
    BigReal fdgcdamax_r = 0;
    BigReal fdgcdimax_r = 0;
    BigReal fdgcaimax_r = 0;
    BigReal gcm = fabs(t0[1]);  // gradient magnitude running average
    for ( i=0,t=t0; i<(n-1); ++i,t+=4 ) {
      const BigReal r2_base = r2_delta * ( 1 << (i/64) );
      const BigReal r2_del = r2_base / 64.0;
      const BigReal r2 = r2_base - r2_delta + r2_del * (i%64);
      const BigReal r = sqrt(r2);
      if ( r > cutoff ) break;
      BigReal x = r2_del;
      BigReal dv = ( ( t[3] * x + t[2] ) * x + t[1] ) * x + t[0] - t[4];
      BigReal dg = ( 3.0 * t[3] * x + 2.0 * t[2] ) * x + t[1] - t[5];
      if ( t[4] != 0. && fabs(dv/t[4]) > fdvmax ) {
        fdvmax = fabs(dv/t[4]); fdvmax_r = r;
      }
      if ( fabs(dv) > dvmax ) {
        dvmax = fabs(dv); dvmax_r = r;
      }
      if ( t[5] != 0. && fabs(dg/t[5]) > fdgmax ) {
        fdgmax = fabs(dg/t[5]); fdgmax_r = r;
      }
      if ( fabs(dg) > dgmax ) {
        dgmax = fabs(dg); dgmax_r = r;
      }
      BigReal gcd = (t[4] - t[0]) / x;  // centered difference gradient
      BigReal gcd_prec = (fabs(t[0]) + fabs(t[4])) * 1.e-15 / x;  // roundoff
      gcm = 0.9 * gcm + 0.1 * fabs(t[5]);  // magnitude running average
      BigReal gca = 0.5  * (t[1] + t[5]);  // centered average gradient
      BigReal gci = ( 0.75 * t[3] * x + t[2] ) * x + t[1];  // interpolated
      BigReal rc = sqrt(r2 + 0.5 * x);
      BigReal dgcda = gcd - gca;
      if ( dgcda != 0. && fabs(dgcda) < gcd_prec ) {
        // CkPrintf("ERROR %g < PREC %g AT %g AVG VAL %g\n", dgcda, gcd_prec, rc, gca);
        dgcda = 0.;
      }
      BigReal dgcdi = gcd - gci;
      if ( dgcdi != 0. && fabs(dgcdi) < gcd_prec ) {
        // CkPrintf("ERROR %g < PREC %g AT %g INT VAL %g\n", dgcdi, gcd_prec, rc, gci);
        dgcdi = 0.;
      }
      BigReal dgcai = gca - gci;
      if ( t[1]*t[5] > 0. && gcm != 0. && fabs(dgcda/gcm) > fdgcdamax ) {
        fdgcdamax = fabs(dgcda/gcm); fdgcdamax_r = rc;
      }
      if ( fabs(dgcda) > fdgcdamax ) {
        dgcdamax = fabs(dgcda); dgcdamax_r = rc;
      }
      if ( t[1]*t[5] > 0. && gcm != 0. && fabs(dgcdi/gcm) > fdgcdimax ) {
        fdgcdimax = fabs(dgcdi/gcm); fdgcdimax_r = rc;
      }
      if ( fabs(dgcdi) > fdgcdimax ) {
        dgcdimax = fabs(dgcdi); dgcdimax_r = rc;
      }
      if ( t[1]*t[5] > 0. && gcm != 0. && fabs(dgcai/gcm) > fdgcaimax ) {
        fdgcaimax = fabs(dgcai/gcm); fdgcaimax_r = rc;
      }
      if ( fabs(dgcai) > fdgcaimax ) {
        dgcaimax = fabs(dgcai); dgcaimax_r = rc;
      }
#if 0
      CkPrintf("TABLE %s %g %g %g %g\n",table_name,rc,dgcda/gcm,dgcda,gci);
      if (dv != 0.) CkPrintf("TABLE %d ENERGY ERROR %g AT %g (%d)\n",j,dv,r,i);
      if (dg != 0.) CkPrintf("TABLE %d FORCE ERROR %g AT %g (%d)\n",j,dg,r,i);
#endif
    }
    if ( dvmax != 0.0 ) {
      iout << iINFO << "ABSOLUTE IMPRECISION IN " << table_name <<
        " TABLE ENERGY: " << dvmax << " AT " << dvmax_r << "\n" << endi;
    }
    if ( fdvmax != 0.0 ) {
      iout << iINFO << "RELATIVE IMPRECISION IN " << table_name <<
        " TABLE ENERGY: " << fdvmax << " AT " << fdvmax_r << "\n" << endi;
    }
    if ( dgmax != 0.0 ) {
      iout << iINFO << "ABSOLUTE IMPRECISION IN " << table_name <<
        " TABLE FORCE: " << dgmax << " AT " << dgmax_r << "\n" << endi;
    }
    if ( fdgmax != 0.0 ) {
      iout << iINFO << "RELATIVE IMPRECISION IN " << table_name <<
        " TABLE FORCE: " << fdgmax << " AT " << fdgmax_r << "\n" << endi;
    }
    if (fdgcdamax != 0.0 ) {
      iout << iINFO << "INCONSISTENCY IN " << table_name <<
        " TABLE ENERGY VS FORCE: " << fdgcdamax << " AT " << fdgcdamax_r << "\n" << endi;
      if ( fdgcdamax > 0.1 ) {
        iout << iERROR << "\n";
        iout << iERROR << "CALCULATED " << table_name <<
          " FORCE MAY NOT MATCH ENERGY! POSSIBLE BUG!\n";
        iout << iERROR << "\n";
      }
    }
    if (0 && fdgcdimax != 0.0 ) {
      iout << iINFO << "INCONSISTENCY IN " << table_name <<
        " TABLE ENERGY VS FORCE: " << fdgcdimax << " AT " << fdgcdimax_r << "\n" << endi;
    }
    if ( 0 && fdgcaimax != 0.0 ) {
      iout << iINFO << "INCONSISTENCY IN " << table_name <<
        " TABLE AVG VS INT FORCE: " << fdgcaimax << " AT " << fdgcaimax_r << "\n" << endi;
    }
    }

  }

  for ( i=0; i<4*n; ++i ) {
    corr_table[i] = fast_table[i] + scor_table[i];
    full_table[i] = fast_table[i] + slow_table[i];
  }

#if 0  
  for ( i=0; i<n; ++i ) {
   for ( int j=0; j<4; ++j ) {
    table_short[16*i+6-2*j] = table_noshort[16*i+6-2*j] = vdwa_table[4*i+j];
    table_short[16*i+7-2*j] = table_noshort[16*i+7-2*j] = vdwb_table[4*i+j];
    table_short[16*i+8+3-j] = fast_table[4*i+j];
    table_short[16*i+12+3-j] = scor_table[4*i+j];
    table_noshort[16*i+8+3-j] = corr_table[4*i+j];
    table_noshort[16*i+12+3-j] = full_table[4*i+j];
   }
  }
#endif 

  for ( i=0; i<n; ++i ) {
    table_short[16*i+ 0] = table_noshort[16*i+0] = -6.*vdwa_table[4*i+3];
    table_short[16*i+ 2] = table_noshort[16*i+2] = -6.*vdwb_table[4*i+3];
    table_short[16*i+ 4] = table_noshort[16*i+4] = -2.*vdwa_table[4*i+1];
    table_short[16*i+ 6] = table_noshort[16*i+6] = -2.*vdwb_table[4*i+1];
    
    table_short[16*i+1] = table_noshort[16*i+1] = -4.*vdwa_table[4*i+2];
    table_short[16*i+3] = table_noshort[16*i+3] = -4.*vdwb_table[4*i+2];
    table_short[16*i+5] = table_noshort[16*i+5] = -1.*vdwa_table[4*i+0];
    table_short[16*i+7] = table_noshort[16*i+7] = -1.*vdwb_table[4*i+0];
    
    table_short[16*i+8]  = -6.*fast_table[4*i+3];
    table_short[16*i+9]  = -4.*fast_table[4*i+2];
    table_short[16*i+10] = -2.*fast_table[4*i+1];
    table_short[16*i+11] = -1.*fast_table[4*i+0];

    table_noshort[16*i+8]  = -6.*corr_table[4*i+3];
    table_noshort[16*i+9]  = -4.*corr_table[4*i+2];
    table_noshort[16*i+10] = -2.*corr_table[4*i+1];
    table_noshort[16*i+11] = -1.*corr_table[4*i+0];

    table_short[16*i+12] = -6.*scor_table[4*i+3];
    table_short[16*i+13] = -4.*scor_table[4*i+2];
    table_short[16*i+14] = -2.*scor_table[4*i+1];
    table_short[16*i+15] = -1.*scor_table[4*i+0];

    table_noshort[16*i+12] = -6.*full_table[4*i+3];
    table_noshort[16*i+13] = -4.*full_table[4*i+2];
    table_noshort[16*i+14] = -2.*full_table[4*i+1];
    table_noshort[16*i+15] = -1.*full_table[4*i+0];
  }

#if 0
  char fname[100];
  sprintf(fname,"/tmp/namd.table.pe%d.dat",CkMyPe());
  FILE *f = fopen(fname,"w");
  for ( i=0; i<(n-1); ++i ) {
    const BigReal r2_base = r2_delta * ( 1 << (i/64) );
    const BigReal r2_del = r2_base / 64.0;
    const BigReal r2 = r2_base - r2_delta + r2_del * (i%64);
    BigReal *t;
    if ( r2 + r2_delta != r2_table[i] ) fprintf(f,"r2 error! ");
    fprintf(f,"%g",r2);
    t = fast_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = scor_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = slow_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = corr_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = full_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = vdwa_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = vdwb_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    fprintf(f,"\n");
  }
  fclose(f);
#endif

#ifdef NAMD_CUDA
  send_build_cuda_force_table();
#endif

}
Ejemplo n.º 2
0
int setup_hierarchy(Msmpot *msm) {
  const int nu = INTERP_PARAMS[msm->interp].nu;
  const int omega = INTERP_PARAMS[msm->interp].omega;
  const int split = msm->split;
  int level, maxlevels;
  int err = 0;

  const float a = msm->a;
  const float hx = msm->hx;
  const float hy = msm->hy;
  const float hz = msm->hz;

  /* maximum extent of epotmap */
  float xm1 = msm->xm0 + msm->dx * (msm->mx - 1);
  float ym1 = msm->ym0 + msm->dy * (msm->my - 1);
  float zm1 = msm->zm0 + msm->dz * (msm->mz - 1);

  /* smallest possible extent of finest spaced MSM lattice */
  float xlo = (msm->xmin < msm->xm0 ? msm->xmin : msm->xm0);
  float ylo = (msm->ymin < msm->ym0 ? msm->ymin : msm->ym0);
  float zlo = (msm->zmin < msm->zm0 ? msm->zmin : msm->zm0);
  float xhi = (msm->xmax > xm1 ? msm->xmax : xm1);
  float yhi = (msm->ymax > ym1 ? msm->ymax : ym1);
  float zhi = (msm->zmax > zm1 ? msm->zmax : zm1);

  /* indexes for MSM lattice */
  long ia = ((long) floorf((xlo - msm->xm0) / hx)) - nu;
  long ja = ((long) floorf((ylo - msm->ym0) / hy)) - nu;
  long ka = ((long) floorf((zlo - msm->zm0) / hz)) - nu;
  long ib = ((long) floorf((xhi - msm->xm0) / hx)) + 1 + nu;
  long jb = ((long) floorf((yhi - msm->ym0) / hy)) + 1 + nu;
  long kb = ((long) floorf((zhi - msm->zm0) / hz)) + 1 + nu;
  long ni = ib - ia + 1;
  long nj = jb - ja + 1;
  long nk = kb - ka + 1;

  long omega3 = omega * omega * omega;
  long nhalf = (long) sqrtf(ni * nj * nk);
  long lastnelems = (nhalf > omega3 ? nhalf : omega3);
  long nelems, n;
  long i, j, k;

  MsmpotLattice *p = NULL;
  float scaling;

  n = ni;
  if (n < nj) n = nj;
  if (n < nk) n = nk;
  for (maxlevels = 1;  n > 0;  n >>= 1)  maxlevels++;
  if (msm->maxlevels < maxlevels) {
    MsmpotLattice **t;
    t = (MsmpotLattice **) realloc(msm->qh, maxlevels*sizeof(MsmpotLattice *));
    if (NULL == t) return ERROR(MSMPOT_ERROR_ALLOC);
    msm->qh = t;
    t = (MsmpotLattice **) realloc(msm->eh, maxlevels*sizeof(MsmpotLattice *));
    if (NULL == t) return ERROR(MSMPOT_ERROR_ALLOC);
    msm->eh = t;
    t = (MsmpotLattice **) realloc(msm->gc, maxlevels*sizeof(MsmpotLattice *));
    if (NULL == t) return ERROR(MSMPOT_ERROR_ALLOC);
    msm->gc = t;
    for (level = msm->maxlevels;  level < maxlevels;  level++) {
      msm->qh[level] = Msmpot_lattice_create();
      if (NULL == msm->qh[level]) return ERROR(MSMPOT_ERROR_ALLOC);
      msm->eh[level] = Msmpot_lattice_create();
      if (NULL == msm->eh[level]) return ERROR(MSMPOT_ERROR_ALLOC);
      msm->gc[level] = Msmpot_lattice_create();
      if (NULL == msm->gc[level]) return ERROR(MSMPOT_ERROR_ALLOC);
    }
    msm->maxlevels = maxlevels;
  }

  level = 0;
  do {
    err = Msmpot_lattice_setup(msm->qh[level], ia, ib, ja, jb, ka, kb);
    if (err) return ERROR(err);
    err = Msmpot_lattice_setup(msm->eh[level], ia, ib, ja, jb, ka, kb);
    if (err) return ERROR(err);
    nelems = ni * nj * nk;
    ia = -((-ia+1)/2) - nu;
    ja = -((-ja+1)/2) - nu;
    ka = -((-ka+1)/2) - nu;
    ib = (ib+1)/2 + nu;
    jb = (jb+1)/2 + nu;
    kb = (kb+1)/2 + nu;
    ni = ib - ia + 1;
    nj = jb - ja + 1;
    nk = kb - ka + 1;
    level++;
  } while (nelems > lastnelems);
  msm->nlevels = level;

  /* ellipsoid axes for lattice cutoff weights */
  ni = (long) ceilf(2*a/hx) - 1;
  nj = (long) ceilf(2*a/hy) - 1;
  nk = (long) ceilf(2*a/hz) - 1;
  scaling = 1;
  for (level = 0;  level < msm->nlevels - 1;  level++) {
    p = msm->gc[level];
    err = Msmpot_lattice_setup(p, -ni, ni, -nj, nj, -nk, nk);
    if (err) return ERROR(err);
    for (k = -nk;  k <= nk;  k++) {
      for (j = -nj;  j <= nj;  j++) {
        for (i = -ni;  i <= ni;  i++) {
          float s, t, gs, gt, g;
          s = ( (i*hx)*(i*hx) + (j*hy)*(j*hy) + (k*hz)*(k*hz) ) / (a*a);
          t = 0.25f * s;
          if (t >= 1) {
            g = 0;
          }
          else if (s >= 1) {
            gs = 1/sqrtf(s);
            SPOLY(&gt, t, split);
            g = scaling * (gs - 0.5f * gt) / a;
          }
          else {
            SPOLY(&gs, s, split);
            SPOLY(&gt, t, split);
            g = scaling * (gs - 0.5f * gt) / a;
          }
          RANGE_CHECK(p, i, j, k);
          *ELEM(p, i, j, k) = g;
        }
      }
    } /* end loops over k-j-i */
    scaling *= 0.5f;
  } /* end loop over levels */

  /* calculate coarsest level weights, ellipsoid axes are length of lattice */
  ni = (msm->qh[level])->ib - (msm->qh[level])->ia;
  nj = (msm->qh[level])->jb - (msm->qh[level])->ja;
  nk = (msm->qh[level])->kb - (msm->qh[level])->ka;
  p = msm->gc[level];
  err = Msmpot_lattice_setup(p, -ni, ni, -nj, nj, -nk, nk);
  for (k = -nk;  k <= nk;  k++) {
    for (j = -nj;  j <= nj;  j++) {
      for (i = -ni;  i <= ni;  i++) {
        float s, gs;
        s = ( (i*hx)*(i*hx) + (j*hy)*(j*hy) + (k*hz)*(k*hz) ) / (a*a);
        if (s >= 1) {
          gs = 1/sqrtf(s);
        }
        else {
          SPOLY(&gs, s, split);
        }
        RANGE_CHECK(p, i, j, k);
        *ELEM(p, i, j, k) = scaling * gs/a;
      }
    }
  } /* end loops over k-j-i for coarsest level weights */

  return OK;
}