void PairDPDOMP::compute(int eflag, int vflag) { ev_init(eflag,vflag); const int nall = atom->nlocal + atom->nghost; const int inum = list->inum; // number of threads has changed. reallocate pool of pRNGs if (nthreads != comm->nthreads) { if (random_thr) { for (int i=1; i < nthreads; ++i) delete random_thr[i]; delete[] random_thr; } nthreads = comm->nthreads; random_thr = new RanMars*[nthreads]; for (int i=1; i < nthreads; ++i) random_thr[i] = NULL; // to ensure full compatibility with the serial DPD style // we use the serial random number generator instance for thread 0 random_thr[0] = random; } #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); // generate a random number generator instance for // all threads != 0. make sure we use unique seeds. if ((tid > 0) && (random_thr[tid] == NULL)) random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me + comm->nprocs*tid); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); else eval<1,1,0>(ifrom, ito, thr); } else { if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); else eval<1,0,0>(ifrom, ito, thr); } } else { if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); else eval<0,0,0>(ifrom, ito, thr); } thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }
void AngleDipoleOMP::compute(int eflag, int vflag) { ev_init(eflag,vflag); if (!force->newton_bond) error->all(FLERR,"'newton' flag for bonded interactions must be 'on'"); const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = neighbor->nanglelist; #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { if (evflag) eval<1>(ifrom, ito, thr); else eval<0>(ifrom, ito, thr); } thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }
void omp_times(FixOMP *fix, const char *label, enum Timer::ttype which, const int nthreads,FILE *scr, FILE *log) { const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.2f\n"; double time_min, time_max, time_avg, time_total, time_std; time_min = 1.0e100; time_max = -1.0e100; time_total = time_avg = time_std = 0.0; for (int i=0; i < nthreads; ++i) { ThrData *thr = fix->get_thr(i); double tmp=thr->get_time(which); time_min = MIN(time_min,tmp); time_max = MAX(time_max,tmp); time_avg += tmp; time_std += tmp*tmp; time_total += thr->get_time(Timer::ALL); } time_avg /= nthreads; time_std /= nthreads; time_total /= nthreads; if (time_avg > 1.0e-10) time_std = sqrt(time_std/time_avg - time_avg)*100.0; else time_std = 0.0; if (scr) fprintf(scr,fmt,label,time_min,time_avg,time_max,time_std, time_avg/time_total*100.0); if (log) fprintf(log,fmt,label,time_min,time_avg,time_max,time_std, time_avg/time_total*100.0); }
void PairEDIPOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = vflag_atom = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { if (vflag_atom) eval<1,1,1>(ifrom, ito, thr); else eval<1,1,0>(ifrom, ito, thr); } else { if (vflag_atom) eval<1,0,1>(ifrom, ito, thr); else eval<1,0,0>(ifrom, ito, thr); } } else eval<0,0,0>(ifrom, ito, thr); thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }
void PairLJCutTIP4PLongSoftOMP::compute(int eflag, int vflag) { if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; const int nlocal = atom->nlocal; const int nall = nlocal + atom->nghost; // reallocate hneigh_thr & newsite_thr if necessary // initialize hneigh_thr[0] to -1 on steps when reneighboring occurred // initialize hneigh_thr[2] to 0 every step if (atom->nmax > nmax) { nmax = atom->nmax; memory->destroy(hneigh_thr); memory->create(hneigh_thr,nmax,"pair:hneigh_thr"); memory->destroy(newsite_thr); memory->create(newsite_thr,nmax,"pair:newsite_thr"); } int i; // tag entire list as completely invalid after a neighbor // list update, since that can change the order of atoms. if (neighbor->ago == 0) for (i = 0; i < nall; i++) hneigh_thr[i].a = -1; // indicate that the coordinates for the M point need to // be updated. this needs to be done in every step. for (i = 0; i < nall; i++) hneigh_thr[i].t = 0; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { if (vflag) eval<1,1,1>(ifrom, ito, thr); else eval<1,1,0>(ifrom, ito, thr); } else { if (vflag) eval<1,0,1>(ifrom, ito, thr); else eval<1,0,0>(ifrom, ito, thr); } } else eval<0,0,0>(ifrom, ito, thr); thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }
void PairHbondDreidingMorseOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; if (!hbcount_thr) { hbcount_thr = new double[nthreads]; hbeng_thr = new double[nthreads]; } for (int i=0; i < nthreads; ++i) { hbcount_thr[i] = 0.0; hbeng_thr[i] = 0.0; } #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); else eval<1,1,0>(ifrom, ito, thr); } else { if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); else eval<1,0,0>(ifrom, ito, thr); } } else { if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); else eval<0,0,0>(ifrom, ito, thr); } thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region // reduce per thread hbond data if (eflag_global) { pvector[0] = 0.0; pvector[1] = 0.0; for (int i=0; i < nthreads; ++i) { pvector[0] += hbcount_thr[i]; pvector[1] += hbeng_thr[i]; } } }
void PairADPOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; const int nlocal = atom->nlocal; const int nall = nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; // grow energy and fp arrays if necessary // need to be atom->nmax in length if (atom->nmax > nmax) { memory->destroy(rho); memory->destroy(fp); memory->destroy(mu); memory->destroy(lambda); nmax = atom->nmax; memory->create(rho,nthreads*nmax,"pair:rho"); memory->create(fp,nmax,"pair:fp"); memory->create(mu,nthreads*nmax,3,"pair:mu"); memory->create(lambda,nthreads*nmax,6,"pair:lambda"); } #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (force->newton_pair) thr->init_adp(nall, rho, mu, lambda); else thr->init_adp(nlocal, rho, mu, lambda); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); else eval<1,1,0>(ifrom, ito, thr); } else { if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); else eval<1,0,0>(ifrom, ito, thr); } } else { if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); else eval<0,0,0>(ifrom, ito, thr); } reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }
void PairDPDTstatOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; if (!random_thr) random_thr = new RanMars*[nthreads]; // to ensure full compatibility with the serial DPD style // we use is random number generator instance for thread 0 random_thr[0] = random; #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); // generate a random number generator instance for // all threads != 0. make sure we use unique seeds. if (random_thr && tid > 0) random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me + comm->nprocs*tid); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); else eval<1,1,0>(ifrom, ito, thr); } else { if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); else eval<1,0,0>(ifrom, ito, thr); } } else { if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); else eval<0,0,0>(ifrom, ito, thr); } thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }
PPPMOMP::~PPPMOMP() { #if defined(_OPENMP) #pragma omp parallel default(none) #endif { #if defined(_OPENMP) const int tid = omp_get_thread_num(); #else const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->init_pppm(-order,memory); } }
void PairPeriLPSOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; // grow bond forces array if necessary if (atom->nmax > nmax) { memory->destroy(s0_new); memory->destroy(theta); nmax = atom->nmax; memory->create(s0_new,nmax,"pair:s0_new"); memory->create(theta,nmax,"pair:theta"); } #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); else eval<1,1,0>(ifrom, ito, thr); } else { if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); else eval<1,0,0>(ifrom, ito, thr); } } else { if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); else eval<0,0,0>(ifrom, ito, thr); } thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }
void PairMEAMSplineOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = listfull->inum; if (listhalf->inum != inum) error->warning(FLERR,"inconsistent half and full neighborlist"); // Grow per-atom array if necessary. if (atom->nmax > nmax) { memory->destroy(Uprime_values); nmax = atom->nmax; memory->create(Uprime_values,nmax*nthreads,"pair:Uprime"); } #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); thr->init_eam(nall,Uprime_values); if (evflag) { if (eflag) { eval<1,1>(ifrom, ito, thr); } else { eval<1,0>(ifrom, ito, thr); } } else { eval<0,0>(ifrom, ito, thr); } reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }
void PPPMCGOMP::compute(int eflag, int vflag) { PPPMCG::compute(eflag,vflag); #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { #if defined(_OPENMP) const int tid = omp_get_thread_num(); #else const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }
void PairLJCutCoulMSMOMP::compute(int eflag, int vflag) { if (force->kspace->scalar_pressure_flag) error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' " "with OMP MSM Pair styles"); if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); else eval<1,1,0>(ifrom, ito, thr); } else { if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); else eval<1,0,0>(ifrom, ito, thr); } } else { if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); else eval<0,0,0>(ifrom, ito, thr); } thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }
void PairGaussOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; double occ = 0.0; #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) reduction(+:occ) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { if (force->newton_pair) occ = eval<1,1,1>(ifrom, ito, thr); else occ = eval<1,1,0>(ifrom, ito, thr); } else { if (force->newton_pair) occ = eval<1,0,1>(ifrom, ito, thr); else occ = eval<1,0,0>(ifrom, ito, thr); } } else { if (force->newton_pair) occ = eval<0,0,1>(ifrom, ito, thr); else occ = eval<0,0,0>(ifrom, ito, thr); } thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region if (eflag_global) pvector[0] = occ; }
void DihedralHarmonicOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { if (evflag) { if (eflag) { if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); else eval<1,1,0>(ifrom, ito, thr); } else { if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); else eval<1,0,0>(ifrom, ito, thr); } } else { if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); else eval<0,0,0>(ifrom, ito, thr); } } thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }
void Finish::end(int flag) { int i,m,nneigh,nneighfull; int histo[10]; int minflag,prdflag,tadflag,timeflag,fftflag,histoflag,neighflag; double time,tmp,ave,max,min; double time_loop,time_other,cpu_loop; int me,nprocs; MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); const int nthreads = comm->nthreads; // recompute natoms in case atoms have been lost bigint nblocal = atom->nlocal; MPI_Allreduce(&nblocal,&atom->natoms,1,MPI_LMP_BIGINT,MPI_SUM,world); // choose flavors of statistical output // flag determines caller // flag = 0 = just loop summary // flag = 1 = dynamics or minimization // flag = 2 = PRD // flag = 3 = TAD // turn off neighflag for Kspace partition of verlet/split integrator minflag = prdflag = tadflag = timeflag = fftflag = histoflag = neighflag = 0; time_loop = cpu_loop = time_other = 0.0; if (flag == 1) { if (update->whichflag == 2) minflag = 1; timeflag = histoflag = 1; neighflag = 1; if (update->whichflag == 1 && strncmp(update->integrate_style,"verlet/split",12) == 0 && universe->iworld == 1) neighflag = 0; if (force->kspace && force->kspace_match("pppm",0) && force->kspace->fftbench) fftflag = 1; } if (flag == 2) prdflag = timeflag = histoflag = neighflag = 1; if (flag == 3) tadflag = histoflag = neighflag = 1; // loop stats if (timer->has_loop()) { // overall loop time time_loop = timer->get_wall(Timer::TOTAL); cpu_loop = timer->get_cpu(Timer::TOTAL); MPI_Allreduce(&time_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time_loop = tmp/nprocs; MPI_Allreduce(&cpu_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world); cpu_loop = tmp/nprocs; if (time_loop > 0.0) cpu_loop = cpu_loop/time_loop*100.0; if (me == 0) { int ntasks = nprocs * nthreads; const char fmt1[] = "Loop time of %g on %d procs " "for %d steps with " BIGINT_FORMAT " atoms\n\n"; if (screen) fprintf(screen,fmt1,time_loop,ntasks,update->nsteps, atom->natoms); if (logfile) fprintf(logfile,fmt1,time_loop,ntasks,update->nsteps, atom->natoms); // Gromacs/NAMD-style performance metric for suitable unit settings if ( timeflag && !minflag && !prdflag && !tadflag && (update->nsteps > 0) && (update->dt != 0.0) && ((strcmp(update->unit_style,"lj") == 0) || (strcmp(update->unit_style,"metal") == 0) || (strcmp(update->unit_style,"micro") == 0) || (strcmp(update->unit_style,"nano") == 0) || (strcmp(update->unit_style,"electron") == 0) || (strcmp(update->unit_style,"real") == 0)) ) { double one_fs = force->femtosecond; double t_step = ((double) time_loop) / ((double) update->nsteps); double step_t = 1.0/t_step; if (strcmp(update->unit_style,"lj") == 0) { double tau_day = 24.0*3600.0 / t_step * update->dt / one_fs; const char perf[] = "Performance: %.3f tau/day, %.3f timesteps/s\n"; if (screen) fprintf(screen,perf,tau_day,step_t); if (logfile) fprintf(logfile,perf,tau_day,step_t); } else { double hrs_ns = t_step / update->dt * 1000000.0 * one_fs / 3600.0; double ns_day = 24.0*3600.0 / t_step * update->dt / one_fs/1000000.0; const char perf[] = "Performance: %.3f ns/day, %.3f hours/ns, %.3f timesteps/s\n"; if (screen) fprintf(screen,perf,ns_day,hrs_ns,step_t); if (logfile) fprintf(logfile,perf,ns_day,hrs_ns,step_t); } } // CPU use on MPI tasks and OpenMP threads #ifdef LMP_USER_OMP const char fmt2[] = "%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n"; if (screen) fprintf(screen,fmt2,cpu_loop,nprocs,nthreads); if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs,nthreads); #else if (lmp->kokkos) { const char fmt2[] = "%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n"; if (screen) fprintf(screen,fmt2,cpu_loop,nprocs,lmp->kokkos->num_threads); if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs,lmp->kokkos->num_threads); } else { const char fmt2[] = "%.1f%% CPU use with %d MPI tasks x no OpenMP threads\n"; if (screen) fprintf(screen,fmt2,cpu_loop,nprocs); if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs); } #endif } } // avoid division by zero for very short runs if (time_loop == 0.0) time_loop = 1.0; if (cpu_loop == 0.0) cpu_loop = 100.0; // get "Other" wall time for later use if (timer->has_normal()) time_other = timer->get_wall(Timer::TOTAL) - timer->get_wall(Timer::ALL); // minimization stats if (minflag) { if (me == 0) { if (screen) fprintf(screen,"\n"); if (logfile) fprintf(logfile,"\n"); } if (me == 0) { if (screen) { fprintf(screen,"Minimization stats:\n"); fprintf(screen," Stopping criterion = %s\n", update->minimize->stopstr); fprintf(screen," Energy initial, next-to-last, final = \n" " %18.12g %18.12g %18.12g\n", update->minimize->einitial,update->minimize->eprevious, update->minimize->efinal); fprintf(screen," Force two-norm initial, final = %g %g\n", update->minimize->fnorm2_init,update->minimize->fnorm2_final); fprintf(screen," Force max component initial, final = %g %g\n", update->minimize->fnorminf_init, update->minimize->fnorminf_final); fprintf(screen," Final line search alpha, max atom move = %g %g\n", update->minimize->alpha_final, update->minimize->alpha_final* update->minimize->fnorminf_final); fprintf(screen," Iterations, force evaluations = %d %d\n", update->minimize->niter,update->minimize->neval); } if (logfile) { fprintf(logfile,"Minimization stats:\n"); fprintf(logfile," Stopping criterion = %s\n", update->minimize->stopstr); fprintf(logfile," Energy initial, next-to-last, final = \n" " %18.12g %18.12g %18.12g\n", update->minimize->einitial,update->minimize->eprevious, update->minimize->efinal); fprintf(logfile," Force two-norm initial, final = %g %g\n", update->minimize->fnorm2_init,update->minimize->fnorm2_final); fprintf(logfile," Force max component initial, final = %g %g\n", update->minimize->fnorminf_init, update->minimize->fnorminf_final); fprintf(logfile," Final line search alpha, max atom move = %g %g\n", update->minimize->alpha_final, update->minimize->alpha_final* update->minimize->fnorminf_final); fprintf(logfile," Iterations, force evaluations = %d %d\n", update->minimize->niter,update->minimize->neval); } } } // PRD stats using PAIR,BOND,KSPACE for dephase,dynamics,quench if (prdflag) { if (me == 0) { if (screen) fprintf(screen,"\n"); if (logfile) fprintf(logfile,"\n"); } if (screen) fprintf(screen,"PRD stats:\n"); if (logfile) fprintf(logfile,"PRD stats:\n"); time = timer->get_wall(Timer::DEPHASE); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { if (screen) fprintf(screen," Dephase time (%%) = %g (%g)\n", time,time/time_loop*100.0); if (logfile) fprintf(logfile," Dephase time (%%) = %g (%g)\n", time,time/time_loop*100.0); } time = timer->get_wall(Timer::DYNAMICS); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { if (screen) fprintf(screen," Dynamics time (%%) = %g (%g)\n", time,time/time_loop*100.0); if (logfile) fprintf(logfile," Dynamics time (%%) = %g (%g)\n", time,time/time_loop*100.0); } time = timer->get_wall(Timer::QUENCH); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { if (screen) fprintf(screen," Quench time (%%) = %g (%g)\n", time,time/time_loop*100.0); if (logfile) fprintf(logfile," Quench time (%%) = %g (%g)\n", time,time/time_loop*100.0); } time = timer->get_wall(Timer::REPCOMM); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { if (screen) fprintf(screen," Comm time (%%) = %g (%g)\n", time,time/time_loop*100.0); if (logfile) fprintf(logfile," Comm time (%%) = %g (%g)\n", time,time/time_loop*100.0); } time = timer->get_wall(Timer::REPOUT); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { if (screen) fprintf(screen," Output time (%%) = %g (%g)\n", time,time/time_loop*100.0); if (logfile) fprintf(logfile," Output time (%%) = %g (%g)\n", time,time/time_loop*100.0); } time = time_other; MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { // XXXX: replica comm, replica output if (screen) fprintf(screen," Other time (%%) = %g (%g)\n", time,time/time_loop*100.0); if (logfile) fprintf(logfile," Other time (%%) = %g (%g)\n", time,time/time_loop*100.0); } } // TAD stats using PAIR,BOND,KSPACE for neb,dynamics,quench if (tadflag) { if (me == 0) { if (screen) fprintf(screen,"\n"); if (logfile) fprintf(logfile,"\n"); } if (screen) fprintf(screen,"TAD stats:\n"); if (logfile) fprintf(logfile,"TAD stats:\n"); time = timer->get_wall(Timer::NEB); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { if (screen) fprintf(screen," NEB time (%%) = %g (%g)\n", time,time/time_loop*100.0); if (logfile) fprintf(logfile," NEB time (%%) = %g (%g)\n", time,time/time_loop*100.0); } time = timer->get_wall(Timer::DYNAMICS); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { if (screen) fprintf(screen," Dynamics time (%%) = %g (%g)\n", time,time/time_loop*100.0); if (logfile) fprintf(logfile," Dynamics time (%%) = %g (%g)\n", time,time/time_loop*100.0); } time = timer->get_wall(Timer::QUENCH); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { if (screen) fprintf(screen," Quench time (%%) = %g (%g)\n", time,time/time_loop*100.0); if (logfile) fprintf(logfile," Quench time (%%) = %g (%g)\n", time,time/time_loop*100.0); } time = timer->get_wall(Timer::REPCOMM); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { if (screen) fprintf(screen," Comm time (%%) = %g (%g)\n", time,time/time_loop*100.0); if (logfile) fprintf(logfile," Comm time (%%) = %g (%g)\n", time,time/time_loop*100.0); } time = timer->get_wall(Timer::REPOUT); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { if (screen) fprintf(screen," Output time (%%) = %g (%g)\n", time,time/time_loop*100.0); if (logfile) fprintf(logfile," Output time (%%) = %g (%g)\n", time,time/time_loop*100.0); } time = time_other; MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { if (screen) fprintf(screen," Other time (%%) = %g (%g)\n", time,time/time_loop*100.0); if (logfile) fprintf(logfile," Other time (%%) = %g (%g)\n", time,time/time_loop*100.0); } } if (timeflag && timer->has_normal()) { if (timer->has_full()) { const char hdr[] = "\nMPI task timing breakdown:\n" "Section | min time | avg time | max time |%varavg| %CPU | %total\n" "-----------------------------------------------------------------------\n"; if (me == 0) { if (screen) fputs(hdr,screen); if (logfile) fputs(hdr,logfile); } } else { const char hdr[] = "\nMPI task timing breakdown:\n" "Section | min time | avg time | max time |%varavg| %total\n" "---------------------------------------------------------------\n"; if (me == 0) { if (screen) fputs(hdr,screen); if (logfile) fputs(hdr,logfile); } } mpi_timings("Pair",timer,Timer::PAIR, world,nprocs, nthreads,me,time_loop,screen,logfile); if (atom->molecular) mpi_timings("Bond",timer,Timer::BOND,world,nprocs, nthreads,me,time_loop,screen,logfile); if (force->kspace) mpi_timings("Kspace",timer,Timer::KSPACE,world,nprocs, nthreads,me,time_loop,screen,logfile); mpi_timings("Neigh",timer,Timer::NEIGH,world,nprocs, nthreads,me,time_loop,screen,logfile); mpi_timings("Comm",timer,Timer::COMM,world,nprocs, nthreads,me,time_loop,screen,logfile); mpi_timings("Output",timer,Timer::OUTPUT,world,nprocs, nthreads,me,time_loop,screen,logfile); mpi_timings("Modify",timer,Timer::MODIFY,world,nprocs, nthreads,me,time_loop,screen,logfile); if (timer->has_sync()) mpi_timings("Sync",timer,Timer::SYNC,world,nprocs, nthreads,me,time_loop,screen,logfile); time = time_other; MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; const char *fmt; if (timer->has_full()) fmt = "Other | |%- 12.4g| | | |%6.2f\n"; else fmt = "Other | |%- 12.4g| | |%6.2f\n"; if (me == 0) { if (screen) fprintf(screen,fmt,time,time/time_loop*100.0); if (logfile) fprintf(logfile,fmt,time,time/time_loop*100.0); } } #ifdef LMP_USER_OMP const char thr_hdr_fmt[] = "\nThread timing breakdown (MPI rank %d):\nTotal threaded time %.4g / %.1f%%\n"; const char thr_header[] = "Section | min time | avg time | max time |%varavg| %total\n" "---------------------------------------------------------------\n"; int ifix = modify->find_fix("package_omp"); // print thread breakdown only with full timer detail if ((ifix >= 0) && timer->has_full() && me == 0) { double thr_total = 0.0; ThrData *td; FixOMP *fixomp = static_cast<FixOMP *>(lmp->modify->fix[ifix]); for (i=0; i < nthreads; ++i) { td = fixomp->get_thr(i); thr_total += td->get_time(Timer::ALL); } thr_total /= (double) nthreads; if (thr_total > 0.0) { if (screen) { fprintf(screen,thr_hdr_fmt,me,thr_total,thr_total/time_loop*100.0); fputs(thr_header,screen); } if (logfile) { fprintf(logfile,thr_hdr_fmt,me,thr_total,thr_total/time_loop*100.0); fputs(thr_header,logfile); } omp_times(fixomp,"Pair",Timer::PAIR,nthreads,screen,logfile); if (atom->molecular) omp_times(fixomp,"Bond",Timer::BOND,nthreads,screen,logfile); if (force->kspace) omp_times(fixomp,"Kspace",Timer::KSPACE,nthreads,screen,logfile); omp_times(fixomp,"Neigh",Timer::NEIGH,nthreads,screen,logfile); omp_times(fixomp,"Reduce",Timer::COMM,nthreads,screen,logfile); } } #endif if (lmp->kokkos && lmp->kokkos->ngpu > 0) if (const char* env_clb = getenv("CUDA_LAUNCH_BLOCKING")) if (!(strcmp(env_clb,"1") == 0)) { error->warning(FLERR,"Timing breakdown may not be accurate since GPU/CPU overlap is enabled. " "Using 'export CUDA_LAUNCH_BLOCKING=1' will give an accurate timing breakdown but will reduce performance"); } // FFT timing statistics // time3d,time1d = total time during run for 3d and 1d FFTs // loop on timing() until nsample FFTs require at least 1.0 CPU sec // time_kspace may be 0.0 if another partition is doing Kspace if (fftflag) { if (me == 0) { if (screen) fprintf(screen,"\n"); if (logfile) fprintf(logfile,"\n"); } int nsteps = update->nsteps; double time3d; int nsample = 1; int nfft = force->kspace->timing_3d(nsample,time3d); while (time3d < 1.0) { nsample *= 2; nfft = force->kspace->timing_3d(nsample,time3d); } time3d = nsteps * time3d / nsample; MPI_Allreduce(&time3d,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time3d = tmp/nprocs; double time1d; nsample = 1; nfft = force->kspace->timing_1d(nsample,time1d); while (time1d < 1.0) { nsample *= 2; nfft = force->kspace->timing_1d(nsample,time1d); } time1d = nsteps * time1d / nsample; MPI_Allreduce(&time1d,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time1d = tmp/nprocs; double time_kspace = timer->get_wall(Timer::KSPACE); MPI_Allreduce(&time_kspace,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time_kspace = tmp/nprocs; double ntotal = 1.0 * force->kspace->nx_pppm * force->kspace->ny_pppm * force->kspace->nz_pppm; double nflops = 5.0 * ntotal * log(ntotal); double fraction,flop3,flop1; if (nsteps) { if (time_kspace) fraction = time3d/time_kspace*100.0; else fraction = 0.0; flop3 = nfft*nflops/1.0e9/(time3d/nsteps); flop1 = nfft*nflops/1.0e9/(time1d/nsteps); } else fraction = flop3 = flop1 = 0.0; if (me == 0) { if (screen) { fprintf(screen,"FFT time (%% of Kspce) = %g (%g)\n",time3d,fraction); fprintf(screen,"FFT Gflps 3d (1d only) = %g %g\n",flop3,flop1); } if (logfile) { fprintf(logfile,"FFT time (%% of Kspce) = %g (%g)\n",time3d,fraction); fprintf(logfile,"FFT Gflps 3d (1d only) = %g %g\n",flop3,flop1); } } } if (histoflag) { if (me == 0) { if (screen) fprintf(screen,"\n"); if (logfile) fprintf(logfile,"\n"); } tmp = atom->nlocal; stats(1,&tmp,&ave,&max,&min,10,histo); if (me == 0) { if (screen) { fprintf(screen,"Nlocal: %g ave %g max %g min\n",ave,max,min); fprintf(screen,"Histogram:"); for (i = 0; i < 10; i++) fprintf(screen," %d",histo[i]); fprintf(screen,"\n"); } if (logfile) { fprintf(logfile,"Nlocal: %g ave %g max %g min\n",ave,max,min); fprintf(logfile,"Histogram:"); for (i = 0; i < 10; i++) fprintf(logfile," %d",histo[i]); fprintf(logfile,"\n"); } } tmp = atom->nghost; stats(1,&tmp,&ave,&max,&min,10,histo); if (me == 0) { if (screen) { fprintf(screen,"Nghost: %g ave %g max %g min\n",ave,max,min); fprintf(screen,"Histogram:"); for (i = 0; i < 10; i++) fprintf(screen," %d",histo[i]); fprintf(screen,"\n"); } if (logfile) { fprintf(logfile,"Nghost: %g ave %g max %g min\n",ave,max,min); fprintf(logfile,"Histogram:"); for (i = 0; i < 10; i++) fprintf(logfile," %d",histo[i]); fprintf(logfile,"\n"); } } // find a non-skip neighbor list containing half pairwise interactions // count neighbors in that list for stats purposes // allow it to be Kokkos neigh list as well for (m = 0; m < neighbor->old_nrequest; m++) { if ((neighbor->old_requests[m]->half || neighbor->old_requests[m]->gran || neighbor->old_requests[m]->respaouter || neighbor->old_requests[m]->half_from_full) && neighbor->old_requests[m]->skip == 0 && neighbor->lists[m] && neighbor->lists[m]->numneigh) { if (!neighbor->lists[m] && lmp->kokkos && lmp->kokkos->neigh_list_kokkos(m)) break; else break; } } nneigh = 0; if (m < neighbor->old_nrequest) { if (neighbor->lists[m]) { int inum = neighbor->lists[m]->inum; int *ilist = neighbor->lists[m]->ilist; int *numneigh = neighbor->lists[m]->numneigh; for (i = 0; i < inum; i++) nneigh += numneigh[ilist[i]]; } else if (lmp->kokkos) nneigh = lmp->kokkos->neigh_count(m); } tmp = nneigh; stats(1,&tmp,&ave,&max,&min,10,histo); if (me == 0) { if (screen) { fprintf(screen,"Neighs: %g ave %g max %g min\n",ave,max,min); fprintf(screen,"Histogram:"); for (i = 0; i < 10; i++) fprintf(screen," %d",histo[i]); fprintf(screen,"\n"); } if (logfile) { fprintf(logfile,"Neighs: %g ave %g max %g min\n",ave,max,min); fprintf(logfile,"Histogram:"); for (i = 0; i < 10; i++) fprintf(logfile," %d",histo[i]); fprintf(logfile,"\n"); } } // find a non-skip neighbor list containing full pairwise interactions // count neighbors in that list for stats purposes // allow it to be Kokkos neigh list as well for (m = 0; m < neighbor->old_nrequest; m++) { if (neighbor->old_requests[m]->full && neighbor->old_requests[m]->skip == 0) { if (lmp->kokkos && lmp->kokkos->neigh_list_kokkos(m)) break; else break; } } nneighfull = 0; if (m < neighbor->old_nrequest) { if (neighbor->lists[m] && neighbor->lists[m]->numneigh) { int inum = neighbor->lists[m]->inum; int *ilist = neighbor->lists[m]->ilist; int *numneigh = neighbor->lists[m]->numneigh; for (i = 0; i < inum; i++) nneighfull += numneigh[ilist[i]]; } else if (!neighbor->lists[m] && lmp->kokkos) nneighfull = lmp->kokkos->neigh_count(m); tmp = nneighfull; stats(1,&tmp,&ave,&max,&min,10,histo); if (me == 0) { if (screen) { fprintf(screen,"FullNghs: %g ave %g max %g min\n",ave,max,min); fprintf(screen,"Histogram:"); for (i = 0; i < 10; i++) fprintf(screen," %d",histo[i]); fprintf(screen,"\n"); } if (logfile) { fprintf(logfile,"FullNghs: %g ave %g max %g min\n",ave,max,min); fprintf(logfile,"Histogram:"); for (i = 0; i < 10; i++) fprintf(logfile," %d",histo[i]); fprintf(logfile,"\n"); } } } } if (neighflag) { if (me == 0) { if (screen) fprintf(screen,"\n"); if (logfile) fprintf(logfile,"\n"); } tmp = MAX(nneigh,nneighfull); double nall; MPI_Allreduce(&tmp,&nall,1,MPI_DOUBLE,MPI_SUM,world); int nspec; double nspec_all = 0; if (atom->molecular == 1) { int **nspecial = atom->nspecial; int nlocal = atom->nlocal; nspec = 0; for (i = 0; i < nlocal; i++) nspec += nspecial[i][2]; tmp = nspec; MPI_Allreduce(&tmp,&nspec_all,1,MPI_DOUBLE,MPI_SUM,world); } else if (atom->molecular == 2) { Molecule **onemols = atom->avec->onemols; int *molindex = atom->molindex; int *molatom = atom->molatom; int nlocal = atom->nlocal; int imol,iatom; nspec = 0; for (i = 0; i < nlocal; i++) { if (molindex[i] < 0) continue; imol = molindex[i]; iatom = molatom[i]; nspec += onemols[imol]->nspecial[iatom][2]; } tmp = nspec; MPI_Allreduce(&tmp,&nspec_all,1,MPI_DOUBLE,MPI_SUM,world); } if (me == 0) { if (screen) { if (nall < 2.0e9) fprintf(screen, "Total # of neighbors = %d\n",static_cast<int> (nall)); else fprintf(screen,"Total # of neighbors = %g\n",nall); if (atom->natoms > 0) fprintf(screen,"Ave neighs/atom = %g\n",nall/atom->natoms); if (atom->molecular && atom->natoms > 0) fprintf(screen,"Ave special neighs/atom = %g\n", nspec_all/atom->natoms); fprintf(screen,"Neighbor list builds = " BIGINT_FORMAT "\n", neighbor->ncalls); if (neighbor->dist_check) fprintf(screen,"Dangerous builds = " BIGINT_FORMAT "\n", neighbor->ndanger); else fprintf(screen,"Dangerous builds not checked\n"); } if (logfile) { if (nall < 2.0e9) fprintf(logfile, "Total # of neighbors = %d\n",static_cast<int> (nall)); else fprintf(logfile,"Total # of neighbors = %g\n",nall); if (atom->natoms > 0) fprintf(logfile,"Ave neighs/atom = %g\n",nall/atom->natoms); if (atom->molecular && atom->natoms > 0) fprintf(logfile,"Ave special neighs/atom = %g\n", nspec_all/atom->natoms); fprintf(logfile,"Neighbor list builds = " BIGINT_FORMAT "\n", neighbor->ncalls); if (neighbor->dist_check) fprintf(logfile,"Dangerous builds = " BIGINT_FORMAT "\n", neighbor->ndanger); else fprintf(logfile,"Dangerous builds not checked\n"); } } } if (logfile) fflush(logfile); }
void PPPMCGOMP::compute_gf_ad() { const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda; const double xprd = prd[0]; const double yprd = prd[1]; const double zprd = prd[2]; const double zprd_slab = zprd*slab_volfactor; const double unitkx = (MY_2PI/xprd); const double unitky = (MY_2PI/yprd); const double unitkz = (MY_2PI/zprd_slab); const int numk = nxhi_fft - nxlo_fft + 1; const int numl = nyhi_fft - nylo_fft + 1; const int twoorder = 2*order; double sf0=0.0,sf1=0.0,sf2=0.0,sf3=0.0,sf4=0.0,sf5=0.0; #if defined(_OPENMP) #pragma omp parallel default(none) reduction(+:sf0,sf1,sf2,sf3,sf4,sf5) #endif { double snx,sny,snz,sqk; double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; double numerator,denominator; int k,l,m,kper,lper,mper,n,nfrom,nto,tid; loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); for (n = nfrom; n < nto; ++n) { m = n / (numl*numk); l = (n - m*numl*numk) / numk; k = n - m*numl*numk - l*numk; m += nzlo_fft; l += nylo_fft; k += nxlo_fft; mper = m - nz_pppm*(2*m/nz_pppm); qz = unitkz*mper; snz = square(sin(0.5*qz*zprd_slab/nz_pppm)); sz = exp(-0.25*square(qz/g_ewald)); argz = 0.5*qz*zprd_slab/nz_pppm; wz = powsinxx(argz,twoorder); lper = l - ny_pppm*(2*l/ny_pppm); qy = unitky*lper; sny = square(sin(0.5*qy*yprd/ny_pppm)); sy = exp(-0.25*square(qy/g_ewald)); argy = 0.5*qy*yprd/ny_pppm; wy = powsinxx(argy,twoorder); kper = k - nx_pppm*(2*k/nx_pppm); qx = unitkx*kper; snx = square(sin(0.5*qx*xprd/nx_pppm)); sx = exp(-0.25*square(qx/g_ewald)); argx = 0.5*qx*xprd/nx_pppm; wx = powsinxx(argx,twoorder); sqk = qx*qx + qy*qy + qz*qz; if (sqk != 0.0) { numerator = MY_4PI/sqk; denominator = gf_denom(snx,sny,snz); greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator; sf0 += sf_precoeff1[n]*greensfn[n]; sf1 += sf_precoeff2[n]*greensfn[n]; sf2 += sf_precoeff3[n]*greensfn[n]; sf3 += sf_precoeff4[n]*greensfn[n]; sf4 += sf_precoeff5[n]*greensfn[n]; sf5 += sf_precoeff6[n]*greensfn[n]; } else { greensfn[n] = 0.0; sf0 += sf_precoeff1[n]*greensfn[n]; sf1 += sf_precoeff2[n]*greensfn[n]; sf2 += sf_precoeff3[n]*greensfn[n]; sf3 += sf_precoeff4[n]*greensfn[n]; sf4 += sf_precoeff5[n]*greensfn[n]; sf5 += sf_precoeff6[n]*greensfn[n]; } } thr->timer(Timer::KSPACE); } // end of paralle region // compute the coefficients for the self-force correction double prex, prey, prez, tmp[6]; prex = prey = prez = MY_PI/volume; prex *= nx_pppm/xprd; prey *= ny_pppm/yprd; prez *= nz_pppm/zprd_slab; tmp[0] = sf0 * prex; tmp[1] = sf1 * prex*2; tmp[2] = sf2 * prey; tmp[3] = sf3 * prey*2; tmp[4] = sf4 * prez; tmp[5] = sf5 * prez*2; // communicate values with other procs MPI_Allreduce(tmp,sf_coeff,6,MPI_DOUBLE,MPI_SUM,world); }
void PPPMCGOMP::compute_gf_ik() { const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda; const double xprd = prd[0]; const double yprd = prd[1]; const double zprd = prd[2]; const double zprd_slab = zprd*slab_volfactor; const double unitkx = (MY_2PI/xprd); const double unitky = (MY_2PI/yprd); const double unitkz = (MY_2PI/zprd_slab); const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) * pow(-log(EPS_HOC),0.25)); const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) * pow(-log(EPS_HOC),0.25)); const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * pow(-log(EPS_HOC),0.25)); const int numk = nxhi_fft - nxlo_fft + 1; const int numl = nyhi_fft - nylo_fft + 1; const int twoorder = 2*order; #if defined(_OPENMP) #pragma omp parallel default(none) #endif { double snx,sny,snz; double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; double sum1,dot1,dot2; double numerator,denominator; double sqk; int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid; loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); for (n = nfrom; n < nto; ++n) { m = n / (numl*numk); l = (n - m*numl*numk) / numk; k = n - m*numl*numk - l*numk; m += nzlo_fft; l += nylo_fft; k += nxlo_fft; mper = m - nz_pppm*(2*m/nz_pppm); snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm)); lper = l - ny_pppm*(2*l/ny_pppm); sny = square(sin(0.5*unitky*lper*yprd/ny_pppm)); kper = k - nx_pppm*(2*k/nx_pppm); snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm)); sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); if (sqk != 0.0) { numerator = 12.5663706/sqk; denominator = gf_denom(snx,sny,snz); sum1 = 0.0; for (nx = -nbx; nx <= nbx; nx++) { qx = unitkx*(kper+nx_pppm*nx); sx = exp(-0.25*square(qx/g_ewald)); argx = 0.5*qx*xprd/nx_pppm; wx = powsinxx(argx,twoorder); for (ny = -nby; ny <= nby; ny++) { qy = unitky*(lper+ny_pppm*ny); sy = exp(-0.25*square(qy/g_ewald)); argy = 0.5*qy*yprd/ny_pppm; wy = powsinxx(argy,twoorder); for (nz = -nbz; nz <= nbz; nz++) { qz = unitkz*(mper+nz_pppm*nz); sz = exp(-0.25*square(qz/g_ewald)); argz = 0.5*qz*zprd_slab/nz_pppm; wz = powsinxx(argz,twoorder); dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; dot2 = qx*qx+qy*qy+qz*qz; sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; } } } greensfn[n] = numerator*sum1/denominator; } else greensfn[n] = 0.0; } thr->timer(Timer::KSPACE); } // end of parallel region }
void PairCDEAMOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; // grow energy and fp arrays if necessary // need to be atom->nmax in length if (atom->nmax > nmax) { memory->destroy(rho); memory->destroy(rhoB); memory->destroy(D_values); memory->destroy(fp); nmax = atom->nmax; memory->create(rho,nthreads*nmax,"pair:rho"); memory->create(rhoB,nthreads*nmax,"pair:mu"); memory->create(D_values,nthreads*nmax,"pair:D_values"); memory->create(fp,nmax,"pair:fp"); } #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (force->newton_pair) thr->init_cdeam(nall, rho, rhoB, D_values); else thr->init_cdeam(atom->nlocal, rho, rhoB, D_values); switch (cdeamVersion) { case 1: if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1,1>(ifrom, ito, thr); else eval<1,1,0,1>(ifrom, ito, thr); } else { if (force->newton_pair) eval<1,0,1,1>(ifrom, ito, thr); else eval<1,0,0,1>(ifrom, ito, thr); } } else { if (force->newton_pair) eval<0,0,1,1>(ifrom, ito, thr); else eval<0,0,0,1>(ifrom, ito, thr); } break; case 2: if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1,2>(ifrom, ito, thr); else eval<1,1,0,2>(ifrom, ito, thr); } else { if (force->newton_pair) eval<1,0,1,2>(ifrom, ito, thr); else eval<1,0,0,2>(ifrom, ito, thr); } } else { if (force->newton_pair) eval<0,0,1,2>(ifrom, ito, thr); else eval<0,0,0,2>(ifrom, ito, thr); } break; default: { #if defined(_OPENMP) #pragma omp master #endif error->all(FLERR,"unsupported eam/cd pair style variant"); } } thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }
void PairGranHertzHistoryOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; const int shearupdate = (update->setupflag) ? 0 : 1; // update rigid body info for owned & ghost atoms if using FixRigid masses // body[i] = which body atom I is in, -1 if none // mass_body = mass of each rigid body if (fix_rigid && neighbor->ago == 0) { int tmp; int *body = (int *) fix_rigid->extract("body",tmp); double *mass_body = (double *) fix_rigid->extract("masstotal",tmp); if (atom->nmax > nmax) { memory->destroy(mass_rigid); nmax = atom->nmax; memory->create(mass_rigid,nmax,"pair:mass_rigid"); } int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) if (body[i] >= 0) mass_rigid[i] = mass_body[body[i]]; else mass_rigid[i] = 0.0; comm->forward_comm_pair(this); } #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (shearupdate) { if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); else eval<1,1,0>(ifrom, ito, thr); } else { if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); else eval<1,0,0>(ifrom, ito, thr); } } else { if (shearupdate) { if (force->newton_pair) eval<0,1,1>(ifrom, ito, thr); else eval<0,1,0>(ifrom, ito, thr); } else { if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); else eval<0,0,0>(ifrom, ito, thr); } } thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }
void PairBrownianPolyOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int inum = list->inum; // This section of code adjusts R0/RT0/RS0 if necessary due to changes // in the volume fraction as a result of fix deform or moving walls double dims[3], wallcoord; if (flagVF) // Flag for volume fraction corrections if (flagdeform || flagwall == 2){ // Possible changes in volume fraction if (flagdeform && !flagwall) for (int j = 0; j < 3; j++) dims[j] = domain->prd[j]; else if (flagwall == 2 || (flagdeform && flagwall == 1)){ double wallhi[3], walllo[3]; for (int j = 0; j < 3; j++){ wallhi[j] = domain->prd[j]; walllo[j] = 0; } for (int m = 0; m < wallfix->nwall; m++){ int dim = wallfix->wallwhich[m] / 2; int side = wallfix->wallwhich[m] % 2; if (wallfix->xstyle[m] == VARIABLE){ wallcoord = input->variable->compute_equal(wallfix->xindex[m]); } else wallcoord = wallfix->coord0[m]; if (side == 0) walllo[dim] = wallcoord; else wallhi[dim] = wallcoord; } for (int j = 0; j < 3; j++) dims[j] = wallhi[j] - walllo[j]; } double vol_T = dims[0]*dims[1]*dims[2]; double vol_f = vol_P/vol_T; if (flaglog == 0) { R0 = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f); RT0 = 8*MY_PI*mu*cube(rad); //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f); } else { R0 = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f); RT0 = 8*MY_PI*mu*cube(rad)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f); //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f); } } // number of threads has changed. reallocate pool of pRNGs if (nthreads != comm->nthreads) { if (random_thr) { for (int i=1; i < nthreads; ++i) delete random_thr[i]; delete[] random_thr; } nthreads = comm->nthreads; random_thr = new RanMars*[nthreads]; for (int i=1; i < nthreads; ++i) random_thr[i] = NULL; // to ensure full compatibility with the serial BrownianPoly style // we use is random number generator instance for thread 0 random_thr[0] = random; } #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); // generate a random number generator instance for // all threads != 0. make sure we use unique seeds. if ((tid > 0) && (random_thr[tid] == NULL)) random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me + comm->nprocs*tid); if (flaglog) { if (evflag) eval<1,1>(ifrom, ito, thr); else eval<1,0>(ifrom, ito, thr); } else { if (evflag) eval<0,1>(ifrom, ito, thr); else eval<0,0>(ifrom, ito, thr); } thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }
void MSMCGOMP::compute(int eflag, int vflag) { if (scalar_pressure_flag) error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' " "with kspace_style msm/cg/omp"); const double * const q = atom->q; const int nlocal = atom->nlocal; int i,j,n; // set energy/virial flags if (eflag || vflag) ev_setup(eflag,vflag); else evflag = evflag_atom = eflag_global = vflag_global = eflag_atom = vflag_atom = eflag_either = vflag_either = 0; // invoke allocate_peratom() if needed for first time if (vflag_atom && !peratom_allocate_flag) { allocate_peratom(); cg_peratom_all->ghost_notify(); cg_peratom_all->setup(); for (int n=0; n<levels; n++) { if (!active_flag[n]) continue; cg_peratom[n]->ghost_notify(); cg_peratom[n]->setup(); } peratom_allocate_flag = 1; } // extend size of per-atom arrays if necessary if (atom->nmax > nmax) { memory->destroy(part2grid); memory->destroy(is_charged); nmax = atom->nmax; memory->create(part2grid,nmax,3,"msm:part2grid"); memory->create(is_charged,nmax,"msm/cg:is_charged"); } // one time setup message if (num_charged < 0) { bigint charged_all, charged_num; double charged_frac, charged_fmax, charged_fmin; num_charged=0; for (i=0; i < nlocal; ++i) if (fabs(q[i]) > smallq) ++num_charged; // get fraction of charged particles per domain if (nlocal > 0) charged_frac = static_cast<double>(num_charged) * 100.0 / static_cast<double>(nlocal); else charged_frac = 0.0; MPI_Reduce(&charged_frac,&charged_fmax,1,MPI_DOUBLE,MPI_MAX,0,world); MPI_Reduce(&charged_frac,&charged_fmin,1,MPI_DOUBLE,MPI_MIN,0,world); // get fraction of charged particles overall charged_num = num_charged; MPI_Reduce(&charged_num,&charged_all,1,MPI_LMP_BIGINT,MPI_SUM,0,world); charged_frac = static_cast<double>(charged_all) * 100.0 / static_cast<double>(atom->natoms); if (me == 0) { if (screen) fprintf(screen, " MSM/cg optimization cutoff: %g\n" " Total charged atoms: %.1f%%\n" " Min/max charged atoms/proc: %.1f%% %.1f%%\n", smallq,charged_frac,charged_fmin,charged_fmax); if (logfile) fprintf(logfile, " MSM/cg optimization cutoff: %g\n" " Total charged atoms: %.1f%%\n" " Min/max charged atoms/proc: %.1f%% %.1f%%\n", smallq,charged_frac,charged_fmin,charged_fmax); } } // only need to rebuild this list after a neighbor list update if (neighbor->ago == 0) { num_charged = 0; for (i = 0; i < nlocal; ++i) { if (fabs(q[i]) > smallq) { is_charged[num_charged] = i; ++num_charged; } } } // find grid points for all my particles // map my particle charge onto my local 3d density grid (aninterpolation) particle_map(); make_rho(); // all procs reverse communicate charge density values from their ghost grid points // to fully sum contribution in their 3d grid current_level = 0; cg_all->reverse_comm(this,REVERSE_RHO); // forward communicate charge density values to fill ghost grid points // compute direct sum interaction and then restrict to coarser grid for (int n=0; n<=levels-2; n++) { if (!active_flag[n]) continue; current_level = n; cg[n]->forward_comm(this,FORWARD_RHO); direct(n); restriction(n); } // compute direct interation for top grid level for nonperiodic // and for second from top grid level for periodic if (active_flag[levels-1]) { if (domain->nonperiodic) { current_level = levels-1; cg[levels-1]->forward_comm(this,FORWARD_RHO); direct_top(levels-1); cg[levels-1]->reverse_comm(this,REVERSE_AD); if (vflag_atom) cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM); } else { // Here using MPI_Allreduce is cheaper than using commgrid grid_swap_forward(levels-1,qgrid[levels-1]); direct(levels-1); grid_swap_reverse(levels-1,egrid[levels-1]); current_level = levels-1; if (vflag_atom) cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM); } } // prolongate energy/virial from coarser grid to finer grid // reverse communicate from ghost grid points to get full sum for (int n=levels-2; n>=0; n--) { if (!active_flag[n]) continue; prolongation(n); current_level = n; cg[n]->reverse_comm(this,REVERSE_AD); // extra per-atom virial communication if (vflag_atom) cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM); } // all procs communicate E-field values // to fill ghost cells surrounding their 3d bricks current_level = 0; cg_all->forward_comm(this,FORWARD_AD); // extra per-atom energy/virial communication if (vflag_atom) cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM); // calculate the force on my particles (interpolation) fieldforce(); // calculate the per-atom energy/virial for my particles if (evflag_atom) fieldforce_peratom(); // update qsum and qsqsum, if atom count has changed and energy needed if ((eflag_global || eflag_atom) && atom->natoms != natoms_original) { qsum_qsq(); natoms_original = atom->natoms; } // sum global energy across procs and add in self-energy term const double qscale = force->qqrd2e * scale; if (eflag_global) { double energy_all; MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); energy = energy_all; double e_self = qsqsum*gamma(0.0)/cutoff; energy -= e_self; energy *= 0.5*qscale; } // total long-range virial if (vflag_global) { double virial_all[6]; MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*virial_all[i]; } // per-atom energy/virial // energy includes self-energy correction if (evflag_atom) { const double qs = 0.5*qscale; if (eflag_atom) { const double sf = gamma(0.0)/cutoff; for (j = 0; j < num_charged; j++) { i = is_charged[j]; eatom[i] -= q[i]*q[i]*sf; eatom[i] *= qs; } } if (vflag_atom) { for (n = 0; n < num_charged; n++) { i = is_charged[n]; for (j = 0; j < 6; j++) vatom[i][j] *= qs; } } } #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { #if defined(_OPENMP) const int tid = omp_get_thread_num(); #else const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }
void PPPMTIP4POMP::make_rho() { const double * const q = atom->q; const double * const * const x = atom->x; const int * const type = atom->type; const int nthreads = comm->nthreads; const int nlocal = atom->nlocal; #if defined(_OPENMP) #pragma omp parallel default(none) #endif { #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = nlocal; const int idelta = 1 + inum/nthreads; const int ifrom = tid*idelta; const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; #else const int tid = 0; const int ifrom = 0; const int ito = nlocal; #endif int l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2; FFT_SCALAR dx,dy,dz,x0,y0,z0; double xM[3]; // set up clear 3d density array const int nzoffs = (nzhi_out-nzlo_out+1)*tid; FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]); memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR)); ThrData *thr = fix->get_thr(tid); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); // loop over my charges, add their contribution to nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt // this if protects against having more threads than local atoms if (ifrom < nlocal) { for (int i = ifrom; i < ito; i++) { if (type[i] == typeO) { find_M(i,iH1,iH2,xM); } else { xM[0] = x[i][0]; xM[1] = x[i][1]; xM[2] = x[i][2]; } nx = part2grid[i][0]; ny = part2grid[i][1]; nz = part2grid[i][2]; dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv; dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv; dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv; compute_rho1d_thr(r1d,dx,dy,dz); z0 = delvolinv * q[i]; for (n = nlower; n <= nupper; n++) { mz = n+nz; y0 = z0*r1d[2][n]; for (m = nlower; m <= nupper; m++) { my = m+ny; x0 = y0*r1d[1][m]; for (l = nlower; l <= nupper; l++) { mx = l+nx; db[mz][my][mx] += x0*r1d[0][l]; } } } } } #if defined(_OPENMP) // reduce 3d density array if (nthreads > 1) { data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid); } #endif } }
void PPPMTIP4POMP::fieldforce() { // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt // ek = 3 components of E-field on particle const double * const q = atom->q; const double * const * const x = atom->x; const int * const type = atom->type; const int nthreads = comm->nthreads; const int nlocal = atom->nlocal; const double qqrd2e = force->qqrd2e; #if defined(_OPENMP) #pragma omp parallel default(none) #endif { #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = nlocal; const int idelta = 1 + inum/nthreads; const int ifrom = tid*idelta; const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; #else const int ifrom = 0; const int ito = nlocal; const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); double * const * const f = thr->get_f(); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); int l,m,n,nx,ny,nz,mx,my,mz; FFT_SCALAR dx,dy,dz,x0,y0,z0; FFT_SCALAR ekx,eky,ekz; int iH1,iH2; double xM[3], fx,fy,fz; double ddotf, rOMx, rOMy, rOMz, f1x, f1y, f1z; // this if protects against having more threads than local atoms if (ifrom < nlocal) { for (int i = ifrom; i < ito; i++) { if (type[i] == typeO) { find_M(i,iH1,iH2,xM); } else { xM[0] = x[i][0]; xM[1] = x[i][1]; xM[2] = x[i][2]; } nx = part2grid[i][0]; ny = part2grid[i][1]; nz = part2grid[i][2]; dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv; dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv; dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv; compute_rho1d_thr(r1d,dx,dy,dz); ekx = eky = ekz = ZEROF; for (n = nlower; n <= nupper; n++) { mz = n+nz; z0 = r1d[2][n]; for (m = nlower; m <= nupper; m++) { my = m+ny; y0 = z0*r1d[1][m]; for (l = nlower; l <= nupper; l++) { mx = l+nx; x0 = y0*r1d[0][l]; ekx -= x0*vdx_brick[mz][my][mx]; eky -= x0*vdy_brick[mz][my][mx]; ekz -= x0*vdz_brick[mz][my][mx]; } } } // convert E-field to force const double qfactor = qqrd2e*scale*q[i]; if (type[i] != typeO) { f[i][0] += qfactor*ekx; f[i][1] += qfactor*eky; f[i][2] += qfactor*ekz; } else { fx = qfactor * ekx; fy = qfactor * eky; fz = qfactor * ekz; find_M(i,iH1,iH2,xM); rOMx = xM[0] - x[i][0]; rOMy = xM[1] - x[i][1]; rOMz = xM[2] - x[i][2]; ddotf = (rOMx * fx + rOMy * fy + rOMz * fz) / (qdist * qdist); f1x = ddotf * rOMx; f1y = ddotf * rOMy; f1z = ddotf * rOMz; f[i][0] += fx - alpha * (fx - f1x); f[i][1] += fy - alpha * (fy - f1y); f[i][2] += fz - alpha * (fz - f1z); f[iH1][0] += 0.5*alpha*(fx - f1x); f[iH1][1] += 0.5*alpha*(fy - f1y); f[iH1][2] += 0.5*alpha*(fz - f1z); f[iH2][0] += 0.5*alpha*(fx - f1x); f[iH2][1] += 0.5*alpha*(fy - f1y); f[iH2][2] += 0.5*alpha*(fz - f1z); } } } } }
void PairTriLJOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; const int * const tri = atom->tri; const int * const type = atom->type; AtomVecTri::Bonus * const bonus = avec->bonus; // grow discrete list if necessary and initialize if (nall > nmax) { nmax = nall; memory->destroy(dnum); memory->destroy(dfirst); memory->create(dnum,nall,"pair:dnum"); memory->create(dfirst,nall,"pair:dfirst"); } memset(dnum,0,nall*sizeof(int)); ndiscrete = 0; // need to discretize the system ahead of time // until we find a good way to multi-thread it. for (int i = 0; i < nall; ++i) { double dc1[3],dc2[3],dc3[3],p[3][3]; if (tri[i] >= 0) { if (dnum[i] == 0) { MathExtra::quat_to_mat(bonus[tri[i]].quat,p); MathExtra::matvec(p,bonus[tri[i]].c1,dc1); MathExtra::matvec(p,bonus[tri[i]].c2,dc2); MathExtra::matvec(p,bonus[tri[i]].c3,dc3); dfirst[i] = ndiscrete; discretize(i,sigma[type[i]][type[i]],dc1,dc2,dc3); dnum[i] = ndiscrete - dfirst[i]; } } } #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); else eval<1,1,0>(ifrom, ito, thr); } else { if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); else eval<1,0,0>(ifrom, ito, thr); } } else { if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); else eval<0,0,0>(ifrom, ito, thr); } thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region }