real Umbrella_Communicate(real Q_local,real *k_Q,real *Q_0) { int i; int i_omp=gmx_omp_get_thread_num(); // int n_omp=gmx_omp_get_num_procs(); // Wrong number int n_omp=udata.n_omp; // int i_mpi; static real Q_semilocal[UMB_MAX_OMP]; real Q_global; if (n_omp>UMB_MAX_OMP) { fprintf(stderr,"Seg fault is probably about to happen because Q_semilocal is not big enough to accommodate %d omp threads. See %d in %s.\n",n_omp,__LINE__,__FILE__); } Q_semilocal[i_omp]=Q_local; #pragma omp barrier // #pragma omp master // { if (i_omp==0) { Q_local=0; // fprintf(stderr,"%f %f %f %f %f\n",Q_semilocal[0],Q_semilocal[1],Q_semilocal[2],Q_semilocal[3],Q_semilocal[4]); for (i=0; i<n_omp; i++) { Q_local+=Q_semilocal[i]; } // fprintf(stderr,"%f %f %f %f %f\n",Q_semilocal[0],Q_semilocal[1],Q_semilocal[2],Q_semilocal[3],Q_semilocal[4]); // #ifdef GMX_DOUBLE // MPI_Allreduce(&Q_local,&Q_global,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD); // #else // MPI_Allreduce(&Q_local,&Q_global,1,MPI_FLOAT, MPI_SUM,MPI_COMM_WORLD); // #endif // for (i=0; i<n_omp; i++) { // Q_semilocal[i]=Q_global; // } // gmx_sum declared in src/gromacs/legacyheaders/network.h (in main.h) #ifdef GMX_MPI gmx_sum(1,&Q_local,udata.cr); #endif for (i=0; i<n_omp; i++) { Q_semilocal[i]=Q_local; } // fprintf(stderr,"%f %f %f %f %f\n",Q_semilocal[0],Q_semilocal[1],Q_semilocal[2],Q_semilocal[3],Q_semilocal[4]); } #pragma omp barrier Q_global=Q_semilocal[i_omp]; *k_Q=udata.k_Q; *Q_0=udata.Q_0+(udata.Q_init-udata.Q_0)*exp(-udata.step/udata.Q_steps); // #pragma omp master // { if (i_omp==0) { // MPI_Comm_rank(MPI_COMM_WORLD,&i_mpi); // if (i_mpi==0) { // udata.fp=NULL on other mpi processes. if ((udata.step % udata.freq)==0 && udata.fp!=NULL) { fprintf(udata.fp,"%d %g %g\n",udata.step,Q_global,0.5*(*k_Q)*(Q_global-(*Q_0))*(Q_global-(*Q_0))); } // } } return Q_global; }
/* Set CPU affinity. Can be important for performance. On some systems (e.g. Cray) CPU Affinity is set by default. But default assigning doesn't work (well) with only some ranks having threads. This causes very low performance. External tools have cumbersome syntax for setting affinity in the case that only some ranks have threads. Thus it is important that GROMACS sets the affinity internally if only PME is using threads. */ void gmx_set_thread_affinity(FILE *fplog, const t_commrec *cr, gmx_hw_opt_t *hw_opt, int nthreads_pme, const gmx_hw_info_t *hwinfo, const t_inputrec *inputrec) { int nth_affinity_set, thread_id_node, thread_id, nthread_local, nthread_node, nthread_hw_max, nphyscore; int offset; const int *locality_order; int rc; if (hw_opt->thread_affinity == threadaffOFF) { /* Nothing to do */ return; } /* If the tMPI thread affinity setting is not supported encourage the user * to report it as it's either a bug or an exotic platform which we might * want to support. */ if (tMPI_Thread_setaffinity_support() != TMPI_SETAFFINITY_SUPPORT_YES) { /* we know Mac OS doesn't support setting thread affinity, so there's no point in warning the user in that case. In any other case the user might be able to do something about it. */ #ifndef __APPLE__ md_print_warn(NULL, fplog, "Can not set thread affinities on the current platform. On NUMA systems this\n" "can cause performance degradation. If you think your platform should support\n" "setting affinities, contact the GROMACS developers."); #endif /* __APPLE__ */ return; } /* threads on this MPI process or TMPI thread */ if (cr->duty & DUTY_PP) { nthread_local = gmx_omp_nthreads_get(emntNonbonded); } else { nthread_local = gmx_omp_nthreads_get(emntPME); } /* map the current process to cores */ thread_id_node = 0; nthread_node = nthread_local; #ifdef GMX_MPI if (PAR(cr) || MULTISIM(cr)) { /* We need to determine a scan of the thread counts in this * compute node. */ MPI_Comm comm_intra; MPI_Comm_split(MPI_COMM_WORLD, gmx_hostname_num(), cr->rank_intranode, &comm_intra); MPI_Scan(&nthread_local, &thread_id_node, 1, MPI_INT, MPI_SUM, comm_intra); /* MPI_Scan is inclusive, but here we need exclusive */ thread_id_node -= nthread_local; /* Get the total number of threads on this physical node */ MPI_Allreduce(&nthread_local, &nthread_node, 1, MPI_INT, MPI_SUM, comm_intra); MPI_Comm_free(&comm_intra); } #endif if (hw_opt->thread_affinity == threadaffAUTO && nthread_node != hwinfo->nthreads_hw_avail) { if (nthread_node > 1 && nthread_node < hwinfo->nthreads_hw_avail) { md_print_warn(cr, fplog, "NOTE: The number of threads is not equal to the number of (logical) cores\n" " and the -pin option is set to auto: will not pin thread to cores.\n" " This can lead to significant performance degradation.\n" " Consider using -pin on (and -pinoffset in case you run multiple jobs).\n"); } return; } offset = 0; if (hw_opt->core_pinning_offset != 0) { offset = hw_opt->core_pinning_offset; md_print_info(cr, fplog, "Applying core pinning offset %d\n", offset); } rc = get_thread_affinity_layout(fplog, cr, hwinfo, nthread_node, offset, &hw_opt->core_pinning_stride, &locality_order); if (rc != 0) { /* Incompatible layout, don't pin, warning was already issued */ return; } /* Set the per-thread affinity. In order to be able to check the success * of affinity settings, we will set nth_affinity_set to 1 on threads * where the affinity setting succeded and to 0 where it failed. * Reducing these 0/1 values over the threads will give the total number * of threads on which we succeeded. */ nth_affinity_set = 0; #pragma omp parallel firstprivate(thread_id_node) num_threads(nthread_local) \ reduction(+:nth_affinity_set) { int index, core; gmx_bool setaffinity_ret; thread_id = gmx_omp_get_thread_num(); thread_id_node += thread_id; index = offset + thread_id_node*hw_opt->core_pinning_stride; if (locality_order != NULL) { core = locality_order[index]; } else { core = index; } setaffinity_ret = tMPI_Thread_setaffinity_single(tMPI_Thread_self(), core); /* store the per-thread success-values of the setaffinity */ nth_affinity_set = (setaffinity_ret == 0); if (debug) { fprintf(debug, "On rank %2d, thread %2d, core %2d the affinity setting returned %d\n", cr->nodeid, gmx_omp_get_thread_num(), core, setaffinity_ret); } } if (nth_affinity_set > nthread_local) { char msg[STRLEN]; sprintf(msg, "Looks like we have set affinity for more threads than " "we have (%d > %d)!\n", nth_affinity_set, nthread_local); gmx_incons(msg); } else { /* check & warn if some threads failed to set their affinities */ if (nth_affinity_set != nthread_local) { char sbuf1[STRLEN], sbuf2[STRLEN]; /* sbuf1 contains rank info, while sbuf2 OpenMP thread info */ sbuf1[0] = sbuf2[0] = '\0'; /* Only add rank info if we have more than one rank. */ if (cr->nnodes > 1) { #ifdef GMX_MPI #ifdef GMX_THREAD_MPI sprintf(sbuf1, "In tMPI thread #%d: ", cr->nodeid); #else /* GMX_LIB_MPI */ sprintf(sbuf1, "In MPI process #%d: ", cr->nodeid); #endif #endif /* GMX_MPI */ } if (nthread_local > 1) { sprintf(sbuf2, "for %d/%d thread%s ", nthread_local - nth_affinity_set, nthread_local, nthread_local > 1 ? "s" : ""); } md_print_warn(NULL, fplog, "WARNING: %sAffinity setting %sfailed.\n" " This can cause performance degradation! If you think your setting are\n" " correct, contact the GROMACS developers.", sbuf1, sbuf2); } } return; }
gmx_radial_distribution_histogram_t *calc_radial_distribution_histogram ( gmx_sans_t *gsans, rvec *x, matrix box, atom_id *index, int isize, double binwidth, gmx_bool bMC, gmx_bool bNORM, real mcover, unsigned int seed) { gmx_radial_distribution_histogram_t *pr = NULL; rvec dist; double rmax; int i, j; #ifdef GMX_OPENMP double **tgr; int tid; int nthreads; gmx_rng_t *trng = NULL; #endif gmx_large_int_t mc = 0, max; gmx_rng_t rng = NULL; /* allocate memory for pr */ snew(pr, 1); /* set some fields */ pr->binwidth = binwidth; /* * create max dist rvec * dist = box[xx] + box[yy] + box[zz] */ rvec_add(box[XX], box[YY], dist); rvec_add(box[ZZ], dist, dist); rmax = norm(dist); pr->grn = (int)floor(rmax/pr->binwidth)+1; rmax = pr->grn*pr->binwidth; snew(pr->gr, pr->grn); if (bMC) { /* Special case for setting automaticaly number of mc iterations to 1% of total number of direct iterations */ if (mcover == -1) { max = (gmx_large_int_t)floor(0.5*0.01*isize*(isize-1)); } else { max = (gmx_large_int_t)floor(0.5*mcover*isize*(isize-1)); } rng = gmx_rng_init(seed); #ifdef GMX_OPENMP nthreads = gmx_omp_get_max_threads(); snew(tgr, nthreads); snew(trng, nthreads); for (i = 0; i < nthreads; i++) { snew(tgr[i], pr->grn); trng[i] = gmx_rng_init(gmx_rng_uniform_uint32(rng)); } #pragma omp parallel shared(tgr,trng,mc) private(tid,i,j) { tid = gmx_omp_get_thread_num(); /* now starting parallel threads */ #pragma omp for for (mc = 0; mc < max; mc++) { i = (int)floor(gmx_rng_uniform_real(trng[tid])*isize); j = (int)floor(gmx_rng_uniform_real(trng[tid])*isize); if (i != j) { tgr[tid][(int)floor(sqrt(distance2(x[index[i]], x[index[j]]))/binwidth)] += gsans->slength[index[i]]*gsans->slength[index[j]]; } } } /* collecting data from threads */ for (i = 0; i < pr->grn; i++) { for (j = 0; j < nthreads; j++) { pr->gr[i] += tgr[j][i]; } } /* freeing memory for tgr and destroying trng */ for (i = 0; i < nthreads; i++) { sfree(tgr[i]); gmx_rng_destroy(trng[i]); } sfree(tgr); sfree(trng); #else for (mc = 0; mc < max; mc++) { i = (int)floor(gmx_rng_uniform_real(rng)*isize); j = (int)floor(gmx_rng_uniform_real(rng)*isize); if (i != j) { pr->gr[(int)floor(sqrt(distance2(x[index[i]], x[index[j]]))/binwidth)] += gsans->slength[index[i]]*gsans->slength[index[j]]; } } #endif gmx_rng_destroy(rng); } else { #ifdef GMX_OPENMP nthreads = gmx_omp_get_max_threads(); /* Allocating memory for tgr arrays */ snew(tgr, nthreads); for (i = 0; i < nthreads; i++) { snew(tgr[i], pr->grn); } #pragma omp parallel shared(tgr) private(tid,i,j) { tid = gmx_omp_get_thread_num(); /* starting parallel threads */ #pragma omp for for (i = 0; i < isize; i++) { for (j = 0; j < i; j++) { tgr[tid][(int)floor(sqrt(distance2(x[index[i]], x[index[j]]))/binwidth)] += gsans->slength[index[i]]*gsans->slength[index[j]]; } } } /* collecating data for pr->gr */ for (i = 0; i < pr->grn; i++) { for (j = 0; j < nthreads; j++) { pr->gr[i] += tgr[j][i]; } } /* freeing memory for tgr */ for (i = 0; i < nthreads; i++) { sfree(tgr[i]); } sfree(tgr); #else for (i = 0; i < isize; i++) { for (j = 0; j < i; j++) { pr->gr[(int)floor(sqrt(distance2(x[index[i]], x[index[j]]))/binwidth)] += gsans->slength[index[i]]*gsans->slength[index[j]]; } } #endif } /* normalize if needed */ if (bNORM) { normalize_probability(pr->grn, pr->gr); } snew(pr->r, pr->grn); for (i = 0; i < pr->grn; i++) { pr->r[i] = (pr->binwidth*i+pr->binwidth*0.5); } return (gmx_radial_distribution_histogram_t *) pr; }
/* Set CPU affinity. Can be important for performance. On some systems (e.g. Cray) CPU Affinity is set by default. But default assigning doesn't work (well) with only some ranks having threads. This causes very low performance. External tools have cumbersome syntax for setting affinity in the case that only some ranks have threads. Thus it is important that GROMACS sets the affinity internally if only PME is using threads. */ void gmx_set_thread_affinity(FILE *fplog, const t_commrec *cr, const gmx_hw_opt_t *hw_opt, const gmx_hw_info_t *hwinfo) { int nth_affinity_set, thread0_id_node, nthread_local, nthread_node; int offset; int * localityOrder = nullptr; int rc; if (hw_opt->thread_affinity == threadaffOFF) { /* Nothing to do */ return; } /* If the tMPI thread affinity setting is not supported encourage the user * to report it as it's either a bug or an exotic platform which we might * want to support. */ if (tMPI_Thread_setaffinity_support() != TMPI_SETAFFINITY_SUPPORT_YES) { /* we know Mac OS & BlueGene do not support setting thread affinity, so there's no point in warning the user in that case. In any other case the user might be able to do something about it. */ #if !defined(__APPLE__) && !defined(__bg__) md_print_warn(cr, fplog, "NOTE: Cannot set thread affinities on the current platform.\n"); #endif /* __APPLE__ */ return; } /* threads on this MPI process or TMPI thread */ if (cr->duty & DUTY_PP) { nthread_local = gmx_omp_nthreads_get(emntNonbonded); } else { nthread_local = gmx_omp_nthreads_get(emntPME); } /* map the current process to cores */ thread0_id_node = 0; nthread_node = nthread_local; #if GMX_MPI if (PAR(cr) || MULTISIM(cr)) { /* We need to determine a scan of the thread counts in this * compute node. */ MPI_Comm comm_intra; MPI_Comm_split(MPI_COMM_WORLD, gmx_physicalnode_id_hash(), cr->rank_intranode, &comm_intra); MPI_Scan(&nthread_local, &thread0_id_node, 1, MPI_INT, MPI_SUM, comm_intra); /* MPI_Scan is inclusive, but here we need exclusive */ thread0_id_node -= nthread_local; /* Get the total number of threads on this physical node */ MPI_Allreduce(&nthread_local, &nthread_node, 1, MPI_INT, MPI_SUM, comm_intra); MPI_Comm_free(&comm_intra); } #endif if (hw_opt->thread_affinity == threadaffAUTO && nthread_node != hwinfo->nthreads_hw_avail) { if (nthread_node > 1 && nthread_node < hwinfo->nthreads_hw_avail) { md_print_warn(cr, fplog, "NOTE: The number of threads is not equal to the number of (logical) cores\n" " and the -pin option is set to auto: will not pin thread to cores.\n" " This can lead to significant performance degradation.\n" " Consider using -pin on (and -pinoffset in case you run multiple jobs).\n"); } return; } offset = 0; if (hw_opt->core_pinning_offset != 0) { offset = hw_opt->core_pinning_offset; md_print_info(cr, fplog, "Applying core pinning offset %d\n", offset); } int core_pinning_stride = hw_opt->core_pinning_stride; rc = get_thread_affinity_layout(fplog, cr, hwinfo, nthread_node, offset, &core_pinning_stride, &localityOrder); gmx::scoped_guard_sfree localityOrderGuard(localityOrder); if (rc != 0) { /* Incompatible layout, don't pin, warning was already issued */ return; } /* Set the per-thread affinity. In order to be able to check the success * of affinity settings, we will set nth_affinity_set to 1 on threads * where the affinity setting succeded and to 0 where it failed. * Reducing these 0/1 values over the threads will give the total number * of threads on which we succeeded. */ // To avoid warnings from the static analyzer we initialize nth_affinity_set // to zero outside the OpenMP block, and then add to it inside the block. // The value will still always be 0 or 1 from each thread. nth_affinity_set = 0; #pragma omp parallel num_threads(nthread_local) reduction(+:nth_affinity_set) { try { int thread_id, thread_id_node; int index, core; gmx_bool setaffinity_ret; thread_id = gmx_omp_get_thread_num(); thread_id_node = thread0_id_node + thread_id; index = offset + thread_id_node*core_pinning_stride; if (localityOrder != nullptr) { core = localityOrder[index]; } else { core = index; } setaffinity_ret = tMPI_Thread_setaffinity_single(tMPI_Thread_self(), core); /* store the per-thread success-values of the setaffinity */ nth_affinity_set += (setaffinity_ret == 0); if (debug) { fprintf(debug, "On rank %2d, thread %2d, index %2d, core %2d the affinity setting returned %d\n", cr->nodeid, gmx_omp_get_thread_num(), index, core, setaffinity_ret); } } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } if (nth_affinity_set > nthread_local) { char msg[STRLEN]; sprintf(msg, "Looks like we have set affinity for more threads than " "we have (%d > %d)!\n", nth_affinity_set, nthread_local); gmx_incons(msg); } else { /* check & warn if some threads failed to set their affinities */ const bool allAffinitiesSet = (nth_affinity_set == nthread_local); if (!allAffinitiesSet) { char sbuf1[STRLEN], sbuf2[STRLEN]; /* sbuf1 contains rank info, while sbuf2 OpenMP thread info */ sbuf1[0] = sbuf2[0] = '\0'; /* Only add rank info if we have more than one rank. */ if (cr->nnodes > 1) { #if GMX_MPI #if GMX_THREAD_MPI sprintf(sbuf1, "In tMPI thread #%d: ", cr->nodeid); #else /* GMX_LIB_MPI */ sprintf(sbuf1, "In MPI process #%d: ", cr->nodeid); #endif #endif /* GMX_MPI */ } if (nthread_local > 1) { sprintf(sbuf2, "for %d/%d thread%s ", nthread_local - nth_affinity_set, nthread_local, nthread_local > 1 ? "s" : ""); } fprintf(stderr, "NOTE: %sAffinity setting %sfailed.\n", sbuf1, sbuf2); } if (invalidWithinSimulation(cr, !allAffinitiesSet)) { md_print_warn(cr, fplog, "NOTE: Thread affinity setting failed. This can cause performance degradation.\n" " If you think your settings are correct, ask on the gmx-users list.\n"); } } }
int many_auto_correl(int nfunc, int ndata, int nfft, real **c) { #pragma omp parallel { try { typedef real complex[2]; int i, j; gmx_fft_t fft1; complex *in, *out; int i0, i1; int nthreads, thread_id; nthreads = gmx_omp_get_max_threads(); thread_id = gmx_omp_get_thread_num(); if ((0 == thread_id)) { // fprintf(stderr, "There are %d threads for correlation functions\n", nthreads); } i0 = thread_id*nfunc/nthreads; i1 = std::min(nfunc, (thread_id+1)*nfunc/nthreads); gmx_fft_init_1d(&fft1, nfft, GMX_FFT_FLAG_CONSERVATIVE); /* Allocate temporary arrays */ snew(in, nfft); snew(out, nfft); for (i = i0; (i < i1); i++) { for (j = 0; j < ndata; j++) { in[j][0] = c[i][j]; in[j][1] = 0; } for (; (j < nfft); j++) { in[j][0] = in[j][1] = 0; } gmx_fft_1d(fft1, GMX_FFT_BACKWARD, (void *)in, (void *)out); for (j = 0; j < nfft; j++) { in[j][0] = (out[j][0]*out[j][0] + out[j][1]*out[j][1])/nfft; in[j][1] = 0; } for (; (j < nfft); j++) { in[j][0] = in[j][1] = 0; } gmx_fft_1d(fft1, GMX_FFT_FORWARD, (void *)in, (void *)out); for (j = 0; (j < nfft); j++) { c[i][j] = out[j][0]/ndata; } } /* Free the memory */ gmx_fft_destroy(fft1); sfree(in); sfree(out); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } // gmx_fft_cleanup(); return 0; }
gmx_bool constrain_lincs(FILE *fplog,gmx_bool bLog,gmx_bool bEner, t_inputrec *ir, gmx_large_int_t step, struct gmx_lincsdata *lincsd,t_mdatoms *md, t_commrec *cr, rvec *x,rvec *xprime,rvec *min_proj, matrix box,t_pbc *pbc, real lambda,real *dvdlambda, real invdt,rvec *v, gmx_bool bCalcVir,tensor vir_r_m_dr, int econq, t_nrnb *nrnb, int maxwarn,int *warncount) { char buf[STRLEN],buf2[22],buf3[STRLEN]; int i,warn,p_imax,error; real ncons_loc,p_ssd,p_max=0; rvec dx; gmx_bool bOK; bOK = TRUE; if (lincsd->nc == 0 && cr->dd == NULL) { if (bLog || bEner) { lincsd->rmsd_data[0] = 0; if (ir->eI == eiSD2 && v == NULL) { i = 2; } else { i = 1; } lincsd->rmsd_data[i] = 0; } return bOK; } if (econq == econqCoord) { if (ir->efep != efepNO) { if (md->nMassPerturbed && lincsd->matlam != md->lambda) { set_lincs_matrix(lincsd,md->invmass,md->lambda); } for(i=0; i<lincsd->nc; i++) { lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i]; } } if (lincsd->ncg_flex) { /* Set the flexible constraint lengths to the old lengths */ if (pbc != NULL) { for(i=0; i<lincsd->nc; i++) { if (lincsd->bllen[i] == 0) { pbc_dx_aiuc(pbc,x[lincsd->bla[2*i]],x[lincsd->bla[2*i+1]],dx); lincsd->bllen[i] = norm(dx); } } } else { for(i=0; i<lincsd->nc; i++) { if (lincsd->bllen[i] == 0) { lincsd->bllen[i] = sqrt(distance2(x[lincsd->bla[2*i]], x[lincsd->bla[2*i+1]])); } } } } if (bLog && fplog) { cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc, &ncons_loc,&p_ssd,&p_max,&p_imax); } /* This warn var can be updated by multiple threads * at the same time. But as we only need to detect * if a warning occured or not, this is not an issue. */ warn = -1; /* The OpenMP parallel region of constrain_lincs for coords */ #pragma omp parallel num_threads(lincsd->nth) { int th=gmx_omp_get_thread_num(); clear_mat(lincsd->th[th].vir_r_m_dr); do_lincs(x,xprime,box,pbc,lincsd,th, md->invmass,cr, bCalcVir || (ir->efep != efepNO), ir->LincsWarnAngle,&warn, invdt,v,bCalcVir, th==0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr); } if (ir->efep != efepNO) { real dt_2,dvdl=0; dt_2 = 1.0/(ir->delta_t*ir->delta_t); for(i=0; (i<lincsd->nc); i++) { dvdl -= lincsd->mlambda[i]*dt_2*lincsd->ddist[i]; } *dvdlambda += dvdl; } if (bLog && fplog && lincsd->nc > 0) { fprintf(fplog," Rel. Constraint Deviation: RMS MAX between atoms\n"); fprintf(fplog," Before LINCS %.6f %.6f %6d %6d\n", sqrt(p_ssd/ncons_loc),p_max, ddglatnr(cr->dd,lincsd->bla[2*p_imax]), ddglatnr(cr->dd,lincsd->bla[2*p_imax+1])); } if (bLog || bEner) { cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc, &ncons_loc,&p_ssd,&p_max,&p_imax); /* Check if we are doing the second part of SD */ if (ir->eI == eiSD2 && v == NULL) { i = 2; } else { i = 1; } lincsd->rmsd_data[0] = ncons_loc; lincsd->rmsd_data[i] = p_ssd; } else { lincsd->rmsd_data[0] = 0; lincsd->rmsd_data[1] = 0; lincsd->rmsd_data[2] = 0; } if (bLog && fplog && lincsd->nc > 0) { fprintf(fplog, " After LINCS %.6f %.6f %6d %6d\n\n", sqrt(p_ssd/ncons_loc),p_max, ddglatnr(cr->dd,lincsd->bla[2*p_imax]), ddglatnr(cr->dd,lincsd->bla[2*p_imax+1])); } if (warn >= 0) { if (maxwarn >= 0) { cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc, &ncons_loc,&p_ssd,&p_max,&p_imax); if (MULTISIM(cr)) { sprintf(buf3," in simulation %d", cr->ms->sim); } else { buf3[0] = 0; } sprintf(buf,"\nStep %s, time %g (ps) LINCS WARNING%s\n" "relative constraint deviation after LINCS:\n" "rms %.6f, max %.6f (between atoms %d and %d)\n", gmx_step_str(step,buf2),ir->init_t+step*ir->delta_t, buf3, sqrt(p_ssd/ncons_loc),p_max, ddglatnr(cr->dd,lincsd->bla[2*p_imax]), ddglatnr(cr->dd,lincsd->bla[2*p_imax+1])); if (fplog) { fprintf(fplog,"%s",buf); } fprintf(stderr,"%s",buf); lincs_warning(fplog,cr->dd,x,xprime,pbc, lincsd->nc,lincsd->bla,lincsd->bllen, ir->LincsWarnAngle,maxwarn,warncount); } bOK = (p_max < 0.5); } if (lincsd->ncg_flex) { for(i=0; (i<lincsd->nc); i++) if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0) lincsd->bllen[i] = 0; } } else { /* The OpenMP parallel region of constrain_lincs for derivatives */ #pragma omp parallel num_threads(lincsd->nth) { int th=gmx_omp_get_thread_num(); do_lincsp(x,xprime,min_proj,pbc,lincsd,th, md->invmass,econq,ir->efep != efepNO ? dvdlambda : NULL, bCalcVir,th==0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr); } } if (bCalcVir && lincsd->nth > 1) { for(i=1; i<lincsd->nth; i++) { m_add(vir_r_m_dr,lincsd->th[i].vir_r_m_dr,vir_r_m_dr); } } /* count assuming nit=1 */ inc_nrnb(nrnb,eNR_LINCS,lincsd->nc); inc_nrnb(nrnb,eNR_LINCSMAT,(2+lincsd->nOrder)*lincsd->ncc); if (lincsd->ntriangle > 0) { inc_nrnb(nrnb,eNR_LINCSMAT,lincsd->nOrder*lincsd->ncc_triangle); } if (v) { inc_nrnb(nrnb,eNR_CONSTR_V,lincsd->nc*2); } if (bCalcVir) { inc_nrnb(nrnb,eNR_CONSTR_VIR,lincsd->nc); } return bOK; }