Ejemplo n.º 1
0
real Umbrella_Communicate(real Q_local,real *k_Q,real *Q_0)
{
  int i;
  int i_omp=gmx_omp_get_thread_num();
  // int n_omp=gmx_omp_get_num_procs(); // Wrong number
  int n_omp=udata.n_omp;
  // int i_mpi;
  static real Q_semilocal[UMB_MAX_OMP];
  real Q_global;

  if (n_omp>UMB_MAX_OMP) {
    fprintf(stderr,"Seg fault is probably about to happen because Q_semilocal is not big enough to accommodate %d omp threads. See %d in %s.\n",n_omp,__LINE__,__FILE__);
  }

  Q_semilocal[i_omp]=Q_local;
  #pragma omp barrier
  // #pragma omp master
  // {
  if (i_omp==0) {
    Q_local=0;
    // fprintf(stderr,"%f %f %f %f %f\n",Q_semilocal[0],Q_semilocal[1],Q_semilocal[2],Q_semilocal[3],Q_semilocal[4]);
    for (i=0; i<n_omp; i++) {
      Q_local+=Q_semilocal[i];
    }
    // fprintf(stderr,"%f %f %f %f %f\n",Q_semilocal[0],Q_semilocal[1],Q_semilocal[2],Q_semilocal[3],Q_semilocal[4]);
//    #ifdef GMX_DOUBLE
//    MPI_Allreduce(&Q_local,&Q_global,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
//    #else
//    MPI_Allreduce(&Q_local,&Q_global,1,MPI_FLOAT, MPI_SUM,MPI_COMM_WORLD);
//    #endif
//    for (i=0; i<n_omp; i++) {
//      Q_semilocal[i]=Q_global;
//    }
    // gmx_sum declared in src/gromacs/legacyheaders/network.h (in main.h)
    #ifdef GMX_MPI
    gmx_sum(1,&Q_local,udata.cr);
    #endif
    for (i=0; i<n_omp; i++) {
      Q_semilocal[i]=Q_local;
    }
    // fprintf(stderr,"%f %f %f %f %f\n",Q_semilocal[0],Q_semilocal[1],Q_semilocal[2],Q_semilocal[3],Q_semilocal[4]);
  }
  #pragma omp barrier
  Q_global=Q_semilocal[i_omp];
  *k_Q=udata.k_Q;
  *Q_0=udata.Q_0+(udata.Q_init-udata.Q_0)*exp(-udata.step/udata.Q_steps);

  // #pragma omp master
  // {
  if (i_omp==0) {
    // MPI_Comm_rank(MPI_COMM_WORLD,&i_mpi);
    // if (i_mpi==0) { // udata.fp=NULL on other mpi processes.
      if ((udata.step % udata.freq)==0 && udata.fp!=NULL) {
        fprintf(udata.fp,"%d %g %g\n",udata.step,Q_global,0.5*(*k_Q)*(Q_global-(*Q_0))*(Q_global-(*Q_0)));
      }
    // }
  }

  return Q_global;
}
Ejemplo n.º 2
0
/* Set CPU affinity. Can be important for performance.
   On some systems (e.g. Cray) CPU Affinity is set by default.
   But default assigning doesn't work (well) with only some ranks
   having threads. This causes very low performance.
   External tools have cumbersome syntax for setting affinity
   in the case that only some ranks have threads.
   Thus it is important that GROMACS sets the affinity internally
   if only PME is using threads.
 */
void
gmx_set_thread_affinity(FILE                *fplog,
                        const t_commrec     *cr,
                        gmx_hw_opt_t        *hw_opt,
                        int                  nthreads_pme,
                        const gmx_hw_info_t *hwinfo,
                        const t_inputrec    *inputrec)
{
    int        nth_affinity_set, thread_id_node, thread_id,
               nthread_local, nthread_node, nthread_hw_max, nphyscore;
    int        offset;
    const int *locality_order;
    int        rc;

    if (hw_opt->thread_affinity == threadaffOFF)
    {
        /* Nothing to do */
        return;
    }

    /* If the tMPI thread affinity setting is not supported encourage the user
     * to report it as it's either a bug or an exotic platform which we might
     * want to support. */
    if (tMPI_Thread_setaffinity_support() != TMPI_SETAFFINITY_SUPPORT_YES)
    {
        /* we know Mac OS doesn't support setting thread affinity, so there's
           no point in warning the user in that case. In any other case
           the user might be able to do something about it. */
#ifndef __APPLE__
        md_print_warn(NULL, fplog,
                      "Can not set thread affinities on the current platform. On NUMA systems this\n"
                      "can cause performance degradation. If you think your platform should support\n"
                      "setting affinities, contact the GROMACS developers.");
#endif  /* __APPLE__ */
        return;
    }

    /* threads on this MPI process or TMPI thread */
    if (cr->duty & DUTY_PP)
    {
        nthread_local = gmx_omp_nthreads_get(emntNonbonded);
    }
    else
    {
        nthread_local = gmx_omp_nthreads_get(emntPME);
    }

    /* map the current process to cores */
    thread_id_node = 0;
    nthread_node   = nthread_local;
#ifdef GMX_MPI
    if (PAR(cr) || MULTISIM(cr))
    {
        /* We need to determine a scan of the thread counts in this
         * compute node.
         */
        MPI_Comm comm_intra;

        MPI_Comm_split(MPI_COMM_WORLD, gmx_hostname_num(), cr->rank_intranode,
                       &comm_intra);
        MPI_Scan(&nthread_local, &thread_id_node, 1, MPI_INT, MPI_SUM, comm_intra);
        /* MPI_Scan is inclusive, but here we need exclusive */
        thread_id_node -= nthread_local;
        /* Get the total number of threads on this physical node */
        MPI_Allreduce(&nthread_local, &nthread_node, 1, MPI_INT, MPI_SUM, comm_intra);
        MPI_Comm_free(&comm_intra);
    }
#endif

    if (hw_opt->thread_affinity == threadaffAUTO &&
        nthread_node != hwinfo->nthreads_hw_avail)
    {
        if (nthread_node > 1 && nthread_node < hwinfo->nthreads_hw_avail)
        {
            md_print_warn(cr, fplog,
                          "NOTE: The number of threads is not equal to the number of (logical) cores\n"
                          "      and the -pin option is set to auto: will not pin thread to cores.\n"
                          "      This can lead to significant performance degradation.\n"
                          "      Consider using -pin on (and -pinoffset in case you run multiple jobs).\n");
        }

        return;
    }

    offset = 0;
    if (hw_opt->core_pinning_offset != 0)
    {
        offset = hw_opt->core_pinning_offset;
        md_print_info(cr, fplog, "Applying core pinning offset %d\n", offset);
    }

    rc = get_thread_affinity_layout(fplog, cr, hwinfo,
                                    nthread_node,
                                    offset, &hw_opt->core_pinning_stride,
                                    &locality_order);

    if (rc != 0)
    {
        /* Incompatible layout, don't pin, warning was already issued */
        return;
    }

    /* Set the per-thread affinity. In order to be able to check the success
     * of affinity settings, we will set nth_affinity_set to 1 on threads
     * where the affinity setting succeded and to 0 where it failed.
     * Reducing these 0/1 values over the threads will give the total number
     * of threads on which we succeeded.
     */
    nth_affinity_set = 0;
#pragma omp parallel firstprivate(thread_id_node) num_threads(nthread_local) \
    reduction(+:nth_affinity_set)
    {
        int      index, core;
        gmx_bool setaffinity_ret;

        thread_id       = gmx_omp_get_thread_num();
        thread_id_node += thread_id;
        index           = offset + thread_id_node*hw_opt->core_pinning_stride;
        if (locality_order != NULL)
        {
            core = locality_order[index];
        }
        else
        {
            core = index;
        }

        setaffinity_ret = tMPI_Thread_setaffinity_single(tMPI_Thread_self(), core);

        /* store the per-thread success-values of the setaffinity */
        nth_affinity_set = (setaffinity_ret == 0);

        if (debug)
        {
            fprintf(debug, "On rank %2d, thread %2d, core %2d the affinity setting returned %d\n",
                    cr->nodeid, gmx_omp_get_thread_num(), core, setaffinity_ret);
        }
    }

    if (nth_affinity_set > nthread_local)
    {
        char msg[STRLEN];

        sprintf(msg, "Looks like we have set affinity for more threads than "
                "we have (%d > %d)!\n", nth_affinity_set, nthread_local);
        gmx_incons(msg);
    }
    else
    {
        /* check & warn if some threads failed to set their affinities */
        if (nth_affinity_set != nthread_local)
        {
            char sbuf1[STRLEN], sbuf2[STRLEN];

            /* sbuf1 contains rank info, while sbuf2 OpenMP thread info */
            sbuf1[0] = sbuf2[0] = '\0';
            /* Only add rank info if we have more than one rank. */
            if (cr->nnodes > 1)
            {
#ifdef GMX_MPI
#ifdef GMX_THREAD_MPI
                sprintf(sbuf1, "In tMPI thread #%d: ", cr->nodeid);
#else           /* GMX_LIB_MPI */
                sprintf(sbuf1, "In MPI process #%d: ", cr->nodeid);
#endif
#endif          /* GMX_MPI */
            }

            if (nthread_local > 1)
            {
                sprintf(sbuf2, "for %d/%d thread%s ",
                        nthread_local - nth_affinity_set, nthread_local,
                        nthread_local > 1 ? "s" : "");
            }

            md_print_warn(NULL, fplog,
                          "WARNING: %sAffinity setting %sfailed.\n"
                          "         This can cause performance degradation! If you think your setting are\n"
                          "         correct, contact the GROMACS developers.",
                          sbuf1, sbuf2);
        }
    }
    return;
}
Ejemplo n.º 3
0
gmx_radial_distribution_histogram_t *calc_radial_distribution_histogram (
        gmx_sans_t  *gsans,
        rvec        *x,
        matrix       box,
        atom_id     *index,
        int          isize,
        double       binwidth,
        gmx_bool     bMC,
        gmx_bool     bNORM,
        real         mcover,
        unsigned int seed)
{
    gmx_radial_distribution_histogram_t    *pr = NULL;
    rvec              dist;
    double            rmax;
    int               i, j;
#ifdef GMX_OPENMP
    double          **tgr;
    int               tid;
    int               nthreads;
    gmx_rng_t        *trng = NULL;
#endif
    gmx_large_int_t   mc  = 0, max;
    gmx_rng_t         rng = NULL;

    /* allocate memory for pr */
    snew(pr, 1);
    /* set some fields */
    pr->binwidth = binwidth;

    /*
     * create max dist rvec
     * dist = box[xx] + box[yy] + box[zz]
     */
    rvec_add(box[XX], box[YY], dist);
    rvec_add(box[ZZ], dist, dist);

    rmax = norm(dist);

    pr->grn = (int)floor(rmax/pr->binwidth)+1;
    rmax    = pr->grn*pr->binwidth;

    snew(pr->gr, pr->grn);

    if (bMC)
    {
        /* Special case for setting automaticaly number of mc iterations to 1% of total number of direct iterations */
        if (mcover == -1)
        {
            max = (gmx_large_int_t)floor(0.5*0.01*isize*(isize-1));
        }
        else
        {
            max = (gmx_large_int_t)floor(0.5*mcover*isize*(isize-1));
        }
        rng = gmx_rng_init(seed);
#ifdef GMX_OPENMP
        nthreads = gmx_omp_get_max_threads();
        snew(tgr, nthreads);
        snew(trng, nthreads);
        for (i = 0; i < nthreads; i++)
        {
            snew(tgr[i], pr->grn);
            trng[i] = gmx_rng_init(gmx_rng_uniform_uint32(rng));
        }
        #pragma omp parallel shared(tgr,trng,mc) private(tid,i,j)
        {
            tid = gmx_omp_get_thread_num();
            /* now starting parallel threads */
            #pragma omp for
            for (mc = 0; mc < max; mc++)
            {
                i = (int)floor(gmx_rng_uniform_real(trng[tid])*isize);
                j = (int)floor(gmx_rng_uniform_real(trng[tid])*isize);
                if (i != j)
                {
                    tgr[tid][(int)floor(sqrt(distance2(x[index[i]], x[index[j]]))/binwidth)] += gsans->slength[index[i]]*gsans->slength[index[j]];
                }
            }
        }
        /* collecting data from threads */
        for (i = 0; i < pr->grn; i++)
        {
            for (j = 0; j < nthreads; j++)
            {
                pr->gr[i] += tgr[j][i];
            }
        }
        /* freeing memory for tgr and destroying trng */
        for (i = 0; i < nthreads; i++)
        {
            sfree(tgr[i]);
            gmx_rng_destroy(trng[i]);
        }
        sfree(tgr);
        sfree(trng);
#else
        for (mc = 0; mc < max; mc++)
        {
            i = (int)floor(gmx_rng_uniform_real(rng)*isize);
            j = (int)floor(gmx_rng_uniform_real(rng)*isize);
            if (i != j)
            {
                pr->gr[(int)floor(sqrt(distance2(x[index[i]], x[index[j]]))/binwidth)] += gsans->slength[index[i]]*gsans->slength[index[j]];
            }
        }
#endif
        gmx_rng_destroy(rng);
    }
    else
    {
#ifdef GMX_OPENMP
        nthreads = gmx_omp_get_max_threads();
        /* Allocating memory for tgr arrays */
        snew(tgr, nthreads);
        for (i = 0; i < nthreads; i++)
        {
            snew(tgr[i], pr->grn);
        }
        #pragma omp parallel shared(tgr) private(tid,i,j)
        {
            tid = gmx_omp_get_thread_num();
            /* starting parallel threads */
            #pragma omp for
            for (i = 0; i < isize; i++)
            {
                for (j = 0; j < i; j++)
                {
                    tgr[tid][(int)floor(sqrt(distance2(x[index[i]], x[index[j]]))/binwidth)] += gsans->slength[index[i]]*gsans->slength[index[j]];
                }
            }
        }
        /* collecating data for pr->gr */
        for (i = 0; i < pr->grn; i++)
        {
            for (j = 0; j < nthreads; j++)
            {
                pr->gr[i] += tgr[j][i];
            }
        }
        /* freeing memory for tgr */
        for (i = 0; i < nthreads; i++)
        {
            sfree(tgr[i]);
        }
        sfree(tgr);
#else
        for (i = 0; i < isize; i++)
        {
            for (j = 0; j < i; j++)
            {
                pr->gr[(int)floor(sqrt(distance2(x[index[i]], x[index[j]]))/binwidth)] += gsans->slength[index[i]]*gsans->slength[index[j]];
            }
        }
#endif
    }

    /* normalize if needed */
    if (bNORM)
    {
        normalize_probability(pr->grn, pr->gr);
    }

    snew(pr->r, pr->grn);
    for (i = 0; i < pr->grn; i++)
    {
        pr->r[i] = (pr->binwidth*i+pr->binwidth*0.5);
    }

    return (gmx_radial_distribution_histogram_t *) pr;
}
Ejemplo n.º 4
0
/* Set CPU affinity. Can be important for performance.
   On some systems (e.g. Cray) CPU Affinity is set by default.
   But default assigning doesn't work (well) with only some ranks
   having threads. This causes very low performance.
   External tools have cumbersome syntax for setting affinity
   in the case that only some ranks have threads.
   Thus it is important that GROMACS sets the affinity internally
   if only PME is using threads.
 */
void
gmx_set_thread_affinity(FILE                *fplog,
                        const t_commrec     *cr,
                        const gmx_hw_opt_t  *hw_opt,
                        const gmx_hw_info_t *hwinfo)
{
    int        nth_affinity_set, thread0_id_node,
               nthread_local, nthread_node;
    int        offset;
    int *      localityOrder = nullptr;
    int        rc;

    if (hw_opt->thread_affinity == threadaffOFF)
    {
        /* Nothing to do */
        return;
    }

    /* If the tMPI thread affinity setting is not supported encourage the user
     * to report it as it's either a bug or an exotic platform which we might
     * want to support. */
    if (tMPI_Thread_setaffinity_support() != TMPI_SETAFFINITY_SUPPORT_YES)
    {
        /* we know Mac OS & BlueGene do not support setting thread affinity, so there's
           no point in warning the user in that case. In any other case
           the user might be able to do something about it. */
#if !defined(__APPLE__) && !defined(__bg__)
        md_print_warn(cr, fplog,
                      "NOTE: Cannot set thread affinities on the current platform.\n");
#endif  /* __APPLE__ */
        return;
    }

    /* threads on this MPI process or TMPI thread */
    if (cr->duty & DUTY_PP)
    {
        nthread_local = gmx_omp_nthreads_get(emntNonbonded);
    }
    else
    {
        nthread_local = gmx_omp_nthreads_get(emntPME);
    }

    /* map the current process to cores */
    thread0_id_node = 0;
    nthread_node    = nthread_local;
#if GMX_MPI
    if (PAR(cr) || MULTISIM(cr))
    {
        /* We need to determine a scan of the thread counts in this
         * compute node.
         */
        MPI_Comm comm_intra;

        MPI_Comm_split(MPI_COMM_WORLD,
                       gmx_physicalnode_id_hash(), cr->rank_intranode,
                       &comm_intra);
        MPI_Scan(&nthread_local, &thread0_id_node, 1, MPI_INT, MPI_SUM, comm_intra);
        /* MPI_Scan is inclusive, but here we need exclusive */
        thread0_id_node -= nthread_local;
        /* Get the total number of threads on this physical node */
        MPI_Allreduce(&nthread_local, &nthread_node, 1, MPI_INT, MPI_SUM, comm_intra);
        MPI_Comm_free(&comm_intra);
    }
#endif

    if (hw_opt->thread_affinity == threadaffAUTO &&
        nthread_node != hwinfo->nthreads_hw_avail)
    {
        if (nthread_node > 1 && nthread_node < hwinfo->nthreads_hw_avail)
        {
            md_print_warn(cr, fplog,
                          "NOTE: The number of threads is not equal to the number of (logical) cores\n"
                          "      and the -pin option is set to auto: will not pin thread to cores.\n"
                          "      This can lead to significant performance degradation.\n"
                          "      Consider using -pin on (and -pinoffset in case you run multiple jobs).\n");
        }

        return;
    }

    offset = 0;
    if (hw_opt->core_pinning_offset != 0)
    {
        offset = hw_opt->core_pinning_offset;
        md_print_info(cr, fplog, "Applying core pinning offset %d\n", offset);
    }

    int core_pinning_stride = hw_opt->core_pinning_stride;
    rc = get_thread_affinity_layout(fplog, cr, hwinfo,
                                    nthread_node,
                                    offset, &core_pinning_stride,
                                    &localityOrder);
    gmx::scoped_guard_sfree localityOrderGuard(localityOrder);

    if (rc != 0)
    {
        /* Incompatible layout, don't pin, warning was already issued */
        return;
    }

    /* Set the per-thread affinity. In order to be able to check the success
     * of affinity settings, we will set nth_affinity_set to 1 on threads
     * where the affinity setting succeded and to 0 where it failed.
     * Reducing these 0/1 values over the threads will give the total number
     * of threads on which we succeeded.
     */

    // To avoid warnings from the static analyzer we initialize nth_affinity_set
    // to zero outside the OpenMP block, and then add to it inside the block.
    // The value will still always be 0 or 1 from each thread.
    nth_affinity_set = 0;
#pragma omp parallel num_threads(nthread_local) reduction(+:nth_affinity_set)
    {
        try
        {
            int      thread_id, thread_id_node;
            int      index, core;
            gmx_bool setaffinity_ret;

            thread_id      = gmx_omp_get_thread_num();
            thread_id_node = thread0_id_node + thread_id;
            index          = offset + thread_id_node*core_pinning_stride;
            if (localityOrder != nullptr)
            {
                core = localityOrder[index];
            }
            else
            {
                core = index;
            }

            setaffinity_ret = tMPI_Thread_setaffinity_single(tMPI_Thread_self(), core);

            /* store the per-thread success-values of the setaffinity */
            nth_affinity_set += (setaffinity_ret == 0);

            if (debug)
            {
                fprintf(debug, "On rank %2d, thread %2d, index %2d, core %2d the affinity setting returned %d\n",
                        cr->nodeid, gmx_omp_get_thread_num(), index, core, setaffinity_ret);
            }
        }
        GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
    }

    if (nth_affinity_set > nthread_local)
    {
        char msg[STRLEN];

        sprintf(msg, "Looks like we have set affinity for more threads than "
                "we have (%d > %d)!\n", nth_affinity_set, nthread_local);
        gmx_incons(msg);
    }
    else
    {
        /* check & warn if some threads failed to set their affinities */
        const bool allAffinitiesSet = (nth_affinity_set == nthread_local);
        if (!allAffinitiesSet)
        {
            char sbuf1[STRLEN], sbuf2[STRLEN];

            /* sbuf1 contains rank info, while sbuf2 OpenMP thread info */
            sbuf1[0] = sbuf2[0] = '\0';
            /* Only add rank info if we have more than one rank. */
            if (cr->nnodes > 1)
            {
#if GMX_MPI
#if GMX_THREAD_MPI
                sprintf(sbuf1, "In tMPI thread #%d: ", cr->nodeid);
#else           /* GMX_LIB_MPI */
                sprintf(sbuf1, "In MPI process #%d: ", cr->nodeid);
#endif
#endif          /* GMX_MPI */
            }

            if (nthread_local > 1)
            {
                sprintf(sbuf2, "for %d/%d thread%s ",
                        nthread_local - nth_affinity_set, nthread_local,
                        nthread_local > 1 ? "s" : "");
            }

            fprintf(stderr, "NOTE: %sAffinity setting %sfailed.\n", sbuf1, sbuf2);
        }
        if (invalidWithinSimulation(cr, !allAffinitiesSet))
        {
            md_print_warn(cr, fplog,
                          "NOTE: Thread affinity setting failed. This can cause performance degradation.\n"
                          "      If you think your settings are correct, ask on the gmx-users list.\n");
        }
    }
}
Ejemplo n.º 5
0
int many_auto_correl(int nfunc, int ndata, int nfft, real **c)
{
    #pragma omp parallel
    {
        try
        {
            typedef real complex[2];
            int          i, j;
            gmx_fft_t    fft1;
            complex     *in, *out;
            int          i0, i1;
            int          nthreads, thread_id;

            nthreads  = gmx_omp_get_max_threads();
            thread_id = gmx_omp_get_thread_num();
            if ((0 == thread_id))
            {
                // fprintf(stderr, "There are %d threads for correlation functions\n", nthreads);
            }
            i0 = thread_id*nfunc/nthreads;
            i1 = std::min(nfunc, (thread_id+1)*nfunc/nthreads);

            gmx_fft_init_1d(&fft1, nfft, GMX_FFT_FLAG_CONSERVATIVE);
            /* Allocate temporary arrays */
            snew(in, nfft);
            snew(out, nfft);
            for (i = i0; (i < i1); i++)
            {
                for (j = 0; j < ndata; j++)
                {
                    in[j][0] = c[i][j];
                    in[j][1] = 0;
                }
                for (; (j < nfft); j++)
                {
                    in[j][0] = in[j][1] = 0;
                }

                gmx_fft_1d(fft1, GMX_FFT_BACKWARD, (void *)in, (void *)out);
                for (j = 0; j < nfft; j++)
                {
                    in[j][0] = (out[j][0]*out[j][0] + out[j][1]*out[j][1])/nfft;
                    in[j][1] = 0;
                }
                for (; (j < nfft); j++)
                {
                    in[j][0] = in[j][1] = 0;
                }

                gmx_fft_1d(fft1, GMX_FFT_FORWARD, (void *)in, (void *)out);
                for (j = 0; (j < nfft); j++)
                {
                    c[i][j] = out[j][0]/ndata;
                }
            }
            /* Free the memory */
            gmx_fft_destroy(fft1);
            sfree(in);
            sfree(out);
        }
        GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
    }
    // gmx_fft_cleanup();
    return 0;
}
Ejemplo n.º 6
0
gmx_bool constrain_lincs(FILE *fplog,gmx_bool bLog,gmx_bool bEner,
                         t_inputrec *ir,
                         gmx_large_int_t step,
                         struct gmx_lincsdata *lincsd,t_mdatoms *md,
                         t_commrec *cr, 
                         rvec *x,rvec *xprime,rvec *min_proj,
                         matrix box,t_pbc *pbc,
                         real lambda,real *dvdlambda,
                         real invdt,rvec *v,
                         gmx_bool bCalcVir,tensor vir_r_m_dr,
                         int econq,
                         t_nrnb *nrnb,
                         int maxwarn,int *warncount)
{
    char  buf[STRLEN],buf2[22],buf3[STRLEN];
    int   i,warn,p_imax,error;
    real  ncons_loc,p_ssd,p_max=0;
    rvec  dx;
    gmx_bool  bOK;
    
    bOK = TRUE;
    
    if (lincsd->nc == 0 && cr->dd == NULL)
    {
        if (bLog || bEner)
        {
            lincsd->rmsd_data[0] = 0;
            if (ir->eI == eiSD2 && v == NULL)
            {
                i = 2;
            }
            else
            {
                i = 1;
            }
            lincsd->rmsd_data[i] = 0;
        }
        
        return bOK;
    }
    
    if (econq == econqCoord)
    {
        if (ir->efep != efepNO)
        {
            if (md->nMassPerturbed && lincsd->matlam != md->lambda)
            {
                set_lincs_matrix(lincsd,md->invmass,md->lambda);
            }
            
            for(i=0; i<lincsd->nc; i++)
            {
                lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i];
            }
        }
        
        if (lincsd->ncg_flex)
        {
            /* Set the flexible constraint lengths to the old lengths */
            if (pbc != NULL)
            {
                for(i=0; i<lincsd->nc; i++)
                {
                    if (lincsd->bllen[i] == 0) {
                        pbc_dx_aiuc(pbc,x[lincsd->bla[2*i]],x[lincsd->bla[2*i+1]],dx);
                        lincsd->bllen[i] = norm(dx);
                    }
                }
            }
            else
            {
                for(i=0; i<lincsd->nc; i++)
                {
                    if (lincsd->bllen[i] == 0)
                    {
                        lincsd->bllen[i] =
                            sqrt(distance2(x[lincsd->bla[2*i]],
                                           x[lincsd->bla[2*i+1]]));
                    }
                }
            }
        }
        
        if (bLog && fplog)
        {
            cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
                    &ncons_loc,&p_ssd,&p_max,&p_imax);
        }

        /* This warn var can be updated by multiple threads
         * at the same time. But as we only need to detect
         * if a warning occured or not, this is not an issue.
         */
        warn = -1;

        /* The OpenMP parallel region of constrain_lincs for coords */
#pragma omp parallel num_threads(lincsd->nth)
        {
            int th=gmx_omp_get_thread_num();

            clear_mat(lincsd->th[th].vir_r_m_dr);

            do_lincs(x,xprime,box,pbc,lincsd,th,
                     md->invmass,cr,
                     bCalcVir || (ir->efep != efepNO),
                     ir->LincsWarnAngle,&warn,
                     invdt,v,bCalcVir,
                     th==0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
        }

        if (ir->efep != efepNO)
        {
            real dt_2,dvdl=0;
            
            dt_2 = 1.0/(ir->delta_t*ir->delta_t);
            for(i=0; (i<lincsd->nc); i++)
            {
                dvdl -= lincsd->mlambda[i]*dt_2*lincsd->ddist[i];
            }
            *dvdlambda += dvdl;
		}
        
        if (bLog && fplog && lincsd->nc > 0)
        {
            fprintf(fplog,"   Rel. Constraint Deviation:  RMS         MAX     between atoms\n");
            fprintf(fplog,"       Before LINCS          %.6f    %.6f %6d %6d\n",
                    sqrt(p_ssd/ncons_loc),p_max,
                    ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
                    ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
        }
        if (bLog || bEner)
        {
            cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
                    &ncons_loc,&p_ssd,&p_max,&p_imax);
            /* Check if we are doing the second part of SD */
            if (ir->eI == eiSD2 && v == NULL)
            {
                i = 2;
            }
            else
            {
                i = 1;
            }
            lincsd->rmsd_data[0] = ncons_loc;
            lincsd->rmsd_data[i] = p_ssd;
        }
        else
        {
            lincsd->rmsd_data[0] = 0;
            lincsd->rmsd_data[1] = 0;
            lincsd->rmsd_data[2] = 0;
        }
        if (bLog && fplog && lincsd->nc > 0)
        {
            fprintf(fplog,
                    "        After LINCS          %.6f    %.6f %6d %6d\n\n",
                    sqrt(p_ssd/ncons_loc),p_max,
                    ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
                    ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
        }
        
        if (warn >= 0)
        {
            if (maxwarn >= 0)
            {
                cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
                        &ncons_loc,&p_ssd,&p_max,&p_imax);
                if (MULTISIM(cr))
                {
                    sprintf(buf3," in simulation %d", cr->ms->sim);
                }
                else
                {
                    buf3[0] = 0;
                }
                sprintf(buf,"\nStep %s, time %g (ps)  LINCS WARNING%s\n"
                        "relative constraint deviation after LINCS:\n"
                        "rms %.6f, max %.6f (between atoms %d and %d)\n",
                        gmx_step_str(step,buf2),ir->init_t+step*ir->delta_t,
                        buf3,
                        sqrt(p_ssd/ncons_loc),p_max,
                        ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
                        ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
                if (fplog)
                {
                    fprintf(fplog,"%s",buf);
                }
                fprintf(stderr,"%s",buf);
                lincs_warning(fplog,cr->dd,x,xprime,pbc,
                              lincsd->nc,lincsd->bla,lincsd->bllen,
                              ir->LincsWarnAngle,maxwarn,warncount);
            }
            bOK = (p_max < 0.5);
        }
        
        if (lincsd->ncg_flex) {
            for(i=0; (i<lincsd->nc); i++)
                if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0)
                    lincsd->bllen[i] = 0;
        }
    } 
    else
    {
        /* The OpenMP parallel region of constrain_lincs for derivatives */
#pragma omp parallel num_threads(lincsd->nth)
        {
            int th=gmx_omp_get_thread_num();

            do_lincsp(x,xprime,min_proj,pbc,lincsd,th,
                      md->invmass,econq,ir->efep != efepNO ? dvdlambda : NULL,
                      bCalcVir,th==0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
        }
    }

    if (bCalcVir && lincsd->nth > 1)
    {
        for(i=1; i<lincsd->nth; i++)
        {
            m_add(vir_r_m_dr,lincsd->th[i].vir_r_m_dr,vir_r_m_dr);
        }
    }
 
    /* count assuming nit=1 */
    inc_nrnb(nrnb,eNR_LINCS,lincsd->nc);
    inc_nrnb(nrnb,eNR_LINCSMAT,(2+lincsd->nOrder)*lincsd->ncc);
    if (lincsd->ntriangle > 0)
    {
        inc_nrnb(nrnb,eNR_LINCSMAT,lincsd->nOrder*lincsd->ncc_triangle);
    }
    if (v)
    {
        inc_nrnb(nrnb,eNR_CONSTR_V,lincsd->nc*2);
    }
    if (bCalcVir)
    {
        inc_nrnb(nrnb,eNR_CONSTR_VIR,lincsd->nc);
    }

    return bOK;
}