int setup_specat_communication(gmx_domdec_t               *dd,
                               ind_req_t                  *ireq,
                               gmx_domdec_specat_comm_t   *spac,
                               gmx_hash_t                 *ga2la_specat,
                               int                         at_start,
                               int                         vbuf_fac,
                               const char                 *specat_type,
                               const char                 *add_err)
{
    int               nsend[2], nlast, nsend_zero[2] = {0, 0}, *nsend_ptr;
    int               d, dim, ndir, dir, nr, ns, i, nrecv_local, n0, start, indr, ind, buf[2];
    int               nat_tot_specat, nat_tot_prev, nalloc_old;
    gmx_bool          bPBC;
    gmx_specatsend_t *spas;

    if (debug)
    {
        fprintf(debug, "Begin setup_specat_communication for %s\n", specat_type);
    }

    /* nsend[0]: the number of atoms requested by this node only,
     *           we communicate this for more efficients checks
     * nsend[1]: the total number of requested atoms
     */
    nsend[0] = ireq->n;
    nsend[1] = nsend[0];
    nlast    = nsend[1];
    for (d = dd->ndim-1; d >= 0; d--)
    {
        /* Pulse the grid forward and backward */
        dim  = dd->dim[d];
        bPBC = (dim < dd->npbcdim);
        if (dd->nc[dim] == 2)
        {
            /* Only 2 cells, so we only need to communicate once */
            ndir = 1;
        }
        else
        {
            ndir = 2;
        }
        for (dir = 0; dir < ndir; dir++)
        {
            if (!bPBC &&
                dd->nc[dim] > 2 &&
                ((dir == 0 && dd->ci[dim] == dd->nc[dim] - 1) ||
                 (dir == 1 && dd->ci[dim] == 0)))
            {
                /* No pbc: the fist/last cell should not request atoms */
                nsend_ptr = nsend_zero;
            }
            else
            {
                nsend_ptr = nsend;
            }
            /* Communicate the number of indices */
            dd_sendrecv_int(dd, d, dir == 0 ? dddirForward : dddirBackward,
                            nsend_ptr, 2, spac->nreq[d][dir], 2);
            nr = spac->nreq[d][dir][1];
            if (nlast+nr > ireq->nalloc)
            {
                ireq->nalloc = over_alloc_dd(nlast+nr);
                srenew(ireq->ind, ireq->nalloc);
            }
            /* Communicate the indices */
            dd_sendrecv_int(dd, d, dir == 0 ? dddirForward : dddirBackward,
                            ireq->ind, nsend_ptr[1], ireq->ind+nlast, nr);
            nlast += nr;
        }
        nsend[1] = nlast;
    }
    if (debug)
    {
        fprintf(debug, "Communicated the counts\n");
    }

    /* Search for the requested atoms and communicate the indices we have */
    nat_tot_specat = at_start;
    nrecv_local    = 0;
    for (d = 0; d < dd->ndim; d++)
    {
        /* Pulse the grid forward and backward */
        if (dd->dim[d] >= dd->npbcdim || dd->nc[dd->dim[d]] > 2)
        {
            ndir = 2;
        }
        else
        {
            ndir = 1;
        }
        nat_tot_prev = nat_tot_specat;
        for (dir = ndir-1; dir >= 0; dir--)
        {
            if (nat_tot_specat > spac->bSendAtom_nalloc)
            {
                nalloc_old             = spac->bSendAtom_nalloc;
                spac->bSendAtom_nalloc = over_alloc_dd(nat_tot_specat);
                srenew(spac->bSendAtom, spac->bSendAtom_nalloc);
                for (i = nalloc_old; i < spac->bSendAtom_nalloc; i++)
                {
                    spac->bSendAtom[i] = FALSE;
                }
            }
            spas = &spac->spas[d][dir];
            n0   = spac->nreq[d][dir][0];
            nr   = spac->nreq[d][dir][1];
            if (debug)
            {
                fprintf(debug, "dim=%d, dir=%d, searching for %d atoms\n",
                        d, dir, nr);
            }
            start       = nlast - nr;
            spas->nsend = 0;
            nsend[0]    = 0;
            for (i = 0; i < nr; i++)
            {
                indr = ireq->ind[start+i];
                ind  = -1;
                /* Check if this is a home atom and if so ind will be set */
                if (!ga2la_get_home(dd->ga2la, indr, &ind))
                {
                    /* Search in the communicated atoms */
                    ind = gmx_hash_get_minone(ga2la_specat, indr);
                }
                if (ind >= 0)
                {
                    if (i < n0 || !spac->bSendAtom[ind])
                    {
                        if (spas->nsend+1 > spas->a_nalloc)
                        {
                            spas->a_nalloc = over_alloc_large(spas->nsend+1);
                            srenew(spas->a, spas->a_nalloc);
                        }
                        /* Store the local index so we know which coordinates
                         * to send out later.
                         */
                        spas->a[spas->nsend] = ind;
                        spac->bSendAtom[ind] = TRUE;
                        if (spas->nsend+1 > spac->ibuf_nalloc)
                        {
                            spac->ibuf_nalloc = over_alloc_large(spas->nsend+1);
                            srenew(spac->ibuf, spac->ibuf_nalloc);
                        }
                        /* Store the global index so we can send it now */
                        spac->ibuf[spas->nsend] = indr;
                        if (i < n0)
                        {
                            nsend[0]++;
                        }
                        spas->nsend++;
                    }
                }
            }
            nlast = start;
            /* Clear the local flags */
            for (i = 0; i < spas->nsend; i++)
            {
                spac->bSendAtom[spas->a[i]] = FALSE;
            }
            /* Send and receive the number of indices to communicate */
            nsend[1] = spas->nsend;
            dd_sendrecv_int(dd, d, dir == 0 ? dddirBackward : dddirForward,
                            nsend, 2, buf, 2);
            if (debug)
            {
                fprintf(debug, "Send to rank %d, %d (%d) indices, "
                        "receive from rank %d, %d (%d) indices\n",
                        dd->neighbor[d][1-dir], nsend[1], nsend[0],
                        dd->neighbor[d][dir], buf[1], buf[0]);
                if (gmx_debug_at)
                {
                    for (i = 0; i < spas->nsend; i++)
                    {
                        fprintf(debug, " %d", spac->ibuf[i]+1);
                    }
                    fprintf(debug, "\n");
                }
            }
            nrecv_local += buf[0];
            spas->nrecv  = buf[1];
            if (nat_tot_specat + spas->nrecv > dd->gatindex_nalloc)
            {
                dd->gatindex_nalloc =
                    over_alloc_dd(nat_tot_specat + spas->nrecv);
                srenew(dd->gatindex, dd->gatindex_nalloc);
            }
            /* Send and receive the indices */
            dd_sendrecv_int(dd, d, dir == 0 ? dddirBackward : dddirForward,
                            spac->ibuf, spas->nsend,
                            dd->gatindex+nat_tot_specat, spas->nrecv);
            nat_tot_specat += spas->nrecv;
        }

        /* Allocate the x/f communication buffers */
        ns = spac->spas[d][0].nsend;
        nr = spac->spas[d][0].nrecv;
        if (ndir == 2)
        {
            ns += spac->spas[d][1].nsend;
            nr += spac->spas[d][1].nrecv;
        }
        if (vbuf_fac*ns > spac->vbuf_nalloc)
        {
            spac->vbuf_nalloc = over_alloc_dd(vbuf_fac*ns);
            srenew(spac->vbuf, spac->vbuf_nalloc);
        }
        if (vbuf_fac == 2 && vbuf_fac*nr > spac->vbuf2_nalloc)
        {
            spac->vbuf2_nalloc = over_alloc_dd(vbuf_fac*nr);
            srenew(spac->vbuf2, spac->vbuf2_nalloc);
        }

        /* Make a global to local index for the communication atoms */
        for (i = nat_tot_prev; i < nat_tot_specat; i++)
        {
            gmx_hash_change_or_set(ga2la_specat, dd->gatindex[i], i);
        }
    }

    /* Check that in the end we got the number of atoms we asked for */
    if (nrecv_local != ireq->n)
    {
        if (debug)
        {
            fprintf(debug, "Requested %d, received %d (tot recv %d)\n",
                    ireq->n, nrecv_local, nat_tot_specat-at_start);
            if (gmx_debug_at)
            {
                for (i = 0; i < ireq->n; i++)
                {
                    ind = gmx_hash_get_minone(ga2la_specat, ireq->ind[i]);
                    fprintf(debug, " %s%d",
                            (ind >= 0) ? "" : "!",
                            ireq->ind[i]+1);
                }
                fprintf(debug, "\n");
            }
        }
        fprintf(stderr, "\nDD cell %d %d %d: Neighboring cells do not have atoms:",
                dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
        for (i = 0; i < ireq->n; i++)
        {
            if (gmx_hash_get_minone(ga2la_specat, ireq->ind[i]) < 0)
            {
                fprintf(stderr, " %d", ireq->ind[i]+1);
            }
        }
        fprintf(stderr, "\n");
        gmx_fatal(FARGS, "DD cell %d %d %d could only obtain %d of the %d atoms that are connected via %ss from the neighboring cells. This probably means your %s lengths are too long compared to the domain decomposition cell size. Decrease the number of domain decomposition grid cells%s%s.",
                  dd->ci[XX], dd->ci[YY], dd->ci[ZZ],
                  nrecv_local, ireq->n, specat_type,
                  specat_type, add_err,
                  dd_dlb_is_on(dd) ? " or use the -rcon option of mdrun" : "");
    }

    spac->at_start = at_start;
    spac->at_end   = nat_tot_specat;

    if (debug)
    {
        fprintf(debug, "Done setup_specat_communication\n");
    }

    return nat_tot_specat;
}
Example #2
0
static void set_grid_sizes(matrix box, rvec izones_x0, rvec izones_x1, real rlist,
                           const gmx_domdec_t *dd, const gmx_ddbox_t *ddbox,
                           t_grid *grid,
                           real grid_density)
{
    int      i, j;
    gmx_bool bDD, bDDRect;
    rvec     izones_size;
    real     inv_r_ideal, size, add_tric, radd;

    for (i = 0; (i < DIM); i++)
    {
        if (debug)
        {
            fprintf(debug,
                    "set_grid_sizes, i-zone bounds for dim %d: %6.3f %6.3f\n",
                    i, izones_x0[i], izones_x1[i]);
        }
        izones_size[i] = izones_x1[i] - izones_x0[i];
    }

    /* Use the ideal number of cg's per cell to set the ideal cell size */
    inv_r_ideal = std::cbrt(grid_density/grid->ncg_ideal);
    if (rlist > 0 && inv_r_ideal*rlist < 1)
    {
        inv_r_ideal = 1/rlist;
    }
    if (debug)
    {
        fprintf(debug, "CG density %f ideal ns cell size %f\n",
                grid_density, 1/inv_r_ideal);
    }

    clear_rvec(grid->cell_offset);
    for (i = 0; (i < DIM); i++)
    {
        /* Initial settings, for DD might change below */
        grid->cell_offset[i] = izones_x0[i];
        size                 = izones_size[i];

        bDD = (dd != nullptr) && (dd->nc[i] > 1);
        if (!bDD)
        {
            bDDRect = FALSE;
        }
        else
        {
            /* With DD grid cell jumps only the first decomposition
             * direction has uniform DD cell boundaries.
             */
            bDDRect = !((ddbox->tric_dir[i] != 0) ||
                        (dd_dlb_is_on(dd) && i != dd->dim[0]));

            radd = rlist;
            if (i >= ddbox->npbcdim &&
                (rlist == 0 ||
                 izones_x1[i] + radd > ddbox->box0[i] + ddbox->box_size[i]))
            {
                radd = ddbox->box0[i] + ddbox->box_size[i] - izones_x1[i];
                if (radd < 0)
                {
                    radd = 0;
                }
            }

            /* With DD we only need a grid of one DD cell size + rlist */
            if (bDDRect)
            {
                size += radd;
            }
            else
            {
                size += radd/ddbox->skew_fac[i];
            }

            /* Check if the cell boundary in this direction is
             * perpendicular to the Cartesian axis.
             * Since grid->npbcdim isan integer that in principle can take
             * any value, we help the compiler avoid warnings and potentially
             * optimize by ensuring that j < DIM here.
             */
            for (j = i+1; j < grid->npbcdim && j < DIM; j++)
            {
                if (box[j][i] != 0)
                {
                    /* Correct the offset for the home cell location */
                    grid->cell_offset[i] += izones_x0[j]*box[j][i]/box[j][j];

                    /* Correct the offset and size for the off-diagonal
                     * displacement of opposing DD cell corners.
                     */
                    /* Without rouding we would need to add:
                     * box[j][i]*rlist/(dd->skew_fac[i]*box[j][j])
                     */
                    /* Determine the shift for the corners of the triclinic box */
                    add_tric = izones_size[j]*box[j][i]/box[j][j];
                    if (dd->ndim == 1 && j == ZZ)
                    {
                        /* With 1D domain decomposition the cg's are not in
                         * the triclinic box, but trilinic x-y and rectangular y-z.
                         * Therefore we need to add the shift from the trilinic
                         * corner to the corner at y=0.
                         */
                        add_tric += -box[YY][XX]*box[ZZ][YY]/box[YY][YY];
                    }
                    if (box[j][i] < 0)
                    {
                        grid->cell_offset[i] += add_tric;
                        size                 -= add_tric;
                    }
                    else
                    {
                        size += add_tric;
                    }
                }
            }
        }
        if (!bDDRect)
        {
            /* No DD or the box is triclinic is this direction:
             * we will use the normal grid ns that checks all cells
             * that are within cut-off distance of the i-particle.
             */
            grid->n[i] = gmx::roundToInt(size*inv_r_ideal);
            if (grid->n[i] < 2)
            {
                grid->n[i] = 2;
            }
            grid->cell_size[i] = size/grid->n[i];
            grid->ncpddc[i]    = 0;
        }
        else
        {
            /* We use grid->ncpddc[i] such that all particles
             * in one ns cell belong to a single DD cell only.
             * We can then beforehand exclude certain ns grid cells
             * for non-home i-particles.
             */
            grid->ncpddc[i] = gmx::roundToInt(izones_size[i]*inv_r_ideal);
            if (grid->ncpddc[i] < 2)
            {
                grid->ncpddc[i] = 2;
            }
            grid->cell_size[i] = izones_size[i]/grid->ncpddc[i];
            grid->n[i]         = grid->ncpddc[i] + static_cast<int>(radd/grid->cell_size[i]) + 1;
        }
        if (debug)
        {
            fprintf(debug, "grid dim %d size %d x %f: %f - %f\n",
                    i, grid->n[i], grid->cell_size[i],
                    grid->cell_offset[i],
                    grid->cell_offset[i]+grid->n[i]*grid->cell_size[i]);
        }
    }

    if (debug)
    {
        fprintf(debug, "CG ncg ideal %d, actual density %.1f\n",
                grid->ncg_ideal, grid_density*grid->cell_size[XX]*grid->cell_size[YY]*grid->cell_size[ZZ]);
    }
}
void pme_loadbal_do(pme_load_balancing_t *pme_lb,
                    t_commrec            *cr,
                    FILE                 *fp_err,
                    FILE                 *fp_log,
                    t_inputrec           *ir,
                    t_forcerec           *fr,
                    t_state              *state,
                    gmx_wallcycle_t       wcycle,
                    gmx_int64_t           step,
                    gmx_int64_t           step_rel,
                    gmx_bool             *bPrinting)
{
    int    n_prev;
    double cycles_prev;

    assert(pme_lb != NULL);

    if (!pme_lb->bActive)
    {
        return;
    }

    n_prev      = pme_lb->cycles_n;
    cycles_prev = pme_lb->cycles_c;
    wallcycle_get(wcycle, ewcSTEP, &pme_lb->cycles_n, &pme_lb->cycles_c);
    if (pme_lb->cycles_n == 0)
    {
        /* Before the first step we haven't done any steps yet */
        return;
    }
    /* Sanity check, we expect nstlist cycle counts */
    if (pme_lb->cycles_n - n_prev != ir->nstlist)
    {
        /* We could return here, but it's safer to issue and error and quit */
        gmx_incons("pme_loadbal_do called at an interval != nstlist");
    }

    /* PME grid + cut-off optimization with GPUs or PME ranks */
    if (!pme_lb->bBalance && pme_lb->bSepPMERanks)
    {
        if (pme_lb->bTriggerOnDLB)
        {
            pme_lb->bBalance = dd_dlb_is_on(cr->dd);
        }
        /* We should ignore the first timing to avoid timing allocation
         * overhead. And since the PME load balancing is called just
         * before DD repartitioning, the ratio returned by dd_pme_f_ratio
         * is not over the last nstlist steps, but the nstlist steps before
         * that. So the first useful ratio is available at step_rel=3*nstlist.
         */
        else if (step_rel >= 3*ir->nstlist)
        {
            if (DDMASTER(cr->dd))
            {
                /* If PME rank load is too high, start tuning */
                pme_lb->bBalance =
                    (dd_pme_f_ratio(cr->dd) >= loadBalanceTriggerFactor);
            }
            dd_bcast(cr->dd, sizeof(gmx_bool), &pme_lb->bBalance);
        }

        pme_lb->bActive = (pme_lb->bBalance ||
                           step_rel <= pme_lb->step_rel_stop);
    }

    /* The location in the code of this balancing termination is strange.
     * You would expect to have it after the call to pme_load_balance()
     * below, since there pme_lb->stage is updated.
     * But when terminating directly after deciding on and selecting the
     * optimal setup, DLB will turn on right away if it was locked before.
     * This might be due to PME reinitialization. So we check stage here
     * to allow for another nstlist steps with DLB locked to stabilize
     * the performance.
     */
    if (pme_lb->bBalance && pme_lb->stage == pme_lb->nstage)
    {
        pme_lb->bBalance = FALSE;

        if (DOMAINDECOMP(cr) && dd_dlb_is_locked(cr->dd))
        {
            /* Unlock the DLB=auto, DLB is allowed to activate */
            dd_dlb_unlock(cr->dd);
            md_print_warn(cr, fp_log, "NOTE: DLB can now turn on, when beneficial\n");

            /* We don't deactivate the tuning yet, since we will balance again
             * after DLB gets turned on, if it does within PMETune_period.
             */
            continue_pme_loadbal(pme_lb, TRUE);
            pme_lb->bTriggerOnDLB = TRUE;
            pme_lb->step_rel_stop = step_rel + PMETunePeriod*ir->nstlist;
        }
        else
        {
            /* We're completely done with PME tuning */
            pme_lb->bActive = FALSE;
        }

        if (DOMAINDECOMP(cr))
        {
            /* Set the cut-off limit to the final selected cut-off,
             * so we don't have artificial DLB limits.
             * This also ensures that we won't disable the currently
             * optimal setting during a second round of PME balancing.
             */
            set_dd_dlb_max_cutoff(cr, fr->ic->rlistlong);
        }
    }

    if (pme_lb->bBalance)
    {
        /* We might not have collected nstlist steps in cycles yet,
         * since init_step might not be a multiple of nstlist,
         * but the first data collected is skipped anyhow.
         */
        pme_load_balance(pme_lb, cr,
                         fp_err, fp_log,
                         ir, state, pme_lb->cycles_c - cycles_prev,
                         fr->ic, fr->nbv, &fr->pmedata,
                         step);

        /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */
        fr->ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
        fr->ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
        fr->rlist         = fr->ic->rlist;
        fr->rlistlong     = fr->ic->rlistlong;
        fr->rcoulomb      = fr->ic->rcoulomb;
        fr->rvdw          = fr->ic->rvdw;

        if (ir->eDispCorr != edispcNO)
        {
            calc_enervirdiff(NULL, ir->eDispCorr, fr);
        }
    }

    if (!pme_lb->bBalance &&
        (!pme_lb->bSepPMERanks || step_rel > pme_lb->step_rel_stop))
    {
        /* We have just deactivated the balancing and we're not measuring PP/PME
         * imbalance during the first steps of the run: deactivate the tuning.
         */
        pme_lb->bActive = FALSE;
    }

    if (!(pme_lb->bActive) && DOMAINDECOMP(cr) && dd_dlb_is_locked(cr->dd))
    {
        /* Make sure DLB is allowed when we deactivate PME tuning */
        dd_dlb_unlock(cr->dd);
        md_print_warn(cr, fp_log, "NOTE: DLB can now turn on, when beneficial\n");
    }

    *bPrinting = pme_lb->bBalance;
}
int setup_specat_communication(gmx_domdec_t               *dd,
                               std::vector<int>           *ireq,
                               gmx_domdec_specat_comm_t   *spac,
                               gmx::HashedMap<int>        *ga2la_specat,
                               int                         at_start,
                               int                         vbuf_fac,
                               const char                 *specat_type,
                               const char                 *add_err)
{
    int               nsend[2], nlast, nsend_zero[2] = {0, 0}, *nsend_ptr;
    int               dim, ndir, nr, ns, nrecv_local, n0, start, buf[2];
    int               nat_tot_specat, nat_tot_prev;
    gmx_bool          bPBC;
    gmx_specatsend_t *spas;

    if (debug)
    {
        fprintf(debug, "Begin setup_specat_communication for %s\n", specat_type);
    }

    /* nsend[0]: the number of atoms requested by this node only,
     *           we communicate this for more efficients checks
     * nsend[1]: the total number of requested atoms
     */
    const int numRequested = ireq->size();
    nsend[0]               = ireq->size();
    nsend[1]               = nsend[0];
    nlast                  = nsend[1];
    for (int d = dd->ndim-1; d >= 0; d--)
    {
        /* Pulse the grid forward and backward */
        dim  = dd->dim[d];
        bPBC = (dim < dd->npbcdim);
        if (dd->nc[dim] == 2)
        {
            /* Only 2 cells, so we only need to communicate once */
            ndir = 1;
        }
        else
        {
            ndir = 2;
        }
        for (int dir = 0; dir < ndir; dir++)
        {
            if (!bPBC &&
                dd->nc[dim] > 2 &&
                ((dir == 0 && dd->ci[dim] == dd->nc[dim] - 1) ||
                 (dir == 1 && dd->ci[dim] == 0)))
            {
                /* No pbc: the fist/last cell should not request atoms */
                nsend_ptr = nsend_zero;
            }
            else
            {
                nsend_ptr = nsend;
            }
            /* Communicate the number of indices */
            ddSendrecv(dd, d, dir == 0 ? dddirForward : dddirBackward,
                       nsend_ptr, 2, spac->nreq[d][dir], 2);
            nr = spac->nreq[d][dir][1];
            ireq->resize(nlast + nr);
            /* Communicate the indices */
            ddSendrecv(dd, d, dir == 0 ? dddirForward : dddirBackward,
                       ireq->data(), nsend_ptr[1], ireq->data() + nlast, nr);
            nlast += nr;
        }
        nsend[1] = nlast;
    }
    if (debug)
    {
        fprintf(debug, "Communicated the counts\n");
    }

    /* Search for the requested atoms and communicate the indices we have */
    nat_tot_specat = at_start;
    nrecv_local    = 0;
    for (int d = 0; d < dd->ndim; d++)
    {
        /* Pulse the grid forward and backward */
        if (dd->dim[d] >= dd->npbcdim || dd->nc[dd->dim[d]] > 2)
        {
            ndir = 2;
        }
        else
        {
            ndir = 1;
        }
        nat_tot_prev = nat_tot_specat;
        for (int dir = ndir - 1; dir >= 0; dir--)
        {
            /* To avoid cost of clearing by resize(), we only increase size */
            if (static_cast<size_t>(nat_tot_specat) > spac->sendAtom.size())
            {
                /* Note: resize initializes new elements to false, which is actually needed here */
                spac->sendAtom.resize(nat_tot_specat);
            }
            spas = &spac->spas[d][dir];
            n0   = spac->nreq[d][dir][0];
            nr   = spac->nreq[d][dir][1];
            if (debug)
            {
                fprintf(debug, "dim=%d, dir=%d, searching for %d atoms\n",
                        d, dir, nr);
            }
            start       = nlast - nr;
            spas->a.clear();
            spac->ibuf.clear();
            nsend[0]    = 0;
            for (int i = 0; i < nr; i++)
            {
                const int indr = (*ireq)[start + i];
                int       ind;
                /* Check if this is a home atom and if so ind will be set */
                if (const int *homeIndex = dd->ga2la->findHome(indr))
                {
                    ind = *homeIndex;
                }
                else
                {
                    /* Search in the communicated atoms */
                    if (const int *a = ga2la_specat->find(indr))
                    {
                        ind = *a;
                    }
                    else
                    {
                        ind = -1;
                    }
                }
                if (ind >= 0)
                {
                    if (i < n0 || !spac->sendAtom[ind])
                    {
                        /* Store the local index so we know which coordinates
                         * to send out later.
                         */
                        spas->a.push_back(ind);
                        spac->sendAtom[ind] = true;
                        /* Store the global index so we can send it now */
                        spac->ibuf.push_back(indr);
                        if (i < n0)
                        {
                            nsend[0]++;
                        }
                    }
                }
            }
            nlast = start;
            /* Clear the local flags */
            for (int a : spas->a)
            {
                spac->sendAtom[a] = false;
            }
            /* Send and receive the number of indices to communicate */
            nsend[1] = spas->a.size();
            ddSendrecv(dd, d, dir == 0 ? dddirBackward : dddirForward,
                       nsend, 2, buf, 2);
            if (debug)
            {
                fprintf(debug, "Send to rank %d, %d (%d) indices, "
                        "receive from rank %d, %d (%d) indices\n",
                        dd->neighbor[d][1-dir], nsend[1], nsend[0],
                        dd->neighbor[d][dir], buf[1], buf[0]);
                if (gmx_debug_at)
                {
                    for (int i : spac->ibuf)
                    {
                        fprintf(debug, " %d", i + 1);
                    }
                    fprintf(debug, "\n");
                }
            }
            nrecv_local += buf[0];
            spas->nrecv  = buf[1];
            dd->globalAtomIndices.resize(nat_tot_specat + spas->nrecv);
            /* Send and receive the indices */
            ddSendrecv(dd, d, dir == 0 ? dddirBackward : dddirForward,
                       spac->ibuf.data(), spac->ibuf.size(),
                       dd->globalAtomIndices.data() + nat_tot_specat, spas->nrecv);
            nat_tot_specat += spas->nrecv;
        }

        /* Increase the x/f communication buffer sizes, when necessary */
        ns = spac->spas[d][0].a.size();
        nr = spac->spas[d][0].nrecv;
        if (ndir == 2)
        {
            ns += spac->spas[d][1].a.size();
            nr += spac->spas[d][1].nrecv;
        }
        if (vbuf_fac*ns > gmx::index(spac->vbuf.size()))
        {
            spac->vbuf.resize(vbuf_fac*ns);
        }
        if (vbuf_fac == 2 && vbuf_fac*nr > gmx::index(spac->vbuf2.size()))
        {
            spac->vbuf2.resize(vbuf_fac*nr);
        }

        /* Make a global to local index for the communication atoms */
        for (int i = nat_tot_prev; i < nat_tot_specat; i++)
        {
            ga2la_specat->insert_or_assign(dd->globalAtomIndices[i], i);
        }
    }

    /* Check that in the end we got the number of atoms we asked for */
    if (nrecv_local != numRequested)
    {
        if (debug)
        {
            fprintf(debug, "Requested %d, received %d (tot recv %d)\n",
                    numRequested, nrecv_local, nat_tot_specat - at_start);
            if (gmx_debug_at)
            {
                for (int i = 0; i < numRequested; i++)
                {
                    const int *ind = ga2la_specat->find((*ireq)[i]);
                    fprintf(debug, " %s%d",
                            ind ? "" : "!",
                            (*ireq)[i] + 1);
                }
                fprintf(debug, "\n");
            }
        }
        fprintf(stderr, "\nDD cell %d %d %d: Neighboring cells do not have atoms:",
                dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
        for (int i = 0; i < numRequested; i++)
        {
            if (!ga2la_specat->find((*ireq)[i]))
            {
                fprintf(stderr, " %d", (*ireq)[i] + 1);
            }
        }
        fprintf(stderr, "\n");
        gmx_fatal(FARGS, "DD cell %d %d %d could only obtain %d of the %d atoms that are connected via %ss from the neighboring cells. This probably means your %s lengths are too long compared to the domain decomposition cell size. Decrease the number of domain decomposition grid cells%s%s.",
                  dd->ci[XX], dd->ci[YY], dd->ci[ZZ],
                  nrecv_local, numRequested, specat_type,
                  specat_type, add_err,
                  dd_dlb_is_on(dd) ? " or use the -rcon option of mdrun" : "");
    }

    spac->at_start = at_start;
    spac->at_end   = nat_tot_specat;

    if (debug)
    {
        fprintf(debug, "Done setup_specat_communication\n");
    }

    return nat_tot_specat;
}