Example #1
0
gmx_wallcycle_t wallcycle_init(FILE *fplog, int resetstep, t_commrec gmx_unused *cr,
                               int nthreads_pp, int nthreads_pme)
{
    gmx_wallcycle_t wc;


    if (!wallcycle_have_counter())
    {
        return NULL;
    }

    snew(wc, 1);

    wc->haveInvalidCount    = FALSE;
    wc->wc_barrier          = FALSE;
    wc->wcc_all             = NULL;
    wc->wc_depth            = 0;
    wc->ewc_prev            = -1;
    wc->reset_counters      = resetstep;
    wc->nthreads_pp         = nthreads_pp;
    wc->nthreads_pme        = nthreads_pme;
    wc->cycles_sum          = NULL;

#ifdef GMX_MPI
    if (PAR(cr) && getenv("GMX_CYCLE_BARRIER") != NULL)
    {
        if (fplog)
        {
            fprintf(fplog, "\nWill call MPI_Barrier before each cycle start/stop call\n\n");
        }
        wc->wc_barrier       = TRUE;
        wc->mpi_comm_mygroup = cr->mpi_comm_mygroup;
    }
#endif

    snew(wc->wcc, ewcNR);
    if (getenv("GMX_CYCLE_ALL") != NULL)
    {
        if (fplog)
        {
            fprintf(fplog, "\nWill time all the code during the run\n\n");
        }
        snew(wc->wcc_all, ewcNR*ewcNR);
    }

#ifdef GMX_CYCLE_SUBCOUNTERS
    snew(wc->wcsc, ewcsNR);
#endif

#ifdef DEBUG_WCYCLE
    wc->count_depth = 0;
#endif

    return wc;
}
Example #2
0
gmx_wallcycle_t wallcycle_init(FILE *fplog,t_commrec *cr)
{
    gmx_wallcycle_t wc;
    char *env_ptr;
    
    
    if (!wallcycle_have_counter())
    {
        return NULL;
    }

    snew(wc,1);

    wc->wc_barrier = FALSE;
    wc->wcc_all    = NULL;
    wc->wc_depth   = 0;
    wc->ewc_prev   = -1;

#ifdef GMX_MPI
    if (PAR(cr) && getenv("GMX_CYCLE_BARRIER") != NULL)
    {
        if (fplog) 
        {
            fprintf(fplog,"\nWill call MPI_Barrier before each cycle start/stop call\n\n");
        }
        wc->wc_barrier = TRUE;
        wc->mpi_comm_mygroup = cr->mpi_comm_mygroup;
    }
#endif

    snew(wc->wcc,ewcNR);
    if (getenv("GMX_CYCLE_ALL") != NULL)
    {
        if (fplog) 
        {
            fprintf(fplog,"\nWill time all the code during the run\n\n");
        }
        snew(wc->wcc_all,ewcNR*ewcNR);
    }
    
    /* Read variable GMX_RESET_COUNTER from environment */ 
    wc->reset_counters = -1;
    if ((env_ptr=getenv("GMX_RESET_COUNTERS")) != NULL)
    {
        sscanf(env_ptr,gmx_step_pfmt,&wc->reset_counters);
    }
    
    return wc;
}
Example #3
0
gmx_wallcycle_t wallcycle_init(FILE *fplog,int resetstep,t_commrec *cr)
{
    gmx_wallcycle_t wc;
    
    
    if (!wallcycle_have_counter())
    {
        return NULL;
    }

    snew(wc,1);

    wc->wc_barrier = FALSE;
    wc->wcc_all    = NULL;
    wc->wc_depth   = 0;
    wc->ewc_prev   = -1;
    wc->reset_counters = resetstep;

#ifdef GMX_MPI
    if (PAR(cr) && getenv("GMX_CYCLE_BARRIER") != NULL)
    {
        if (fplog) 
        {
            fprintf(fplog,"\nWill call MPI_Barrier before each cycle start/stop call\n\n");
        }
        wc->wc_barrier = TRUE;
        wc->mpi_comm_mygroup = cr->mpi_comm_mygroup;
    }
#endif

    snew(wc->wcc,ewcNR);
    if (getenv("GMX_CYCLE_ALL") != NULL)
    {
/*#ifndef GMX_THREADS*/
        if (fplog) 
        {
            fprintf(fplog,"\nWill time all the code during the run\n\n");
        }
        snew(wc->wcc_all,ewcNR*ewcNR);
/*#else*/
        gmx_fatal(FARGS, "GMX_CYCLE_ALL is incompatible with threaded code");
/*#endif*/
    }
    
    return wc;
}
void pme_loadbal_init(pme_load_balancing_t     **pme_lb_p,
                      t_commrec                 *cr,
                      FILE                      *fp_log,
                      const t_inputrec          *ir,
                      matrix                     box,
                      const interaction_const_t *ic,
                      struct gmx_pme_t          *pmedata,
                      gmx_bool                   bUseGPU,
                      gmx_bool                  *bPrinting)
{
    pme_load_balancing_t *pme_lb;
    real                  spm, sp;
    int                   d;

    snew(pme_lb, 1);

    pme_lb->bSepPMERanks  = !(cr->duty & DUTY_PME);

    /* Initially we turn on balancing directly on based on PP/PME imbalance */
    pme_lb->bTriggerOnDLB = FALSE;

    /* Any number of stages >= 2 is supported */
    pme_lb->nstage        = 2;

    pme_lb->cutoff_scheme = ir->cutoff_scheme;

    if (pme_lb->cutoff_scheme == ecutsVERLET)
    {
        pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
        pme_lb->rbuf_vdw     = pme_lb->rbuf_coulomb;
    }
    else
    {
        if (ic->rcoulomb > ic->rlist)
        {
            pme_lb->rbuf_coulomb = ic->rlistlong - ic->rcoulomb;
        }
        else
        {
            pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
        }
        if (ic->rvdw > ic->rlist)
        {
            pme_lb->rbuf_vdw = ic->rlistlong - ic->rvdw;
        }
        else
        {
            pme_lb->rbuf_vdw = ic->rlist - ic->rvdw;
        }
    }

    copy_mat(box, pme_lb->box_start);
    if (ir->ePBC == epbcXY && ir->nwall == 2)
    {
        svmul(ir->wall_ewald_zfac, pme_lb->box_start[ZZ], pme_lb->box_start[ZZ]);
    }

    pme_lb->n = 1;
    snew(pme_lb->setup, pme_lb->n);

    pme_lb->rcut_vdw                 = ic->rvdw;
    pme_lb->rcut_coulomb_start       = ir->rcoulomb;
    pme_lb->nstcalclr_start          = ir->nstcalclr;

    pme_lb->cur                      = 0;
    pme_lb->setup[0].rcut_coulomb    = ic->rcoulomb;
    pme_lb->setup[0].rlist           = ic->rlist;
    pme_lb->setup[0].rlistlong       = ic->rlistlong;
    pme_lb->setup[0].nstcalclr       = ir->nstcalclr;
    pme_lb->setup[0].grid[XX]        = ir->nkx;
    pme_lb->setup[0].grid[YY]        = ir->nky;
    pme_lb->setup[0].grid[ZZ]        = ir->nkz;
    pme_lb->setup[0].ewaldcoeff_q    = ic->ewaldcoeff_q;
    pme_lb->setup[0].ewaldcoeff_lj   = ic->ewaldcoeff_lj;

    pme_lb->setup[0].pmedata         = pmedata;

    spm = 0;
    for (d = 0; d < DIM; d++)
    {
        sp = norm(pme_lb->box_start[d])/pme_lb->setup[0].grid[d];
        if (sp > spm)
        {
            spm = sp;
        }
    }
    pme_lb->setup[0].spacing = spm;

    if (ir->fourier_spacing > 0)
    {
        pme_lb->cut_spacing = ir->rcoulomb/ir->fourier_spacing;
    }
    else
    {
        pme_lb->cut_spacing = ir->rcoulomb/pme_lb->setup[0].spacing;
    }

    pme_lb->stage = 0;

    pme_lb->fastest     = 0;
    pme_lb->lower_limit = 0;
    pme_lb->start       = 0;
    pme_lb->end         = 0;
    pme_lb->elimited    = epmelblimNO;

    pme_lb->cycles_n = 0;
    pme_lb->cycles_c = 0;

    /* Tune with GPUs and/or separate PME ranks.
     * When running only on a CPU without PME ranks, PME tuning will only help
     * with small numbers of atoms in the cut-off sphere.
     */
    pme_lb->bActive  = (wallcycle_have_counter() && (bUseGPU ||
                                                     pme_lb->bSepPMERanks));

    /* With GPUs and no separate PME ranks we can't measure the PP/PME
     * imbalance, so we start balancing right away.
     * Otherwise we only start balancing after we observe imbalance.
     */
    pme_lb->bBalance = (pme_lb->bActive && (bUseGPU && !pme_lb->bSepPMERanks));

    pme_lb->step_rel_stop = PMETunePeriod*ir->nstlist;

    /* Delay DD load balancing when GPUs are used */
    if (pme_lb->bActive && DOMAINDECOMP(cr) && cr->dd->nnodes > 1 && bUseGPU)
    {
        /* Lock DLB=auto to off (does nothing when DLB=yes/no.
         * With GPUs and separate PME nodes, we want to first
         * do PME tuning without DLB, since DLB might limit
         * the cut-off, which never improves performance.
         * We allow for DLB + PME tuning after a first round of tuning.
         */
        dd_dlb_lock(cr->dd);
        if (dd_dlb_is_locked(cr->dd))
        {
            md_print_warn(cr, fp_log, "NOTE: DLB will not turn on during the first phase of PME tuning\n");
        }
    }

    *pme_lb_p = pme_lb;

    *bPrinting = pme_lb->bBalance;
}
Example #5
0
void pme_loadbal_init(pme_load_balancing_t **pme_lb_p,
                      const t_inputrec *ir, matrix box,
                      const interaction_const_t *ic,
                      struct gmx_pme_t *pmedata,
                      gmx_bool bUseGPU, gmx_bool bSepPMERanks,
                      gmx_bool *bPrinting)
{
    pme_load_balancing_t *pme_lb;
    real                  spm, sp;
    int                   d;

    snew(pme_lb, 1);

    pme_lb->bSepPMERanks  = bSepPMERanks;

    /* Any number of stages >= 2 is supported */
    pme_lb->nstage        = 2;

    pme_lb->cutoff_scheme = ir->cutoff_scheme;

    if (pme_lb->cutoff_scheme == ecutsVERLET)
    {
        pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
        pme_lb->rbuf_vdw     = pme_lb->rbuf_coulomb;
    }
    else
    {
        if (ic->rcoulomb > ic->rlist)
        {
            pme_lb->rbuf_coulomb = ic->rlistlong - ic->rcoulomb;
        }
        else
        {
            pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
        }
        if (ic->rvdw > ic->rlist)
        {
            pme_lb->rbuf_vdw = ic->rlistlong - ic->rvdw;
        }
        else
        {
            pme_lb->rbuf_vdw = ic->rlist - ic->rvdw;
        }
    }

    copy_mat(box, pme_lb->box_start);
    if (ir->ePBC == epbcXY && ir->nwall == 2)
    {
        svmul(ir->wall_ewald_zfac, pme_lb->box_start[ZZ], pme_lb->box_start[ZZ]);
    }

    pme_lb->n = 1;
    snew(pme_lb->setup, pme_lb->n);

    pme_lb->rcut_vdw                 = ic->rvdw;
    pme_lb->rcut_coulomb_start       = ir->rcoulomb;
    pme_lb->nstcalclr_start          = ir->nstcalclr;

    pme_lb->cur                      = 0;
    pme_lb->setup[0].rcut_coulomb    = ic->rcoulomb;
    pme_lb->setup[0].rlist           = ic->rlist;
    pme_lb->setup[0].rlistlong       = ic->rlistlong;
    pme_lb->setup[0].nstcalclr       = ir->nstcalclr;
    pme_lb->setup[0].grid[XX]        = ir->nkx;
    pme_lb->setup[0].grid[YY]        = ir->nky;
    pme_lb->setup[0].grid[ZZ]        = ir->nkz;
    pme_lb->setup[0].ewaldcoeff_q    = ic->ewaldcoeff_q;
    pme_lb->setup[0].ewaldcoeff_lj   = ic->ewaldcoeff_lj;

    pme_lb->setup[0].pmedata         = pmedata;

    spm = 0;
    for (d = 0; d < DIM; d++)
    {
        sp = norm(pme_lb->box_start[d])/pme_lb->setup[0].grid[d];
        if (sp > spm)
        {
            spm = sp;
        }
    }
    pme_lb->setup[0].spacing = spm;

    if (ir->fourier_spacing > 0)
    {
        pme_lb->cut_spacing = ir->rcoulomb/ir->fourier_spacing;
    }
    else
    {
        pme_lb->cut_spacing = ir->rcoulomb/pme_lb->setup[0].spacing;
    }

    pme_lb->stage = 0;

    pme_lb->fastest  = 0;
    pme_lb->start    = 0;
    pme_lb->end      = 0;
    pme_lb->elimited = epmelblimNO;

    pme_lb->cycles_n = 0;
    pme_lb->cycles_c = 0;

    /* Tune with GPUs and/or separate PME ranks.
     * When running only on a CPU without PME ranks, PME tuning will only help
     * with small numbers of atoms in the cut-off sphere.
     */
    pme_lb->bActive  = (wallcycle_have_counter() && (bUseGPU || bSepPMERanks));

    /* With GPUs and no separate PME ranks we can't measure the PP/PME
     * imbalance, so we start balancing right away.
     * Otherwise we only start balancing after we observe imbalance.
     */
    pme_lb->bBalance = (pme_lb->bActive && (bUseGPU && !bSepPMERanks));

    *pme_lb_p  = pme_lb;

    *bPrinting = pme_lb->bBalance;
}