예제 #1
0
/**
 * Called after engine initialization
 */
void OnPostInitialize(void)
{
	CShIdentifier levelIdentifier("memory");

	bool loaded = ShLevel::Load(levelIdentifier);
	SH_ASSERT(loaded);

	//
	// Create Camera
	g_pCamera = ShCamera::Create(GID(global), GID(camera_free), false);
	SH_ASSERT(NULL != g_pCamera);
	ShCamera::SetPosition(g_pCamera, CShVector3(0, 0.0f, 1000.0f));
	ShCamera::SetTarget(g_pCamera, CShVector3(0.0f, 0.0f, 0.0f));
	ShCamera::SetFarPlaneDistance(g_pCamera, 3000.0f);
	ShCamera::SetViewport(g_pCamera, 256*WIDTH, 256*HEIGHT);
	ShCamera::SetProjectionOrtho(g_pCamera);

	ShCamera::SetCurrent2D(g_pCamera);

	ratio = CShVector2((256*WIDTH)/(float)ShDisplay::GetWidth(), (256*HEIGHT)/(float)ShDisplay::GetHeight());

	g_pWinEntity = ShEntity2::Find(levelIdentifier, CShIdentifier("sprite_memory_win_001"));
	SH_ASSERT(shNULL != g_pWinEntity);
	ShEntity2::SetShow(g_pWinEntity, false);

	//
	// Create all sprites
	for (int i = 0; i < HEIGHT; ++i)
	{
		for (int j = 0; j < WIDTH; ++j)
		{
			CShVector3 pos;
			pos.m_x = (256.0f * j) - (128.0f + (((WIDTH/2)-1) * 256.0f));
			pos.m_y = (256.0f * i) - (128.0f + (((HEIGHT/2)-1) * 256.0f));

			int c = i*WIDTH+j;

			aCards[c].pEntityRecto = ShEntity2::Create(levelIdentifier, CShIdentifier(), GID(layer_default), CShIdentifier("memory"), aIdentifier[c/2], pos, CShEulerAngles(0.0f, 0.0f, 0.0f), CShVector3(1.0f, 1.0f, 1.0f));
			SH_ASSERT(shNULL != aCards[c].pEntityRecto);

			aCards[c].pEntityVerso = ShEntity2::Create(levelIdentifier, CShIdentifier(), GID(layer_default), CShIdentifier("memory"), CShIdentifier("verso"), pos, CShEulerAngles(0.0f, 0.0f, 0.0f), CShVector3(1.0f, 1.0f, 1.0f));
			SH_ASSERT(shNULL != aCards[c].pEntityVerso);

			aCards[c].type = c/2;
		}
	}

	for (int i = 0; i < PIECES; ++i)
	{
		ShEntity2::SetShow(aCards[i].pEntityRecto, false);
	}

	shuffle();
}
/**
 * Called after engine initialization
 */
void OnPostInitialize(void)
{
	CShIdentifier levelIdentifier("character_controller"); // this is the level name

	//
	// Load level
	bool loaded = ShLevel::Load(levelIdentifier);
	SH_ASSERT(loaded);

	//
	// Create camera
	g_pCamera = ShCamera::Create(GID(global), GID(camera_free), false);
	SH_ASSERT(NULL != g_pCamera);
	ShCamera::SetPosition(g_pCamera, CShVector3(-300.0f,-1500.0f, 1000.0f));
	ShCamera::SetTarget(g_pCamera, CShVector3(0.0f, 0.0f, 100.0f));
	ShCamera::SetFarPlaneDistance(g_pCamera, 3000.0f);

	ShCamera::SetCurrent2D(g_pCamera);
	ShCamera::SetCurrent3D(g_pCamera);

	//
	// Find the character entity
	g_pCharacter = ShEntity3::Find(levelIdentifier, CShIdentifier("entitypc_warrior"));
	SH_ASSERT(shNULL != g_pCharacter);

	//
	// Initialize the character controller with the level, the identifier, the position, the radius, the direction, the speed.
	g_pCharacterController = ShCharacterController::Create(levelIdentifier, CShIdentifier("character_controller_character_001"), ShObject::GetPosition2(g_pCharacter), 50.0, g_direction, g_speed);
	SH_ASSERT(shNULL != g_pCharacterController);

	//
	// Create the moving input (arrow up).
	// Using JustPressed function in order to change each time the button is pressed and not continuously.
	g_pInputUp = ShInput::CreateInputPressed(ShInput::e_input_device_keyboard, ShInput::e_input_device_control_pc_key_up, 0.1f);
	SH_ASSERT(shNULL != g_pInputUp);

	//
	// Create the rotation inputs (right and left).
	// Using InputPressed function in order to change until the button is released.
	g_pInputLeft = ShInput::CreateInputPressed(ShInput::e_input_device_keyboard, ShInput::e_input_device_control_pc_key_left, 0.1f);
	SH_ASSERT(NULL != g_pInputLeft);
	g_pInputRight = ShInput::CreateInputPressed(ShInput::e_input_device_keyboard, ShInput::e_input_device_control_pc_key_right, 0.1f);
	SH_ASSERT(NULL != g_pInputRight);

	// Find tyhe two animations : warrior idle and warrior run.
	pAnimationWarriorStop = ShAnimation::Find(CShIdentifier("pc_warrior.pc_warrior.idle.01"));
	SH_ASSERT(NULL != pAnimationWarriorStop);

	pAnimationWarriorRun = ShAnimation::Find(CShIdentifier("pc_warrior.pc_warrior.run.01"));
	SH_ASSERT(NULL != pAnimationWarriorRun);

	// By default, we play the idle animation, allowing it to loop.
	ShEntity3::AnimationPlay(g_pCharacter, pAnimationWarriorStop, true);
}
예제 #3
0
/**
 * Constructor
 */
CShPluginGame::CShPluginGame(void)
: CShPlugin(CShIdentifier("TPS"))
, m_levelIdentifier(GID(NULL))
, m_pBackground(shNULL)
, m_pPlayer(shNULL)
, m_fScale(0.0f)
{

}
예제 #4
0
/**
 * Register the game modes supported by this plugin.
 */
int G_RegisterGames(int hookType, int param, void* data)
{
#define CONFIGDIR       "doom64"
#define STARTUPPK3      PLUGIN_NAMETEXT2 ".pk3"

    GameDef const doom64Def = {
        "doom64", CONFIGDIR, "Doom 64", "Midway Software"
    };

    DENG_UNUSED(hookType); DENG_UNUSED(param); DENG_UNUSED(data);

    gameIds[doom64] = DD_DefineGame(&doom64Def);
    DD_AddGameResource(GID(doom64), RC_PACKAGE, FF_STARTUP, STARTUPPK3, 0);
    DD_AddGameResource(GID(doom64), RC_PACKAGE, FF_STARTUP, "doom64.wad", "MAP01;MAP020;MAP38;F_SUCK");
    DD_AddGameResource(GID(doom64), RC_DEFINITION, 0, PLUGIN_NAMETEXT ".ded", 0);
    return true;

#undef STARTUPPK3
#undef CONFIGDIR
}
예제 #5
0
파일: Game.cpp 프로젝트: rgrimmer/GGJ2016
/**
 * Called after engine initialization
 */
void Game::OnPostInitialize(void)
{
	instance();
	instance_->m_registeredAction.action = e_action_none;

	// Create the Camera
	ShCamera * pCamera = ShCamera::Create(GID(global), GID(camera), false);
	SH_ASSERT(shNULL != pCamera);

	ShCamera::SetPosition(pCamera, CShVector3(0.0f, 0.0f, 100.0f));
	ShCamera::SetTarget(pCamera, CShVector3(0.0f, 0.0f, 0.0f));

	ShCamera::SetUp(pCamera, CShVector3(0.0f, 1.0f, 0.0f));

	ShCamera::SetProjectionOrtho(pCamera);
	ShCamera::SetNearPlaneDistance(pCamera, 0.0f);
	ShCamera::SetFarPlaneDistance(pCamera, 200.0f);

	instance_->m_fRescaleRatio = ShDisplay::GetHeight() / (float)ShDisplay::GetWidth();
	ShCamera::SetViewport(pCamera, DISPLAY_WIDTH, DISPLAY_WIDTH * instance_->m_fRescaleRatio);

	ShCamera::SetCurrent2D(pCamera);

	// Initialize Sound
	instance_->m_sound.Initialize();

	// Initialize Transition
	instance_->m_transition.Initialize();

	// Initialize states
	instance_->m_stateMainMenu.Initialize();
	instance_->m_stateCredits.Initialize();
	instance_->m_stateGame.Initialize();

	instance_->Push(MENU);
}
예제 #6
0
파일: padding.c 프로젝트: jlagneau/micro-ls
static t_size	max_group_len(t_list *files)
{
	t_list		*tmp;
	t_size		padding;
	t_grp		*pgroup;

	tmp = files;
	padding = 0;
	while (tmp)
	{
		pgroup = getgrgid(GID(tmp));
		if (padding < ft_strlen(pgroup->gr_name))
			padding = ft_strlen(pgroup->gr_name);
		tmp = tmp->next;
	}
	return (padding);
}
예제 #7
0
real 
do_listed_vdw_q(int ftype,int nbonds,
                const t_iatom iatoms[],const t_iparams iparams[],
                const rvec x[],rvec f[],rvec fshift[],
                const t_pbc *pbc,const t_graph *g,
                real lambda,real *dvdlambda,
                const t_mdatoms *md,
                const t_forcerec *fr,gmx_grppairener_t *grppener,
                int *global_atom_index)
{
    static    gmx_bool bWarn=FALSE;
    real      eps,r2,*tab,rtab2=0;
    rvec      dx,x14[2],f14[2];
    int       i,ai,aj,itype;
    int       typeA[2]={0,0},typeB[2]={0,1};
    real      chargeA[2]={0,0},chargeB[2];
    int       gid,shift_vir,shift_f;
    int       j_index[] = { 0, 1 };
    int       i0=0,i1=1,i2=2;
    ivec      dt;
    int       outeriter,inneriter;
    int       nthreads = 1;
    int       count;
    real      krf,crf,tabscale;
    int       ntype=0;
    real      *nbfp=NULL;
    real      *egnb=NULL,*egcoul=NULL;
    t_nblist  tmplist;
    int       icoul,ivdw;
    gmx_bool      bMolPBC,bFreeEnergy;
    t_pf_global *pf_global;
    
    
#if GMX_THREAD_SHM_FDECOMP
    pthread_mutex_t mtx;
#else
    void *    mtx = NULL;
#endif

    
#if GMX_THREAD_SHM_FDECOMP
    pthread_mutex_initialize(&mtx);
#endif

    bMolPBC = fr->bMolPBC;

    pf_global = fr->pf_global;
    
    switch (ftype) {
    case F_LJ14:
    case F_LJC14_Q:
        eps = fr->epsfac*fr->fudgeQQ;
        ntype  = 1;
        egnb   = grppener->ener[egLJ14];
        egcoul = grppener->ener[egCOUL14];
        break;
    case F_LJC_PAIRS_NB:
        eps = fr->epsfac;
        ntype  = 1;
        egnb   = grppener->ener[egLJSR];
        egcoul = grppener->ener[egCOULSR];
        break;
    default:
        gmx_fatal(FARGS,"Unknown function type %d in do_nonbonded14",
                  ftype);
    }
    tab = fr->tab14.tab;
    rtab2 = sqr(fr->tab14.r);
    tabscale = fr->tab14.scale;

    krf = fr->k_rf;
    crf = fr->c_rf;

    /* Determine the values for icoul/ivdw. */
    if (fr->bEwald) {
        icoul = 1;
    } 
    else if(fr->bcoultab)
    {
        icoul = 3;
    }
    else if(fr->eeltype == eelRF_NEC)
    {
        icoul = 2;
    }
    else 
    {
        icoul = 1;
    }
    
    if(fr->bvdwtab)
    {
        ivdw = 3;
    }
    else if(fr->bBHAM)
    {
        ivdw = 2;
    }
    else 
    {
        ivdw = 1;
    }
    
    
    /* We don't do SSE or altivec here, due to large overhead for 4-fold 
     * unrolling on short lists 
     */
    
    bFreeEnergy = FALSE;
    for(i=0; (i<nbonds); ) 
    {
        itype = iatoms[i++];
        ai    = iatoms[i++];
        aj    = iatoms[i++];
        gid   = GID(md->cENER[ai],md->cENER[aj],md->nenergrp);
        
        switch (ftype) {
        case F_LJ14:
            bFreeEnergy =
                (fr->efep != efepNO &&
                 ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
                  iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
                  iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
            chargeA[0] = md->chargeA[ai];
            chargeA[1] = md->chargeA[aj];
            nbfp = (real *)&(iparams[itype].lj14.c6A);
            break;
        case F_LJC14_Q:
            eps = fr->epsfac*iparams[itype].ljc14.fqq;
            chargeA[0] = iparams[itype].ljc14.qi;
            chargeA[1] = iparams[itype].ljc14.qj;
            nbfp = (real *)&(iparams[itype].ljc14.c6);
            break;
        case F_LJC_PAIRS_NB:
            chargeA[0] = iparams[itype].ljcnb.qi;
            chargeA[1] = iparams[itype].ljcnb.qj;
            nbfp = (real *)&(iparams[itype].ljcnb.c6);
            break;
        }
        
        if (!bMolPBC) 
        {
            /* This is a bonded interaction, atoms are in the same box */
            shift_f = CENTRAL;
            r2 = distance2(x[ai],x[aj]);
        }
        else 
        {
            /* Apply full periodic boundary conditions */
            shift_f = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
            r2 = norm2(dx);
        }

        if (r2 >= rtab2) 
        {
            if (!bWarn) 
            {
                fprintf(stderr,"Warning: 1-4 interaction between %d and %d "
                        "at distance %.3f which is larger than the 1-4 table size %.3f nm\n", 
			glatnr(global_atom_index,ai),
			glatnr(global_atom_index,aj),
			sqrt(r2), sqrt(rtab2));
                fprintf(stderr,"These are ignored for the rest of the simulation\n");
                fprintf(stderr,"This usually means your system is exploding,\n"
                        "if not, you should increase table-extension in your mdp file\n"
                        "or with user tables increase the table size\n");
                bWarn = TRUE;
            }
            if (debug) 
	      fprintf(debug,"%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
		      x[ai][XX],x[ai][YY],x[ai][ZZ],
		      x[aj][XX],x[aj][YY],x[aj][ZZ],
		      glatnr(global_atom_index,ai),
		      glatnr(global_atom_index,aj),
		      sqrt(r2));
        }
        else 
        {
            copy_rvec(x[ai],x14[0]);
            copy_rvec(x[aj],x14[1]);
            clear_rvec(f14[0]);
            clear_rvec(f14[1]);
#ifdef DEBUG
            fprintf(debug,"LJ14: grp-i=%2d, grp-j=%2d, ngrp=%2d, GID=%d\n",
                    md->cENER[ai],md->cENER[aj],md->nenergrp,gid);
#endif
            
	    outeriter = inneriter = count = 0;
	    if (bFreeEnergy)
        {
            chargeB[0] = md->chargeB[ai];
            chargeB[1] = md->chargeB[aj];
            /* We pass &(iparams[itype].lj14.c6A) as LJ parameter matrix
             * to the innerloops.
             * Here we use that the LJ-14 parameters are stored in iparams
             * as c6A,c12A,c6B,c12B, which are referenced correctly
             * in the innerloops if we assign type combinations 0-0 and 0-1
             * to atom pair ai-aj in topologies A and B respectively.
             */
            if(ivdw==2)
            {
                gmx_fatal(FARGS,"Cannot do free energy Buckingham interactions.");
            }
            count = 0;
            gmx_nb_free_energy_kernel(icoul,
                                      ivdw,
                                      i1,
                                      &i0,
                                      j_index,
                                      &i1,
                                      &shift_f,
                                      fr->shift_vec[0],
                                      fshift[0],
                                      &gid,
                                      x14[0],
                                      f14[0],
                                      chargeA,
                                      chargeB,
                                      eps,
                                      krf,
                                      crf,
                                      fr->ewaldcoeff,
                                      egcoul,
                                      typeA,
                                      typeB,
                                      ntype,
                                      nbfp,
                                      egnb,
                                      tabscale,
                                      tab,
                                      lambda,
                                      dvdlambda,
                                      fr->sc_alpha,
                                      fr->sc_power,
                                      fr->sc_sigma6_def,
                                      fr->sc_sigma6_min,
                                      TRUE,
                                      &outeriter,
                                      &inneriter);
        }
        else 
        { 
            /* Not perturbed - call kernel 330 */
            nb_kernel330
                ( &i1,
                  &i0,
                  j_index,
                  &i1,
                  &shift_f,
                  fr->shift_vec[0],
                  fshift[0],
                  &gid,
                  x14[0],
                  f14[0],
                  chargeA,
                  &eps,
                  &krf,
                  &crf,
                  egcoul,
                  typeA,
                  &ntype,
                  nbfp,
                  egnb,
                  &tabscale,
                  tab,
                  NULL,
                  NULL,
                  NULL,
                  NULL,
                  &nthreads,
                  &count,
                  (void *)&mtx,
                  &outeriter,
                  &inneriter,
                  NULL);                
        }
        
        /* Add the forces */
        rvec_inc(f[ai],f14[0]);
        rvec_dec(f[aj],f14[0]);

	if (pf_global->bInitialized)
	    pf_atom_add_bonded(pf_global, ai, aj, PF_INTER_NB14, f14[0]);

        if (g) 
        {
            /* Correct the shift forces using the graph */
            ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);    
            shift_vir = IVEC2IS(dt);
            rvec_inc(fshift[shift_vir],f14[0]);
            rvec_dec(fshift[CENTRAL],f14[0]);
        }
        
	    /* flops: eNR_KERNEL_OUTER + eNR_KERNEL330 + 12 */
        }
    }
    return 0.0;
}
예제 #8
0
/**
 * Register the game modes supported by this plugin.
 */
int G_RegisterGames(int hookType, int param, void* data)
{
#define CONFIGDIR       "hexen"
#define STARTUPPK3      PLUGIN_NAMETEXT2 ".pk3"

    GameDef const deathkingsDef = {
        "hexen-dk", CONFIGDIR,
        "Hexen: Deathkings of the Dark Citadel", "Raven Software"
    };
    GameDef const hexenDef = {
        "hexen", CONFIGDIR, "Hexen", "Raven Software"
    };
    GameDef const hexenDemoDef = {
        "hexen-demo", CONFIGDIR, "Hexen 4-map Demo", "Raven Software"
    };
    GameDef const hexenBetaDemoDef = {
        "hexen-betademo", CONFIGDIR, "Hexen 4-map Beta Demo", "Raven Software"
    };
    GameDef const hexenV10Def = {
        "hexen-v10", CONFIGDIR, "Hexen v1.0", "Raven Software"
    };

    DENG_UNUSED(hookType); DENG_UNUSED(param); DENG_UNUSED(data);

    /* Hexen (Death Kings) */
    gameIds[hexen_deathkings] = DD_DefineGame(&deathkingsDef);
    DD_AddGameResource(GID(hexen_deathkings), RC_PACKAGE, FF_STARTUP, STARTUPPK3, 0);
    DD_AddGameResource(GID(hexen_deathkings), RC_PACKAGE, FF_STARTUP, "hexdd.wad", "MAP59;MAP60");
    DD_AddGameResource(GID(hexen_deathkings), RC_PACKAGE, FF_STARTUP, "hexen.wad", "MAP08;MAP22;TINTTAB;FOGMAP;TRANTBLA;DARTA1;ARTIPORK;SKYFOG;TALLYTOP;GROVER");
    DD_AddGameResource(GID(hexen_deathkings), RC_DEFINITION, 0, "hexen-dk.ded", 0);

    /* Hexen */
    gameIds[hexen] = DD_DefineGame(&hexenDef);
    DD_AddGameResource(GID(hexen), RC_PACKAGE, FF_STARTUP, "hexen.wad", "MAP08;MAP22;TINTTAB;FOGMAP;TRANTBLA;DARTA1;ARTIPORK;SKYFOG;TALLYTOP;GROVER");
    DD_AddGameResource(GID(hexen), RC_PACKAGE, FF_STARTUP, STARTUPPK3, 0);
    DD_AddGameResource(GID(hexen), RC_DEFINITION, 0, "hexen.ded", 0);

    /* Hexen (v1.0) */
    gameIds[hexen_v10] = DD_DefineGame(&hexenV10Def);
    DD_AddGameResource(GID(hexen_v10), RC_PACKAGE, FF_STARTUP, STARTUPPK3, 0);
    DD_AddGameResource(GID(hexen_v10), RC_PACKAGE, FF_STARTUP, "hexen.wad", "MAP08;MAP22;MAP41;TINTTAB;FOGMAP;DARTA1;ARTIPORK;SKYFOG;GROVER");
    DD_AddGameResource(GID(hexen_v10), RC_DEFINITION, 0, "hexen-v10.ded", 0);

    /* Hexen (Demo) */
    gameIds[hexen_demo] = DD_DefineGame(&hexenDemoDef);
    DD_AddGameResource(GID(hexen_demo), RC_PACKAGE, FF_STARTUP, STARTUPPK3, 0);
    DD_AddGameResource(GID(hexen_demo), RC_PACKAGE, FF_STARTUP, "hexendemo.wad;machexendemo.wad;hexen.wad", "MAP01;MAP04;TINTTAB;FOGMAP;DARTA1;ARTIPORK;DEMO3==18150");
    DD_AddGameResource(GID(hexen_demo), RC_DEFINITION, 0, "hexen-demo.ded", 0);

    /* Hexen (Beta Demo) */
    gameIds[hexen_betademo] = DD_DefineGame(&hexenBetaDemoDef);
    DD_AddGameResource(GID(hexen_betademo), RC_PACKAGE, FF_STARTUP, STARTUPPK3, 0);
    DD_AddGameResource(GID(hexen_betademo), RC_PACKAGE, FF_STARTUP, "hexendemo.wad;machexendemo.wad;hexenbeta.wad;hexen.wad", "MAP01;MAP04;TINTTAB;FOGMAP;DARTA1;ARTIPORK;AFLYA0;DEMO3==13866");
    DD_AddGameResource(GID(hexen_betademo), RC_DEFINITION, 0, "hexen-demo.ded", 0);

    return true;

#undef STARTUPPK3
#undef CONFIGDIR
}
예제 #9
0
파일: tpi.c 프로젝트: daniellandau/gromacs
double do_tpi(FILE *fplog, t_commrec *cr,
              int nfile, const t_filenm fnm[],
              const output_env_t oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
              int gmx_unused nstglobalcomm,
              gmx_vsite_t gmx_unused *vsite, gmx_constr_t gmx_unused constr,
              int gmx_unused stepout,
              t_inputrec *inputrec,
              gmx_mtop_t *top_global, t_fcdata *fcd,
              t_state *state,
              t_mdatoms *mdatoms,
              t_nrnb *nrnb, gmx_wallcycle_t wcycle,
              gmx_edsam_t gmx_unused ed,
              t_forcerec *fr,
              int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
              gmx_membed_t gmx_unused membed,
              real gmx_unused cpt_period, real gmx_unused max_hours,
              const char gmx_unused *deviceOptions,
              int gmx_unused imdport,
              unsigned long gmx_unused Flags,
              gmx_walltime_accounting_t walltime_accounting)
{
    const char     *TPI = "Test Particle Insertion";
    gmx_localtop_t *top;
    gmx_groups_t   *groups;
    gmx_enerdata_t *enerd;
    rvec           *f;
    real            lambda, t, temp, beta, drmax, epot;
    double          embU, sum_embU, *sum_UgembU, V, V_all, VembU_all;
    t_trxstatus    *status;
    t_trxframe      rerun_fr;
    gmx_bool        bDispCorr, bCharge, bRFExcl, bNotLastFrame, bStateChanged, bNS;
    tensor          force_vir, shake_vir, vir, pres;
    int             cg_tp, a_tp0, a_tp1, ngid, gid_tp, nener, e;
    rvec           *x_mol;
    rvec            mu_tot, x_init, dx, x_tp;
    int             nnodes, frame;
    gmx_int64_t     frame_step_prev, frame_step;
    gmx_int64_t     nsteps, stepblocksize = 0, step;
    gmx_int64_t     rnd_count_stride, rnd_count;
    gmx_int64_t     seed;
    double          rnd[4];
    int             i, start, end;
    FILE           *fp_tpi = NULL;
    char           *ptr, *dump_pdb, **leg, str[STRLEN], str2[STRLEN];
    double          dbl, dump_ener;
    gmx_bool        bCavity;
    int             nat_cavity  = 0, d;
    real           *mass_cavity = NULL, mass_tot;
    int             nbin;
    double          invbinw, *bin, refvolshift, logV, bUlogV;
    real            dvdl, prescorr, enercorr, dvdlcorr;
    gmx_bool        bEnergyOutOfBounds;
    const char     *tpid_leg[2] = {"direct", "reweighted"};

    /* Since there is no upper limit to the insertion energies,
     * we need to set an upper limit for the distribution output.
     */
    real bU_bin_limit      = 50;
    real bU_logV_bin_limit = bU_bin_limit + 10;

    nnodes = cr->nnodes;

    top = gmx_mtop_generate_local_top(top_global, inputrec);

    groups = &top_global->groups;

    bCavity = (inputrec->eI == eiTPIC);
    if (bCavity)
    {
        ptr = getenv("GMX_TPIC_MASSES");
        if (ptr == NULL)
        {
            nat_cavity = 1;
        }
        else
        {
            /* Read (multiple) masses from env var GMX_TPIC_MASSES,
             * The center of mass of the last atoms is then used for TPIC.
             */
            nat_cavity = 0;
            while (sscanf(ptr, "%lf%n", &dbl, &i) > 0)
            {
                srenew(mass_cavity, nat_cavity+1);
                mass_cavity[nat_cavity] = dbl;
                fprintf(fplog, "mass[%d] = %f\n",
                        nat_cavity+1, mass_cavity[nat_cavity]);
                nat_cavity++;
                ptr += i;
            }
            if (nat_cavity == 0)
            {
                gmx_fatal(FARGS, "Found %d masses in GMX_TPIC_MASSES", nat_cavity);
            }
        }
    }

    /*
       init_em(fplog,TPI,inputrec,&lambda,nrnb,mu_tot,
       state->box,fr,mdatoms,top,cr,nfile,fnm,NULL,NULL);*/
    /* We never need full pbc for TPI */
    fr->ePBC = epbcXYZ;
    /* Determine the temperature for the Boltzmann weighting */
    temp = inputrec->opts.ref_t[0];
    if (fplog)
    {
        for (i = 1; (i < inputrec->opts.ngtc); i++)
        {
            if (inputrec->opts.ref_t[i] != temp)
            {
                fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
                fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
            }
        }
        fprintf(fplog,
                "\n  The temperature for test particle insertion is %.3f K\n\n",
                temp);
    }
    beta = 1.0/(BOLTZ*temp);

    /* Number of insertions per frame */
    nsteps = inputrec->nsteps;

    /* Use the same neighborlist with more insertions points
     * in a sphere of radius drmax around the initial point
     */
    /* This should be a proper mdp parameter */
    drmax = inputrec->rtpi;

    /* An environment variable can be set to dump all configurations
     * to pdb with an insertion energy <= this value.
     */
    dump_pdb  = getenv("GMX_TPI_DUMP");
    dump_ener = 0;
    if (dump_pdb)
    {
        sscanf(dump_pdb, "%lf", &dump_ener);
    }

    atoms2md(top_global, inputrec, 0, NULL, top_global->natoms, mdatoms);
    update_mdatoms(mdatoms, inputrec->fepvals->init_lambda);

    snew(enerd, 1);
    init_enerdata(groups->grps[egcENER].nr, inputrec->fepvals->n_lambda, enerd);
    snew(f, top_global->natoms);

    /* Print to log file  */
    walltime_accounting_start(walltime_accounting);
    wallcycle_start(wcycle, ewcRUN);
    print_start(fplog, cr, walltime_accounting, "Test Particle Insertion");

    /* The last charge group is the group to be inserted */
    cg_tp = top->cgs.nr - 1;
    a_tp0 = top->cgs.index[cg_tp];
    a_tp1 = top->cgs.index[cg_tp+1];
    if (debug)
    {
        fprintf(debug, "TPI cg %d, atoms %d-%d\n", cg_tp, a_tp0, a_tp1);
    }
    if (a_tp1 - a_tp0 > 1 &&
        (inputrec->rlist < inputrec->rcoulomb ||
         inputrec->rlist < inputrec->rvdw))
    {
        gmx_fatal(FARGS, "Can not do TPI for multi-atom molecule with a twin-range cut-off");
    }
    snew(x_mol, a_tp1-a_tp0);

    bDispCorr = (inputrec->eDispCorr != edispcNO);
    bCharge   = FALSE;
    for (i = a_tp0; i < a_tp1; i++)
    {
        /* Copy the coordinates of the molecule to be insterted */
        copy_rvec(state->x[i], x_mol[i-a_tp0]);
        /* Check if we need to print electrostatic energies */
        bCharge |= (mdatoms->chargeA[i] != 0 ||
                    (mdatoms->chargeB && mdatoms->chargeB[i] != 0));
    }
    bRFExcl = (bCharge && EEL_RF(fr->eeltype) && fr->eeltype != eelRF_NEC);

    calc_cgcm(fplog, cg_tp, cg_tp+1, &(top->cgs), state->x, fr->cg_cm);
    if (bCavity)
    {
        if (norm(fr->cg_cm[cg_tp]) > 0.5*inputrec->rlist && fplog)
        {
            fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
            fprintf(stderr, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
        }
    }
    else
    {
        /* Center the molecule to be inserted at zero */
        for (i = 0; i < a_tp1-a_tp0; i++)
        {
            rvec_dec(x_mol[i], fr->cg_cm[cg_tp]);
        }
    }

    if (fplog)
    {
        fprintf(fplog, "\nWill insert %d atoms %s partial charges\n",
                a_tp1-a_tp0, bCharge ? "with" : "without");

        fprintf(fplog, "\nWill insert %d times in each frame of %s\n",
                (int)nsteps, opt2fn("-rerun", nfile, fnm));
    }

    if (!bCavity)
    {
        if (inputrec->nstlist > 1)
        {
            if (drmax == 0 && a_tp1-a_tp0 == 1)
            {
                gmx_fatal(FARGS, "Re-using the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense", inputrec->nstlist, drmax);
            }
            if (fplog)
            {
                fprintf(fplog, "Will use the same neighborlist for %d insertions in a sphere of radius %f\n", inputrec->nstlist, drmax);
            }
        }
    }
    else
    {
        if (fplog)
        {
            fprintf(fplog, "Will insert randomly in a sphere of radius %f around the center of the cavity\n", drmax);
        }
    }

    ngid   = groups->grps[egcENER].nr;
    gid_tp = GET_CGINFO_GID(fr->cginfo[cg_tp]);
    nener  = 1 + ngid;
    if (bDispCorr)
    {
        nener += 1;
    }
    if (bCharge)
    {
        nener += ngid;
        if (bRFExcl)
        {
            nener += 1;
        }
        if (EEL_FULL(fr->eeltype))
        {
            nener += 1;
        }
    }
    snew(sum_UgembU, nener);

    /* Copy the random seed set by the user */
    seed = inputrec->ld_seed;
    /* We use the frame step number as one random counter.
     * The second counter use the insertion (step) count. But we
     * need multiple random numbers per insertion. This number is
     * not fixed, since we generate random locations in a sphere
     * by putting locations in a cube and some of these fail.
     * A count of 20 is already extremely unlikely, so 10000 is
     * a safe margin for random numbers per insertion.
     */
    rnd_count_stride = 10000;

    if (MASTER(cr))
    {
        fp_tpi = xvgropen(opt2fn("-tpi", nfile, fnm),
                          "TPI energies", "Time (ps)",
                          "(kJ mol\\S-1\\N) / (nm\\S3\\N)", oenv);
        xvgr_subtitle(fp_tpi, "f. are averages over one frame", oenv);
        snew(leg, 4+nener);
        e = 0;
        sprintf(str, "-kT log(<Ve\\S-\\betaU\\N>/<V>)");
        leg[e++] = strdup(str);
        sprintf(str, "f. -kT log<e\\S-\\betaU\\N>");
        leg[e++] = strdup(str);
        sprintf(str, "f. <e\\S-\\betaU\\N>");
        leg[e++] = strdup(str);
        sprintf(str, "f. V");
        leg[e++] = strdup(str);
        sprintf(str, "f. <Ue\\S-\\betaU\\N>");
        leg[e++] = strdup(str);
        for (i = 0; i < ngid; i++)
        {
            sprintf(str, "f. <U\\sVdW %s\\Ne\\S-\\betaU\\N>",
                    *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
            leg[e++] = strdup(str);
        }
        if (bDispCorr)
        {
            sprintf(str, "f. <U\\sdisp c\\Ne\\S-\\betaU\\N>");
            leg[e++] = strdup(str);
        }
        if (bCharge)
        {
            for (i = 0; i < ngid; i++)
            {
                sprintf(str, "f. <U\\sCoul %s\\Ne\\S-\\betaU\\N>",
                        *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
                leg[e++] = strdup(str);
            }
            if (bRFExcl)
            {
                sprintf(str, "f. <U\\sRF excl\\Ne\\S-\\betaU\\N>");
                leg[e++] = strdup(str);
            }
            if (EEL_FULL(fr->eeltype))
            {
                sprintf(str, "f. <U\\sCoul recip\\Ne\\S-\\betaU\\N>");
                leg[e++] = strdup(str);
            }
        }
        xvgr_legend(fp_tpi, 4+nener, (const char**)leg, oenv);
        for (i = 0; i < 4+nener; i++)
        {
            sfree(leg[i]);
        }
        sfree(leg);
    }
    clear_rvec(x_init);
    V_all     = 0;
    VembU_all = 0;

    invbinw = 10;
    nbin    = 10;
    snew(bin, nbin);

    /* Avoid frame step numbers <= -1 */
    frame_step_prev = -1;

    bNotLastFrame = read_first_frame(oenv, &status, opt2fn("-rerun", nfile, fnm),
                                     &rerun_fr, TRX_NEED_X);
    frame = 0;

    if (rerun_fr.natoms - (bCavity ? nat_cavity : 0) !=
        mdatoms->nr - (a_tp1 - a_tp0))
    {
        gmx_fatal(FARGS, "Number of atoms in trajectory (%d)%s "
                  "is not equal the number in the run input file (%d) "
                  "minus the number of atoms to insert (%d)\n",
                  rerun_fr.natoms, bCavity ? " minus one" : "",
                  mdatoms->nr, a_tp1-a_tp0);
    }

    refvolshift = log(det(rerun_fr.box));

    switch (inputrec->eI)
    {
        case eiTPI:
            stepblocksize = inputrec->nstlist;
            break;
        case eiTPIC:
            stepblocksize = 1;
            break;
        default:
            gmx_fatal(FARGS, "Unknown integrator %s", ei_names[inputrec->eI]);
    }

#ifdef GMX_SIMD
    /* Make sure we don't detect SIMD overflow generated before this point */
    gmx_simd_check_and_reset_overflow();
#endif

    while (bNotLastFrame)
    {
        frame_step      = rerun_fr.step;
        if (frame_step <= frame_step_prev)
        {
            /* We don't have step number in the trajectory file,
             * or we have constant or decreasing step numbers.
             * Ensure we have increasing step numbers, since we use
             * the step numbers as a counter for random numbers.
             */
            frame_step  = frame_step_prev + 1;
        }
        frame_step_prev = frame_step;

        lambda = rerun_fr.lambda;
        t      = rerun_fr.time;

        sum_embU = 0;
        for (e = 0; e < nener; e++)
        {
            sum_UgembU[e] = 0;
        }

        /* Copy the coordinates from the input trajectory */
        for (i = 0; i < rerun_fr.natoms; i++)
        {
            copy_rvec(rerun_fr.x[i], state->x[i]);
        }
        copy_mat(rerun_fr.box, state->box);

        V    = det(state->box);
        logV = log(V);

        bStateChanged = TRUE;
        bNS           = TRUE;

        step = cr->nodeid*stepblocksize;
        while (step < nsteps)
        {
            /* Initialize the second counter for random numbers using
             * the insertion step index. This ensures that we get
             * the same random numbers independently of how many
             * MPI ranks we use. Also for the same seed, we get
             * the same initial random sequence for different nsteps.
             */
            rnd_count = step*rnd_count_stride;

            if (!bCavity)
            {
                /* Random insertion in the whole volume */
                bNS = (step % inputrec->nstlist == 0);
                if (bNS)
                {
                    /* Generate a random position in the box */
                    gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd);
                    gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2);
                    for (d = 0; d < DIM; d++)
                    {
                        x_init[d] = rnd[d]*state->box[d][d];
                    }
                }
                if (inputrec->nstlist == 1)
                {
                    copy_rvec(x_init, x_tp);
                }
                else
                {
                    /* Generate coordinates within |dx|=drmax of x_init */
                    do
                    {
                        gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd);
                        gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2);
                        for (d = 0; d < DIM; d++)
                        {
                            dx[d] = (2*rnd[d] - 1)*drmax;
                        }
                    }
                    while (norm2(dx) > drmax*drmax);
                    rvec_add(x_init, dx, x_tp);
                }
            }
            else
            {
                /* Random insertion around a cavity location
                 * given by the last coordinate of the trajectory.
                 */
                if (step == 0)
                {
                    if (nat_cavity == 1)
                    {
                        /* Copy the location of the cavity */
                        copy_rvec(rerun_fr.x[rerun_fr.natoms-1], x_init);
                    }
                    else
                    {
                        /* Determine the center of mass of the last molecule */
                        clear_rvec(x_init);
                        mass_tot = 0;
                        for (i = 0; i < nat_cavity; i++)
                        {
                            for (d = 0; d < DIM; d++)
                            {
                                x_init[d] +=
                                    mass_cavity[i]*rerun_fr.x[rerun_fr.natoms-nat_cavity+i][d];
                            }
                            mass_tot += mass_cavity[i];
                        }
                        for (d = 0; d < DIM; d++)
                        {
                            x_init[d] /= mass_tot;
                        }
                    }
                }
                /* Generate coordinates within |dx|=drmax of x_init */
                do
                {
                    gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd);
                    gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2);
                    for (d = 0; d < DIM; d++)
                    {
                        dx[d] = (2*rnd[d] - 1)*drmax;
                    }
                }
                while (norm2(dx) > drmax*drmax);
                rvec_add(x_init, dx, x_tp);
            }

            if (a_tp1 - a_tp0 == 1)
            {
                /* Insert a single atom, just copy the insertion location */
                copy_rvec(x_tp, state->x[a_tp0]);
            }
            else
            {
                /* Copy the coordinates from the top file */
                for (i = a_tp0; i < a_tp1; i++)
                {
                    copy_rvec(x_mol[i-a_tp0], state->x[i]);
                }
                /* Rotate the molecule randomly */
                gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd);
                gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2);
                rotate_conf(a_tp1-a_tp0, state->x+a_tp0, NULL,
                            2*M_PI*rnd[0],
                            2*M_PI*rnd[1],
                            2*M_PI*rnd[2]);
                /* Shift to the insertion location */
                for (i = a_tp0; i < a_tp1; i++)
                {
                    rvec_inc(state->x[i], x_tp);
                }
            }

            /* Clear some matrix variables  */
            clear_mat(force_vir);
            clear_mat(shake_vir);
            clear_mat(vir);
            clear_mat(pres);

            /* Set the charge group center of mass of the test particle */
            copy_rvec(x_init, fr->cg_cm[top->cgs.nr-1]);

            /* Calc energy (no forces) on new positions.
             * Since we only need the intermolecular energy
             * and the RF exclusion terms of the inserted molecule occur
             * within a single charge group we can pass NULL for the graph.
             * This also avoids shifts that would move charge groups
             * out of the box.
             *
             * Some checks above ensure than we can not have
             * twin-range interactions together with nstlist > 1,
             * therefore we do not need to remember the LR energies.
             */
            /* Make do_force do a single node force calculation */
            cr->nnodes = 1;
            do_force(fplog, cr, inputrec,
                     step, nrnb, wcycle, top, &top_global->groups,
                     state->box, state->x, &state->hist,
                     f, force_vir, mdatoms, enerd, fcd,
                     state->lambda,
                     NULL, fr, NULL, mu_tot, t, NULL, NULL, FALSE,
                     GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY |
                     (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS | GMX_FORCE_DO_LR : 0) |
                     (bStateChanged ? GMX_FORCE_STATECHANGED : 0));
            cr->nnodes    = nnodes;
            bStateChanged = FALSE;
            bNS           = FALSE;

            /* Calculate long range corrections to pressure and energy */
            calc_dispcorr(fplog, inputrec, fr, step, top_global->natoms, state->box,
                          lambda, pres, vir, &prescorr, &enercorr, &dvdlcorr);
            /* figure out how to rearrange the next 4 lines MRS 8/4/2009 */
            enerd->term[F_DISPCORR]  = enercorr;
            enerd->term[F_EPOT]     += enercorr;
            enerd->term[F_PRES]     += prescorr;
            enerd->term[F_DVDL_VDW] += dvdlcorr;

            epot               = enerd->term[F_EPOT];
            bEnergyOutOfBounds = FALSE;
#ifdef GMX_SIMD_X86_SSE2_OR_HIGHER
            /* With SSE the energy can overflow, check for this */
            if (gmx_mm_check_and_reset_overflow())
            {
                if (debug)
                {
                    fprintf(debug, "Found an SSE overflow, assuming the energy is out of bounds\n");
                }
                bEnergyOutOfBounds = TRUE;
            }
#endif
            /* If the compiler doesn't optimize this check away
             * we catch the NAN energies.
             * The epot>GMX_REAL_MAX check catches inf values,
             * which should nicely result in embU=0 through the exp below,
             * but it does not hurt to check anyhow.
             */
            /* Non-bonded Interaction usually diverge at r=0.
             * With tabulated interaction functions the first few entries
             * should be capped in a consistent fashion between
             * repulsion, dispersion and Coulomb to avoid accidental
             * negative values in the total energy.
             * The table generation code in tables.c does this.
             * With user tbales the user should take care of this.
             */
            if (epot != epot || epot > GMX_REAL_MAX)
            {
                bEnergyOutOfBounds = TRUE;
            }
            if (bEnergyOutOfBounds)
            {
                if (debug)
                {
                    fprintf(debug, "\n  time %.3f, step %d: non-finite energy %f, using exp(-bU)=0\n", t, (int)step, epot);
                }
                embU = 0;
            }
            else
            {
                embU      = exp(-beta*epot);
                sum_embU += embU;
                /* Determine the weighted energy contributions of each energy group */
                e                = 0;
                sum_UgembU[e++] += epot*embU;
                if (fr->bBHAM)
                {
                    for (i = 0; i < ngid; i++)
                    {
                        sum_UgembU[e++] +=
                            (enerd->grpp.ener[egBHAMSR][GID(i, gid_tp, ngid)] +
                             enerd->grpp.ener[egBHAMLR][GID(i, gid_tp, ngid)])*embU;
                    }
                }
                else
                {
                    for (i = 0; i < ngid; i++)
                    {
                        sum_UgembU[e++] +=
                            (enerd->grpp.ener[egLJSR][GID(i, gid_tp, ngid)] +
                             enerd->grpp.ener[egLJLR][GID(i, gid_tp, ngid)])*embU;
                    }
                }
                if (bDispCorr)
                {
                    sum_UgembU[e++] += enerd->term[F_DISPCORR]*embU;
                }
                if (bCharge)
                {
                    for (i = 0; i < ngid; i++)
                    {
                        sum_UgembU[e++] +=
                            (enerd->grpp.ener[egCOULSR][GID(i, gid_tp, ngid)] +
                             enerd->grpp.ener[egCOULLR][GID(i, gid_tp, ngid)])*embU;
                    }
                    if (bRFExcl)
                    {
                        sum_UgembU[e++] += enerd->term[F_RF_EXCL]*embU;
                    }
                    if (EEL_FULL(fr->eeltype))
                    {
                        sum_UgembU[e++] += enerd->term[F_COUL_RECIP]*embU;
                    }
                }
            }

            if (embU == 0 || beta*epot > bU_bin_limit)
            {
                bin[0]++;
            }
            else
            {
                i = (int)((bU_logV_bin_limit
                           - (beta*epot - logV + refvolshift))*invbinw
                          + 0.5);
                if (i < 0)
                {
                    i = 0;
                }
                if (i >= nbin)
                {
                    realloc_bins(&bin, &nbin, i+10);
                }
                bin[i]++;
            }

            if (debug)
            {
                fprintf(debug, "TPI %7d %12.5e %12.5f %12.5f %12.5f\n",
                        (int)step, epot, x_tp[XX], x_tp[YY], x_tp[ZZ]);
            }

            if (dump_pdb && epot <= dump_ener)
            {
                sprintf(str, "t%g_step%d.pdb", t, (int)step);
                sprintf(str2, "t: %f step %d ener: %f", t, (int)step, epot);
                write_sto_conf_mtop(str, str2, top_global, state->x, state->v,
                                    inputrec->ePBC, state->box);
            }

            step++;
            if ((step/stepblocksize) % cr->nnodes != cr->nodeid)
            {
                /* Skip all steps assigned to the other MPI ranks */
                step += (cr->nnodes - 1)*stepblocksize;
            }
        }

        if (PAR(cr))
        {
            /* When running in parallel sum the energies over the processes */
            gmx_sumd(1,    &sum_embU, cr);
            gmx_sumd(nener, sum_UgembU, cr);
        }

        frame++;
        V_all     += V;
        VembU_all += V*sum_embU/nsteps;

        if (fp_tpi)
        {
            if (bVerbose || frame%10 == 0 || frame < 10)
            {
                fprintf(stderr, "mu %10.3e <mu> %10.3e\n",
                        -log(sum_embU/nsteps)/beta, -log(VembU_all/V_all)/beta);
            }

            fprintf(fp_tpi, "%10.3f %12.5e %12.5e %12.5e %12.5e",
                    t,
                    VembU_all == 0 ? 20/beta : -log(VembU_all/V_all)/beta,
                    sum_embU == 0  ? 20/beta : -log(sum_embU/nsteps)/beta,
                    sum_embU/nsteps, V);
            for (e = 0; e < nener; e++)
            {
                fprintf(fp_tpi, " %12.5e", sum_UgembU[e]/nsteps);
            }
            fprintf(fp_tpi, "\n");
            fflush(fp_tpi);
        }

        bNotLastFrame = read_next_frame(oenv, status, &rerun_fr);
    } /* End of the loop  */
    walltime_accounting_end(walltime_accounting);

    close_trj(status);

    if (fp_tpi != NULL)
    {
        gmx_fio_fclose(fp_tpi);
    }

    if (fplog != NULL)
    {
        fprintf(fplog, "\n");
        fprintf(fplog, "  <V>  = %12.5e nm^3\n", V_all/frame);
        fprintf(fplog, "  <mu> = %12.5e kJ/mol\n", -log(VembU_all/V_all)/beta);
    }

    /* Write the Boltzmann factor histogram */
    if (PAR(cr))
    {
        /* When running in parallel sum the bins over the processes */
        i = nbin;
        global_max(cr, &i);
        realloc_bins(&bin, &nbin, i);
        gmx_sumd(nbin, bin, cr);
    }
    if (MASTER(cr))
    {
        fp_tpi = xvgropen(opt2fn("-tpid", nfile, fnm),
                          "TPI energy distribution",
                          "\\betaU - log(V/<V>)", "count", oenv);
        sprintf(str, "number \\betaU > %g: %9.3e", bU_bin_limit, bin[0]);
        xvgr_subtitle(fp_tpi, str, oenv);
        xvgr_legend(fp_tpi, 2, (const char **)tpid_leg, oenv);
        for (i = nbin-1; i > 0; i--)
        {
            bUlogV = -i/invbinw + bU_logV_bin_limit - refvolshift + log(V_all/frame);
            fprintf(fp_tpi, "%6.2f %10d %12.5e\n",
                    bUlogV,
                    (int)(bin[i]+0.5),
                    bin[i]*exp(-bUlogV)*V_all/VembU_all);
        }
        gmx_fio_fclose(fp_tpi);
    }
    sfree(bin);

    sfree(sum_UgembU);

    walltime_accounting_set_nsteps_done(walltime_accounting, frame*inputrec->nsteps);

    return 0;
}
예제 #10
0
/**
 * Release
 */
bool CShPluginGame::Release(void)
{
	m_levelIdentifier = GID(NULL);

	return(true);
}
예제 #11
0
파일: mdebin.c 프로젝트: Chadi-akel/cere
void upd_mdebin(t_mdebin *md,FILE *fp_dgdl,
		real tmass,int step,real time,
		real ener[],
		matrix box,
		tensor svir,
		tensor fvir,
		tensor vir,
		tensor pres,
		t_groups *grps,
		rvec mu_tot, bool bNoseHoover)
{
  static real *ttt=NULL;
  static rvec *uuu=NULL;
  int    i,j,k,kk,m,n,gid;
  real   bs[NBOXS];
  real   tricl_bs[NTRICLBOXS];
  real   eee[egNR];
  real   ecopy[F_NRE];
  real   tmp;
  
  copy_energy(ener,ecopy);
  add_ebin(md->ebin,md->ie,f_nre,ecopy,step);
  if (bPC || fabs(grps->cosacc.cos_accel)>GMX_REAL_MIN) {
    if(bTricl) {
      tricl_bs[0]=box[XX][XX];
      tricl_bs[1]=box[YY][XX];
      tricl_bs[2]=box[YY][YY];
      tricl_bs[3]=box[ZZ][XX];
      tricl_bs[4]=box[ZZ][YY];
      tricl_bs[5]=box[ZZ][ZZ];
      /* This is the volume */
      tricl_bs[6]=tricl_bs[0]*tricl_bs[2]*tricl_bs[5];
      /* This is the density */
      tricl_bs[7] = (tmass*AMU)/(tricl_bs[6]*NANO*NANO*NANO);
    } else {
      for(m=0; (m<DIM); m++) 
	bs[m]=box[m][m];
      /* This is the volume */
      bs[3] = bs[XX]*bs[YY]*bs[ZZ];      
      /* This is the density */
      bs[4] = (tmass*AMU)/(bs[3]*NANO*NANO*NANO);
    }
  }  
  if (bPC) {
    /* This is pV (in kJ/mol) */  
    if(bTricl) {
      tricl_bs[8] = tricl_bs[6]*ener[F_PRES]/PRESFAC;
      add_ebin(md->ebin,md->ib,NTRICLBOXS,tricl_bs,step);
    } else {
      bs[5] = bs[3]*ener[F_PRES]/PRESFAC;
      add_ebin(md->ebin,md->ib,NBOXS,bs,step);
    }
  }  
  if (bShake) {
    add_ebin(md->ebin,md->isvir,9,svir[0],step);
    add_ebin(md->ebin,md->ifvir,9,fvir[0],step);
  }
  add_ebin(md->ebin,md->ivir,9,vir[0],step);
  add_ebin(md->ebin,md->ipres,9,pres[0],step);
  tmp = (pres[ZZ][ZZ]-(pres[XX][XX]+pres[YY][YY])*0.5)*box[ZZ][ZZ];
  add_ebin(md->ebin,md->isurft,1,&tmp,step);
  add_ebin(md->ebin,md->imu,3,mu_tot,step);

  if (fabs(grps->cosacc.cos_accel)>GMX_REAL_MIN) {
    add_ebin(md->ebin,md->ivcos,1,&(grps->cosacc.vcos),step);
    /* 1/viscosity, unit 1/(kg m^-1 s^-1) */
    if(bTricl) 
      tmp = 1/(grps->cosacc.cos_accel/(grps->cosacc.vcos*PICO)
	       *tricl_bs[7]*sqr(box[ZZ][ZZ]*NANO/(2*M_PI)));
    else 
      tmp = 1/(grps->cosacc.cos_accel/(grps->cosacc.vcos*PICO)
	       *bs[4]*sqr(box[ZZ][ZZ]*NANO/(2*M_PI)));
    add_ebin(md->ebin,md->ivisc,1,&tmp,step);    
  }
  if (md->nE > 1) {
    n=0;
    for(i=0; (i<md->nEg); i++) {
      for(j=i; (j<md->nEg); j++) {
	gid=GID(i,j,md->nEg);
	for(k=kk=0; (k<egNR); k++) 
	  if (bEInd[k])
	    eee[kk++]=grps->estat.ee[k][gid];
	add_ebin(md->ebin,md->igrp[n],md->nEc,eee,step);
	n++;
      }
    }
  }
  
  if(ttt == NULL) 
    snew(ttt,2*md->nTC);
  for(i=0; (i<md->nTC); i++) {
    ttt[2*i]   = grps->tcstat[i].T;
    if(bNoseHoover)
      ttt[2*i+1] = grps->tcstat[i].xi;
    else
      ttt[2*i+1] = grps->tcstat[i].lambda;
  }
  add_ebin(md->ebin,md->itc,2*md->nTC,ttt,step);  
  
  if (md->nU > 1) {
    if (uuu == NULL)
      snew(uuu,md->nU);
    for(i=0; (i<md->nU); i++)
      copy_rvec(grps->grpstat[i].u,uuu[i]);
    add_ebin(md->ebin,md->iu,3*md->nU,uuu[0],step);
  }
  if (fp_dgdl)
    fprintf(fp_dgdl,"%g %g\n",time,ener[F_DVDL]+ener[F_DVDLKIN]);
}
예제 #12
0
void upd_mdebin(t_mdebin *md, gmx_bool write_dhdl,
                gmx_bool bSum,
                double time,
                real tmass,
                gmx_enerdata_t *enerd,
                t_state *state,
                matrix  box,
                tensor svir,
                tensor fvir,
                tensor vir,
                tensor pres,
                gmx_ekindata_t *ekind,
                rvec mu_tot,
                gmx_constr_t constr)
{
    int    i,j,k,kk,m,n,gid;
    real   crmsd[2],tmp6[6];
    real   bs[NTRICLBOXS],vol,dens,pv,enthalpy;
    real   eee[egNR];
    real   ecopy[F_NRE];
    real   tmp;
    gmx_bool   bNoseHoover;

    /* Do NOT use the box in the state variable, but the separate box provided
     * as an argument. This is because we sometimes need to write the box from
     * the last timestep to match the trajectory frames.
     */
    copy_energy(md, enerd->term,ecopy);
    add_ebin(md->ebin,md->ie,md->f_nre,ecopy,bSum);
    if (md->nCrmsd)
    {
        crmsd[0] = constr_rmsd(constr,FALSE);
        if (md->nCrmsd > 1)
        {
            crmsd[1] = constr_rmsd(constr,TRUE);
        }
        add_ebin(md->ebin,md->iconrmsd,md->nCrmsd,crmsd,FALSE);
    }
    if (md->bDynBox)
    {
        int nboxs;
        if(md->bTricl)
        {
            bs[0] = box[XX][XX];
            bs[1] = box[YY][YY];
            bs[2] = box[ZZ][ZZ];
            bs[3] = box[YY][XX];
            bs[4] = box[ZZ][XX];
            bs[5] = box[ZZ][YY];
            nboxs=NTRICLBOXS;
        }
        else
        {
            bs[0] = box[XX][XX];
            bs[1] = box[YY][YY];
            bs[2] = box[ZZ][ZZ];
            nboxs=NBOXS;
        }
        vol  = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
        dens = (tmass*AMU)/(vol*NANO*NANO*NANO);

        /* This is pV (in kJ/mol).  The pressure is the reference pressure,
           not the instantaneous pressure */  
        pv = 0;
        for (i=0;i<DIM;i++) 
        {
            for (j=0;j<DIM;j++) 
            {
                if (i>j) 
                {
                    pv += box[i][j]*md->ref_p[i][j]/PRESFAC;
                } 
                else 
                {
                    pv += box[j][i]*md->ref_p[j][i]/PRESFAC;
                }
            }
        }

        add_ebin(md->ebin,md->ib   ,nboxs,bs   ,bSum);
        add_ebin(md->ebin,md->ivol ,1    ,&vol ,bSum);
        add_ebin(md->ebin,md->idens,1    ,&dens,bSum);
        add_ebin(md->ebin,md->ipv  ,1    ,&pv  ,bSum);
        enthalpy = pv + enerd->term[F_ETOT];
        add_ebin(md->ebin,md->ienthalpy  ,1    ,&enthalpy  ,bSum);
    }
    if (md->bConstrVir)
    {
        add_ebin(md->ebin,md->isvir,9,svir[0],bSum);
        add_ebin(md->ebin,md->ifvir,9,fvir[0],bSum);
    }
    add_ebin(md->ebin,md->ivir,9,vir[0],bSum);
    add_ebin(md->ebin,md->ipres,9,pres[0],bSum);
    tmp = (pres[ZZ][ZZ]-(pres[XX][XX]+pres[YY][YY])*0.5)*box[ZZ][ZZ];
    add_ebin(md->ebin,md->isurft,1,&tmp,bSum);
    if (md->epc == epcPARRINELLORAHMAN || md->epc == epcMTTK)
    {
        tmp6[0] = state->boxv[XX][XX];
        tmp6[1] = state->boxv[YY][YY];
        tmp6[2] = state->boxv[ZZ][ZZ];
        tmp6[3] = state->boxv[YY][XX];
        tmp6[4] = state->boxv[ZZ][XX];
        tmp6[5] = state->boxv[ZZ][YY];
        add_ebin(md->ebin,md->ipc,md->bTricl ? 6 : 3,tmp6,bSum);
    }
    add_ebin(md->ebin,md->imu,3,mu_tot,bSum);
    if (ekind && ekind->cosacc.cos_accel != 0)
    {
        vol  = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
        dens = (tmass*AMU)/(vol*NANO*NANO*NANO);
        add_ebin(md->ebin,md->ivcos,1,&(ekind->cosacc.vcos),bSum);
        /* 1/viscosity, unit 1/(kg m^-1 s^-1) */
        tmp = 1/(ekind->cosacc.cos_accel/(ekind->cosacc.vcos*PICO)
                 *vol*sqr(box[ZZ][ZZ]*NANO/(2*M_PI)));
        add_ebin(md->ebin,md->ivisc,1,&tmp,bSum);    
    }
    if (md->nE > 1)
    {
        n=0;
        for(i=0; (i<md->nEg); i++)
        {
            for(j=i; (j<md->nEg); j++)
            {
                gid=GID(i,j,md->nEg);
                for(k=kk=0; (k<egNR); k++)
                {
                    if (md->bEInd[k])
                    {
                        eee[kk++] = enerd->grpp.ener[k][gid];
                    }
                }
                add_ebin(md->ebin,md->igrp[n],md->nEc,eee,bSum);
                n++;
            }
        }
    }

    if (ekind)
    {
        for(i=0; (i<md->nTC); i++)
        {
            md->tmp_r[i] = ekind->tcstat[i].T;
        }
        add_ebin(md->ebin,md->itemp,md->nTC,md->tmp_r,bSum);

        /* whether to print Nose-Hoover chains: */
        bNoseHoover = (getenv("GMX_NOSEHOOVER_CHAINS") != NULL); 

        if (md->etc == etcNOSEHOOVER)
        {
            if (bNoseHoover) 
            {
                if (md->bNHC_trotter)
                {
                    for(i=0; (i<md->nTC); i++) 
                    {
                        for (j=0;j<md->nNHC;j++) 
                        {
                            k = i*md->nNHC+j;
                            md->tmp_r[2*k] = state->nosehoover_xi[k];
                            md->tmp_r[2*k+1] = state->nosehoover_vxi[k];
                        }
                    }
                    add_ebin(md->ebin,md->itc,md->mde_n,md->tmp_r,bSum);      

                    if (md->bMTTK) {
                        for(i=0; (i<md->nTCP); i++) 
                        {
                            for (j=0;j<md->nNHC;j++) 
                            {
                                k = i*md->nNHC+j;
                                md->tmp_r[2*k] = state->nhpres_xi[k];
                                md->tmp_r[2*k+1] = state->nhpres_vxi[k];
                            }
                        }
                        add_ebin(md->ebin,md->itcb,md->mdeb_n,md->tmp_r,bSum);      
                    }

                } 
                else 
                {
                    for(i=0; (i<md->nTC); i++)
                    {
                        md->tmp_r[2*i] = state->nosehoover_xi[i];
                        md->tmp_r[2*i+1] = state->nosehoover_vxi[i];
                    }
                    add_ebin(md->ebin,md->itc,md->mde_n,md->tmp_r,bSum);
                }
            }
        }
        else if (md->etc == etcBERENDSEN || md->etc == etcYES || 
                 md->etc == etcVRESCALE)
        {
            for(i=0; (i<md->nTC); i++)
            {
                md->tmp_r[i] = ekind->tcstat[i].lambda;
            }
            add_ebin(md->ebin,md->itc,md->nTC,md->tmp_r,bSum);
        }
    }

    if (ekind && md->nU > 1)
    {
        for(i=0; (i<md->nU); i++)
        {
            copy_rvec(ekind->grpstat[i].u,md->tmp_v[i]);
        }
        add_ebin(md->ebin,md->iu,3*md->nU,md->tmp_v[0],bSum);
    }

    ebin_increase_count(md->ebin,bSum);

    /* BAR + thermodynamic integration values */
    if (write_dhdl)
    {
        if (md->fp_dhdl)
        {
            fprintf(md->fp_dhdl,"%.4f", time);

            if (md->dhdl_derivatives)
            {
                fprintf(md->fp_dhdl," %g", enerd->term[F_DVDL]+ 
                                           enerd->term[F_DKDL]+
                                           enerd->term[F_DHDL_CON]);
            }
            for(i=1; i<enerd->n_lambda; i++)
            {
                fprintf(md->fp_dhdl," %g",
                        enerd->enerpart_lambda[i]-enerd->enerpart_lambda[0]);
            }
            fprintf(md->fp_dhdl,"\n");
        }
        /* and the binary BAR output */
        if (md->dhc)
        {
            mde_delta_h_coll_add_dh(md->dhc, 
                                    enerd->term[F_DVDL]+ enerd->term[F_DKDL]+
                                    enerd->term[F_DHDL_CON],
                                    enerd->enerpart_lambda, time, 
                                    state->lambda);
        }
    }
}
예제 #13
0
real
do_nonbonded_listed(int ftype, int nbonds,
                    const t_iatom iatoms[], const t_iparams iparams[],
                    const rvec x[], rvec f[], rvec fshift[],
                    const t_pbc *pbc, const t_graph *g,
                    real *lambda, real *dvdl,
                    const t_mdatoms *md,
                    const t_forcerec *fr, gmx_grppairener_t *grppener,
                    int *global_atom_index)
{
    int              ielec, ivdw;
    real             qq, c6, c12;
    rvec             dx;
    ivec             dt;
    int              i, j, itype, ai, aj, gid;
    int              fshift_index;
    real             r2, rinv;
    real             fscal, velec, vvdw;
    real *           energygrp_elec;
    real *           energygrp_vdw;
    static gmx_bool  warned_rlimit = FALSE;
    /* Free energy stuff */
    gmx_bool         bFreeEnergy;
    real             LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
    real             qqB, c6B, c12B, sigma2_def, sigma2_min;


    switch (ftype)
    {
        case F_LJ14:
        case F_LJC14_Q:
            energygrp_elec = grppener->ener[egCOUL14];
            energygrp_vdw  = grppener->ener[egLJ14];
            break;
        case F_LJC_PAIRS_NB:
            energygrp_elec = grppener->ener[egCOULSR];
            energygrp_vdw  = grppener->ener[egLJSR];
            break;
        default:
            energygrp_elec = NULL; /* Keep compiler happy */
            energygrp_vdw  = NULL; /* Keep compiler happy */
            gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
            break;
    }

    if (fr->efep != efepNO)
    {
        /* Lambda factor for state A=1-lambda and B=lambda */
        LFC[0] = 1.0 - lambda[efptCOUL];
        LFV[0] = 1.0 - lambda[efptVDW];
        LFC[1] = lambda[efptCOUL];
        LFV[1] = lambda[efptVDW];

        /*derivative of the lambda factor for state A and B */
        DLF[0] = -1;
        DLF[1] = 1;

        /* precalculate */
        sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
        sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);

        for (i = 0; i < 2; i++)
        {
            lfac_coul[i]  = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
            dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
            lfac_vdw[i]   = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
            dlfac_vdw[i]  = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
        }
    }
    else
    {
        sigma2_min = sigma2_def = 0;
    }

    bFreeEnergy = FALSE;
    for (i = 0; (i < nbonds); )
    {
        itype = iatoms[i++];
        ai    = iatoms[i++];
        aj    = iatoms[i++];
        gid   = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);

        /* Get parameters */
        switch (ftype)
        {
            case F_LJ14:
                bFreeEnergy =
                    (fr->efep != efepNO &&
                     ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
                      iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
                      iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
                qq               = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
                c6               = iparams[itype].lj14.c6A;
                c12              = iparams[itype].lj14.c12A;
                break;
            case F_LJC14_Q:
                qq               = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
                c6               = iparams[itype].ljc14.c6;
                c12              = iparams[itype].ljc14.c12;
                break;
            case F_LJC_PAIRS_NB:
                qq               = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
                c6               = iparams[itype].ljcnb.c6;
                c12              = iparams[itype].ljcnb.c12;
                break;
            default:
                /* Cannot happen since we called gmx_fatal() above in this case */
                qq = c6 = c12 = 0; /* Keep compiler happy */
                break;
        }

        /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
         * included in the general nfbp array now. This means the tables are scaled down by the
         * same factor, so when we use the original c6/c12 parameters from iparams[] they must
         * be scaled up.
         */
        c6  *= 6.0;
        c12 *= 12.0;

        /* Do we need to apply full periodic boundary conditions? */
        if (fr->bMolPBC == TRUE)
        {
            fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
        }
        else
        {
            fshift_index = CENTRAL;
            rvec_sub(x[ai], x[aj], dx);
        }
        r2           = norm2(dx);

        if (r2 >= fr->tab14.r*fr->tab14.r)
        {
            if (warned_rlimit == FALSE)
            {
                nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
                warned_rlimit = TRUE;
            }
            continue;
        }

        if (bFreeEnergy)
        {
            /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
            qqB              = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
            c6B              = iparams[itype].lj14.c6B*6.0;
            c12B             = iparams[itype].lj14.c12B*12.0;

            fscal            = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
                                                              fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
                                                              LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
                                                              fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
        }
        else
        {
            /* Evaluate tabulated interaction without free energy */
            fscal            = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
        }

        energygrp_elec[gid]  += velec;
        energygrp_vdw[gid]   += vvdw;
        svmul(fscal, dx, dx);

        /* Add the forces */
        rvec_inc(f[ai], dx);
        rvec_dec(f[aj], dx);

        if (g)
        {
            /* Correct the shift forces using the graph */
            ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
            fshift_index = IVEC2IS(dt);
        }
        if (fshift_index != CENTRAL)
        {
            rvec_inc(fshift[fshift_index], dx);
            rvec_dec(fshift[CENTRAL], dx);
        }
    }
    return 0.0;
}
/**
 * Constructor
 */
CShPluginTranslate::CShPluginTranslate(void)
: CShPlugin(CShIdentifier("translate"))
, m_levelIdentifier(GID(NULL))
{

}
예제 #15
0
void CShTPSPlayer::Initialize(const CShIdentifier & levelIdentifier, CShTPSGun * defaultGun)
{
	if (!m_bInitialized)
	{
		m_bInitialized  = true;

		// Load a sprite in 2D in the Sprite attribute
		m_pSprite = shNULL;
		m_pSprite = ShEntity2::Find(levelIdentifier, CShIdentifier(PLAYER_SPRITE_NAME));
		float radius = CHARACTER_CONTROLLER_RADIUS_2D; // radius for the character controller
	
		if (shNULL == m_pSprite) // if no player sprite is on the map, one is created for 3D, to manage collision between invisible 2D stuff
		{
			m_pSprite = ShEntity2::Create(levelIdentifier, CShIdentifier("player_sprite_forced_2D"), GID(layer_default), CShIdentifier("tps"), CShIdentifier("player"), CShVector3(0.0f,0.0f,1.0f), CShEulerAngles(0.0f, 0.0f, 0.0f), CShVector3(1.0f, 1.0f, 1.0f));

		}

		SH_ASSERT(shNULL != m_pSprite);
		m_pModel = shNULL;
		m_pModel = ShEntity3::Find(levelIdentifier, CShIdentifier(PLAYER_SPRITE_NAME));
		if(shNULL != m_pModel)
		{
			m_3d = true;
			m_pAnimIdle = ShAnimation::Find(CShIdentifier(PLAYER_ANIM_IDLE));
			SH_ASSERT(shNULL != m_pAnimIdle);
			m_pAnimRun = ShAnimation::Find(CShIdentifier(PLAYER_ANIM_RUN));
			SH_ASSERT(shNULL != m_pAnimRun);
			/*m_pAnimAttack = ShAnimation::Find(CShIdentifier(PLAYER_ANIM_ATTACK));
			SH_ASSERT(shNULL != m_pAnimAttack);*/
			ShEntity3::AnimationPlay(m_pModel, m_pAnimIdle,true);
			radius= CHARACTER_CONTROLLER_RADIUS_3D;	
		}
		else
		{
			m_3d = false;
		}

		if(m_3d)
		{
			ShObject::SetShow(m_pSprite, false);
			ShObject::SetShow(m_pModel, true);
		}
		else
		{
			ShObject::SetShow(m_pSprite, true);
		}
		CShTPSCharacter::Initialize(levelIdentifier, CShIdentifier(PLAYER_SPRITE_NAME), defaultGun);

		// Initialize the character controller with the level, the identifier, the position, the radius, the direction, the speed.
		ShCharacterController *	pCharacterController = shNULL;
		pCharacterController = ShCharacterController::Create(levelIdentifier, CShIdentifier("character_controller_character_001"), m_Position, radius, m_Direction, m_Speed);
		m_pCharacterController = pCharacterController;
		SH_ASSERT(shNULL != m_pCharacterController);
	}
	else
	{
		Spawn();
	}
}
예제 #16
0
extern "C" magma_int_t 
magma_dgetrf_mgpu_work_amc_v3(magma_int_t num_gpus,
magma_int_t m, magma_int_t n,  
double **dlA, magma_int_t dlA_LD, 
magma_int_t *ipiv, magma_int_t *info,
/*workspace on the cpu side*/
double *AWORK, magma_int_t AWORK_LD, magma_int_t AWORK_n
) 
{ 
/*  -- MAGMA (version 1.5.0-beta3) -- 
       Univ. of Tennessee, Knoxville 
       Univ. of California, Berkeley 
       Univ. of Colorado, Denver 
       November 2011 
 
    Purpose 
    ======= 
 
    DGETRF_REC_ASYNC computes an LU factorization of a general M-by-N matrix A 
    using partial pivoting with row interchanges. The technique used for the panel factorization
    is the parallel recursif LU (see lawn 259).
 
    The factorization has the form 
       A = P * L * U 
    where P is a permutation matrix, L is lower triangular with unit 
    diagonal elements (lower trapezoidal if m > n), and U is upper 
    triangular (upper trapezoidal if m < n). 
 
    This is the right-looking Level 3 BLAS version of the algorithm. 
 
    Arguments 
    ========= 
    NUM_GPUS
            (input) INTEGER
            The number of GPUS to be used for the factorization.
 
    M       (input) INTEGER 
            The number of rows of the matrix A.  M >= 0. 
 
    N       (input) INTEGER 
            The number of columns of the matrix A.  N >= 0. 
 
    A       (input/output) DOUBLE_PRECISION array on the GPU, dimension (LDDA,N). 
            On entry, the M-by-N matrix to be factored. 
            On exit, the factors L and U from the factorization 
            A = P*L*U; the unit diagonal elements of L are not stored. 
 
    LDDA     (input) INTEGER 
            The leading dimension of the array A.  LDDA >= max(1,M). 
 
    IPIV    (output) INTEGER array, dimension (min(M,N)) 
            The pivot indices; for 1 <= i <= min(M,N), row i of the 
            matrix was interchanged with row IPIV(i). 
 
    INFO    (output) INTEGER 
            = 0:  successful exit 
            < 0:  if INFO = -i, the i-th argument had an illegal value 
                  or another error occured, such as memory allocation failed. 
            > 0:  if INFO = i, U(i,i) is exactly zero. The factorization 
                  has been completed, but the factor U is exactly 
                  singular, and division by zero will occur if it is used 
                  to solve a system of equations.

    =====================================================================    */ 
 
 
 
    double c_one     = MAGMA_D_ONE; 
    double c_neg_one = MAGMA_D_NEG_ONE; 
 
    int ONE = 1; 
 
    magma_int_t iinfo, nb; 
    magma_int_t mindim; 
    magma_int_t nrows, ncols; 
    //double *work; 
 
 
     magma_int_t dm_max, dn_max; 
     magma_int_t I, J, K, M, N, U_K, L; 
     
     //magma_int_t A_m, A_n, A_N; 
     //magma_int_t Am_max, An_max; 
     //magma_int_t A_nb; 
     
 
     //magma_int_t A_K; 
     double **dlAT; 
     magma_int_t dlAT_LD; 
      
      
     double *dlAP_get[MagmaMaxGPUs]; //*dlAP_set[MagmaMaxGPUs]
      
     
     double *dlAP_set[MagmaMaxGPUs];
     magma_int_t dlAP_LD;

     double *dlpanel[MagmaMaxGPUs];
     magma_int_t dlpanel_LD;

     int *n_local, *nr_local;

     //magma_int_t nrows, ncols; 
     magma_int_t gpu_nrows, gpu_ncols; 
  
     int nbcores; /*Number of cores available for the whole factorization*/ 
     int panel_num_threads; /*Number of threads for the panel*/ 
     double dcpu; /*percentage of the matrix to allocate on the CPUs*/ 
  
    int B_rows;

    double t1;
    
    /*Workspace*/
    // magma_int_t AWORK_NMAX;
    // magma_int_t AWORK_m, AWORK_n, AWORK_N;

     /* Recommanded dimension in the workspace*/ 
     int A_m, A_n, A_N, A_NMAX, A_LD;
     int A_NP1;
     double *A;

     amc_args_t *args;
    /*magma_event_t *A_event;*/ /*Control bucket*/
     magma_queue_t mstream[MagmaMaxGPUs][3]; /*0: H2D, 1: compute, 2:D2H*/
     int dd;

//     double *tmpdA;

    /* Check arguments */ 
    *info = 0; 
    if (m < 0) 
        *info = -1; 
    else if (n < 0) 
        *info = -2; 
    else if (dlA_LD < max(1,m)) 
        *info = -4; 
    else if (AWORK_LD < max(1,m)) 
        *info = -5;

    if (*info != 0) { 
        magma_xerbla( __func__, -(*info) ); 
        return *info; 
    } 
 
    /* Quick return if possible */ 
    if (m == 0 || n == 0) 
        return *info; 
 
 


      
     /*Get parameters*/ 
    args = magma_amc_args_get_default();
     nb= args->nb;

     nbcores = args->P;  
     panel_num_threads = args->Pr; 
     dcpu = args->dcpu;

     /* Check and fix parameters */
     if(nb==0)
        nb     = magma_get_dgetrf_nb(m) ;/*magma dgetrf block size*/ 
    else
        nb = args->nb;

     if(nb>n) nb = n; 
     if(panel_num_threads>nbcores) panel_num_threads = nbcores;

     /*check the buffer size*/

     if(AWORK_n<nb){
         printf("Not enough buffer. Should be greater than the block size: %d\n", nb);
         exit(1);
     }

     /* Compute the number of blocks columns to factorize*/
     N  = (int) ceil( (double) min(m, n) / nb);

     /* Compute the maximum number of panels we can store in the workspace*/
     A_NMAX = (int) (AWORK_n/ nb);

     /*Compute the recommanded number of panels for the cpu part*/
     A_N = NSplit(N, dcpu);

     /* Compute the recommanded number of columns for the cpu part*/
     A_n = A_N*nb;//(int) ceil(n*dcpu);

     //if(A_n<nb) 
     //     A_n = nb;//make sure workspace has at least one block column

     /*Make sure we work with multiple of 32*/
     /*
     if(A_n%32!=0) {
         A_n = ((A_n + 31)/32)*32;
     }
     */

     /* Compute the recommanded number of panels for the cpu part*/
    // A_N = (int) (A_n/ nb);
     
     /* Check if there are enough workspace. In case the user gave a workspace lower than the optimal*/
     /* NOTE: using small workspace may reduce performance*/

     if(A_N>A_NMAX){    

#if (dbglevel >=1)
        printf("[DBG_WARNING] Resizing buffer to feet user preferences. Recommanded:%d, Max given:%d\n",A_N, A_NMAX); 
#endif
        A_N = A_NMAX;

        /*Make A_n a multiple of nb*/
        A_n = A_N*nb;
    }
      
     A = AWORK;
     A_m = m;
     A_LD = AWORK_LD;


#if (dbglevel >=1)
    /* Initialize the tracing*/
    ca_dbg_trace_init(nbcores,num_gpus); //nbcores + 1 GPU
#endif

#if (dbglevel >=1)
    t1 = magma_wtime();
#endif

    /* create the streams */
    //mstream = (magma_queue_t *)    malloc(num_gpus*sizeof(magma_queue_t));

    for(dd=0;dd<num_gpus;dd++){
       magma_setdevice(dd); //required
       magma_queue_create(&mstream[dd][0]);
       magma_queue_create(&mstream[dd][1]);
       magma_queue_create(&mstream[dd][2]);

       /*Set the stream for internal computations*/
       //magmablasSetKernelStream(0); /*Use 0 instead of mstream[dd][1], MagmasetkernelStream is not thread safe*/ /*TODO: mae it safe*/

       //task_dev_set_compute_stream(dd, mstream[dd][1]);
       magma_task_dev_set_compute_stream(dd, 0); //set to mstream 1 later
    }

    


     /* Matrix dimension */
     dm_max = m;
     dn_max = n;

    /*Make sure m and n are multiple of 32*/
     
     if(dm_max%32!=0) dm_max = ((dm_max + 31)/32)*32;
     if(dn_max%32!=0) dn_max = ((dn_max + 31)/32)*32;
     
     

     /* local dimensions of the matrix for each GPU*/
     n_local = (int *)    malloc(num_gpus*sizeof(int)); /*This do no change during the execution*/
     nr_local = (int *)    malloc(num_gpus*sizeof(int)); /*Change after each update of the trailing submatrix*/

     for(dd=0;dd<num_gpus;dd++){
        n_local[dd] = numcols2p(dd, n, nb, num_gpus); //loc2p(dd, N, num_gpus)*nb;    
        nr_local[dd] = n_local[dd];
     }


     /*Allocate a workspace for the panels transposition*/ 

     dlAP_LD = dm_max; 
     //if(dAP_LD%32!=0) dAP_LD = ((dAP_LD + 31)/32)*32;/*Make dAP_LD multiple of 32*/
    /// dlAP_set = (double **)    malloc(num_gpus*sizeof(double*));
     //dlAP_get = (double **)    malloc(num_gpus*sizeof(double*));

     for(dd=0;dd<num_gpus;dd++){

         magma_setdevice(dd);
        
         if (MAGMA_SUCCESS != magma_dmalloc( &dlAP_set[dd], dlAP_LD*nb)) { 
                *info = MAGMA_ERR_DEVICE_ALLOC; 
                return *info; 
        } 
        
        /*
        if (MAGMA_SUCCESS != magma_dmalloc(&tmpdA, dlAP_LD*nb)) { 
                *info = MAGMA_ERR_DEVICE_ALLOC; 
                return *info; 
        }
        */
        if ( magma_is_devptr(dlAP_set[dd] ) == 0 ) {
            fprintf( stderr, "ERROR: dlAP_set[dd] is host pointer.\n" );
            //exit(1);
        }

        
        //cudaMemcpy(dlAP_set[dd],&tmpdA,sizeof(double*), cudaMemcpyDeviceToHost);

        #if (dbglevel==10) 
        printf("0.4\n");
            
            //ca_dbg_printMat_gpu(2, 2, dlAP_set[dd], dlAP_LD, "dlAP_set[dd] for testing");
            //cudaMemcpy(&tmpdA, &dlAP_set[dd], sizeof(double*), cudaMemcpyHostToDevice);
            //ca_dbg_printMat_gpu(2, 2, tmpdA, dlAP_LD, "dlAP_set[dd] for testing");
            //printf("0.5: int to continue"); scanf("%d", &I);
        #endif

         if (MAGMA_SUCCESS != magma_dmalloc(&dlAP_get[dd], dlAP_LD*nb)) { 
                //magma_free(dlAP_set); //TODO: free all previous buffers
                *info = MAGMA_ERR_DEVICE_ALLOC; 
                return *info; 
        }
     }

    /* Workspace for the panels */

    // dlpanel = (double **)    malloc(num_gpus*sizeof(double*));
     
     for(dd=0;dd<num_gpus;dd++){
         magma_setdevice(dd);

         if (MAGMA_SUCCESS != magma_dmalloc(&dlpanel[dd], nb*dm_max)) { 

                
                *info = MAGMA_ERR_DEVICE_ALLOC; 
                return *info; 
        }
      }

      dlpanel_LD = nb;
      

    /*local matrix storage*/
    dlAT = (double **)    malloc(num_gpus*sizeof(double*));

    
    dlAT_LD = n_local[0];

    if(dlAT_LD%32!=0) dlAT_LD = ((dlAT_LD + 31)/32)*32;

    for(dd=0;dd<num_gpus;dd++){
         magma_setdevice(dd);

        if (MAGMA_SUCCESS != magma_dmalloc(&dlAT[dd], dlAT_LD*dm_max )) { 
                for(J=0;J<dd;J++){
                    magma_setdevice(J);
                    magma_free( dlAP_set[J]); 
                    magma_free( dlAP_get[J]);
                    magma_free(dlpanel[J]);
                    magma_free(dlAT[J]);
                }
                //free(dlAP_set); 
                //free(dlAP_get);
                //free(dlpanel);
                free(dlAT);
            *info = MAGMA_ERR_DEVICE_ALLOC; 
            return *info; 
        }



    }


#if (dbglevel >=1)
    printf("[DBG] Time workspace memory alloc (dAP): %f\n",magma_wtime()-t1);
    t1 = magma_wtime();
#endif


    /*1. Transfer the first column blocks of the matrix from the GPU to the CPUs.*/ 
    
    //magma_dgetmatrix(A_m, A_n, dA, dA_LD, A, A_LD); 
    magma_dgetmatrix_1D_col_bcyclic(A_m, A_n, dlA, dlA_LD, A, A_LD, num_gpus, nb);

#if (dbglevel >=1)
    printf("[DBG] Time First getmatrix: %f\n",magma_wtime()-t1);
    t1 = magma_wtime();
#endif

#if (dbglevel==10) 
    printf("1.0\n");
    ca_dbg_printMat(A_m, A_n, A, A_LD,"A after first getMatrix"); 

    /*
    for(dd=0;dd<num_gpus;dd++){
        //Fill the matrix with zero for easy visualization of the matrix in debug mode
        for(I=0;I<dlAT_LD*dm_max;I++)  dlAT[dd][I] = 0.0;
    }
    */
//    ca_dbg_printMat_mgpu(num_gpus,  m, n_local, dlAT, dlAT_LD,"matrix dAlT^T empty");
//    ca_dbg_printMat_transpose_mgpu(num_gpus,  n_local, m, dlAT, dlAT_LD,"matrix dAT empty");
printf("2.0\n");
#endif

    /*Update the remaining number of columns on the GPUs.*/
    for(dd=0;dd<num_gpus;dd++){
        nr_local[dd] = nr_local[dd] - numcols2p(dd, A_n, nb, num_gpus); //;n_local[dd] - loc2p(dd, A_N, num_gpus)*nb;        
    }

#if (dbglevel==10) 
    ca_dbg_printMat_mgpu(num_gpus, m, n_local, dlA, dlA_LD,"matrix dA to factorize");

    printf("3.0\n");    
#endif


for(dd=0;dd<num_gpus;dd++){
    magma_setdevice(dd);
    //magmablasSetKernelStream(mstream[dd][1]);    
    magmablas_dtranspose2(dlAT[dd], dlAT_LD, dlA[dd], dlA_LD, m, n_local[dd]);
}

///


for(dd=0;dd<num_gpus;dd++){
    magma_setdevice(dd);
    magma_task_dev_set_compute_stream(dd, mstream[dd][1]);
}



#if (dbglevel >=1)
    printf("[DBG] Time First transposition: %f\n",magma_wtime()-t1);
    t1 = magma_wtime();
#endif

#if (dbglevel==10) 
    //ca_dbg_printMat_transpose_mgpu(num_gpus, n_local, m, dlAT, dlAT_LD,"matrix dAT to factorize");
/*
    dd = GID(A_N);
    magma_setdevice(dd);

    ca_dbg_printMat_transpose_gpu(nb, m, dlAT(0, A_N), dlAT_LD,"matrix dAT(0, A_N)");
    
    magma_setdevice(0);
    ca_dbg_printMat_transpose_gpu(m, nb, dlA(0, A_N), dlA_LD,"matrix dA(0, A_N)");
    */

    printf("4.0\n");
    printf("int to continue"); scanf("%d", &I);
#endif

/*
#if (dbglevel==10) 
    ca_dbg_printMat_transpose_mgpu(num_gpus, m, n_local, dlAT, dlAT_LD,"matrix dAT to factorize");
#endif
*/


     /* Compute the maximun number of steps*/
     mindim = min(m, n); 
     M      = (int) ceil( (double) m / nb); 
     N      = (int) ceil( (double) mindim / nb); /*N = n/nb*/


     /* 3. Let the asynchronous algorithm begin*/ 
     
#if (dbglevel >=1)
     printf("Starting recursif code ... m:%d, n:%d, nb:%d, nbcores:%d, N:%d, A_N:%d\n", m, n, nb, nbcores, N, A_N); //Summary
#endif



     /*Initialize the scheduler*/ 
     magma_schedule_init(nbcores, num_gpus); 



     K = 0; 
     /*initialize parallel recursif panel environment*/
     CORE_zgetrf_reclap_init();

     magma_schedule_set_task_priority(INT_MAX-1);
     
     /*Schedule the first panel factorization*/ 
     magma_insert_core_dgetrf_rec(A_m, nb, A(0,K), A_LD, ipiv(0), &iinfo, panel_num_threads, colptr(K));  
     //magma_insert_core_dgetrf(A_m, nb, A(0,K), A_LD, ipiv(0), &iinfo, colptr(K)); 
 
     /*Transfer the factorized panel in the buffer of GPU (dlpanel)*/

     for(dd=0;dd<num_gpus;dd++){
         
        
        ///magma_insert_dev_dsetmatrix_transpose(dd, A_m, nb, A(0,K), A_LD, dlpanel(dd,K), dlpanel_LD, dlAP_set[dd], dlAP_LD, colptr(K), dlpanel[dd]);
        magma_insert_dev_dsetmatrix_async_transpose(dd, A_m, nb, A(0,K), A_LD, dlpanel(dd,K), dlpanel_LD, mstream[dd][0], dlAP_set[dd], dlAP_LD, colptr(K), dlpanel(dd,K)); //dlpanel[dd]

     }
#if (dbglevel==10) 
    magma_schedule_barrier();

    for(dd=0;dd<num_gpus;dd++){
        magma_setdevice(dd);
        ca_dbg_printMat_transpose_gpu(nb, m, dlpanel(dd,K), dlpanel_LD,"dlpanel[dd] after setmatrix_async"); //dlpanel[dd]
    }
    printf("4.5: int to continue"); scanf("%d", &I);
#endif
     /*Transfer also the factorized panel on its right position in the final matrix (transposition included)*/ 
     /*TODO: this may use cudaMemcpyDeviceToDevice and initiate the transfer from dlpanel*/
     dd = GID(K);
     //magma_insert_dev_dsetmatrix_transpose(dd, A_m, nb, A(0,K), A_LD, dlAT(0,K), dlAT_LD, dlAP_set[dd], dlAP_LD, colptr(K), dlAT(0,K)); 
     magma_insert_dev_dsetmatrix_async_transpose(dd, A_m, nb, A(0,K), A_LD, dlAT(0,K), dlAT_LD, mstream[dd][0], dlAP_set[dd], dlAP_LD, colptr(K), dlAT(0,K)); 
 
#if (dbglevel==10) 
    magma_schedule_barrier(); 
    ca_dbg_printMat(m, nb, A(0,0), A_LD,"A(0,0)");
    
    for(dd=0;dd<num_gpus;dd++){
        magma_setdevice(dd);
        ca_dbg_printMat_transpose_gpu(nb, m, dlpanel[dd], dlpanel_LD,"dlpanel[dd] after setmatrix to dlAT");
    }

    ca_dbg_printMat_transpose_mgpu(num_gpus, n_local, m, dlAT, dlAT_LD,"dlA"); 
 printf("5.0: int to continue"); scanf("%d", &I);
#endif 

     for(K=0;K<=N-1;K++){ 
     
         /*compute the new value of the cpu number of blocks*/
         A_N = NSplit(N-K, dcpu);

          /*insert the coarse update of the trailing submatrix corresponding to panel K to the GPU, that is submatrix A[K+1:M, K+1+d-1:N]*/ 
          
         //if(K==0) /*TODO: move outside loop*/
          //{

         /*NOTE: Here we work on the matrix transpose*/

         /*Set the priority max for the GPU computations*/
            magma_schedule_set_task_priority(INT_MAX);
            //// magma_schedule_set_task_priority(INT_MAX - N*K);

         gpu_nrows = m - (K+1)*nb;///

         for(J=K+A_N;J<min(K+A_N+num_gpus,N);J++){

            /*Determine the device which own the first column of the group of columns to update*/
             dd = GID(J);

            /*Determine the number of columns to apply the update. */
             nr_local[dd] = numcols2p(dd, n - (K+1+A_N-1)*nb, nb, num_gpus);

              gpu_ncols = nr_local[dd]; //n - (K+1+A_N-1)*nb; 
            
              if(gpu_ncols >0) 
              { 
 
                  /*schedule a swap of the trailing submatrix in the gpus using ipiv[K]*/ 
                  /*dependency dAT((K+1)-1, (K+A_N)-1) = dAT(K, K+A_N-1) with previous dgemm*/              
              
                  magma_insert_dev_dlaswp(dd, gpu_ncols, dlAT(K, J), dlAT_LD, ONE, nb, ipiv(K), ONE, dlAT(K, J-1)); /*non blocking*/                  
                  //printf("debug barrier\n");
                  //magma_schedule_barrier();
                  //&(dlpanel[dd][dlpanel_LD*nb*K])
                  magma_insert_dev_dtrsm(dd, MagmaRight,  MagmaUpper, MagmaNoTrans, MagmaUnit, gpu_ncols, nb, c_one, dlpanel(dd,K), dlpanel_LD, dlAT(K,J), dlAT_LD);/*non blocking*/ 
 
                  /* aij^T = aij^T - (lik.ukj)^T = aij^T - ukj^T.lik^T*/ //&(dlpanel[dd][dlpanel_LD*nb*(K+1)])
                  magma_insert_dev_dgemm(dd, MagmaNoTrans,MagmaNoTrans, gpu_ncols, gpu_nrows, nb, c_neg_one, dlAT(K,J), dlAT_LD, dlpanel(dd,K+1), dlpanel_LD, c_one, dlAT(K+1,J), dlAT_LD);/*non blocking*/    
              

                  /*Transfer asynchronously one column (column K+A_N) from the GPU to the CPU to balance work*/                
                 //// if(K+A_N<N) 
                 //// { 
                    ////ncols = min(nb, gpu_ncols); 
 
                    //////magma_schedule_set_task_priority(INT_MAX);

                    ////magma_insert_dgetmatrix_transpose(gpu_nrows, ncols, dAT(K+1,K+A_N), dAT_LD, A(K+1,K+A_N), A_LD, dAP, dAP_LD, colptr(K+A_N)); //blocking
                 //// }
              
              } 
         }

          //}
          /*iterate over the rest of the columns to update the trailing submatrix on the cpu*/ 
          for(J=K+1;J<=min(K+A_N-1, N-1);J++){ 
 
               ncols = min(nb, n - J*nb); 
 
               /*Set the priority max for column having the next panel (look ahead of deep 1),
               and process the rest of the update in a right looking way*/
               if(J==K+1)
                   magma_schedule_set_task_priority(INT_MAX -2 );
                  //// magma_schedule_set_task_priority(INT_MAX - N*K -1);
               else
                   magma_schedule_set_task_priority(INT_MAX -3 - J );//- N*K
                  //// magma_schedule_set_task_priority(INT_MAX - N*K -3 -J);
               //magma_schedule_set_task_priority(INT_MAX - J);

               /*dependency colptr(J): make sure column J is sent from GPU, and all previous update was done*/
               magma_insert_core_dlaswp(ncols, A(K,J), A_LD, ONE, nb, ipiv(K), ONE, colptr(J)); 
 
               magma_insert_core_dtrsm('L', 'L', 'N', 'U', nb, ncols, c_one, A(K,K), A_LD, A(K,J), A_LD, colptr(J)); 
 
             /*Compute the number of blocs rows to group together before the update. To avoid scheduling overhead.*/
              B_rows = (int) ceil((double) (M-K-1)/panel_num_threads);
              B_rows = max(B_rows,4); /*maximun of 4*/ 
              //B_rows = max(B_rows,1);
              //printf("B_rows:%d\n",B_rows);
               for(I=K+1; I<=M-1; I+=B_rows){ 
     
                    nrows = min(B_rows*nb, m-I*nb); 
                    
                    /*dep colptr(K):make sure the panel is not overwritten or swapped since dgemm use A[I,K]*/
                    /*dep colptr(J): Gather all dgemm on one column and create dependencies with previous dgemm and the next panel*/
                    magma_insert_core_dgemm('N','N', nrows, ncols, nb, c_neg_one, A(I,K), A_LD, A(K,J), A_LD, c_one, A(I,J), A_LD, colptr(K), colptr(J)); 
               } 
 
                

               if(J==K+1) 
               { 
                    /*Look ahead and insert the next panel*/ 
                    nrows = m - (K+1)*nb; 
                    ncols = min(nb, n - (K+1)*nb); 
 
                    /*Schedule the next panel factorization with maximum priority*/ 
                    magma_schedule_set_task_priority(INT_MAX -1);
                    ///magma_schedule_set_task_priority(0); //TEST: testing prio_0
                   //// magma_schedule_set_task_priority(INT_MAX - N*K - 2);

                   magma_insert_core_dgetrf_rec(nrows, ncols, A(K+1,K+1), A_LD, ipiv(K+1), &iinfo, panel_num_threads, colptr(K+1)); 
                   // magma_insert_core_dgetrf(nrows, ncols, A(K+1,K+1), A_LD, ipiv(K+1), &iinfo, colptr(K+1)); 
 
                    /*Transfer the factorized panel in the buffer of GPU (dlpanel)*/

                     for(dd=0;dd<num_gpus;dd++){
                         //&(dlpanel[dd][dlpanel_LD*nb*(K+1)])
                        ///magma_insert_dev_dsetmatrix_transpose(dd, nrows, ncols, A(K+1, K+1), A_LD, dlpanel(dd, K+1), dlpanel_LD, dlAP_set[dd], dlAP_LD, colptr(K+1), dlpanel[dd]);
                        magma_insert_dev_dsetmatrix_async_transpose(dd, nrows, ncols, A(K+1, K+1), A_LD, dlpanel(dd, K+1), dlpanel_LD, mstream[dd][0], dlAP_set[dd], dlAP_LD, colptr(K+1), dlpanel(dd,K+1));//, dlpanel[dd]
                     }

                    /*Determine the upper part of the matrix done by the CPU on that column and send it to the GPU with the panel*/ 
                    U_K = max(0, K+1 - A_N +1); 
                    nrows = m - U_K*nb; 
 
                    ///magma_schedule_set_task_priority(INT_MAX);
                    /*Transfer the upper part of the matrix for that column and the factorized panel to the GPU*/ 
                    ///magma_insert_dsetmatrix_transpose(nrows, ncols, A(U_K, K+1), A_LD, dAT(U_K, K+1), dAT_LD, dAP, dAP_LD, A(K+1,K+1), dAT(K+1,K+1)); 
                    //magma_insert_dev_dsetmatrix_transpose(nrows, ncols, A(U_K, K+1), A_LD, dAT(U_K, K+1), dAT_LD, dAP_set, dAP_LD, colptr(K+1), dAT(K+1,K+1));
 
                    
                     /*Transfer also the factorized panel on its right position in the final matrix (transposition included)*/ 
                     /*TODO: this may use cudaMemcpyDeviceToDevice and initiate the transfer from dlpanel*/
                     dd = GID(K+1);
                     ///magma_insert_dev_dsetmatrix_transpose(dd, nrows, ncols, A(U_K, K+1), A_LD, dlAT(U_K,K+1), dlAT_LD, dlAP_set[dd], dlAP_LD, colptr(K+1), dlAT(K+1,K+1));
                     magma_insert_dev_dsetmatrix_async_transpose(dd, nrows, ncols, A(U_K, K+1), A_LD, dlAT(U_K,K+1), dlAT_LD, mstream[dd][0], dlAP_set[dd], dlAP_LD, colptr(K+1), dlAT(0,K+1));///

               } 
 
          } 
 
            /*compute the next number of blocks colums */
          A_NP1 = NSplit(N-(K+1), dcpu) - NSplit(N-K, dcpu) + 1;

           /*Transfer asynchronously (A_NP1 - A_N) block column (column K+A_N) from the GPU to the CPU to balance work*/  
            /*Make sure this is inserted after all dgemm because it schedules to replace a current panel for the case A_N< N*/
          for(L=K+A_N;L<K+A_N+A_NP1;L++)
          {
               if(L<N) { 

                 /*Determine the device which own column K+A_N*/
                 dd = GID(L);

                 gpu_ncols = nr_local[dd];

                 ncols = min(nb, gpu_ncols); 
 
                 magma_schedule_set_task_priority(INT_MAX);

                 ///magma_insert_dev_dgetmatrix_transpose(dd, gpu_nrows, ncols, dlAT(K+1,K+A_N), dlAT_LD, A(K+1,K+A_N), A_LD, dlAP_get[dd], dlAP_LD, colptr(K+A_N)); //blocking
             
                 /*make sure the computations are done on stream 1 and send a block column on stream 2*/
                 magma_insert_dev_queue_sync(dd, mstream[dd][1], dlAT(K+1,L)); 
                 magma_insert_dev_dgetmatrix_async_transpose(dd, gpu_nrows, ncols, dlAT(K+1,L), dlAT_LD, A(K+1,L), A_LD, mstream[dd][2], dlAP_get[dd], dlAP_LD, colptr(L));
                 /*Update the remaining number of columns*/
                //// nr_local[dd]-=nb;

                  /*if A_N==1, there is no look-ahead, so insert the panel here*/
                   if((A_N==1) && (L==K+A_N)){
                      /*Look ahead and insert the next panel*/ 
                      nrows = m - (K+1)*nb; 
                      ncols = min(nb, n - (K+1)*nb); 
                      /*Schedule the next panel factorization with maximum priority*/ 
                      magma_schedule_set_task_priority(INT_MAX -1);
                            ///magma_schedule_set_task_priority(0); //TEST: testing prio_0
                           //// magma_schedule_set_task_priority(INT_MAX - N*K - 2);

                      magma_insert_core_dgetrf_rec(nrows, ncols, A(K+1,K+1), A_LD, ipiv(K+1), &iinfo, panel_num_threads, colptr(K+1)); 
                      //magma_insert_core_dgetrf(nrows, ncols, A(K+1,K+1), A_LD, ipiv(K+1), &iinfo, colptr(K+1)); 
 
                      /*Transfer the factorized panel in the buffer of GPU (dlpanel)*/

                      for(dd=0;dd<num_gpus;dd++){
                          //&(dlpanel[dd][dlpanel_LD*nb*(K+1)])
                        ///magma_insert_dev_dsetmatrix_transpose(dd, nrows, ncols, A(K+1, K+1), A_LD, dlpanel(dd, K+1), dlpanel_LD, dlAP_set[dd], dlAP_LD, colptr(K+1), dlpanel[dd]);
                        magma_insert_dev_dsetmatrix_async_transpose(dd, nrows, ncols, A(K+1, K+1), A_LD, dlpanel(dd, K+1), dlpanel_LD, mstream[dd][0], dlAP_set[dd], dlAP_LD, colptr(K+1), dlpanel(dd,K+1));//dlpanel[dd]
                      }

                            /*Determine the upper part of the matrix done by the CPU on that column and send it to the GPU with the panel*/ 
                      U_K = max(0, K+1 - A_N +1); 
                      nrows = m - U_K*nb; 
 
                            ///magma_schedule_set_task_priority(INT_MAX);
                            /*Transfer the upper part of the matrix for that column and the factorized panel to the GPU*/ 
                            ///magma_insert_dsetmatrix_transpose(nrows, ncols, A(U_K, K+1), A_LD, dAT(U_K, K+1), dAT_LD, dAP, dAP_LD, A(K+1,K+1), dAT(K+1,K+1)); 
                            //magma_insert_dev_dsetmatrix_transpose(nrows, ncols, A(U_K, K+1), A_LD, dAT(U_K, K+1), dAT_LD, dAP_set, dAP_LD, colptr(K+1), dAT(K+1,K+1));

                      /*Transfer also the factorized panel on its right position in the final matrix (transposition included)*/ 
                      /*TODO: this may use cudaMemcpyDeviceToDevice and initiate the transfer from dlpanel*/
                      dd = GID(K+1);
                      ///magma_insert_dev_dsetmatrix_transpose(dd, nrows, ncols, A(U_K, K+1), A_LD, dlAT(U_K,K+1), dlAT_LD, dlAP_set[dd], dlAP_LD, colptr(K+1), dlAT(K+1,K+1));
                      magma_insert_dev_dsetmatrix_async_transpose(dd, nrows, ncols, A(U_K, K+1), A_LD, dlAT(U_K,K+1), dlAT_LD, mstream[dd][0], dlAP_set[dd], dlAP_LD, colptr(K+1), dlAT(0,K+1));///dlAT(K+1,K+1)
                   }
               }

         }
#if (dbglevel==10) 
  
  magma_schedule_barrier(); 
  ca_dbg_printMat(m, A_n, A, A_LD,"A"); 
  ca_dbg_printMat_transpose_mgpu(num_gpus, n_local, m, dlAT, dlAT_LD,"dAT (Step K)"); 
  
  nrows = m - K*nb; 
  ncols = min(nb, n - K*nb);

  dd = GID(K);
  magma_setdevice(dd);
  ca_dbg_printMat_transpose_gpu(ncols, nrows, dlAT(K,K), dlAT_LD,"dAT(K,K)");

  if(K<=5){
  printf("Step K:%d done. Int to continue: ",K); scanf("%d", &I);
  }

#endif 
           

     } //Step K done
 /*Wait for all thread termination*/
 magma_schedule_barrier(); 


 /*make sure everything arrived*/ ///needed?
 for(dd=0;dd<num_gpus;dd++){
        magma_setdevice(dd);
       magma_queue_sync(mstream[dd][0]);
       magma_queue_sync(mstream[dd][1]);
       magma_queue_sync(mstream[dd][2]);
}

     /*TODO: don't need quark here*/
     /*Perform a sequence of left swap on the matrix corresponding to the different panel*/ 
     for(K=1;K<=N-1;K++){ 
 
#if (dbglevel >=1)
    ca_trace_start();
#endif

    nrows = min(nb,m - K*nb);

    ncols = min(K*nb,n);

    for(dd=0;dd<=min(num_gpus-1, K-1);dd++){
        
        gpu_ncols = numcols2p(dd, ncols, nb, num_gpus); 

        J = dd;
        if(gpu_ncols>0){
            magma_setdevice(dd);
            //pthread_mutex_lock(&mutex_compute_stream);
            magmablasSetKernelStream(mstream[dd][1]);
            magmablas_dlaswp(gpu_ncols, dlAT(K, J), dlAT_LD, ONE, nrows, ipiv(K), ONE);
            //pthread_mutex_lock(&mutex_compute_stream);
        }

    }       
        

        

#if (dbglevel >=1)
ca_trace_end_1gpu('W');
#endif
     } 
     
#if (dbglevel==10) 
    ca_dbg_printMat_transpose_mgpu(num_gpus, n_local, m, dlAT, dlAT_LD,"dAT after lswap"); 
#endif

/*Shutdown the scheduler*/
     magma_schedule_delete();

/*update permutation vector indexes*/ 
     for(K=1;K<=N-1;K++){ 
 
        nrows = min(nb, n-K*nb); 
        for(J=0;J<=nrows-1;J++){ 
            ipiv[K*nb+J] += K*nb; 
        } 
     } 

#if dbglevel>=1
    printf("[DBG] Time Factorization:%f\n",magma_wtime()-t1); 
    t1 = magma_wtime();
#endif

    /* 4. Transpose back the matrix in/out of place*/
    for(dd=0;dd<num_gpus;dd++){

        //n_local[dd] = numcols2p(dd, n, nb, num_gpus); //loc2p(dd, N, num_gpus)*nb;

        magma_setdevice(dd);
        magmablasSetKernelStream(mstream[dd][1]);
        magmablas_dtranspose2(dlA[dd], dlA_LD, dlAT[dd], dlAT_LD, n_local[dd], m);
    }


    for(dd=0;dd<num_gpus;dd++){ //needed
        magma_setdevice(dd);
        magmablasSetKernelStream(NULL);
    }

#if dbglevel>=1
    printf("[DBG] Time Final in/out of place transpose:%f\n",magma_wtime()-t1); 
    t1 = magma_wtime();

#endif



#if (dbglevel==10)     
    ca_dbg_printMat_mgpu(num_gpus, m, n_local, dlA, dlA_LD,"dA = LU"); 
#endif 

    for(dd=0;dd<num_gpus;dd++){
        magma_setdevice(dd);
       magma_queue_destroy(mstream[dd][0]);
       magma_queue_destroy(mstream[dd][1]);
       magma_queue_destroy(mstream[dd][2]);
    }

    //free(mstream);

    // printf("Step 4: time:%f\n",magma_wtime()-t1); 
// t1 = magma_wtime();
    free(n_local);
    free(nr_local);
//    free(k_local);
    for(dd=0;dd<num_gpus;dd++){
        magma_setdevice(dd);
        magma_free( dlAP_set[dd]); 
        magma_free( dlAP_get[dd]);
        magma_free(dlpanel[dd]);
        magma_free(dlAT[dd]);
    }

    //free(dlAP_set); 
    //free(dlAP_get);
    //free(dlpanel);
    free(dlAT);

#if dbglevel>=1
    printf("[DBG] Time memory free (dAP):%f\n",magma_wtime()-t1); 
    t1 = magma_wtime();
#endif

#if dbglevel>=1
    /*Finalize the tracing*/
    ca_dbg_trace_finalize();
    printf("[DBG] Time llog:%f\n",magma_wtime()-t1); 
#endif

    return *info; 
}   /* End of MAGMA_DGETRF_REC_ASYNC_WORK_GPU */
예제 #17
0
void upd_mdebin(t_mdebin       *md,
                gmx_bool        bDoDHDL,
                gmx_bool        bSum,
                double          time,
                real            tmass,
                gmx_enerdata_t *enerd,
                t_state        *state,
                t_lambda       *fep,
                t_expanded     *expand,
                matrix          box,
                tensor          svir,
                tensor          fvir,
                tensor          vir,
                tensor          pres,
                gmx_ekindata_t *ekind,
                rvec            mu_tot,
                gmx_constr_t    constr)
{
    int    i, j, k, kk, n, gid;
    real   crmsd[2], tmp6[6];
    real   bs[NTRICLBOXS], vol, dens, pv, enthalpy;
    real   eee[egNR];
    real   ecopy[F_NRE];
    double store_dhdl[efptNR];
    real   store_energy = 0;
    real   tmp;

    /* Do NOT use the box in the state variable, but the separate box provided
     * as an argument. This is because we sometimes need to write the box from
     * the last timestep to match the trajectory frames.
     */
    copy_energy(md, enerd->term, ecopy);
    add_ebin(md->ebin, md->ie, md->f_nre, ecopy, bSum);
    if (md->nCrmsd)
    {
        crmsd[0] = constr_rmsd(constr);
        add_ebin(md->ebin, md->iconrmsd, md->nCrmsd, crmsd, FALSE);
    }
    if (md->bDynBox)
    {
        int nboxs;
        if (md->bTricl)
        {
            bs[0] = box[XX][XX];
            bs[1] = box[YY][YY];
            bs[2] = box[ZZ][ZZ];
            bs[3] = box[YY][XX];
            bs[4] = box[ZZ][XX];
            bs[5] = box[ZZ][YY];
            nboxs = NTRICLBOXS;
        }
        else
        {
            bs[0] = box[XX][XX];
            bs[1] = box[YY][YY];
            bs[2] = box[ZZ][ZZ];
            nboxs = NBOXS;
        }
        vol  = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
        dens = (tmass*AMU)/(vol*NANO*NANO*NANO);
        add_ebin(md->ebin, md->ib, nboxs, bs, bSum);
        add_ebin(md->ebin, md->ivol, 1, &vol, bSum);
        add_ebin(md->ebin, md->idens, 1, &dens, bSum);

        if (md->bDiagPres)
        {
            /* This is pV (in kJ/mol).  The pressure is the reference pressure,
               not the instantaneous pressure */
            pv = vol*md->ref_p/PRESFAC;

            add_ebin(md->ebin, md->ipv, 1, &pv, bSum);
            enthalpy = pv + enerd->term[F_ETOT];
            add_ebin(md->ebin, md->ienthalpy, 1, &enthalpy, bSum);
        }
    }
    if (md->bConstrVir)
    {
        add_ebin(md->ebin, md->isvir, 9, svir[0], bSum);
        add_ebin(md->ebin, md->ifvir, 9, fvir[0], bSum);
    }
    add_ebin(md->ebin, md->ivir, 9, vir[0], bSum);
    add_ebin(md->ebin, md->ipres, 9, pres[0], bSum);
    tmp = (pres[ZZ][ZZ]-(pres[XX][XX]+pres[YY][YY])*0.5)*box[ZZ][ZZ];
    add_ebin(md->ebin, md->isurft, 1, &tmp, bSum);
    if (md->epc == epcPARRINELLORAHMAN || md->epc == epcMTTK)
    {
        tmp6[0] = state->boxv[XX][XX];
        tmp6[1] = state->boxv[YY][YY];
        tmp6[2] = state->boxv[ZZ][ZZ];
        tmp6[3] = state->boxv[YY][XX];
        tmp6[4] = state->boxv[ZZ][XX];
        tmp6[5] = state->boxv[ZZ][YY];
        add_ebin(md->ebin, md->ipc, md->bTricl ? 6 : 3, tmp6, bSum);
    }
    if (md->bMu)
    {
        add_ebin(md->ebin, md->imu, 3, mu_tot, bSum);
    }
    if (ekind && ekind->cosacc.cos_accel != 0)
    {
        vol  = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
        dens = (tmass*AMU)/(vol*NANO*NANO*NANO);
        add_ebin(md->ebin, md->ivcos, 1, &(ekind->cosacc.vcos), bSum);
        /* 1/viscosity, unit 1/(kg m^-1 s^-1) */
        tmp = 1/(ekind->cosacc.cos_accel/(ekind->cosacc.vcos*PICO)
                 *dens*gmx::square(box[ZZ][ZZ]*NANO/(2*M_PI)));
        add_ebin(md->ebin, md->ivisc, 1, &tmp, bSum);
    }
    if (md->nE > 1)
    {
        n = 0;
        for (i = 0; (i < md->nEg); i++)
        {
            for (j = i; (j < md->nEg); j++)
            {
                gid = GID(i, j, md->nEg);
                for (k = kk = 0; (k < egNR); k++)
                {
                    if (md->bEInd[k])
                    {
                        eee[kk++] = enerd->grpp.ener[k][gid];
                    }
                }
                add_ebin(md->ebin, md->igrp[n], md->nEc, eee, bSum);
                n++;
            }
        }
    }

    if (ekind)
    {
        for (i = 0; (i < md->nTC); i++)
        {
            md->tmp_r[i] = ekind->tcstat[i].T;
        }
        add_ebin(md->ebin, md->itemp, md->nTC, md->tmp_r, bSum);

        if (md->etc == etcNOSEHOOVER)
        {
            /* whether to print Nose-Hoover chains: */
            if (md->bPrintNHChains)
            {
                if (md->bNHC_trotter)
                {
                    for (i = 0; (i < md->nTC); i++)
                    {
                        for (j = 0; j < md->nNHC; j++)
                        {
                            k                = i*md->nNHC+j;
                            md->tmp_r[2*k]   = state->nosehoover_xi[k];
                            md->tmp_r[2*k+1] = state->nosehoover_vxi[k];
                        }
                    }
                    add_ebin(md->ebin, md->itc, md->mde_n, md->tmp_r, bSum);

                    if (md->bMTTK)
                    {
                        for (i = 0; (i < md->nTCP); i++)
                        {
                            for (j = 0; j < md->nNHC; j++)
                            {
                                k                = i*md->nNHC+j;
                                md->tmp_r[2*k]   = state->nhpres_xi[k];
                                md->tmp_r[2*k+1] = state->nhpres_vxi[k];
                            }
                        }
                        add_ebin(md->ebin, md->itcb, md->mdeb_n, md->tmp_r, bSum);
                    }
                }
                else
                {
                    for (i = 0; (i < md->nTC); i++)
                    {
                        md->tmp_r[2*i]   = state->nosehoover_xi[i];
                        md->tmp_r[2*i+1] = state->nosehoover_vxi[i];
                    }
                    add_ebin(md->ebin, md->itc, md->mde_n, md->tmp_r, bSum);
                }
            }
        }
        else if (md->etc == etcBERENDSEN || md->etc == etcYES ||
                 md->etc == etcVRESCALE)
        {
            for (i = 0; (i < md->nTC); i++)
            {
                md->tmp_r[i] = ekind->tcstat[i].lambda;
            }
            add_ebin(md->ebin, md->itc, md->nTC, md->tmp_r, bSum);
        }
    }

    if (ekind && md->nU > 1)
    {
        for (i = 0; (i < md->nU); i++)
        {
            copy_rvec(ekind->grpstat[i].u, md->tmp_v[i]);
        }
        add_ebin(md->ebin, md->iu, 3*md->nU, md->tmp_v[0], bSum);
    }

    ebin_increase_count(md->ebin, bSum);

    /* BAR + thermodynamic integration values */
    if ((md->fp_dhdl || md->dhc) && bDoDHDL)
    {
        for (i = 0; i < enerd->n_lambda-1; i++)
        {
            /* zero for simulated tempering */
            md->dE[i] = enerd->enerpart_lambda[i+1]-enerd->enerpart_lambda[0];
            if (md->temperatures != NULL)
            {
                /* MRS: is this right, given the way we have defined the exchange probabilities? */
                /* is this even useful to have at all? */
                md->dE[i] += (md->temperatures[i]/
                              md->temperatures[state->fep_state]-1.0)*
                    enerd->term[F_EKIN];
            }
        }

        if (md->fp_dhdl)
        {
            fprintf(md->fp_dhdl, "%.4f", time);
            /* the current free energy state */

            /* print the current state if we are doing expanded ensemble */
            if (expand->elmcmove > elmcmoveNO)
            {
                fprintf(md->fp_dhdl, " %4d", state->fep_state);
            }
            /* total energy (for if the temperature changes */

            if (fep->edHdLPrintEnergy != edHdLPrintEnergyNO)
            {
                switch (fep->edHdLPrintEnergy)
                {
                    case edHdLPrintEnergyPOTENTIAL:
                        store_energy = enerd->term[F_EPOT];
                        break;
                    case edHdLPrintEnergyTOTAL:
                    case edHdLPrintEnergyYES:
                    default:
                        store_energy = enerd->term[F_ETOT];
                }
                fprintf(md->fp_dhdl, " %#.8g", store_energy);
            }

            if (fep->dhdl_derivatives == edhdlderivativesYES)
            {
                for (i = 0; i < efptNR; i++)
                {
                    if (fep->separate_dvdl[i])
                    {
                        /* assumes F_DVDL is first */
                        fprintf(md->fp_dhdl, " %#.8g", enerd->term[F_DVDL+i]);
                    }
                }
            }
            for (i = fep->lambda_start_n; i < fep->lambda_stop_n; i++)
            {
                fprintf(md->fp_dhdl, " %#.8g", md->dE[i]);
            }
            if (md->bDynBox &&
                md->bDiagPres &&
                (md->epc != epcNO) &&
                (enerd->n_lambda > 0) &&
                (fep->init_lambda < 0))
            {
                fprintf(md->fp_dhdl, " %#.8g", pv);  /* PV term only needed when
                                                        there are alternate state
                                                        lambda and we're not in
                                                        compatibility mode */
            }
            fprintf(md->fp_dhdl, "\n");
            /* and the binary free energy output */
        }
        if (md->dhc && bDoDHDL)
        {
            int idhdl = 0;
            for (i = 0; i < efptNR; i++)
            {
                if (fep->separate_dvdl[i])
                {
                    /* assumes F_DVDL is first */
                    store_dhdl[idhdl] = enerd->term[F_DVDL+i];
                    idhdl            += 1;
                }
            }
            store_energy = enerd->term[F_ETOT];
            /* store_dh is dE */
            mde_delta_h_coll_add_dh(md->dhc,
                                    (double)state->fep_state,
                                    store_energy,
                                    pv,
                                    store_dhdl,
                                    md->dE + fep->lambda_start_n,
                                    time);
        }
    }
}
예제 #18
0
void upd_mdebin(t_mdebin *md,FILE *fp_dgdl,
		bool bSum,
		real tmass,int step,real time,
		gmx_enerdata_t *enerd,
		t_state *state,
		matrix  box,
		tensor svir,
		tensor fvir,
		tensor vir,
		tensor pres,
		gmx_ekindata_t *ekind,
		rvec mu_tot,
		gmx_constr_t constr)
{
  static real *ttt=NULL;
  static rvec *uuu=NULL;
  int    i,j,k,kk,m,n,gid;
  real   crmsd[2],bs[NBOXS],tmp6[6];
  real   tricl_bs[NTRICLBOXS];
  real   eee[egNR];
  real   ecopy[F_NRE];
  real   tmp;
  
  /* Do NOT use the box in the state variable, but the separate box provided
   * as an argument. This is because we sometimes need to write the box from
   * the last timestep to match the trajectory frames.
   */
  copy_energy(enerd->term,ecopy);
  add_ebin(md->ebin,md->ie,f_nre,ecopy,bSum,step);
  if (nCrmsd) {
    crmsd[0] = constr_rmsd(constr,FALSE);
    if (nCrmsd > 1)
      crmsd[1] = constr_rmsd(constr,TRUE);
    add_ebin(md->ebin,md->iconrmsd,nCrmsd,crmsd,FALSE,0);
  }
  if (bDynBox || ((ekind != NULL) && (ekind->cosacc.cos_accel != 0))) {
    if(bTricl) {
      tricl_bs[0]=box[XX][XX];
      tricl_bs[1]=box[YY][XX];
      tricl_bs[2]=box[YY][YY];
      tricl_bs[3]=box[ZZ][XX];
      tricl_bs[4]=box[ZZ][YY];
      tricl_bs[5]=box[ZZ][ZZ];
      /* This is the volume */
      tricl_bs[6]=tricl_bs[0]*tricl_bs[2]*tricl_bs[5];
      /* This is the density */
      tricl_bs[7] = (tmass*AMU)/(tricl_bs[6]*NANO*NANO*NANO);
    } else {
      for(m=0; (m<DIM); m++) 
	bs[m]=box[m][m];
      /* This is the volume */
      bs[3] = bs[XX]*bs[YY]*bs[ZZ];      
      /* This is the density */
      bs[4] = (tmass*AMU)/(bs[3]*NANO*NANO*NANO);
    }
  }  
  if (bDynBox) {
    /* This is pV (in kJ/mol) */  
    if(bTricl) {
      tricl_bs[8] = tricl_bs[6]*enerd->term[F_PRES]/PRESFAC;
      add_ebin(md->ebin,md->ib,NTRICLBOXS,tricl_bs,bSum,step);
    } else {
      bs[5] = bs[3]*enerd->term[F_PRES]/PRESFAC;
      add_ebin(md->ebin,md->ib,NBOXS,bs,bSum,step);
    }
  }  
  if (bConstrVir) {
    add_ebin(md->ebin,md->isvir,9,svir[0],bSum,step);
    add_ebin(md->ebin,md->ifvir,9,fvir[0],bSum,step);
  }
  add_ebin(md->ebin,md->ivir,9,vir[0],bSum,step);
  add_ebin(md->ebin,md->ipres,9,pres[0],bSum,step);
  tmp = (pres[ZZ][ZZ]-(pres[XX][XX]+pres[YY][YY])*0.5)*box[ZZ][ZZ];
  add_ebin(md->ebin,md->isurft,1,&tmp,bSum,step);
  if (epc == epcPARRINELLORAHMAN) {
    tmp6[0] = state->boxv[XX][XX];
    tmp6[1] = state->boxv[YY][YY];
    tmp6[2] = state->boxv[ZZ][ZZ];
    tmp6[3] = state->boxv[YY][XX];
    tmp6[4] = state->boxv[ZZ][XX];
    tmp6[5] = state->boxv[ZZ][YY];
    add_ebin(md->ebin,md->ipc,bTricl ? 6 : 3,tmp6,bSum,step);
  }
  add_ebin(md->ebin,md->imu,3,mu_tot,bSum,step);
if (ekind && ekind->cosacc.cos_accel != 0) {
    add_ebin(md->ebin,md->ivcos,1,&(ekind->cosacc.vcos),bSum,step);
    /* 1/viscosity, unit 1/(kg m^-1 s^-1) */
    if(bTricl) 
      tmp = 1/(ekind->cosacc.cos_accel/(ekind->cosacc.vcos*PICO)
	       *tricl_bs[7]*sqr(box[ZZ][ZZ]*NANO/(2*M_PI)));
    else 
      tmp = 1/(ekind->cosacc.cos_accel/(ekind->cosacc.vcos*PICO)
	       *bs[4]*sqr(box[ZZ][ZZ]*NANO/(2*M_PI)));
    add_ebin(md->ebin,md->ivisc,1,&tmp,bSum,step);    
  }
  if (md->nE > 1) {
    n=0;
    for(i=0; (i<md->nEg); i++) {
      for(j=i; (j<md->nEg); j++) {
	gid=GID(i,j,md->nEg);
	for(k=kk=0; (k<egNR); k++) {
	  if (bEInd[k]) {
	    eee[kk++] = enerd->grpp.ener[k][gid];
	  }
	}
	add_ebin(md->ebin,md->igrp[n],md->nEc,eee,bSum,step);
	n++;
      }
    }
  }
  
  if (ekind) {
    if(ttt == NULL) 
      snew(ttt,md->nTC);
    for(i=0; (i<md->nTC); i++) {
      ttt[i] = ekind->tcstat[i].T;
    }
    add_ebin(md->ebin,md->itemp,md->nTC,ttt,bSum,step);
    if (etc == etcNOSEHOOVER) {
      for(i=0; (i<md->nTC); i++)
	ttt[i] = state->nosehoover_xi[i];
      add_ebin(md->ebin,md->itc,md->nTC,ttt,bSum,step);
    } else if (etc == etcBERENDSEN || etc == etcYES || etc == etcVRESCALE) {
      for(i=0; (i<md->nTC); i++)
          ttt[i] = ekind->tcstat[i].lambda;
      add_ebin(md->ebin,md->itc,md->nTC,ttt,bSum,step);
    }
  }
  
  if (ekind && md->nU > 1) {
    if (uuu == NULL)
      snew(uuu,md->nU);
    for(i=0; (i<md->nU); i++)
      copy_rvec(ekind->grpstat[i].u,uuu[i]);
    add_ebin(md->ebin,md->iu,3*md->nU,uuu[0],bSum,step);
  }
  if (fp_dgdl)
    fprintf(fp_dgdl,"%.4f %g\n",
	    time,
	    enerd->term[F_DVDL]+enerd->term[F_DKDL]+enerd->term[F_DGDL_CON]);
}