/** * Called after engine initialization */ void OnPostInitialize(void) { CShIdentifier levelIdentifier("memory"); bool loaded = ShLevel::Load(levelIdentifier); SH_ASSERT(loaded); // // Create Camera g_pCamera = ShCamera::Create(GID(global), GID(camera_free), false); SH_ASSERT(NULL != g_pCamera); ShCamera::SetPosition(g_pCamera, CShVector3(0, 0.0f, 1000.0f)); ShCamera::SetTarget(g_pCamera, CShVector3(0.0f, 0.0f, 0.0f)); ShCamera::SetFarPlaneDistance(g_pCamera, 3000.0f); ShCamera::SetViewport(g_pCamera, 256*WIDTH, 256*HEIGHT); ShCamera::SetProjectionOrtho(g_pCamera); ShCamera::SetCurrent2D(g_pCamera); ratio = CShVector2((256*WIDTH)/(float)ShDisplay::GetWidth(), (256*HEIGHT)/(float)ShDisplay::GetHeight()); g_pWinEntity = ShEntity2::Find(levelIdentifier, CShIdentifier("sprite_memory_win_001")); SH_ASSERT(shNULL != g_pWinEntity); ShEntity2::SetShow(g_pWinEntity, false); // // Create all sprites for (int i = 0; i < HEIGHT; ++i) { for (int j = 0; j < WIDTH; ++j) { CShVector3 pos; pos.m_x = (256.0f * j) - (128.0f + (((WIDTH/2)-1) * 256.0f)); pos.m_y = (256.0f * i) - (128.0f + (((HEIGHT/2)-1) * 256.0f)); int c = i*WIDTH+j; aCards[c].pEntityRecto = ShEntity2::Create(levelIdentifier, CShIdentifier(), GID(layer_default), CShIdentifier("memory"), aIdentifier[c/2], pos, CShEulerAngles(0.0f, 0.0f, 0.0f), CShVector3(1.0f, 1.0f, 1.0f)); SH_ASSERT(shNULL != aCards[c].pEntityRecto); aCards[c].pEntityVerso = ShEntity2::Create(levelIdentifier, CShIdentifier(), GID(layer_default), CShIdentifier("memory"), CShIdentifier("verso"), pos, CShEulerAngles(0.0f, 0.0f, 0.0f), CShVector3(1.0f, 1.0f, 1.0f)); SH_ASSERT(shNULL != aCards[c].pEntityVerso); aCards[c].type = c/2; } } for (int i = 0; i < PIECES; ++i) { ShEntity2::SetShow(aCards[i].pEntityRecto, false); } shuffle(); }
/** * Called after engine initialization */ void OnPostInitialize(void) { CShIdentifier levelIdentifier("character_controller"); // this is the level name // // Load level bool loaded = ShLevel::Load(levelIdentifier); SH_ASSERT(loaded); // // Create camera g_pCamera = ShCamera::Create(GID(global), GID(camera_free), false); SH_ASSERT(NULL != g_pCamera); ShCamera::SetPosition(g_pCamera, CShVector3(-300.0f,-1500.0f, 1000.0f)); ShCamera::SetTarget(g_pCamera, CShVector3(0.0f, 0.0f, 100.0f)); ShCamera::SetFarPlaneDistance(g_pCamera, 3000.0f); ShCamera::SetCurrent2D(g_pCamera); ShCamera::SetCurrent3D(g_pCamera); // // Find the character entity g_pCharacter = ShEntity3::Find(levelIdentifier, CShIdentifier("entitypc_warrior")); SH_ASSERT(shNULL != g_pCharacter); // // Initialize the character controller with the level, the identifier, the position, the radius, the direction, the speed. g_pCharacterController = ShCharacterController::Create(levelIdentifier, CShIdentifier("character_controller_character_001"), ShObject::GetPosition2(g_pCharacter), 50.0, g_direction, g_speed); SH_ASSERT(shNULL != g_pCharacterController); // // Create the moving input (arrow up). // Using JustPressed function in order to change each time the button is pressed and not continuously. g_pInputUp = ShInput::CreateInputPressed(ShInput::e_input_device_keyboard, ShInput::e_input_device_control_pc_key_up, 0.1f); SH_ASSERT(shNULL != g_pInputUp); // // Create the rotation inputs (right and left). // Using InputPressed function in order to change until the button is released. g_pInputLeft = ShInput::CreateInputPressed(ShInput::e_input_device_keyboard, ShInput::e_input_device_control_pc_key_left, 0.1f); SH_ASSERT(NULL != g_pInputLeft); g_pInputRight = ShInput::CreateInputPressed(ShInput::e_input_device_keyboard, ShInput::e_input_device_control_pc_key_right, 0.1f); SH_ASSERT(NULL != g_pInputRight); // Find tyhe two animations : warrior idle and warrior run. pAnimationWarriorStop = ShAnimation::Find(CShIdentifier("pc_warrior.pc_warrior.idle.01")); SH_ASSERT(NULL != pAnimationWarriorStop); pAnimationWarriorRun = ShAnimation::Find(CShIdentifier("pc_warrior.pc_warrior.run.01")); SH_ASSERT(NULL != pAnimationWarriorRun); // By default, we play the idle animation, allowing it to loop. ShEntity3::AnimationPlay(g_pCharacter, pAnimationWarriorStop, true); }
/** * Constructor */ CShPluginGame::CShPluginGame(void) : CShPlugin(CShIdentifier("TPS")) , m_levelIdentifier(GID(NULL)) , m_pBackground(shNULL) , m_pPlayer(shNULL) , m_fScale(0.0f) { }
/** * Register the game modes supported by this plugin. */ int G_RegisterGames(int hookType, int param, void* data) { #define CONFIGDIR "doom64" #define STARTUPPK3 PLUGIN_NAMETEXT2 ".pk3" GameDef const doom64Def = { "doom64", CONFIGDIR, "Doom 64", "Midway Software" }; DENG_UNUSED(hookType); DENG_UNUSED(param); DENG_UNUSED(data); gameIds[doom64] = DD_DefineGame(&doom64Def); DD_AddGameResource(GID(doom64), RC_PACKAGE, FF_STARTUP, STARTUPPK3, 0); DD_AddGameResource(GID(doom64), RC_PACKAGE, FF_STARTUP, "doom64.wad", "MAP01;MAP020;MAP38;F_SUCK"); DD_AddGameResource(GID(doom64), RC_DEFINITION, 0, PLUGIN_NAMETEXT ".ded", 0); return true; #undef STARTUPPK3 #undef CONFIGDIR }
/** * Called after engine initialization */ void Game::OnPostInitialize(void) { instance(); instance_->m_registeredAction.action = e_action_none; // Create the Camera ShCamera * pCamera = ShCamera::Create(GID(global), GID(camera), false); SH_ASSERT(shNULL != pCamera); ShCamera::SetPosition(pCamera, CShVector3(0.0f, 0.0f, 100.0f)); ShCamera::SetTarget(pCamera, CShVector3(0.0f, 0.0f, 0.0f)); ShCamera::SetUp(pCamera, CShVector3(0.0f, 1.0f, 0.0f)); ShCamera::SetProjectionOrtho(pCamera); ShCamera::SetNearPlaneDistance(pCamera, 0.0f); ShCamera::SetFarPlaneDistance(pCamera, 200.0f); instance_->m_fRescaleRatio = ShDisplay::GetHeight() / (float)ShDisplay::GetWidth(); ShCamera::SetViewport(pCamera, DISPLAY_WIDTH, DISPLAY_WIDTH * instance_->m_fRescaleRatio); ShCamera::SetCurrent2D(pCamera); // Initialize Sound instance_->m_sound.Initialize(); // Initialize Transition instance_->m_transition.Initialize(); // Initialize states instance_->m_stateMainMenu.Initialize(); instance_->m_stateCredits.Initialize(); instance_->m_stateGame.Initialize(); instance_->Push(MENU); }
static t_size max_group_len(t_list *files) { t_list *tmp; t_size padding; t_grp *pgroup; tmp = files; padding = 0; while (tmp) { pgroup = getgrgid(GID(tmp)); if (padding < ft_strlen(pgroup->gr_name)) padding = ft_strlen(pgroup->gr_name); tmp = tmp->next; } return (padding); }
real do_listed_vdw_q(int ftype,int nbonds, const t_iatom iatoms[],const t_iparams iparams[], const rvec x[],rvec f[],rvec fshift[], const t_pbc *pbc,const t_graph *g, real lambda,real *dvdlambda, const t_mdatoms *md, const t_forcerec *fr,gmx_grppairener_t *grppener, int *global_atom_index) { static gmx_bool bWarn=FALSE; real eps,r2,*tab,rtab2=0; rvec dx,x14[2],f14[2]; int i,ai,aj,itype; int typeA[2]={0,0},typeB[2]={0,1}; real chargeA[2]={0,0},chargeB[2]; int gid,shift_vir,shift_f; int j_index[] = { 0, 1 }; int i0=0,i1=1,i2=2; ivec dt; int outeriter,inneriter; int nthreads = 1; int count; real krf,crf,tabscale; int ntype=0; real *nbfp=NULL; real *egnb=NULL,*egcoul=NULL; t_nblist tmplist; int icoul,ivdw; gmx_bool bMolPBC,bFreeEnergy; t_pf_global *pf_global; #if GMX_THREAD_SHM_FDECOMP pthread_mutex_t mtx; #else void * mtx = NULL; #endif #if GMX_THREAD_SHM_FDECOMP pthread_mutex_initialize(&mtx); #endif bMolPBC = fr->bMolPBC; pf_global = fr->pf_global; switch (ftype) { case F_LJ14: case F_LJC14_Q: eps = fr->epsfac*fr->fudgeQQ; ntype = 1; egnb = grppener->ener[egLJ14]; egcoul = grppener->ener[egCOUL14]; break; case F_LJC_PAIRS_NB: eps = fr->epsfac; ntype = 1; egnb = grppener->ener[egLJSR]; egcoul = grppener->ener[egCOULSR]; break; default: gmx_fatal(FARGS,"Unknown function type %d in do_nonbonded14", ftype); } tab = fr->tab14.tab; rtab2 = sqr(fr->tab14.r); tabscale = fr->tab14.scale; krf = fr->k_rf; crf = fr->c_rf; /* Determine the values for icoul/ivdw. */ if (fr->bEwald) { icoul = 1; } else if(fr->bcoultab) { icoul = 3; } else if(fr->eeltype == eelRF_NEC) { icoul = 2; } else { icoul = 1; } if(fr->bvdwtab) { ivdw = 3; } else if(fr->bBHAM) { ivdw = 2; } else { ivdw = 1; } /* We don't do SSE or altivec here, due to large overhead for 4-fold * unrolling on short lists */ bFreeEnergy = FALSE; for(i=0; (i<nbonds); ) { itype = iatoms[i++]; ai = iatoms[i++]; aj = iatoms[i++]; gid = GID(md->cENER[ai],md->cENER[aj],md->nenergrp); switch (ftype) { case F_LJ14: bFreeEnergy = (fr->efep != efepNO && ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) || iparams[itype].lj14.c6A != iparams[itype].lj14.c6B || iparams[itype].lj14.c12A != iparams[itype].lj14.c12B)); chargeA[0] = md->chargeA[ai]; chargeA[1] = md->chargeA[aj]; nbfp = (real *)&(iparams[itype].lj14.c6A); break; case F_LJC14_Q: eps = fr->epsfac*iparams[itype].ljc14.fqq; chargeA[0] = iparams[itype].ljc14.qi; chargeA[1] = iparams[itype].ljc14.qj; nbfp = (real *)&(iparams[itype].ljc14.c6); break; case F_LJC_PAIRS_NB: chargeA[0] = iparams[itype].ljcnb.qi; chargeA[1] = iparams[itype].ljcnb.qj; nbfp = (real *)&(iparams[itype].ljcnb.c6); break; } if (!bMolPBC) { /* This is a bonded interaction, atoms are in the same box */ shift_f = CENTRAL; r2 = distance2(x[ai],x[aj]); } else { /* Apply full periodic boundary conditions */ shift_f = pbc_dx_aiuc(pbc,x[ai],x[aj],dx); r2 = norm2(dx); } if (r2 >= rtab2) { if (!bWarn) { fprintf(stderr,"Warning: 1-4 interaction between %d and %d " "at distance %.3f which is larger than the 1-4 table size %.3f nm\n", glatnr(global_atom_index,ai), glatnr(global_atom_index,aj), sqrt(r2), sqrt(rtab2)); fprintf(stderr,"These are ignored for the rest of the simulation\n"); fprintf(stderr,"This usually means your system is exploding,\n" "if not, you should increase table-extension in your mdp file\n" "or with user tables increase the table size\n"); bWarn = TRUE; } if (debug) fprintf(debug,"%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n", x[ai][XX],x[ai][YY],x[ai][ZZ], x[aj][XX],x[aj][YY],x[aj][ZZ], glatnr(global_atom_index,ai), glatnr(global_atom_index,aj), sqrt(r2)); } else { copy_rvec(x[ai],x14[0]); copy_rvec(x[aj],x14[1]); clear_rvec(f14[0]); clear_rvec(f14[1]); #ifdef DEBUG fprintf(debug,"LJ14: grp-i=%2d, grp-j=%2d, ngrp=%2d, GID=%d\n", md->cENER[ai],md->cENER[aj],md->nenergrp,gid); #endif outeriter = inneriter = count = 0; if (bFreeEnergy) { chargeB[0] = md->chargeB[ai]; chargeB[1] = md->chargeB[aj]; /* We pass &(iparams[itype].lj14.c6A) as LJ parameter matrix * to the innerloops. * Here we use that the LJ-14 parameters are stored in iparams * as c6A,c12A,c6B,c12B, which are referenced correctly * in the innerloops if we assign type combinations 0-0 and 0-1 * to atom pair ai-aj in topologies A and B respectively. */ if(ivdw==2) { gmx_fatal(FARGS,"Cannot do free energy Buckingham interactions."); } count = 0; gmx_nb_free_energy_kernel(icoul, ivdw, i1, &i0, j_index, &i1, &shift_f, fr->shift_vec[0], fshift[0], &gid, x14[0], f14[0], chargeA, chargeB, eps, krf, crf, fr->ewaldcoeff, egcoul, typeA, typeB, ntype, nbfp, egnb, tabscale, tab, lambda, dvdlambda, fr->sc_alpha, fr->sc_power, fr->sc_sigma6_def, fr->sc_sigma6_min, TRUE, &outeriter, &inneriter); } else { /* Not perturbed - call kernel 330 */ nb_kernel330 ( &i1, &i0, j_index, &i1, &shift_f, fr->shift_vec[0], fshift[0], &gid, x14[0], f14[0], chargeA, &eps, &krf, &crf, egcoul, typeA, &ntype, nbfp, egnb, &tabscale, tab, NULL, NULL, NULL, NULL, &nthreads, &count, (void *)&mtx, &outeriter, &inneriter, NULL); } /* Add the forces */ rvec_inc(f[ai],f14[0]); rvec_dec(f[aj],f14[0]); if (pf_global->bInitialized) pf_atom_add_bonded(pf_global, ai, aj, PF_INTER_NB14, f14[0]); if (g) { /* Correct the shift forces using the graph */ ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt); shift_vir = IVEC2IS(dt); rvec_inc(fshift[shift_vir],f14[0]); rvec_dec(fshift[CENTRAL],f14[0]); } /* flops: eNR_KERNEL_OUTER + eNR_KERNEL330 + 12 */ } } return 0.0; }
/** * Register the game modes supported by this plugin. */ int G_RegisterGames(int hookType, int param, void* data) { #define CONFIGDIR "hexen" #define STARTUPPK3 PLUGIN_NAMETEXT2 ".pk3" GameDef const deathkingsDef = { "hexen-dk", CONFIGDIR, "Hexen: Deathkings of the Dark Citadel", "Raven Software" }; GameDef const hexenDef = { "hexen", CONFIGDIR, "Hexen", "Raven Software" }; GameDef const hexenDemoDef = { "hexen-demo", CONFIGDIR, "Hexen 4-map Demo", "Raven Software" }; GameDef const hexenBetaDemoDef = { "hexen-betademo", CONFIGDIR, "Hexen 4-map Beta Demo", "Raven Software" }; GameDef const hexenV10Def = { "hexen-v10", CONFIGDIR, "Hexen v1.0", "Raven Software" }; DENG_UNUSED(hookType); DENG_UNUSED(param); DENG_UNUSED(data); /* Hexen (Death Kings) */ gameIds[hexen_deathkings] = DD_DefineGame(&deathkingsDef); DD_AddGameResource(GID(hexen_deathkings), RC_PACKAGE, FF_STARTUP, STARTUPPK3, 0); DD_AddGameResource(GID(hexen_deathkings), RC_PACKAGE, FF_STARTUP, "hexdd.wad", "MAP59;MAP60"); DD_AddGameResource(GID(hexen_deathkings), RC_PACKAGE, FF_STARTUP, "hexen.wad", "MAP08;MAP22;TINTTAB;FOGMAP;TRANTBLA;DARTA1;ARTIPORK;SKYFOG;TALLYTOP;GROVER"); DD_AddGameResource(GID(hexen_deathkings), RC_DEFINITION, 0, "hexen-dk.ded", 0); /* Hexen */ gameIds[hexen] = DD_DefineGame(&hexenDef); DD_AddGameResource(GID(hexen), RC_PACKAGE, FF_STARTUP, "hexen.wad", "MAP08;MAP22;TINTTAB;FOGMAP;TRANTBLA;DARTA1;ARTIPORK;SKYFOG;TALLYTOP;GROVER"); DD_AddGameResource(GID(hexen), RC_PACKAGE, FF_STARTUP, STARTUPPK3, 0); DD_AddGameResource(GID(hexen), RC_DEFINITION, 0, "hexen.ded", 0); /* Hexen (v1.0) */ gameIds[hexen_v10] = DD_DefineGame(&hexenV10Def); DD_AddGameResource(GID(hexen_v10), RC_PACKAGE, FF_STARTUP, STARTUPPK3, 0); DD_AddGameResource(GID(hexen_v10), RC_PACKAGE, FF_STARTUP, "hexen.wad", "MAP08;MAP22;MAP41;TINTTAB;FOGMAP;DARTA1;ARTIPORK;SKYFOG;GROVER"); DD_AddGameResource(GID(hexen_v10), RC_DEFINITION, 0, "hexen-v10.ded", 0); /* Hexen (Demo) */ gameIds[hexen_demo] = DD_DefineGame(&hexenDemoDef); DD_AddGameResource(GID(hexen_demo), RC_PACKAGE, FF_STARTUP, STARTUPPK3, 0); DD_AddGameResource(GID(hexen_demo), RC_PACKAGE, FF_STARTUP, "hexendemo.wad;machexendemo.wad;hexen.wad", "MAP01;MAP04;TINTTAB;FOGMAP;DARTA1;ARTIPORK;DEMO3==18150"); DD_AddGameResource(GID(hexen_demo), RC_DEFINITION, 0, "hexen-demo.ded", 0); /* Hexen (Beta Demo) */ gameIds[hexen_betademo] = DD_DefineGame(&hexenBetaDemoDef); DD_AddGameResource(GID(hexen_betademo), RC_PACKAGE, FF_STARTUP, STARTUPPK3, 0); DD_AddGameResource(GID(hexen_betademo), RC_PACKAGE, FF_STARTUP, "hexendemo.wad;machexendemo.wad;hexenbeta.wad;hexen.wad", "MAP01;MAP04;TINTTAB;FOGMAP;DARTA1;ARTIPORK;AFLYA0;DEMO3==13866"); DD_AddGameResource(GID(hexen_betademo), RC_DEFINITION, 0, "hexen-demo.ded", 0); return true; #undef STARTUPPK3 #undef CONFIGDIR }
double do_tpi(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact, int gmx_unused nstglobalcomm, gmx_vsite_t gmx_unused *vsite, gmx_constr_t gmx_unused constr, int gmx_unused stepout, t_inputrec *inputrec, gmx_mtop_t *top_global, t_fcdata *fcd, t_state *state, t_mdatoms *mdatoms, t_nrnb *nrnb, gmx_wallcycle_t wcycle, gmx_edsam_t gmx_unused ed, t_forcerec *fr, int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, gmx_membed_t gmx_unused membed, real gmx_unused cpt_period, real gmx_unused max_hours, const char gmx_unused *deviceOptions, int gmx_unused imdport, unsigned long gmx_unused Flags, gmx_walltime_accounting_t walltime_accounting) { const char *TPI = "Test Particle Insertion"; gmx_localtop_t *top; gmx_groups_t *groups; gmx_enerdata_t *enerd; rvec *f; real lambda, t, temp, beta, drmax, epot; double embU, sum_embU, *sum_UgembU, V, V_all, VembU_all; t_trxstatus *status; t_trxframe rerun_fr; gmx_bool bDispCorr, bCharge, bRFExcl, bNotLastFrame, bStateChanged, bNS; tensor force_vir, shake_vir, vir, pres; int cg_tp, a_tp0, a_tp1, ngid, gid_tp, nener, e; rvec *x_mol; rvec mu_tot, x_init, dx, x_tp; int nnodes, frame; gmx_int64_t frame_step_prev, frame_step; gmx_int64_t nsteps, stepblocksize = 0, step; gmx_int64_t rnd_count_stride, rnd_count; gmx_int64_t seed; double rnd[4]; int i, start, end; FILE *fp_tpi = NULL; char *ptr, *dump_pdb, **leg, str[STRLEN], str2[STRLEN]; double dbl, dump_ener; gmx_bool bCavity; int nat_cavity = 0, d; real *mass_cavity = NULL, mass_tot; int nbin; double invbinw, *bin, refvolshift, logV, bUlogV; real dvdl, prescorr, enercorr, dvdlcorr; gmx_bool bEnergyOutOfBounds; const char *tpid_leg[2] = {"direct", "reweighted"}; /* Since there is no upper limit to the insertion energies, * we need to set an upper limit for the distribution output. */ real bU_bin_limit = 50; real bU_logV_bin_limit = bU_bin_limit + 10; nnodes = cr->nnodes; top = gmx_mtop_generate_local_top(top_global, inputrec); groups = &top_global->groups; bCavity = (inputrec->eI == eiTPIC); if (bCavity) { ptr = getenv("GMX_TPIC_MASSES"); if (ptr == NULL) { nat_cavity = 1; } else { /* Read (multiple) masses from env var GMX_TPIC_MASSES, * The center of mass of the last atoms is then used for TPIC. */ nat_cavity = 0; while (sscanf(ptr, "%lf%n", &dbl, &i) > 0) { srenew(mass_cavity, nat_cavity+1); mass_cavity[nat_cavity] = dbl; fprintf(fplog, "mass[%d] = %f\n", nat_cavity+1, mass_cavity[nat_cavity]); nat_cavity++; ptr += i; } if (nat_cavity == 0) { gmx_fatal(FARGS, "Found %d masses in GMX_TPIC_MASSES", nat_cavity); } } } /* init_em(fplog,TPI,inputrec,&lambda,nrnb,mu_tot, state->box,fr,mdatoms,top,cr,nfile,fnm,NULL,NULL);*/ /* We never need full pbc for TPI */ fr->ePBC = epbcXYZ; /* Determine the temperature for the Boltzmann weighting */ temp = inputrec->opts.ref_t[0]; if (fplog) { for (i = 1; (i < inputrec->opts.ngtc); i++) { if (inputrec->opts.ref_t[i] != temp) { fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); } } fprintf(fplog, "\n The temperature for test particle insertion is %.3f K\n\n", temp); } beta = 1.0/(BOLTZ*temp); /* Number of insertions per frame */ nsteps = inputrec->nsteps; /* Use the same neighborlist with more insertions points * in a sphere of radius drmax around the initial point */ /* This should be a proper mdp parameter */ drmax = inputrec->rtpi; /* An environment variable can be set to dump all configurations * to pdb with an insertion energy <= this value. */ dump_pdb = getenv("GMX_TPI_DUMP"); dump_ener = 0; if (dump_pdb) { sscanf(dump_pdb, "%lf", &dump_ener); } atoms2md(top_global, inputrec, 0, NULL, top_global->natoms, mdatoms); update_mdatoms(mdatoms, inputrec->fepvals->init_lambda); snew(enerd, 1); init_enerdata(groups->grps[egcENER].nr, inputrec->fepvals->n_lambda, enerd); snew(f, top_global->natoms); /* Print to log file */ walltime_accounting_start(walltime_accounting); wallcycle_start(wcycle, ewcRUN); print_start(fplog, cr, walltime_accounting, "Test Particle Insertion"); /* The last charge group is the group to be inserted */ cg_tp = top->cgs.nr - 1; a_tp0 = top->cgs.index[cg_tp]; a_tp1 = top->cgs.index[cg_tp+1]; if (debug) { fprintf(debug, "TPI cg %d, atoms %d-%d\n", cg_tp, a_tp0, a_tp1); } if (a_tp1 - a_tp0 > 1 && (inputrec->rlist < inputrec->rcoulomb || inputrec->rlist < inputrec->rvdw)) { gmx_fatal(FARGS, "Can not do TPI for multi-atom molecule with a twin-range cut-off"); } snew(x_mol, a_tp1-a_tp0); bDispCorr = (inputrec->eDispCorr != edispcNO); bCharge = FALSE; for (i = a_tp0; i < a_tp1; i++) { /* Copy the coordinates of the molecule to be insterted */ copy_rvec(state->x[i], x_mol[i-a_tp0]); /* Check if we need to print electrostatic energies */ bCharge |= (mdatoms->chargeA[i] != 0 || (mdatoms->chargeB && mdatoms->chargeB[i] != 0)); } bRFExcl = (bCharge && EEL_RF(fr->eeltype) && fr->eeltype != eelRF_NEC); calc_cgcm(fplog, cg_tp, cg_tp+1, &(top->cgs), state->x, fr->cg_cm); if (bCavity) { if (norm(fr->cg_cm[cg_tp]) > 0.5*inputrec->rlist && fplog) { fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n"); fprintf(stderr, "WARNING: Your TPI molecule is not centered at 0,0,0\n"); } } else { /* Center the molecule to be inserted at zero */ for (i = 0; i < a_tp1-a_tp0; i++) { rvec_dec(x_mol[i], fr->cg_cm[cg_tp]); } } if (fplog) { fprintf(fplog, "\nWill insert %d atoms %s partial charges\n", a_tp1-a_tp0, bCharge ? "with" : "without"); fprintf(fplog, "\nWill insert %d times in each frame of %s\n", (int)nsteps, opt2fn("-rerun", nfile, fnm)); } if (!bCavity) { if (inputrec->nstlist > 1) { if (drmax == 0 && a_tp1-a_tp0 == 1) { gmx_fatal(FARGS, "Re-using the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense", inputrec->nstlist, drmax); } if (fplog) { fprintf(fplog, "Will use the same neighborlist for %d insertions in a sphere of radius %f\n", inputrec->nstlist, drmax); } } } else { if (fplog) { fprintf(fplog, "Will insert randomly in a sphere of radius %f around the center of the cavity\n", drmax); } } ngid = groups->grps[egcENER].nr; gid_tp = GET_CGINFO_GID(fr->cginfo[cg_tp]); nener = 1 + ngid; if (bDispCorr) { nener += 1; } if (bCharge) { nener += ngid; if (bRFExcl) { nener += 1; } if (EEL_FULL(fr->eeltype)) { nener += 1; } } snew(sum_UgembU, nener); /* Copy the random seed set by the user */ seed = inputrec->ld_seed; /* We use the frame step number as one random counter. * The second counter use the insertion (step) count. But we * need multiple random numbers per insertion. This number is * not fixed, since we generate random locations in a sphere * by putting locations in a cube and some of these fail. * A count of 20 is already extremely unlikely, so 10000 is * a safe margin for random numbers per insertion. */ rnd_count_stride = 10000; if (MASTER(cr)) { fp_tpi = xvgropen(opt2fn("-tpi", nfile, fnm), "TPI energies", "Time (ps)", "(kJ mol\\S-1\\N) / (nm\\S3\\N)", oenv); xvgr_subtitle(fp_tpi, "f. are averages over one frame", oenv); snew(leg, 4+nener); e = 0; sprintf(str, "-kT log(<Ve\\S-\\betaU\\N>/<V>)"); leg[e++] = strdup(str); sprintf(str, "f. -kT log<e\\S-\\betaU\\N>"); leg[e++] = strdup(str); sprintf(str, "f. <e\\S-\\betaU\\N>"); leg[e++] = strdup(str); sprintf(str, "f. V"); leg[e++] = strdup(str); sprintf(str, "f. <Ue\\S-\\betaU\\N>"); leg[e++] = strdup(str); for (i = 0; i < ngid; i++) { sprintf(str, "f. <U\\sVdW %s\\Ne\\S-\\betaU\\N>", *(groups->grpname[groups->grps[egcENER].nm_ind[i]])); leg[e++] = strdup(str); } if (bDispCorr) { sprintf(str, "f. <U\\sdisp c\\Ne\\S-\\betaU\\N>"); leg[e++] = strdup(str); } if (bCharge) { for (i = 0; i < ngid; i++) { sprintf(str, "f. <U\\sCoul %s\\Ne\\S-\\betaU\\N>", *(groups->grpname[groups->grps[egcENER].nm_ind[i]])); leg[e++] = strdup(str); } if (bRFExcl) { sprintf(str, "f. <U\\sRF excl\\Ne\\S-\\betaU\\N>"); leg[e++] = strdup(str); } if (EEL_FULL(fr->eeltype)) { sprintf(str, "f. <U\\sCoul recip\\Ne\\S-\\betaU\\N>"); leg[e++] = strdup(str); } } xvgr_legend(fp_tpi, 4+nener, (const char**)leg, oenv); for (i = 0; i < 4+nener; i++) { sfree(leg[i]); } sfree(leg); } clear_rvec(x_init); V_all = 0; VembU_all = 0; invbinw = 10; nbin = 10; snew(bin, nbin); /* Avoid frame step numbers <= -1 */ frame_step_prev = -1; bNotLastFrame = read_first_frame(oenv, &status, opt2fn("-rerun", nfile, fnm), &rerun_fr, TRX_NEED_X); frame = 0; if (rerun_fr.natoms - (bCavity ? nat_cavity : 0) != mdatoms->nr - (a_tp1 - a_tp0)) { gmx_fatal(FARGS, "Number of atoms in trajectory (%d)%s " "is not equal the number in the run input file (%d) " "minus the number of atoms to insert (%d)\n", rerun_fr.natoms, bCavity ? " minus one" : "", mdatoms->nr, a_tp1-a_tp0); } refvolshift = log(det(rerun_fr.box)); switch (inputrec->eI) { case eiTPI: stepblocksize = inputrec->nstlist; break; case eiTPIC: stepblocksize = 1; break; default: gmx_fatal(FARGS, "Unknown integrator %s", ei_names[inputrec->eI]); } #ifdef GMX_SIMD /* Make sure we don't detect SIMD overflow generated before this point */ gmx_simd_check_and_reset_overflow(); #endif while (bNotLastFrame) { frame_step = rerun_fr.step; if (frame_step <= frame_step_prev) { /* We don't have step number in the trajectory file, * or we have constant or decreasing step numbers. * Ensure we have increasing step numbers, since we use * the step numbers as a counter for random numbers. */ frame_step = frame_step_prev + 1; } frame_step_prev = frame_step; lambda = rerun_fr.lambda; t = rerun_fr.time; sum_embU = 0; for (e = 0; e < nener; e++) { sum_UgembU[e] = 0; } /* Copy the coordinates from the input trajectory */ for (i = 0; i < rerun_fr.natoms; i++) { copy_rvec(rerun_fr.x[i], state->x[i]); } copy_mat(rerun_fr.box, state->box); V = det(state->box); logV = log(V); bStateChanged = TRUE; bNS = TRUE; step = cr->nodeid*stepblocksize; while (step < nsteps) { /* Initialize the second counter for random numbers using * the insertion step index. This ensures that we get * the same random numbers independently of how many * MPI ranks we use. Also for the same seed, we get * the same initial random sequence for different nsteps. */ rnd_count = step*rnd_count_stride; if (!bCavity) { /* Random insertion in the whole volume */ bNS = (step % inputrec->nstlist == 0); if (bNS) { /* Generate a random position in the box */ gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd); gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2); for (d = 0; d < DIM; d++) { x_init[d] = rnd[d]*state->box[d][d]; } } if (inputrec->nstlist == 1) { copy_rvec(x_init, x_tp); } else { /* Generate coordinates within |dx|=drmax of x_init */ do { gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd); gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2); for (d = 0; d < DIM; d++) { dx[d] = (2*rnd[d] - 1)*drmax; } } while (norm2(dx) > drmax*drmax); rvec_add(x_init, dx, x_tp); } } else { /* Random insertion around a cavity location * given by the last coordinate of the trajectory. */ if (step == 0) { if (nat_cavity == 1) { /* Copy the location of the cavity */ copy_rvec(rerun_fr.x[rerun_fr.natoms-1], x_init); } else { /* Determine the center of mass of the last molecule */ clear_rvec(x_init); mass_tot = 0; for (i = 0; i < nat_cavity; i++) { for (d = 0; d < DIM; d++) { x_init[d] += mass_cavity[i]*rerun_fr.x[rerun_fr.natoms-nat_cavity+i][d]; } mass_tot += mass_cavity[i]; } for (d = 0; d < DIM; d++) { x_init[d] /= mass_tot; } } } /* Generate coordinates within |dx|=drmax of x_init */ do { gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd); gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2); for (d = 0; d < DIM; d++) { dx[d] = (2*rnd[d] - 1)*drmax; } } while (norm2(dx) > drmax*drmax); rvec_add(x_init, dx, x_tp); } if (a_tp1 - a_tp0 == 1) { /* Insert a single atom, just copy the insertion location */ copy_rvec(x_tp, state->x[a_tp0]); } else { /* Copy the coordinates from the top file */ for (i = a_tp0; i < a_tp1; i++) { copy_rvec(x_mol[i-a_tp0], state->x[i]); } /* Rotate the molecule randomly */ gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd); gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2); rotate_conf(a_tp1-a_tp0, state->x+a_tp0, NULL, 2*M_PI*rnd[0], 2*M_PI*rnd[1], 2*M_PI*rnd[2]); /* Shift to the insertion location */ for (i = a_tp0; i < a_tp1; i++) { rvec_inc(state->x[i], x_tp); } } /* Clear some matrix variables */ clear_mat(force_vir); clear_mat(shake_vir); clear_mat(vir); clear_mat(pres); /* Set the charge group center of mass of the test particle */ copy_rvec(x_init, fr->cg_cm[top->cgs.nr-1]); /* Calc energy (no forces) on new positions. * Since we only need the intermolecular energy * and the RF exclusion terms of the inserted molecule occur * within a single charge group we can pass NULL for the graph. * This also avoids shifts that would move charge groups * out of the box. * * Some checks above ensure than we can not have * twin-range interactions together with nstlist > 1, * therefore we do not need to remember the LR energies. */ /* Make do_force do a single node force calculation */ cr->nnodes = 1; do_force(fplog, cr, inputrec, step, nrnb, wcycle, top, &top_global->groups, state->box, state->x, &state->hist, f, force_vir, mdatoms, enerd, fcd, state->lambda, NULL, fr, NULL, mu_tot, t, NULL, NULL, FALSE, GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY | (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS | GMX_FORCE_DO_LR : 0) | (bStateChanged ? GMX_FORCE_STATECHANGED : 0)); cr->nnodes = nnodes; bStateChanged = FALSE; bNS = FALSE; /* Calculate long range corrections to pressure and energy */ calc_dispcorr(fplog, inputrec, fr, step, top_global->natoms, state->box, lambda, pres, vir, &prescorr, &enercorr, &dvdlcorr); /* figure out how to rearrange the next 4 lines MRS 8/4/2009 */ enerd->term[F_DISPCORR] = enercorr; enerd->term[F_EPOT] += enercorr; enerd->term[F_PRES] += prescorr; enerd->term[F_DVDL_VDW] += dvdlcorr; epot = enerd->term[F_EPOT]; bEnergyOutOfBounds = FALSE; #ifdef GMX_SIMD_X86_SSE2_OR_HIGHER /* With SSE the energy can overflow, check for this */ if (gmx_mm_check_and_reset_overflow()) { if (debug) { fprintf(debug, "Found an SSE overflow, assuming the energy is out of bounds\n"); } bEnergyOutOfBounds = TRUE; } #endif /* If the compiler doesn't optimize this check away * we catch the NAN energies. * The epot>GMX_REAL_MAX check catches inf values, * which should nicely result in embU=0 through the exp below, * but it does not hurt to check anyhow. */ /* Non-bonded Interaction usually diverge at r=0. * With tabulated interaction functions the first few entries * should be capped in a consistent fashion between * repulsion, dispersion and Coulomb to avoid accidental * negative values in the total energy. * The table generation code in tables.c does this. * With user tbales the user should take care of this. */ if (epot != epot || epot > GMX_REAL_MAX) { bEnergyOutOfBounds = TRUE; } if (bEnergyOutOfBounds) { if (debug) { fprintf(debug, "\n time %.3f, step %d: non-finite energy %f, using exp(-bU)=0\n", t, (int)step, epot); } embU = 0; } else { embU = exp(-beta*epot); sum_embU += embU; /* Determine the weighted energy contributions of each energy group */ e = 0; sum_UgembU[e++] += epot*embU; if (fr->bBHAM) { for (i = 0; i < ngid; i++) { sum_UgembU[e++] += (enerd->grpp.ener[egBHAMSR][GID(i, gid_tp, ngid)] + enerd->grpp.ener[egBHAMLR][GID(i, gid_tp, ngid)])*embU; } } else { for (i = 0; i < ngid; i++) { sum_UgembU[e++] += (enerd->grpp.ener[egLJSR][GID(i, gid_tp, ngid)] + enerd->grpp.ener[egLJLR][GID(i, gid_tp, ngid)])*embU; } } if (bDispCorr) { sum_UgembU[e++] += enerd->term[F_DISPCORR]*embU; } if (bCharge) { for (i = 0; i < ngid; i++) { sum_UgembU[e++] += (enerd->grpp.ener[egCOULSR][GID(i, gid_tp, ngid)] + enerd->grpp.ener[egCOULLR][GID(i, gid_tp, ngid)])*embU; } if (bRFExcl) { sum_UgembU[e++] += enerd->term[F_RF_EXCL]*embU; } if (EEL_FULL(fr->eeltype)) { sum_UgembU[e++] += enerd->term[F_COUL_RECIP]*embU; } } } if (embU == 0 || beta*epot > bU_bin_limit) { bin[0]++; } else { i = (int)((bU_logV_bin_limit - (beta*epot - logV + refvolshift))*invbinw + 0.5); if (i < 0) { i = 0; } if (i >= nbin) { realloc_bins(&bin, &nbin, i+10); } bin[i]++; } if (debug) { fprintf(debug, "TPI %7d %12.5e %12.5f %12.5f %12.5f\n", (int)step, epot, x_tp[XX], x_tp[YY], x_tp[ZZ]); } if (dump_pdb && epot <= dump_ener) { sprintf(str, "t%g_step%d.pdb", t, (int)step); sprintf(str2, "t: %f step %d ener: %f", t, (int)step, epot); write_sto_conf_mtop(str, str2, top_global, state->x, state->v, inputrec->ePBC, state->box); } step++; if ((step/stepblocksize) % cr->nnodes != cr->nodeid) { /* Skip all steps assigned to the other MPI ranks */ step += (cr->nnodes - 1)*stepblocksize; } } if (PAR(cr)) { /* When running in parallel sum the energies over the processes */ gmx_sumd(1, &sum_embU, cr); gmx_sumd(nener, sum_UgembU, cr); } frame++; V_all += V; VembU_all += V*sum_embU/nsteps; if (fp_tpi) { if (bVerbose || frame%10 == 0 || frame < 10) { fprintf(stderr, "mu %10.3e <mu> %10.3e\n", -log(sum_embU/nsteps)/beta, -log(VembU_all/V_all)/beta); } fprintf(fp_tpi, "%10.3f %12.5e %12.5e %12.5e %12.5e", t, VembU_all == 0 ? 20/beta : -log(VembU_all/V_all)/beta, sum_embU == 0 ? 20/beta : -log(sum_embU/nsteps)/beta, sum_embU/nsteps, V); for (e = 0; e < nener; e++) { fprintf(fp_tpi, " %12.5e", sum_UgembU[e]/nsteps); } fprintf(fp_tpi, "\n"); fflush(fp_tpi); } bNotLastFrame = read_next_frame(oenv, status, &rerun_fr); } /* End of the loop */ walltime_accounting_end(walltime_accounting); close_trj(status); if (fp_tpi != NULL) { gmx_fio_fclose(fp_tpi); } if (fplog != NULL) { fprintf(fplog, "\n"); fprintf(fplog, " <V> = %12.5e nm^3\n", V_all/frame); fprintf(fplog, " <mu> = %12.5e kJ/mol\n", -log(VembU_all/V_all)/beta); } /* Write the Boltzmann factor histogram */ if (PAR(cr)) { /* When running in parallel sum the bins over the processes */ i = nbin; global_max(cr, &i); realloc_bins(&bin, &nbin, i); gmx_sumd(nbin, bin, cr); } if (MASTER(cr)) { fp_tpi = xvgropen(opt2fn("-tpid", nfile, fnm), "TPI energy distribution", "\\betaU - log(V/<V>)", "count", oenv); sprintf(str, "number \\betaU > %g: %9.3e", bU_bin_limit, bin[0]); xvgr_subtitle(fp_tpi, str, oenv); xvgr_legend(fp_tpi, 2, (const char **)tpid_leg, oenv); for (i = nbin-1; i > 0; i--) { bUlogV = -i/invbinw + bU_logV_bin_limit - refvolshift + log(V_all/frame); fprintf(fp_tpi, "%6.2f %10d %12.5e\n", bUlogV, (int)(bin[i]+0.5), bin[i]*exp(-bUlogV)*V_all/VembU_all); } gmx_fio_fclose(fp_tpi); } sfree(bin); sfree(sum_UgembU); walltime_accounting_set_nsteps_done(walltime_accounting, frame*inputrec->nsteps); return 0; }
/** * Release */ bool CShPluginGame::Release(void) { m_levelIdentifier = GID(NULL); return(true); }
void upd_mdebin(t_mdebin *md,FILE *fp_dgdl, real tmass,int step,real time, real ener[], matrix box, tensor svir, tensor fvir, tensor vir, tensor pres, t_groups *grps, rvec mu_tot, bool bNoseHoover) { static real *ttt=NULL; static rvec *uuu=NULL; int i,j,k,kk,m,n,gid; real bs[NBOXS]; real tricl_bs[NTRICLBOXS]; real eee[egNR]; real ecopy[F_NRE]; real tmp; copy_energy(ener,ecopy); add_ebin(md->ebin,md->ie,f_nre,ecopy,step); if (bPC || fabs(grps->cosacc.cos_accel)>GMX_REAL_MIN) { if(bTricl) { tricl_bs[0]=box[XX][XX]; tricl_bs[1]=box[YY][XX]; tricl_bs[2]=box[YY][YY]; tricl_bs[3]=box[ZZ][XX]; tricl_bs[4]=box[ZZ][YY]; tricl_bs[5]=box[ZZ][ZZ]; /* This is the volume */ tricl_bs[6]=tricl_bs[0]*tricl_bs[2]*tricl_bs[5]; /* This is the density */ tricl_bs[7] = (tmass*AMU)/(tricl_bs[6]*NANO*NANO*NANO); } else { for(m=0; (m<DIM); m++) bs[m]=box[m][m]; /* This is the volume */ bs[3] = bs[XX]*bs[YY]*bs[ZZ]; /* This is the density */ bs[4] = (tmass*AMU)/(bs[3]*NANO*NANO*NANO); } } if (bPC) { /* This is pV (in kJ/mol) */ if(bTricl) { tricl_bs[8] = tricl_bs[6]*ener[F_PRES]/PRESFAC; add_ebin(md->ebin,md->ib,NTRICLBOXS,tricl_bs,step); } else { bs[5] = bs[3]*ener[F_PRES]/PRESFAC; add_ebin(md->ebin,md->ib,NBOXS,bs,step); } } if (bShake) { add_ebin(md->ebin,md->isvir,9,svir[0],step); add_ebin(md->ebin,md->ifvir,9,fvir[0],step); } add_ebin(md->ebin,md->ivir,9,vir[0],step); add_ebin(md->ebin,md->ipres,9,pres[0],step); tmp = (pres[ZZ][ZZ]-(pres[XX][XX]+pres[YY][YY])*0.5)*box[ZZ][ZZ]; add_ebin(md->ebin,md->isurft,1,&tmp,step); add_ebin(md->ebin,md->imu,3,mu_tot,step); if (fabs(grps->cosacc.cos_accel)>GMX_REAL_MIN) { add_ebin(md->ebin,md->ivcos,1,&(grps->cosacc.vcos),step); /* 1/viscosity, unit 1/(kg m^-1 s^-1) */ if(bTricl) tmp = 1/(grps->cosacc.cos_accel/(grps->cosacc.vcos*PICO) *tricl_bs[7]*sqr(box[ZZ][ZZ]*NANO/(2*M_PI))); else tmp = 1/(grps->cosacc.cos_accel/(grps->cosacc.vcos*PICO) *bs[4]*sqr(box[ZZ][ZZ]*NANO/(2*M_PI))); add_ebin(md->ebin,md->ivisc,1,&tmp,step); } if (md->nE > 1) { n=0; for(i=0; (i<md->nEg); i++) { for(j=i; (j<md->nEg); j++) { gid=GID(i,j,md->nEg); for(k=kk=0; (k<egNR); k++) if (bEInd[k]) eee[kk++]=grps->estat.ee[k][gid]; add_ebin(md->ebin,md->igrp[n],md->nEc,eee,step); n++; } } } if(ttt == NULL) snew(ttt,2*md->nTC); for(i=0; (i<md->nTC); i++) { ttt[2*i] = grps->tcstat[i].T; if(bNoseHoover) ttt[2*i+1] = grps->tcstat[i].xi; else ttt[2*i+1] = grps->tcstat[i].lambda; } add_ebin(md->ebin,md->itc,2*md->nTC,ttt,step); if (md->nU > 1) { if (uuu == NULL) snew(uuu,md->nU); for(i=0; (i<md->nU); i++) copy_rvec(grps->grpstat[i].u,uuu[i]); add_ebin(md->ebin,md->iu,3*md->nU,uuu[0],step); } if (fp_dgdl) fprintf(fp_dgdl,"%g %g\n",time,ener[F_DVDL]+ener[F_DVDLKIN]); }
void upd_mdebin(t_mdebin *md, gmx_bool write_dhdl, gmx_bool bSum, double time, real tmass, gmx_enerdata_t *enerd, t_state *state, matrix box, tensor svir, tensor fvir, tensor vir, tensor pres, gmx_ekindata_t *ekind, rvec mu_tot, gmx_constr_t constr) { int i,j,k,kk,m,n,gid; real crmsd[2],tmp6[6]; real bs[NTRICLBOXS],vol,dens,pv,enthalpy; real eee[egNR]; real ecopy[F_NRE]; real tmp; gmx_bool bNoseHoover; /* Do NOT use the box in the state variable, but the separate box provided * as an argument. This is because we sometimes need to write the box from * the last timestep to match the trajectory frames. */ copy_energy(md, enerd->term,ecopy); add_ebin(md->ebin,md->ie,md->f_nre,ecopy,bSum); if (md->nCrmsd) { crmsd[0] = constr_rmsd(constr,FALSE); if (md->nCrmsd > 1) { crmsd[1] = constr_rmsd(constr,TRUE); } add_ebin(md->ebin,md->iconrmsd,md->nCrmsd,crmsd,FALSE); } if (md->bDynBox) { int nboxs; if(md->bTricl) { bs[0] = box[XX][XX]; bs[1] = box[YY][YY]; bs[2] = box[ZZ][ZZ]; bs[3] = box[YY][XX]; bs[4] = box[ZZ][XX]; bs[5] = box[ZZ][YY]; nboxs=NTRICLBOXS; } else { bs[0] = box[XX][XX]; bs[1] = box[YY][YY]; bs[2] = box[ZZ][ZZ]; nboxs=NBOXS; } vol = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]; dens = (tmass*AMU)/(vol*NANO*NANO*NANO); /* This is pV (in kJ/mol). The pressure is the reference pressure, not the instantaneous pressure */ pv = 0; for (i=0;i<DIM;i++) { for (j=0;j<DIM;j++) { if (i>j) { pv += box[i][j]*md->ref_p[i][j]/PRESFAC; } else { pv += box[j][i]*md->ref_p[j][i]/PRESFAC; } } } add_ebin(md->ebin,md->ib ,nboxs,bs ,bSum); add_ebin(md->ebin,md->ivol ,1 ,&vol ,bSum); add_ebin(md->ebin,md->idens,1 ,&dens,bSum); add_ebin(md->ebin,md->ipv ,1 ,&pv ,bSum); enthalpy = pv + enerd->term[F_ETOT]; add_ebin(md->ebin,md->ienthalpy ,1 ,&enthalpy ,bSum); } if (md->bConstrVir) { add_ebin(md->ebin,md->isvir,9,svir[0],bSum); add_ebin(md->ebin,md->ifvir,9,fvir[0],bSum); } add_ebin(md->ebin,md->ivir,9,vir[0],bSum); add_ebin(md->ebin,md->ipres,9,pres[0],bSum); tmp = (pres[ZZ][ZZ]-(pres[XX][XX]+pres[YY][YY])*0.5)*box[ZZ][ZZ]; add_ebin(md->ebin,md->isurft,1,&tmp,bSum); if (md->epc == epcPARRINELLORAHMAN || md->epc == epcMTTK) { tmp6[0] = state->boxv[XX][XX]; tmp6[1] = state->boxv[YY][YY]; tmp6[2] = state->boxv[ZZ][ZZ]; tmp6[3] = state->boxv[YY][XX]; tmp6[4] = state->boxv[ZZ][XX]; tmp6[5] = state->boxv[ZZ][YY]; add_ebin(md->ebin,md->ipc,md->bTricl ? 6 : 3,tmp6,bSum); } add_ebin(md->ebin,md->imu,3,mu_tot,bSum); if (ekind && ekind->cosacc.cos_accel != 0) { vol = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]; dens = (tmass*AMU)/(vol*NANO*NANO*NANO); add_ebin(md->ebin,md->ivcos,1,&(ekind->cosacc.vcos),bSum); /* 1/viscosity, unit 1/(kg m^-1 s^-1) */ tmp = 1/(ekind->cosacc.cos_accel/(ekind->cosacc.vcos*PICO) *vol*sqr(box[ZZ][ZZ]*NANO/(2*M_PI))); add_ebin(md->ebin,md->ivisc,1,&tmp,bSum); } if (md->nE > 1) { n=0; for(i=0; (i<md->nEg); i++) { for(j=i; (j<md->nEg); j++) { gid=GID(i,j,md->nEg); for(k=kk=0; (k<egNR); k++) { if (md->bEInd[k]) { eee[kk++] = enerd->grpp.ener[k][gid]; } } add_ebin(md->ebin,md->igrp[n],md->nEc,eee,bSum); n++; } } } if (ekind) { for(i=0; (i<md->nTC); i++) { md->tmp_r[i] = ekind->tcstat[i].T; } add_ebin(md->ebin,md->itemp,md->nTC,md->tmp_r,bSum); /* whether to print Nose-Hoover chains: */ bNoseHoover = (getenv("GMX_NOSEHOOVER_CHAINS") != NULL); if (md->etc == etcNOSEHOOVER) { if (bNoseHoover) { if (md->bNHC_trotter) { for(i=0; (i<md->nTC); i++) { for (j=0;j<md->nNHC;j++) { k = i*md->nNHC+j; md->tmp_r[2*k] = state->nosehoover_xi[k]; md->tmp_r[2*k+1] = state->nosehoover_vxi[k]; } } add_ebin(md->ebin,md->itc,md->mde_n,md->tmp_r,bSum); if (md->bMTTK) { for(i=0; (i<md->nTCP); i++) { for (j=0;j<md->nNHC;j++) { k = i*md->nNHC+j; md->tmp_r[2*k] = state->nhpres_xi[k]; md->tmp_r[2*k+1] = state->nhpres_vxi[k]; } } add_ebin(md->ebin,md->itcb,md->mdeb_n,md->tmp_r,bSum); } } else { for(i=0; (i<md->nTC); i++) { md->tmp_r[2*i] = state->nosehoover_xi[i]; md->tmp_r[2*i+1] = state->nosehoover_vxi[i]; } add_ebin(md->ebin,md->itc,md->mde_n,md->tmp_r,bSum); } } } else if (md->etc == etcBERENDSEN || md->etc == etcYES || md->etc == etcVRESCALE) { for(i=0; (i<md->nTC); i++) { md->tmp_r[i] = ekind->tcstat[i].lambda; } add_ebin(md->ebin,md->itc,md->nTC,md->tmp_r,bSum); } } if (ekind && md->nU > 1) { for(i=0; (i<md->nU); i++) { copy_rvec(ekind->grpstat[i].u,md->tmp_v[i]); } add_ebin(md->ebin,md->iu,3*md->nU,md->tmp_v[0],bSum); } ebin_increase_count(md->ebin,bSum); /* BAR + thermodynamic integration values */ if (write_dhdl) { if (md->fp_dhdl) { fprintf(md->fp_dhdl,"%.4f", time); if (md->dhdl_derivatives) { fprintf(md->fp_dhdl," %g", enerd->term[F_DVDL]+ enerd->term[F_DKDL]+ enerd->term[F_DHDL_CON]); } for(i=1; i<enerd->n_lambda; i++) { fprintf(md->fp_dhdl," %g", enerd->enerpart_lambda[i]-enerd->enerpart_lambda[0]); } fprintf(md->fp_dhdl,"\n"); } /* and the binary BAR output */ if (md->dhc) { mde_delta_h_coll_add_dh(md->dhc, enerd->term[F_DVDL]+ enerd->term[F_DKDL]+ enerd->term[F_DHDL_CON], enerd->enerpart_lambda, time, state->lambda); } } }
real do_nonbonded_listed(int ftype, int nbonds, const t_iatom iatoms[], const t_iparams iparams[], const rvec x[], rvec f[], rvec fshift[], const t_pbc *pbc, const t_graph *g, real *lambda, real *dvdl, const t_mdatoms *md, const t_forcerec *fr, gmx_grppairener_t *grppener, int *global_atom_index) { int ielec, ivdw; real qq, c6, c12; rvec dx; ivec dt; int i, j, itype, ai, aj, gid; int fshift_index; real r2, rinv; real fscal, velec, vvdw; real * energygrp_elec; real * energygrp_vdw; static gmx_bool warned_rlimit = FALSE; /* Free energy stuff */ gmx_bool bFreeEnergy; real LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2]; real qqB, c6B, c12B, sigma2_def, sigma2_min; switch (ftype) { case F_LJ14: case F_LJC14_Q: energygrp_elec = grppener->ener[egCOUL14]; energygrp_vdw = grppener->ener[egLJ14]; break; case F_LJC_PAIRS_NB: energygrp_elec = grppener->ener[egCOULSR]; energygrp_vdw = grppener->ener[egLJSR]; break; default: energygrp_elec = NULL; /* Keep compiler happy */ energygrp_vdw = NULL; /* Keep compiler happy */ gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype); break; } if (fr->efep != efepNO) { /* Lambda factor for state A=1-lambda and B=lambda */ LFC[0] = 1.0 - lambda[efptCOUL]; LFV[0] = 1.0 - lambda[efptVDW]; LFC[1] = lambda[efptCOUL]; LFV[1] = lambda[efptVDW]; /*derivative of the lambda factor for state A and B */ DLF[0] = -1; DLF[1] = 1; /* precalculate */ sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0); sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0); for (i = 0; i < 2; i++) { lfac_coul[i] = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i])); dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1); lfac_vdw[i] = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i])); dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1); } } else { sigma2_min = sigma2_def = 0; } bFreeEnergy = FALSE; for (i = 0; (i < nbonds); ) { itype = iatoms[i++]; ai = iatoms[i++]; aj = iatoms[i++]; gid = GID(md->cENER[ai], md->cENER[aj], md->nenergrp); /* Get parameters */ switch (ftype) { case F_LJ14: bFreeEnergy = (fr->efep != efepNO && ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) || iparams[itype].lj14.c6A != iparams[itype].lj14.c6B || iparams[itype].lj14.c12A != iparams[itype].lj14.c12B)); qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ; c6 = iparams[itype].lj14.c6A; c12 = iparams[itype].lj14.c12A; break; case F_LJC14_Q: qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq; c6 = iparams[itype].ljc14.c6; c12 = iparams[itype].ljc14.c12; break; case F_LJC_PAIRS_NB: qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac; c6 = iparams[itype].ljcnb.c6; c12 = iparams[itype].ljcnb.c12; break; default: /* Cannot happen since we called gmx_fatal() above in this case */ qq = c6 = c12 = 0; /* Keep compiler happy */ break; } /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors * included in the general nfbp array now. This means the tables are scaled down by the * same factor, so when we use the original c6/c12 parameters from iparams[] they must * be scaled up. */ c6 *= 6.0; c12 *= 12.0; /* Do we need to apply full periodic boundary conditions? */ if (fr->bMolPBC == TRUE) { fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx); } else { fshift_index = CENTRAL; rvec_sub(x[ai], x[aj], dx); } r2 = norm2(dx); if (r2 >= fr->tab14.r*fr->tab14.r) { if (warned_rlimit == FALSE) { nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r); warned_rlimit = TRUE; } continue; } if (bFreeEnergy) { /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */ qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ; c6B = iparams[itype].lj14.c6B*6.0; c12B = iparams[itype].lj14.c12B*12.0; fscal = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw, fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B, LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw, fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl); } else { /* Evaluate tabulated interaction without free energy */ fscal = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw); } energygrp_elec[gid] += velec; energygrp_vdw[gid] += vvdw; svmul(fscal, dx, dx); /* Add the forces */ rvec_inc(f[ai], dx); rvec_dec(f[aj], dx); if (g) { /* Correct the shift forces using the graph */ ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt); fshift_index = IVEC2IS(dt); } if (fshift_index != CENTRAL) { rvec_inc(fshift[fshift_index], dx); rvec_dec(fshift[CENTRAL], dx); } } return 0.0; }
/** * Constructor */ CShPluginTranslate::CShPluginTranslate(void) : CShPlugin(CShIdentifier("translate")) , m_levelIdentifier(GID(NULL)) { }
void CShTPSPlayer::Initialize(const CShIdentifier & levelIdentifier, CShTPSGun * defaultGun) { if (!m_bInitialized) { m_bInitialized = true; // Load a sprite in 2D in the Sprite attribute m_pSprite = shNULL; m_pSprite = ShEntity2::Find(levelIdentifier, CShIdentifier(PLAYER_SPRITE_NAME)); float radius = CHARACTER_CONTROLLER_RADIUS_2D; // radius for the character controller if (shNULL == m_pSprite) // if no player sprite is on the map, one is created for 3D, to manage collision between invisible 2D stuff { m_pSprite = ShEntity2::Create(levelIdentifier, CShIdentifier("player_sprite_forced_2D"), GID(layer_default), CShIdentifier("tps"), CShIdentifier("player"), CShVector3(0.0f,0.0f,1.0f), CShEulerAngles(0.0f, 0.0f, 0.0f), CShVector3(1.0f, 1.0f, 1.0f)); } SH_ASSERT(shNULL != m_pSprite); m_pModel = shNULL; m_pModel = ShEntity3::Find(levelIdentifier, CShIdentifier(PLAYER_SPRITE_NAME)); if(shNULL != m_pModel) { m_3d = true; m_pAnimIdle = ShAnimation::Find(CShIdentifier(PLAYER_ANIM_IDLE)); SH_ASSERT(shNULL != m_pAnimIdle); m_pAnimRun = ShAnimation::Find(CShIdentifier(PLAYER_ANIM_RUN)); SH_ASSERT(shNULL != m_pAnimRun); /*m_pAnimAttack = ShAnimation::Find(CShIdentifier(PLAYER_ANIM_ATTACK)); SH_ASSERT(shNULL != m_pAnimAttack);*/ ShEntity3::AnimationPlay(m_pModel, m_pAnimIdle,true); radius= CHARACTER_CONTROLLER_RADIUS_3D; } else { m_3d = false; } if(m_3d) { ShObject::SetShow(m_pSprite, false); ShObject::SetShow(m_pModel, true); } else { ShObject::SetShow(m_pSprite, true); } CShTPSCharacter::Initialize(levelIdentifier, CShIdentifier(PLAYER_SPRITE_NAME), defaultGun); // Initialize the character controller with the level, the identifier, the position, the radius, the direction, the speed. ShCharacterController * pCharacterController = shNULL; pCharacterController = ShCharacterController::Create(levelIdentifier, CShIdentifier("character_controller_character_001"), m_Position, radius, m_Direction, m_Speed); m_pCharacterController = pCharacterController; SH_ASSERT(shNULL != m_pCharacterController); } else { Spawn(); } }
extern "C" magma_int_t magma_dgetrf_mgpu_work_amc_v3(magma_int_t num_gpus, magma_int_t m, magma_int_t n, double **dlA, magma_int_t dlA_LD, magma_int_t *ipiv, magma_int_t *info, /*workspace on the cpu side*/ double *AWORK, magma_int_t AWORK_LD, magma_int_t AWORK_n ) { /* -- MAGMA (version 1.5.0-beta3) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver November 2011 Purpose ======= DGETRF_REC_ASYNC computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. The technique used for the panel factorization is the parallel recursif LU (see lawn 259). The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n). This is the right-looking Level 3 BLAS version of the algorithm. Arguments ========= NUM_GPUS (input) INTEGER The number of GPUS to be used for the factorization. M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input/output) DOUBLE_PRECISION array on the GPU, dimension (LDDA,N). On entry, the M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. LDDA (input) INTEGER The leading dimension of the array A. LDDA >= max(1,M). IPIV (output) INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. ===================================================================== */ double c_one = MAGMA_D_ONE; double c_neg_one = MAGMA_D_NEG_ONE; int ONE = 1; magma_int_t iinfo, nb; magma_int_t mindim; magma_int_t nrows, ncols; //double *work; magma_int_t dm_max, dn_max; magma_int_t I, J, K, M, N, U_K, L; //magma_int_t A_m, A_n, A_N; //magma_int_t Am_max, An_max; //magma_int_t A_nb; //magma_int_t A_K; double **dlAT; magma_int_t dlAT_LD; double *dlAP_get[MagmaMaxGPUs]; //*dlAP_set[MagmaMaxGPUs] double *dlAP_set[MagmaMaxGPUs]; magma_int_t dlAP_LD; double *dlpanel[MagmaMaxGPUs]; magma_int_t dlpanel_LD; int *n_local, *nr_local; //magma_int_t nrows, ncols; magma_int_t gpu_nrows, gpu_ncols; int nbcores; /*Number of cores available for the whole factorization*/ int panel_num_threads; /*Number of threads for the panel*/ double dcpu; /*percentage of the matrix to allocate on the CPUs*/ int B_rows; double t1; /*Workspace*/ // magma_int_t AWORK_NMAX; // magma_int_t AWORK_m, AWORK_n, AWORK_N; /* Recommanded dimension in the workspace*/ int A_m, A_n, A_N, A_NMAX, A_LD; int A_NP1; double *A; amc_args_t *args; /*magma_event_t *A_event;*/ /*Control bucket*/ magma_queue_t mstream[MagmaMaxGPUs][3]; /*0: H2D, 1: compute, 2:D2H*/ int dd; // double *tmpdA; /* Check arguments */ *info = 0; if (m < 0) *info = -1; else if (n < 0) *info = -2; else if (dlA_LD < max(1,m)) *info = -4; else if (AWORK_LD < max(1,m)) *info = -5; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if (m == 0 || n == 0) return *info; /*Get parameters*/ args = magma_amc_args_get_default(); nb= args->nb; nbcores = args->P; panel_num_threads = args->Pr; dcpu = args->dcpu; /* Check and fix parameters */ if(nb==0) nb = magma_get_dgetrf_nb(m) ;/*magma dgetrf block size*/ else nb = args->nb; if(nb>n) nb = n; if(panel_num_threads>nbcores) panel_num_threads = nbcores; /*check the buffer size*/ if(AWORK_n<nb){ printf("Not enough buffer. Should be greater than the block size: %d\n", nb); exit(1); } /* Compute the number of blocks columns to factorize*/ N = (int) ceil( (double) min(m, n) / nb); /* Compute the maximum number of panels we can store in the workspace*/ A_NMAX = (int) (AWORK_n/ nb); /*Compute the recommanded number of panels for the cpu part*/ A_N = NSplit(N, dcpu); /* Compute the recommanded number of columns for the cpu part*/ A_n = A_N*nb;//(int) ceil(n*dcpu); //if(A_n<nb) // A_n = nb;//make sure workspace has at least one block column /*Make sure we work with multiple of 32*/ /* if(A_n%32!=0) { A_n = ((A_n + 31)/32)*32; } */ /* Compute the recommanded number of panels for the cpu part*/ // A_N = (int) (A_n/ nb); /* Check if there are enough workspace. In case the user gave a workspace lower than the optimal*/ /* NOTE: using small workspace may reduce performance*/ if(A_N>A_NMAX){ #if (dbglevel >=1) printf("[DBG_WARNING] Resizing buffer to feet user preferences. Recommanded:%d, Max given:%d\n",A_N, A_NMAX); #endif A_N = A_NMAX; /*Make A_n a multiple of nb*/ A_n = A_N*nb; } A = AWORK; A_m = m; A_LD = AWORK_LD; #if (dbglevel >=1) /* Initialize the tracing*/ ca_dbg_trace_init(nbcores,num_gpus); //nbcores + 1 GPU #endif #if (dbglevel >=1) t1 = magma_wtime(); #endif /* create the streams */ //mstream = (magma_queue_t *) malloc(num_gpus*sizeof(magma_queue_t)); for(dd=0;dd<num_gpus;dd++){ magma_setdevice(dd); //required magma_queue_create(&mstream[dd][0]); magma_queue_create(&mstream[dd][1]); magma_queue_create(&mstream[dd][2]); /*Set the stream for internal computations*/ //magmablasSetKernelStream(0); /*Use 0 instead of mstream[dd][1], MagmasetkernelStream is not thread safe*/ /*TODO: mae it safe*/ //task_dev_set_compute_stream(dd, mstream[dd][1]); magma_task_dev_set_compute_stream(dd, 0); //set to mstream 1 later } /* Matrix dimension */ dm_max = m; dn_max = n; /*Make sure m and n are multiple of 32*/ if(dm_max%32!=0) dm_max = ((dm_max + 31)/32)*32; if(dn_max%32!=0) dn_max = ((dn_max + 31)/32)*32; /* local dimensions of the matrix for each GPU*/ n_local = (int *) malloc(num_gpus*sizeof(int)); /*This do no change during the execution*/ nr_local = (int *) malloc(num_gpus*sizeof(int)); /*Change after each update of the trailing submatrix*/ for(dd=0;dd<num_gpus;dd++){ n_local[dd] = numcols2p(dd, n, nb, num_gpus); //loc2p(dd, N, num_gpus)*nb; nr_local[dd] = n_local[dd]; } /*Allocate a workspace for the panels transposition*/ dlAP_LD = dm_max; //if(dAP_LD%32!=0) dAP_LD = ((dAP_LD + 31)/32)*32;/*Make dAP_LD multiple of 32*/ /// dlAP_set = (double **) malloc(num_gpus*sizeof(double*)); //dlAP_get = (double **) malloc(num_gpus*sizeof(double*)); for(dd=0;dd<num_gpus;dd++){ magma_setdevice(dd); if (MAGMA_SUCCESS != magma_dmalloc( &dlAP_set[dd], dlAP_LD*nb)) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } /* if (MAGMA_SUCCESS != magma_dmalloc(&tmpdA, dlAP_LD*nb)) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } */ if ( magma_is_devptr(dlAP_set[dd] ) == 0 ) { fprintf( stderr, "ERROR: dlAP_set[dd] is host pointer.\n" ); //exit(1); } //cudaMemcpy(dlAP_set[dd],&tmpdA,sizeof(double*), cudaMemcpyDeviceToHost); #if (dbglevel==10) printf("0.4\n"); //ca_dbg_printMat_gpu(2, 2, dlAP_set[dd], dlAP_LD, "dlAP_set[dd] for testing"); //cudaMemcpy(&tmpdA, &dlAP_set[dd], sizeof(double*), cudaMemcpyHostToDevice); //ca_dbg_printMat_gpu(2, 2, tmpdA, dlAP_LD, "dlAP_set[dd] for testing"); //printf("0.5: int to continue"); scanf("%d", &I); #endif if (MAGMA_SUCCESS != magma_dmalloc(&dlAP_get[dd], dlAP_LD*nb)) { //magma_free(dlAP_set); //TODO: free all previous buffers *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } } /* Workspace for the panels */ // dlpanel = (double **) malloc(num_gpus*sizeof(double*)); for(dd=0;dd<num_gpus;dd++){ magma_setdevice(dd); if (MAGMA_SUCCESS != magma_dmalloc(&dlpanel[dd], nb*dm_max)) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } } dlpanel_LD = nb; /*local matrix storage*/ dlAT = (double **) malloc(num_gpus*sizeof(double*)); dlAT_LD = n_local[0]; if(dlAT_LD%32!=0) dlAT_LD = ((dlAT_LD + 31)/32)*32; for(dd=0;dd<num_gpus;dd++){ magma_setdevice(dd); if (MAGMA_SUCCESS != magma_dmalloc(&dlAT[dd], dlAT_LD*dm_max )) { for(J=0;J<dd;J++){ magma_setdevice(J); magma_free( dlAP_set[J]); magma_free( dlAP_get[J]); magma_free(dlpanel[J]); magma_free(dlAT[J]); } //free(dlAP_set); //free(dlAP_get); //free(dlpanel); free(dlAT); *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } } #if (dbglevel >=1) printf("[DBG] Time workspace memory alloc (dAP): %f\n",magma_wtime()-t1); t1 = magma_wtime(); #endif /*1. Transfer the first column blocks of the matrix from the GPU to the CPUs.*/ //magma_dgetmatrix(A_m, A_n, dA, dA_LD, A, A_LD); magma_dgetmatrix_1D_col_bcyclic(A_m, A_n, dlA, dlA_LD, A, A_LD, num_gpus, nb); #if (dbglevel >=1) printf("[DBG] Time First getmatrix: %f\n",magma_wtime()-t1); t1 = magma_wtime(); #endif #if (dbglevel==10) printf("1.0\n"); ca_dbg_printMat(A_m, A_n, A, A_LD,"A after first getMatrix"); /* for(dd=0;dd<num_gpus;dd++){ //Fill the matrix with zero for easy visualization of the matrix in debug mode for(I=0;I<dlAT_LD*dm_max;I++) dlAT[dd][I] = 0.0; } */ // ca_dbg_printMat_mgpu(num_gpus, m, n_local, dlAT, dlAT_LD,"matrix dAlT^T empty"); // ca_dbg_printMat_transpose_mgpu(num_gpus, n_local, m, dlAT, dlAT_LD,"matrix dAT empty"); printf("2.0\n"); #endif /*Update the remaining number of columns on the GPUs.*/ for(dd=0;dd<num_gpus;dd++){ nr_local[dd] = nr_local[dd] - numcols2p(dd, A_n, nb, num_gpus); //;n_local[dd] - loc2p(dd, A_N, num_gpus)*nb; } #if (dbglevel==10) ca_dbg_printMat_mgpu(num_gpus, m, n_local, dlA, dlA_LD,"matrix dA to factorize"); printf("3.0\n"); #endif for(dd=0;dd<num_gpus;dd++){ magma_setdevice(dd); //magmablasSetKernelStream(mstream[dd][1]); magmablas_dtranspose2(dlAT[dd], dlAT_LD, dlA[dd], dlA_LD, m, n_local[dd]); } /// for(dd=0;dd<num_gpus;dd++){ magma_setdevice(dd); magma_task_dev_set_compute_stream(dd, mstream[dd][1]); } #if (dbglevel >=1) printf("[DBG] Time First transposition: %f\n",magma_wtime()-t1); t1 = magma_wtime(); #endif #if (dbglevel==10) //ca_dbg_printMat_transpose_mgpu(num_gpus, n_local, m, dlAT, dlAT_LD,"matrix dAT to factorize"); /* dd = GID(A_N); magma_setdevice(dd); ca_dbg_printMat_transpose_gpu(nb, m, dlAT(0, A_N), dlAT_LD,"matrix dAT(0, A_N)"); magma_setdevice(0); ca_dbg_printMat_transpose_gpu(m, nb, dlA(0, A_N), dlA_LD,"matrix dA(0, A_N)"); */ printf("4.0\n"); printf("int to continue"); scanf("%d", &I); #endif /* #if (dbglevel==10) ca_dbg_printMat_transpose_mgpu(num_gpus, m, n_local, dlAT, dlAT_LD,"matrix dAT to factorize"); #endif */ /* Compute the maximun number of steps*/ mindim = min(m, n); M = (int) ceil( (double) m / nb); N = (int) ceil( (double) mindim / nb); /*N = n/nb*/ /* 3. Let the asynchronous algorithm begin*/ #if (dbglevel >=1) printf("Starting recursif code ... m:%d, n:%d, nb:%d, nbcores:%d, N:%d, A_N:%d\n", m, n, nb, nbcores, N, A_N); //Summary #endif /*Initialize the scheduler*/ magma_schedule_init(nbcores, num_gpus); K = 0; /*initialize parallel recursif panel environment*/ CORE_zgetrf_reclap_init(); magma_schedule_set_task_priority(INT_MAX-1); /*Schedule the first panel factorization*/ magma_insert_core_dgetrf_rec(A_m, nb, A(0,K), A_LD, ipiv(0), &iinfo, panel_num_threads, colptr(K)); //magma_insert_core_dgetrf(A_m, nb, A(0,K), A_LD, ipiv(0), &iinfo, colptr(K)); /*Transfer the factorized panel in the buffer of GPU (dlpanel)*/ for(dd=0;dd<num_gpus;dd++){ ///magma_insert_dev_dsetmatrix_transpose(dd, A_m, nb, A(0,K), A_LD, dlpanel(dd,K), dlpanel_LD, dlAP_set[dd], dlAP_LD, colptr(K), dlpanel[dd]); magma_insert_dev_dsetmatrix_async_transpose(dd, A_m, nb, A(0,K), A_LD, dlpanel(dd,K), dlpanel_LD, mstream[dd][0], dlAP_set[dd], dlAP_LD, colptr(K), dlpanel(dd,K)); //dlpanel[dd] } #if (dbglevel==10) magma_schedule_barrier(); for(dd=0;dd<num_gpus;dd++){ magma_setdevice(dd); ca_dbg_printMat_transpose_gpu(nb, m, dlpanel(dd,K), dlpanel_LD,"dlpanel[dd] after setmatrix_async"); //dlpanel[dd] } printf("4.5: int to continue"); scanf("%d", &I); #endif /*Transfer also the factorized panel on its right position in the final matrix (transposition included)*/ /*TODO: this may use cudaMemcpyDeviceToDevice and initiate the transfer from dlpanel*/ dd = GID(K); //magma_insert_dev_dsetmatrix_transpose(dd, A_m, nb, A(0,K), A_LD, dlAT(0,K), dlAT_LD, dlAP_set[dd], dlAP_LD, colptr(K), dlAT(0,K)); magma_insert_dev_dsetmatrix_async_transpose(dd, A_m, nb, A(0,K), A_LD, dlAT(0,K), dlAT_LD, mstream[dd][0], dlAP_set[dd], dlAP_LD, colptr(K), dlAT(0,K)); #if (dbglevel==10) magma_schedule_barrier(); ca_dbg_printMat(m, nb, A(0,0), A_LD,"A(0,0)"); for(dd=0;dd<num_gpus;dd++){ magma_setdevice(dd); ca_dbg_printMat_transpose_gpu(nb, m, dlpanel[dd], dlpanel_LD,"dlpanel[dd] after setmatrix to dlAT"); } ca_dbg_printMat_transpose_mgpu(num_gpus, n_local, m, dlAT, dlAT_LD,"dlA"); printf("5.0: int to continue"); scanf("%d", &I); #endif for(K=0;K<=N-1;K++){ /*compute the new value of the cpu number of blocks*/ A_N = NSplit(N-K, dcpu); /*insert the coarse update of the trailing submatrix corresponding to panel K to the GPU, that is submatrix A[K+1:M, K+1+d-1:N]*/ //if(K==0) /*TODO: move outside loop*/ //{ /*NOTE: Here we work on the matrix transpose*/ /*Set the priority max for the GPU computations*/ magma_schedule_set_task_priority(INT_MAX); //// magma_schedule_set_task_priority(INT_MAX - N*K); gpu_nrows = m - (K+1)*nb;/// for(J=K+A_N;J<min(K+A_N+num_gpus,N);J++){ /*Determine the device which own the first column of the group of columns to update*/ dd = GID(J); /*Determine the number of columns to apply the update. */ nr_local[dd] = numcols2p(dd, n - (K+1+A_N-1)*nb, nb, num_gpus); gpu_ncols = nr_local[dd]; //n - (K+1+A_N-1)*nb; if(gpu_ncols >0) { /*schedule a swap of the trailing submatrix in the gpus using ipiv[K]*/ /*dependency dAT((K+1)-1, (K+A_N)-1) = dAT(K, K+A_N-1) with previous dgemm*/ magma_insert_dev_dlaswp(dd, gpu_ncols, dlAT(K, J), dlAT_LD, ONE, nb, ipiv(K), ONE, dlAT(K, J-1)); /*non blocking*/ //printf("debug barrier\n"); //magma_schedule_barrier(); //&(dlpanel[dd][dlpanel_LD*nb*K]) magma_insert_dev_dtrsm(dd, MagmaRight, MagmaUpper, MagmaNoTrans, MagmaUnit, gpu_ncols, nb, c_one, dlpanel(dd,K), dlpanel_LD, dlAT(K,J), dlAT_LD);/*non blocking*/ /* aij^T = aij^T - (lik.ukj)^T = aij^T - ukj^T.lik^T*/ //&(dlpanel[dd][dlpanel_LD*nb*(K+1)]) magma_insert_dev_dgemm(dd, MagmaNoTrans,MagmaNoTrans, gpu_ncols, gpu_nrows, nb, c_neg_one, dlAT(K,J), dlAT_LD, dlpanel(dd,K+1), dlpanel_LD, c_one, dlAT(K+1,J), dlAT_LD);/*non blocking*/ /*Transfer asynchronously one column (column K+A_N) from the GPU to the CPU to balance work*/ //// if(K+A_N<N) //// { ////ncols = min(nb, gpu_ncols); //////magma_schedule_set_task_priority(INT_MAX); ////magma_insert_dgetmatrix_transpose(gpu_nrows, ncols, dAT(K+1,K+A_N), dAT_LD, A(K+1,K+A_N), A_LD, dAP, dAP_LD, colptr(K+A_N)); //blocking //// } } } //} /*iterate over the rest of the columns to update the trailing submatrix on the cpu*/ for(J=K+1;J<=min(K+A_N-1, N-1);J++){ ncols = min(nb, n - J*nb); /*Set the priority max for column having the next panel (look ahead of deep 1), and process the rest of the update in a right looking way*/ if(J==K+1) magma_schedule_set_task_priority(INT_MAX -2 ); //// magma_schedule_set_task_priority(INT_MAX - N*K -1); else magma_schedule_set_task_priority(INT_MAX -3 - J );//- N*K //// magma_schedule_set_task_priority(INT_MAX - N*K -3 -J); //magma_schedule_set_task_priority(INT_MAX - J); /*dependency colptr(J): make sure column J is sent from GPU, and all previous update was done*/ magma_insert_core_dlaswp(ncols, A(K,J), A_LD, ONE, nb, ipiv(K), ONE, colptr(J)); magma_insert_core_dtrsm('L', 'L', 'N', 'U', nb, ncols, c_one, A(K,K), A_LD, A(K,J), A_LD, colptr(J)); /*Compute the number of blocs rows to group together before the update. To avoid scheduling overhead.*/ B_rows = (int) ceil((double) (M-K-1)/panel_num_threads); B_rows = max(B_rows,4); /*maximun of 4*/ //B_rows = max(B_rows,1); //printf("B_rows:%d\n",B_rows); for(I=K+1; I<=M-1; I+=B_rows){ nrows = min(B_rows*nb, m-I*nb); /*dep colptr(K):make sure the panel is not overwritten or swapped since dgemm use A[I,K]*/ /*dep colptr(J): Gather all dgemm on one column and create dependencies with previous dgemm and the next panel*/ magma_insert_core_dgemm('N','N', nrows, ncols, nb, c_neg_one, A(I,K), A_LD, A(K,J), A_LD, c_one, A(I,J), A_LD, colptr(K), colptr(J)); } if(J==K+1) { /*Look ahead and insert the next panel*/ nrows = m - (K+1)*nb; ncols = min(nb, n - (K+1)*nb); /*Schedule the next panel factorization with maximum priority*/ magma_schedule_set_task_priority(INT_MAX -1); ///magma_schedule_set_task_priority(0); //TEST: testing prio_0 //// magma_schedule_set_task_priority(INT_MAX - N*K - 2); magma_insert_core_dgetrf_rec(nrows, ncols, A(K+1,K+1), A_LD, ipiv(K+1), &iinfo, panel_num_threads, colptr(K+1)); // magma_insert_core_dgetrf(nrows, ncols, A(K+1,K+1), A_LD, ipiv(K+1), &iinfo, colptr(K+1)); /*Transfer the factorized panel in the buffer of GPU (dlpanel)*/ for(dd=0;dd<num_gpus;dd++){ //&(dlpanel[dd][dlpanel_LD*nb*(K+1)]) ///magma_insert_dev_dsetmatrix_transpose(dd, nrows, ncols, A(K+1, K+1), A_LD, dlpanel(dd, K+1), dlpanel_LD, dlAP_set[dd], dlAP_LD, colptr(K+1), dlpanel[dd]); magma_insert_dev_dsetmatrix_async_transpose(dd, nrows, ncols, A(K+1, K+1), A_LD, dlpanel(dd, K+1), dlpanel_LD, mstream[dd][0], dlAP_set[dd], dlAP_LD, colptr(K+1), dlpanel(dd,K+1));//, dlpanel[dd] } /*Determine the upper part of the matrix done by the CPU on that column and send it to the GPU with the panel*/ U_K = max(0, K+1 - A_N +1); nrows = m - U_K*nb; ///magma_schedule_set_task_priority(INT_MAX); /*Transfer the upper part of the matrix for that column and the factorized panel to the GPU*/ ///magma_insert_dsetmatrix_transpose(nrows, ncols, A(U_K, K+1), A_LD, dAT(U_K, K+1), dAT_LD, dAP, dAP_LD, A(K+1,K+1), dAT(K+1,K+1)); //magma_insert_dev_dsetmatrix_transpose(nrows, ncols, A(U_K, K+1), A_LD, dAT(U_K, K+1), dAT_LD, dAP_set, dAP_LD, colptr(K+1), dAT(K+1,K+1)); /*Transfer also the factorized panel on its right position in the final matrix (transposition included)*/ /*TODO: this may use cudaMemcpyDeviceToDevice and initiate the transfer from dlpanel*/ dd = GID(K+1); ///magma_insert_dev_dsetmatrix_transpose(dd, nrows, ncols, A(U_K, K+1), A_LD, dlAT(U_K,K+1), dlAT_LD, dlAP_set[dd], dlAP_LD, colptr(K+1), dlAT(K+1,K+1)); magma_insert_dev_dsetmatrix_async_transpose(dd, nrows, ncols, A(U_K, K+1), A_LD, dlAT(U_K,K+1), dlAT_LD, mstream[dd][0], dlAP_set[dd], dlAP_LD, colptr(K+1), dlAT(0,K+1));/// } } /*compute the next number of blocks colums */ A_NP1 = NSplit(N-(K+1), dcpu) - NSplit(N-K, dcpu) + 1; /*Transfer asynchronously (A_NP1 - A_N) block column (column K+A_N) from the GPU to the CPU to balance work*/ /*Make sure this is inserted after all dgemm because it schedules to replace a current panel for the case A_N< N*/ for(L=K+A_N;L<K+A_N+A_NP1;L++) { if(L<N) { /*Determine the device which own column K+A_N*/ dd = GID(L); gpu_ncols = nr_local[dd]; ncols = min(nb, gpu_ncols); magma_schedule_set_task_priority(INT_MAX); ///magma_insert_dev_dgetmatrix_transpose(dd, gpu_nrows, ncols, dlAT(K+1,K+A_N), dlAT_LD, A(K+1,K+A_N), A_LD, dlAP_get[dd], dlAP_LD, colptr(K+A_N)); //blocking /*make sure the computations are done on stream 1 and send a block column on stream 2*/ magma_insert_dev_queue_sync(dd, mstream[dd][1], dlAT(K+1,L)); magma_insert_dev_dgetmatrix_async_transpose(dd, gpu_nrows, ncols, dlAT(K+1,L), dlAT_LD, A(K+1,L), A_LD, mstream[dd][2], dlAP_get[dd], dlAP_LD, colptr(L)); /*Update the remaining number of columns*/ //// nr_local[dd]-=nb; /*if A_N==1, there is no look-ahead, so insert the panel here*/ if((A_N==1) && (L==K+A_N)){ /*Look ahead and insert the next panel*/ nrows = m - (K+1)*nb; ncols = min(nb, n - (K+1)*nb); /*Schedule the next panel factorization with maximum priority*/ magma_schedule_set_task_priority(INT_MAX -1); ///magma_schedule_set_task_priority(0); //TEST: testing prio_0 //// magma_schedule_set_task_priority(INT_MAX - N*K - 2); magma_insert_core_dgetrf_rec(nrows, ncols, A(K+1,K+1), A_LD, ipiv(K+1), &iinfo, panel_num_threads, colptr(K+1)); //magma_insert_core_dgetrf(nrows, ncols, A(K+1,K+1), A_LD, ipiv(K+1), &iinfo, colptr(K+1)); /*Transfer the factorized panel in the buffer of GPU (dlpanel)*/ for(dd=0;dd<num_gpus;dd++){ //&(dlpanel[dd][dlpanel_LD*nb*(K+1)]) ///magma_insert_dev_dsetmatrix_transpose(dd, nrows, ncols, A(K+1, K+1), A_LD, dlpanel(dd, K+1), dlpanel_LD, dlAP_set[dd], dlAP_LD, colptr(K+1), dlpanel[dd]); magma_insert_dev_dsetmatrix_async_transpose(dd, nrows, ncols, A(K+1, K+1), A_LD, dlpanel(dd, K+1), dlpanel_LD, mstream[dd][0], dlAP_set[dd], dlAP_LD, colptr(K+1), dlpanel(dd,K+1));//dlpanel[dd] } /*Determine the upper part of the matrix done by the CPU on that column and send it to the GPU with the panel*/ U_K = max(0, K+1 - A_N +1); nrows = m - U_K*nb; ///magma_schedule_set_task_priority(INT_MAX); /*Transfer the upper part of the matrix for that column and the factorized panel to the GPU*/ ///magma_insert_dsetmatrix_transpose(nrows, ncols, A(U_K, K+1), A_LD, dAT(U_K, K+1), dAT_LD, dAP, dAP_LD, A(K+1,K+1), dAT(K+1,K+1)); //magma_insert_dev_dsetmatrix_transpose(nrows, ncols, A(U_K, K+1), A_LD, dAT(U_K, K+1), dAT_LD, dAP_set, dAP_LD, colptr(K+1), dAT(K+1,K+1)); /*Transfer also the factorized panel on its right position in the final matrix (transposition included)*/ /*TODO: this may use cudaMemcpyDeviceToDevice and initiate the transfer from dlpanel*/ dd = GID(K+1); ///magma_insert_dev_dsetmatrix_transpose(dd, nrows, ncols, A(U_K, K+1), A_LD, dlAT(U_K,K+1), dlAT_LD, dlAP_set[dd], dlAP_LD, colptr(K+1), dlAT(K+1,K+1)); magma_insert_dev_dsetmatrix_async_transpose(dd, nrows, ncols, A(U_K, K+1), A_LD, dlAT(U_K,K+1), dlAT_LD, mstream[dd][0], dlAP_set[dd], dlAP_LD, colptr(K+1), dlAT(0,K+1));///dlAT(K+1,K+1) } } } #if (dbglevel==10) magma_schedule_barrier(); ca_dbg_printMat(m, A_n, A, A_LD,"A"); ca_dbg_printMat_transpose_mgpu(num_gpus, n_local, m, dlAT, dlAT_LD,"dAT (Step K)"); nrows = m - K*nb; ncols = min(nb, n - K*nb); dd = GID(K); magma_setdevice(dd); ca_dbg_printMat_transpose_gpu(ncols, nrows, dlAT(K,K), dlAT_LD,"dAT(K,K)"); if(K<=5){ printf("Step K:%d done. Int to continue: ",K); scanf("%d", &I); } #endif } //Step K done /*Wait for all thread termination*/ magma_schedule_barrier(); /*make sure everything arrived*/ ///needed? for(dd=0;dd<num_gpus;dd++){ magma_setdevice(dd); magma_queue_sync(mstream[dd][0]); magma_queue_sync(mstream[dd][1]); magma_queue_sync(mstream[dd][2]); } /*TODO: don't need quark here*/ /*Perform a sequence of left swap on the matrix corresponding to the different panel*/ for(K=1;K<=N-1;K++){ #if (dbglevel >=1) ca_trace_start(); #endif nrows = min(nb,m - K*nb); ncols = min(K*nb,n); for(dd=0;dd<=min(num_gpus-1, K-1);dd++){ gpu_ncols = numcols2p(dd, ncols, nb, num_gpus); J = dd; if(gpu_ncols>0){ magma_setdevice(dd); //pthread_mutex_lock(&mutex_compute_stream); magmablasSetKernelStream(mstream[dd][1]); magmablas_dlaswp(gpu_ncols, dlAT(K, J), dlAT_LD, ONE, nrows, ipiv(K), ONE); //pthread_mutex_lock(&mutex_compute_stream); } } #if (dbglevel >=1) ca_trace_end_1gpu('W'); #endif } #if (dbglevel==10) ca_dbg_printMat_transpose_mgpu(num_gpus, n_local, m, dlAT, dlAT_LD,"dAT after lswap"); #endif /*Shutdown the scheduler*/ magma_schedule_delete(); /*update permutation vector indexes*/ for(K=1;K<=N-1;K++){ nrows = min(nb, n-K*nb); for(J=0;J<=nrows-1;J++){ ipiv[K*nb+J] += K*nb; } } #if dbglevel>=1 printf("[DBG] Time Factorization:%f\n",magma_wtime()-t1); t1 = magma_wtime(); #endif /* 4. Transpose back the matrix in/out of place*/ for(dd=0;dd<num_gpus;dd++){ //n_local[dd] = numcols2p(dd, n, nb, num_gpus); //loc2p(dd, N, num_gpus)*nb; magma_setdevice(dd); magmablasSetKernelStream(mstream[dd][1]); magmablas_dtranspose2(dlA[dd], dlA_LD, dlAT[dd], dlAT_LD, n_local[dd], m); } for(dd=0;dd<num_gpus;dd++){ //needed magma_setdevice(dd); magmablasSetKernelStream(NULL); } #if dbglevel>=1 printf("[DBG] Time Final in/out of place transpose:%f\n",magma_wtime()-t1); t1 = magma_wtime(); #endif #if (dbglevel==10) ca_dbg_printMat_mgpu(num_gpus, m, n_local, dlA, dlA_LD,"dA = LU"); #endif for(dd=0;dd<num_gpus;dd++){ magma_setdevice(dd); magma_queue_destroy(mstream[dd][0]); magma_queue_destroy(mstream[dd][1]); magma_queue_destroy(mstream[dd][2]); } //free(mstream); // printf("Step 4: time:%f\n",magma_wtime()-t1); // t1 = magma_wtime(); free(n_local); free(nr_local); // free(k_local); for(dd=0;dd<num_gpus;dd++){ magma_setdevice(dd); magma_free( dlAP_set[dd]); magma_free( dlAP_get[dd]); magma_free(dlpanel[dd]); magma_free(dlAT[dd]); } //free(dlAP_set); //free(dlAP_get); //free(dlpanel); free(dlAT); #if dbglevel>=1 printf("[DBG] Time memory free (dAP):%f\n",magma_wtime()-t1); t1 = magma_wtime(); #endif #if dbglevel>=1 /*Finalize the tracing*/ ca_dbg_trace_finalize(); printf("[DBG] Time llog:%f\n",magma_wtime()-t1); #endif return *info; } /* End of MAGMA_DGETRF_REC_ASYNC_WORK_GPU */
void upd_mdebin(t_mdebin *md, gmx_bool bDoDHDL, gmx_bool bSum, double time, real tmass, gmx_enerdata_t *enerd, t_state *state, t_lambda *fep, t_expanded *expand, matrix box, tensor svir, tensor fvir, tensor vir, tensor pres, gmx_ekindata_t *ekind, rvec mu_tot, gmx_constr_t constr) { int i, j, k, kk, n, gid; real crmsd[2], tmp6[6]; real bs[NTRICLBOXS], vol, dens, pv, enthalpy; real eee[egNR]; real ecopy[F_NRE]; double store_dhdl[efptNR]; real store_energy = 0; real tmp; /* Do NOT use the box in the state variable, but the separate box provided * as an argument. This is because we sometimes need to write the box from * the last timestep to match the trajectory frames. */ copy_energy(md, enerd->term, ecopy); add_ebin(md->ebin, md->ie, md->f_nre, ecopy, bSum); if (md->nCrmsd) { crmsd[0] = constr_rmsd(constr); add_ebin(md->ebin, md->iconrmsd, md->nCrmsd, crmsd, FALSE); } if (md->bDynBox) { int nboxs; if (md->bTricl) { bs[0] = box[XX][XX]; bs[1] = box[YY][YY]; bs[2] = box[ZZ][ZZ]; bs[3] = box[YY][XX]; bs[4] = box[ZZ][XX]; bs[5] = box[ZZ][YY]; nboxs = NTRICLBOXS; } else { bs[0] = box[XX][XX]; bs[1] = box[YY][YY]; bs[2] = box[ZZ][ZZ]; nboxs = NBOXS; } vol = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]; dens = (tmass*AMU)/(vol*NANO*NANO*NANO); add_ebin(md->ebin, md->ib, nboxs, bs, bSum); add_ebin(md->ebin, md->ivol, 1, &vol, bSum); add_ebin(md->ebin, md->idens, 1, &dens, bSum); if (md->bDiagPres) { /* This is pV (in kJ/mol). The pressure is the reference pressure, not the instantaneous pressure */ pv = vol*md->ref_p/PRESFAC; add_ebin(md->ebin, md->ipv, 1, &pv, bSum); enthalpy = pv + enerd->term[F_ETOT]; add_ebin(md->ebin, md->ienthalpy, 1, &enthalpy, bSum); } } if (md->bConstrVir) { add_ebin(md->ebin, md->isvir, 9, svir[0], bSum); add_ebin(md->ebin, md->ifvir, 9, fvir[0], bSum); } add_ebin(md->ebin, md->ivir, 9, vir[0], bSum); add_ebin(md->ebin, md->ipres, 9, pres[0], bSum); tmp = (pres[ZZ][ZZ]-(pres[XX][XX]+pres[YY][YY])*0.5)*box[ZZ][ZZ]; add_ebin(md->ebin, md->isurft, 1, &tmp, bSum); if (md->epc == epcPARRINELLORAHMAN || md->epc == epcMTTK) { tmp6[0] = state->boxv[XX][XX]; tmp6[1] = state->boxv[YY][YY]; tmp6[2] = state->boxv[ZZ][ZZ]; tmp6[3] = state->boxv[YY][XX]; tmp6[4] = state->boxv[ZZ][XX]; tmp6[5] = state->boxv[ZZ][YY]; add_ebin(md->ebin, md->ipc, md->bTricl ? 6 : 3, tmp6, bSum); } if (md->bMu) { add_ebin(md->ebin, md->imu, 3, mu_tot, bSum); } if (ekind && ekind->cosacc.cos_accel != 0) { vol = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]; dens = (tmass*AMU)/(vol*NANO*NANO*NANO); add_ebin(md->ebin, md->ivcos, 1, &(ekind->cosacc.vcos), bSum); /* 1/viscosity, unit 1/(kg m^-1 s^-1) */ tmp = 1/(ekind->cosacc.cos_accel/(ekind->cosacc.vcos*PICO) *dens*gmx::square(box[ZZ][ZZ]*NANO/(2*M_PI))); add_ebin(md->ebin, md->ivisc, 1, &tmp, bSum); } if (md->nE > 1) { n = 0; for (i = 0; (i < md->nEg); i++) { for (j = i; (j < md->nEg); j++) { gid = GID(i, j, md->nEg); for (k = kk = 0; (k < egNR); k++) { if (md->bEInd[k]) { eee[kk++] = enerd->grpp.ener[k][gid]; } } add_ebin(md->ebin, md->igrp[n], md->nEc, eee, bSum); n++; } } } if (ekind) { for (i = 0; (i < md->nTC); i++) { md->tmp_r[i] = ekind->tcstat[i].T; } add_ebin(md->ebin, md->itemp, md->nTC, md->tmp_r, bSum); if (md->etc == etcNOSEHOOVER) { /* whether to print Nose-Hoover chains: */ if (md->bPrintNHChains) { if (md->bNHC_trotter) { for (i = 0; (i < md->nTC); i++) { for (j = 0; j < md->nNHC; j++) { k = i*md->nNHC+j; md->tmp_r[2*k] = state->nosehoover_xi[k]; md->tmp_r[2*k+1] = state->nosehoover_vxi[k]; } } add_ebin(md->ebin, md->itc, md->mde_n, md->tmp_r, bSum); if (md->bMTTK) { for (i = 0; (i < md->nTCP); i++) { for (j = 0; j < md->nNHC; j++) { k = i*md->nNHC+j; md->tmp_r[2*k] = state->nhpres_xi[k]; md->tmp_r[2*k+1] = state->nhpres_vxi[k]; } } add_ebin(md->ebin, md->itcb, md->mdeb_n, md->tmp_r, bSum); } } else { for (i = 0; (i < md->nTC); i++) { md->tmp_r[2*i] = state->nosehoover_xi[i]; md->tmp_r[2*i+1] = state->nosehoover_vxi[i]; } add_ebin(md->ebin, md->itc, md->mde_n, md->tmp_r, bSum); } } } else if (md->etc == etcBERENDSEN || md->etc == etcYES || md->etc == etcVRESCALE) { for (i = 0; (i < md->nTC); i++) { md->tmp_r[i] = ekind->tcstat[i].lambda; } add_ebin(md->ebin, md->itc, md->nTC, md->tmp_r, bSum); } } if (ekind && md->nU > 1) { for (i = 0; (i < md->nU); i++) { copy_rvec(ekind->grpstat[i].u, md->tmp_v[i]); } add_ebin(md->ebin, md->iu, 3*md->nU, md->tmp_v[0], bSum); } ebin_increase_count(md->ebin, bSum); /* BAR + thermodynamic integration values */ if ((md->fp_dhdl || md->dhc) && bDoDHDL) { for (i = 0; i < enerd->n_lambda-1; i++) { /* zero for simulated tempering */ md->dE[i] = enerd->enerpart_lambda[i+1]-enerd->enerpart_lambda[0]; if (md->temperatures != NULL) { /* MRS: is this right, given the way we have defined the exchange probabilities? */ /* is this even useful to have at all? */ md->dE[i] += (md->temperatures[i]/ md->temperatures[state->fep_state]-1.0)* enerd->term[F_EKIN]; } } if (md->fp_dhdl) { fprintf(md->fp_dhdl, "%.4f", time); /* the current free energy state */ /* print the current state if we are doing expanded ensemble */ if (expand->elmcmove > elmcmoveNO) { fprintf(md->fp_dhdl, " %4d", state->fep_state); } /* total energy (for if the temperature changes */ if (fep->edHdLPrintEnergy != edHdLPrintEnergyNO) { switch (fep->edHdLPrintEnergy) { case edHdLPrintEnergyPOTENTIAL: store_energy = enerd->term[F_EPOT]; break; case edHdLPrintEnergyTOTAL: case edHdLPrintEnergyYES: default: store_energy = enerd->term[F_ETOT]; } fprintf(md->fp_dhdl, " %#.8g", store_energy); } if (fep->dhdl_derivatives == edhdlderivativesYES) { for (i = 0; i < efptNR; i++) { if (fep->separate_dvdl[i]) { /* assumes F_DVDL is first */ fprintf(md->fp_dhdl, " %#.8g", enerd->term[F_DVDL+i]); } } } for (i = fep->lambda_start_n; i < fep->lambda_stop_n; i++) { fprintf(md->fp_dhdl, " %#.8g", md->dE[i]); } if (md->bDynBox && md->bDiagPres && (md->epc != epcNO) && (enerd->n_lambda > 0) && (fep->init_lambda < 0)) { fprintf(md->fp_dhdl, " %#.8g", pv); /* PV term only needed when there are alternate state lambda and we're not in compatibility mode */ } fprintf(md->fp_dhdl, "\n"); /* and the binary free energy output */ } if (md->dhc && bDoDHDL) { int idhdl = 0; for (i = 0; i < efptNR; i++) { if (fep->separate_dvdl[i]) { /* assumes F_DVDL is first */ store_dhdl[idhdl] = enerd->term[F_DVDL+i]; idhdl += 1; } } store_energy = enerd->term[F_ETOT]; /* store_dh is dE */ mde_delta_h_coll_add_dh(md->dhc, (double)state->fep_state, store_energy, pv, store_dhdl, md->dE + fep->lambda_start_n, time); } } }
void upd_mdebin(t_mdebin *md,FILE *fp_dgdl, bool bSum, real tmass,int step,real time, gmx_enerdata_t *enerd, t_state *state, matrix box, tensor svir, tensor fvir, tensor vir, tensor pres, gmx_ekindata_t *ekind, rvec mu_tot, gmx_constr_t constr) { static real *ttt=NULL; static rvec *uuu=NULL; int i,j,k,kk,m,n,gid; real crmsd[2],bs[NBOXS],tmp6[6]; real tricl_bs[NTRICLBOXS]; real eee[egNR]; real ecopy[F_NRE]; real tmp; /* Do NOT use the box in the state variable, but the separate box provided * as an argument. This is because we sometimes need to write the box from * the last timestep to match the trajectory frames. */ copy_energy(enerd->term,ecopy); add_ebin(md->ebin,md->ie,f_nre,ecopy,bSum,step); if (nCrmsd) { crmsd[0] = constr_rmsd(constr,FALSE); if (nCrmsd > 1) crmsd[1] = constr_rmsd(constr,TRUE); add_ebin(md->ebin,md->iconrmsd,nCrmsd,crmsd,FALSE,0); } if (bDynBox || ((ekind != NULL) && (ekind->cosacc.cos_accel != 0))) { if(bTricl) { tricl_bs[0]=box[XX][XX]; tricl_bs[1]=box[YY][XX]; tricl_bs[2]=box[YY][YY]; tricl_bs[3]=box[ZZ][XX]; tricl_bs[4]=box[ZZ][YY]; tricl_bs[5]=box[ZZ][ZZ]; /* This is the volume */ tricl_bs[6]=tricl_bs[0]*tricl_bs[2]*tricl_bs[5]; /* This is the density */ tricl_bs[7] = (tmass*AMU)/(tricl_bs[6]*NANO*NANO*NANO); } else { for(m=0; (m<DIM); m++) bs[m]=box[m][m]; /* This is the volume */ bs[3] = bs[XX]*bs[YY]*bs[ZZ]; /* This is the density */ bs[4] = (tmass*AMU)/(bs[3]*NANO*NANO*NANO); } } if (bDynBox) { /* This is pV (in kJ/mol) */ if(bTricl) { tricl_bs[8] = tricl_bs[6]*enerd->term[F_PRES]/PRESFAC; add_ebin(md->ebin,md->ib,NTRICLBOXS,tricl_bs,bSum,step); } else { bs[5] = bs[3]*enerd->term[F_PRES]/PRESFAC; add_ebin(md->ebin,md->ib,NBOXS,bs,bSum,step); } } if (bConstrVir) { add_ebin(md->ebin,md->isvir,9,svir[0],bSum,step); add_ebin(md->ebin,md->ifvir,9,fvir[0],bSum,step); } add_ebin(md->ebin,md->ivir,9,vir[0],bSum,step); add_ebin(md->ebin,md->ipres,9,pres[0],bSum,step); tmp = (pres[ZZ][ZZ]-(pres[XX][XX]+pres[YY][YY])*0.5)*box[ZZ][ZZ]; add_ebin(md->ebin,md->isurft,1,&tmp,bSum,step); if (epc == epcPARRINELLORAHMAN) { tmp6[0] = state->boxv[XX][XX]; tmp6[1] = state->boxv[YY][YY]; tmp6[2] = state->boxv[ZZ][ZZ]; tmp6[3] = state->boxv[YY][XX]; tmp6[4] = state->boxv[ZZ][XX]; tmp6[5] = state->boxv[ZZ][YY]; add_ebin(md->ebin,md->ipc,bTricl ? 6 : 3,tmp6,bSum,step); } add_ebin(md->ebin,md->imu,3,mu_tot,bSum,step); if (ekind && ekind->cosacc.cos_accel != 0) { add_ebin(md->ebin,md->ivcos,1,&(ekind->cosacc.vcos),bSum,step); /* 1/viscosity, unit 1/(kg m^-1 s^-1) */ if(bTricl) tmp = 1/(ekind->cosacc.cos_accel/(ekind->cosacc.vcos*PICO) *tricl_bs[7]*sqr(box[ZZ][ZZ]*NANO/(2*M_PI))); else tmp = 1/(ekind->cosacc.cos_accel/(ekind->cosacc.vcos*PICO) *bs[4]*sqr(box[ZZ][ZZ]*NANO/(2*M_PI))); add_ebin(md->ebin,md->ivisc,1,&tmp,bSum,step); } if (md->nE > 1) { n=0; for(i=0; (i<md->nEg); i++) { for(j=i; (j<md->nEg); j++) { gid=GID(i,j,md->nEg); for(k=kk=0; (k<egNR); k++) { if (bEInd[k]) { eee[kk++] = enerd->grpp.ener[k][gid]; } } add_ebin(md->ebin,md->igrp[n],md->nEc,eee,bSum,step); n++; } } } if (ekind) { if(ttt == NULL) snew(ttt,md->nTC); for(i=0; (i<md->nTC); i++) { ttt[i] = ekind->tcstat[i].T; } add_ebin(md->ebin,md->itemp,md->nTC,ttt,bSum,step); if (etc == etcNOSEHOOVER) { for(i=0; (i<md->nTC); i++) ttt[i] = state->nosehoover_xi[i]; add_ebin(md->ebin,md->itc,md->nTC,ttt,bSum,step); } else if (etc == etcBERENDSEN || etc == etcYES || etc == etcVRESCALE) { for(i=0; (i<md->nTC); i++) ttt[i] = ekind->tcstat[i].lambda; add_ebin(md->ebin,md->itc,md->nTC,ttt,bSum,step); } } if (ekind && md->nU > 1) { if (uuu == NULL) snew(uuu,md->nU); for(i=0; (i<md->nU); i++) copy_rvec(ekind->grpstat[i].u,uuu[i]); add_ebin(md->ebin,md->iu,3*md->nU,uuu[0],bSum,step); } if (fp_dgdl) fprintf(fp_dgdl,"%.4f %g\n", time, enerd->term[F_DVDL]+enerd->term[F_DKDL]+enerd->term[F_DGDL_CON]); }