void registerUserEventsForAllComputeObjs() { #ifdef TRACE_COMPUTE_OBJECTS ComputeMap *map = ComputeMap::Object(); PatchMap *pmap = PatchMap::Object(); char user_des[50]; int p1, p2; int adim, bdim, cdim; int t1, t2; int x1, y1, z1, x2, y2, z2; int dx, dy, dz; for (int i=0; i<map->numComputes(); i++) { memset(user_des, 0, 50); switch ( map->type(i) ) { case computeNonbondedSelfType: sprintf(user_des, "computeNonBondedSelfType_%d_pid_%d", i, map->pid(i,0)); break; case computeLCPOType: sprintf(user_des, "computeLCPOType_%d_pid_%d", i, map->pid(i,0)); break; case computeNonbondedPairType: adim = pmap->gridsize_a(); bdim = pmap->gridsize_b(); cdim = pmap->gridsize_c(); p1 = map->pid(i, 0); t1 = map->trans(i, 0); x1 = pmap->index_a(p1) + adim * Lattice::offset_a(t1); y1 = pmap->index_b(p1) + bdim * Lattice::offset_b(t1); z1 = pmap->index_c(p1) + cdim * Lattice::offset_c(t1); p2 = map->pid(i, 1); t2 = map->trans(i, 1); x2 = pmap->index_a(p2) + adim * Lattice::offset_a(t2); y2 = pmap->index_b(p2) + bdim * Lattice::offset_b(t2); z2 = pmap->index_c(p2) + cdim * Lattice::offset_c(t2); dx = abs(x1-x2); dy = abs(y1-y2); dz = abs(z1-z2); sprintf(user_des, "computeNonBondedPairType_%d(%d,%d,%d)", i, dx,dy,dz); break; case computeExclsType: sprintf(user_des, "computeExclsType_%d", i); break; case computeBondsType: sprintf(user_des, "computeBondsType_%d", i); break; case computeAnglesType: sprintf(user_des, "computeAnglesType_%d", i); break; case computeDihedralsType: sprintf(user_des, "computeDihedralsType_%d", i); break; case computeImpropersType: sprintf(user_des, "computeImpropersType_%d", i); break; case computeTholeType: sprintf(user_des, "computeTholeType_%d", i); break; case computeAnisoType: sprintf(user_des, "computeAnisoType_%d", i); break; case computeCrosstermsType: sprintf(user_des, "computeCrosstermsType_%d", i); break; case computeSelfExclsType: sprintf(user_des, "computeSelfExclsType_%d", i); break; case computeSelfBondsType: sprintf(user_des, "computeSelfBondsType_%d", i); break; case computeSelfAnglesType: sprintf(user_des, "computeSelfAnglesType_%d", i); break; case computeSelfDihedralsType: sprintf(user_des, "computeSelfDihedralsType_%d", i); break; case computeSelfImpropersType: sprintf(user_des, "computeSelfImpropersType_%d", i); break; case computeSelfTholeType: sprintf(user_des, "computeSelfTholeType_%d", i); break; case computeSelfAnisoType: sprintf(user_des, "computeSelfAnisoType_%d", i); break; case computeSelfCrosstermsType: sprintf(user_des, "computeSelfCrosstermsType_%d", i); break; #ifdef DPMTA case computeDPMTAType: sprintf(user_des, "computeDPMTAType_%d", i); break; #endif #ifdef DPME case computeDPMEType: sprintf(user_des, "computeDPMEType_%d", i); break; #endif case computePmeType: sprintf(user_des, "computePMEType_%d", i); break; case computeEwaldType: sprintf(user_des, "computeEwaldType_%d", i); break; case computeFullDirectType: sprintf(user_des, "computeFullDirectType_%d", i); break; case computeGlobalType: sprintf(user_des, "computeGlobalType_%d", i); break; case computeStirType: sprintf(user_des, "computeStirType_%d", i); break; case computeExtType: sprintf(user_des, "computeExtType_%d", i); break; case computeEFieldType: sprintf(user_des, "computeEFieldType_%d", i); break; /* BEGIN gf */ case computeGridForceType: sprintf(user_des, "computeGridForceType_%d", i); break; /* END gf */ case computeSphericalBCType: sprintf(user_des, "computeSphericalBCType_%d", i); break; case computeCylindricalBCType: sprintf(user_des, "computeCylindricalBCType_%d", i); break; case computeTclBCType: sprintf(user_des, "computeTclBCType_%d", i); break; case computeRestraintsType: sprintf(user_des, "computeRestraintsType_%d", i); break; case computeConsForceType: sprintf(user_des, "computeConsForceType_%d", i); break; case computeConsTorqueType: sprintf(user_des, "computeConsTorqueType_%d", i); break; default: NAMD_bug("Unknown compute type in ComputeMgr::registerUserEventForAllComputeObjs()."); break; } int user_des_len = strlen(user_des); char *user_des_cst = new char[user_des_len+1]; memcpy(user_des_cst, user_des, user_des_len); user_des_cst[user_des_len] = 0; //Since the argument in traceRegisterUserEvent is supposed //to be a const string which will not be copied inside the //function when a new user event is created, user_des_cst //has to be allocated in heap. int reEvenId = traceRegisterUserEvent(user_des_cst, TRACE_COMPOBJ_IDOFFSET+i); //printf("Register user event (%s) with id (%d)\n", user_des, reEvenId); } #else return; #endif }
Main::Main(CkArgMsg* msg) { // Parse the command line (sets application parameters) parseCommandLine(msg->argc, msg->argv); delete msg; // Display a header that displays info about the run CkPrintf("MD Simulation\n" " Patch Grid: x:%d by y:%d by z:%d\n" " NumParticlesPerPatch: %d\n" " Simulation Steps: %d\n" #if USE_PROXY_PATCHES != 0 " Proxy Patches Enabled\n" #endif #if USE_ARRAY_SECTIONS != 0 " Array Sections Enabled\n" #endif " StepPerPrint: %d\n", numPatchesX, numPatchesY, numPatchesZ, numParticlesPerPatch, numStepsRemaining, STEPS_PER_PRINT ); if(sizeof(MD_FLOAT)==4) CkPrintf("Single Precision\n"); else if(sizeof(MD_FLOAT)==8) CkPrintf("Double Precision\n"); else CkPrintf("Precision %d bytes\n",sizeof(MD_FLOAT)); // DMK - DEBUG #if ENABLE_USER_EVENTS != 0 traceRegisterUserEvent("Patch::forceCheckIn_callback()", PROJ_USER_EVENT_PATCH_FORCECHECKIN_CALLBACK); traceRegisterUserEvent("Patch::integrate_callback()", PROJ_USER_EVENT_PATCH_INTEGRATE_CALLBACK); traceRegisterUserEvent("SelfCompute::doCalc_callback()", PROJ_USER_EVENT_SELFCOMPUTE_DOCALC_CALLBACK); traceRegisterUserEvent("PairCompute::doCalc_callback()", PROJ_USER_EVENT_PAIRCOMPUTE_DOCALC_CALLBACK); traceRegisterUserEvent("SelfCompute::doCalc() - Work", PROJ_USER_EVENT_SELFCOMPUTE_DOCALC_WORK); traceRegisterUserEvent("PairCompute::doCalc() - Work", PROJ_USER_EVENT_PAIRCOMPUTE_DOCALC_WORK); traceRegisterUserEvent("CmiMachineProgressImpl", PROJ_USER_EVENT_MACHINEPROGRESS); #endif // DMK - DEBUG #if COUNT_FLOPS != 0 globalFlopCount = 0; if (CkNumPes() != 1) { CkPrintf("ERROR: When COUNT_FLOPS is enabled, only a single processor should be used... Exiting...\n"); CkExit(); } #endif // Spread a proxy to this main chare object to all processors via a read-only mainProxy = thisProxy; // Create the patch array patchArrayProxy = CProxy_Patch::ckNew(numPatchesX, numPatchesY, numPatchesZ); // Create the self compute array selfComputeArrayProxy = CProxy_SelfCompute::ckNew(numPatchesX, numPatchesY, numPatchesZ); // Create the pair compute array #if ENABLE_STATIC_LOAD_BALANCING != 0 // NOTE : For now, this code has to be manually changed to match the nodelist file since there is no way to // pass this information into the program at runtime. In the future, this is something the runtime system // take care of in the ideal case. int numPEs = CkNumPes(); #define W_X86 ( 10) // 10 #define W_BLADE (125) // 100 #define W_PS3 ( 96) // 75 // NOTE: The peWeights should match the hetero nodelist file being used //int peWeights[13] = { W_X86, W_BLADE, W_PS3, W_BLADE, W_PS3, W_BLADE, W_PS3, W_BLADE, W_PS3, W_BLADE, W_BLADE, W_BLADE, W_BLADE }; int peWeights[13] = { W_BLADE, W_PS3, W_BLADE, W_PS3, W_BLADE, W_PS3, W_BLADE, W_PS3, W_BLADE, W_BLADE, W_BLADE, W_BLADE, W_BLADE }; //int peWeights[14] = { W_X86, W_X86, W_BLADE, W_PS3, W_BLADE, W_PS3, W_BLADE, W_PS3, W_BLADE, W_PS3, W_BLADE, W_BLADE, W_BLADE, W_BLADE }; int peStats[13] = { 0 }; CkAssert(numPEs <= 13); int rValLimit = 0; for (int i = 0; i < numPEs; i++) { rValLimit += peWeights[i]; } #endif pairComputeArrayProxy = CProxy_PairCompute::ckNew(); const int numPatches = numPatchesX * numPatchesY * numPatchesZ; for (int p0 = 0; p0 < numPatches; p0++) { for (int p1 = p0 + 1; p1 < numPatches; p1++) { #if ENABLE_STATIC_LOAD_BALANCING != 0 int pe = 0; int rVal = rand() % rValLimit; for (int i = 0; i < numPEs; i++) { if (rVal < peWeights[i]) { pe = i; break; } rVal -= peWeights[i]; } pairComputeArrayProxy(p0, p1).insert(pe); peStats[pe]++; #else pairComputeArrayProxy(p0, p1).insert(); #endif } } pairComputeArrayProxy.doneInserting(); #if ENABLE_STATIC_LOAD_BALANCING != 0 int numPairComputes = 0; for (int i = 0; i < numPEs; i++) { numPairComputes += peStats[i]; } for (int i = 0; i < numPEs; i++) { CkPrintf("[STATS] :: peStats[%d] = %6d (%5.2f%%)\n", i, peStats[i], ((float)peStats[i]) / ((float)numPairComputes) * 10zero); } #endif // Start initialization (NOTE: Patch will initiate proxy patches directly if proxy patches are being used) selfComputeArrayProxy.init(numParticlesPerPatch); pairComputeArrayProxy.init(numParticlesPerPatch); #if USE_PROXY_PATCHES != 0 for (int x = 0; x < numPatchesX; x++) { for (int y = 0; y < numPatchesY; y++) { for (int z = 0; z < numPatchesZ; z++) { int patchIndex = PATCH_XYZ_TO_I(x, y, z); CProxy_ProxyPatch proxyPatchProxy = CProxy_ProxyPatch::ckNew(patchIndex); proxyPatchProxy.init(numParticlesPerPatch); patchArrayProxy(x, y, z).init(numParticlesPerPatch, proxyPatchProxy); } } } #else patchArrayProxy.init(numParticlesPerPatch); #endif }