ComputeMap::ComputeMap(int x, int y, int z, int tx, int ty, int tz) { X = x; Y = y; Z = z; mapping = new int[X*Y*Z]; TopoManager tmgr; int dimX, dimY, dimZ, dimT; #if USE_TOPOMAP dimX = tmgr.getDimNX(); dimY = tmgr.getDimNY(); dimZ = tmgr.getDimNZ(); dimT = tmgr.getDimNT(); #elif USE_BLOCKMAP dimX = tx; dimY = ty; dimZ = tz; dimT = 1; #endif // we are assuming that the no. of chares in each dimension is a // multiple of the torus dimension int numCharesPerPe = X*Y*Z/CkNumPes(); int numCharesPerPeX = X / dimX; int numCharesPerPeY = Y / dimY; int numCharesPerPeZ = Z / dimZ; if(dimT < 2) { // one core per node if(CkMyPe()==0) CkPrintf("DATA: %d %d %d %d : %d %d %d\n", dimX, dimY, dimZ, dimT, numCharesPerPeX, numCharesPerPeY, numCharesPerPeZ); for(int i=0; i<dimX; i++) for(int j=0; j<dimY; j++) for(int k=0; k<dimZ; k++) for(int ci=i*numCharesPerPeX; ci<(i+1)*numCharesPerPeX; ci++) for(int cj=j*numCharesPerPeY; cj<(j+1)*numCharesPerPeY; cj++) for(int ck=k*numCharesPerPeZ; ck<(k+1)*numCharesPerPeZ; ck++) { #if USE_TOPOMAP mapping[ci*Y*Z + cj*Z + ck] = tmgr.coordinatesToRank(i, j, k); #elif USE_BLOCKMAP mapping[ci*Y*Z + cj*Z + ck] = i + j*dimX + k*dimX*dimY; #endif } } else { // multiple cores per node // In this case, we split the chares in the X dimension among the // cores on the same node. numCharesPerPeX /= dimT; if(CkMyPe()==0) CkPrintf("%d %d %d : %d %d %d %d : %d %d %d \n", x, y, z, dimX, dimY, dimZ, dimT, numCharesPerPeX, numCharesPerPeY, numCharesPerPeZ); for(int i=0; i<dimX; i++) for(int j=0; j<dimY; j++) for(int k=0; k<dimZ; k++) for(int l=0; l<dimT; l++) for(int ci=(dimT*i+l)*numCharesPerPeX; ci<(dimT*i+l+1)*numCharesPerPeX; ci++) for(int cj=j*numCharesPerPeY; cj<(j+1)*numCharesPerPeY; cj++) for(int ck=k*numCharesPerPeZ; ck<(k+1)*numCharesPerPeZ; ck++) { mapping[ci*Y*Z + cj*Z + ck] = tmgr.coordinatesToRank(i, j, k, l); } } }
void build_process_map(int size, int *smap, int *rmap, int *pmap, int file) { TopoManager tmgr; int pe, pe1, pe2, x, y, z1, t; int dimNX, dimNY, dimNZ, dimNT; dimNX = tmgr.getDimNX(); dimNY = tmgr.getDimNY(); dimNZ = tmgr.getDimNZ(); dimNT = tmgr.getDimNT(); int count = 0; for(int i=0; i<size; i++) { smap[i]=-1; rmap[i]=-1; pmap[i]=-1; } cout << "Loading Map" << endl; char name[50]; sprintf(name,"%d.map",file); ifstream mapFile(name); string line_s; while(mapFile.good() ){ #ifdef DEBUG cout << " > Loading " << name << endl; #endif int c1,c2,c3,c4,c5,c6; getline(mapFile,line_s); istringstream line(line_s); line >> c1 >> c2 >>c3 >> c4 >> c5>> c6; for(int i=0;i<dimNZ;i++) { pe = tmgr.coordinatesToRank(c1, c2, i, 0); pe1 = tmgr.coordinatesToRank(c4, c5, i, 0); smap[pe] = pe1; rmap[pe1] = pe; if(i==0) { pmap[pe] =1; pmap[pe1]=2; } } } dump_map(size,rmap); dump_map(size,smap); }
extern "C" void LrtsInitCpuTopo(char **argv) { static skt_ip_t myip; hostnameMsg *msg; double startT; int obtain_flag = 1; // default on int show_flag = 0; // default not show topology if (CmiMyRank() ==0) { topoLock = CmiCreateLock(); } #if __FAULT__ obtain_flag = 0; #endif if(CmiGetArgFlagDesc(argv,"+obtain_cpu_topology", "obtain cpu topology info")) obtain_flag = 1; if (CmiGetArgFlagDesc(argv,"+skip_cpu_topology", "skip the processof getting cpu topology info")) obtain_flag = 0; if(CmiGetArgFlagDesc(argv,"+show_cpu_topology", "Show cpu topology info")) show_flag = 1; #if CMK_BIGSIM_CHARM if (BgNodeRank() == 0) #endif { cpuTopoHandlerIdx = CmiRegisterHandler((CmiHandler)cpuTopoHandler); cpuTopoRecvHandlerIdx = CmiRegisterHandler((CmiHandler)cpuTopoRecvHandler); } if (!obtain_flag) { if (CmiMyRank() == 0) cpuTopo.sort(); CmiNodeAllBarrier(); CcdRaiseCondition(CcdTOPOLOGY_AVAIL); // call callbacks return; } if (CmiMyPe() == 0) { #if CMK_BIGSIM_CHARM if (BgNodeRank() == 0) #endif startT = CmiWallTimer(); } #if CMK_BIGSIM_CHARM if (BgNodeRank() == 0) { //int numPes = BgNumNodes()*BgGetNumWorkThread(); int numPes = cpuTopo.numPes = CkNumPes(); cpuTopo.nodeIDs = new int[numPes]; CpuTopology::supported = 1; int wth = BgGetNumWorkThread(); for (int i=0; i<numPes; i++) { int nid = i / wth; cpuTopo.nodeIDs[i] = nid; } cpuTopo.sort(); } return; #else #if CMK_USE_GM CmiBarrier(); #endif #if 0 if (gethostname(hostname, 999)!=0) { strcpy(hostname, ""); } #endif #if CMK_BLUEGENEL || CMK_BLUEGENEP if (CmiMyRank() == 0) { TopoManager tmgr; int numPes = cpuTopo.numPes = CmiNumPes(); cpuTopo.nodeIDs = new int[numPes]; CpuTopology::supported = 1; int x, y, z, t, nid; for(int i=0; i<numPes; i++) { tmgr.rankToCoordinates(i, x, y, z, t); nid = tmgr.coordinatesToRank(x, y, z, 0); cpuTopo.nodeIDs[i] = nid; } cpuTopo.sort(); if (CmiMyPe()==0) CmiPrintf("Charm++> Running on %d unique compute nodes (%d-way SMP).\n", cpuTopo.numNodes, CmiNumCores()); } CmiNodeAllBarrier(); #elif CMK_BLUEGENEQ if (CmiMyRank() == 0) { TopoManager tmgr; int numPes = cpuTopo.numPes = CmiNumPes(); cpuTopo.nodeIDs = new int[numPes]; CpuTopology::supported = 1; int a, b, c, d, e, t, nid; for(int i=0; i<numPes; i++) { tmgr.rankToCoordinates(i, a, b, c, d, e, t); nid = tmgr.coordinatesToRank(a, b, c, d, e, 0); cpuTopo.nodeIDs[i] = nid; } cpuTopo.sort(); if (CmiMyPe()==0) CmiPrintf("Charm++> Running on %d unique compute nodes (%d-way SMP).\n", cpuTopo.numNodes, CmiNumCores()); } CmiNodeAllBarrier(); #elif CMK_CRAYXT || CMK_CRAYXE || CMK_CRAYXC if(CmiMyRank() == 0) { int numPes = cpuTopo.numPes = CmiNumPes(); int numNodes = CmiNumNodes(); cpuTopo.nodeIDs = new int[numPes]; CpuTopology::supported = 1; int nid; for(int i=0; i<numPes; i++) { nid = getXTNodeID(CmiNodeOf(i), numNodes); cpuTopo.nodeIDs[i] = nid; } int prev = -1; nid = -1; // this assumes that all cores on a node have consecutive MPI rank IDs // and then changes nodeIDs to 0 to numNodes-1 for(int i=0; i<numPes; i++) { if(cpuTopo.nodeIDs[i] != prev) { prev = cpuTopo.nodeIDs[i]; cpuTopo.nodeIDs[i] = ++nid; } else cpuTopo.nodeIDs[i] = nid; } cpuTopo.sort(); if (CmiMyPe()==0) CmiPrintf("Charm++> Running on %d unique compute nodes (%d-way SMP).\n", cpuTopo.numNodes, CmiNumCores()); } CmiNodeAllBarrier(); #else bool topoInProgress = true; if (CmiMyPe() >= CmiNumPes()) { CmiNodeAllBarrier(); // comm thread waiting #if CMK_MACHINE_PROGRESS_DEFINED #if ! CMK_CRAYXT while (topoInProgress) { CmiNetworkProgress(); CmiLock(topoLock); topoInProgress = done < CmiMyNodeSize(); CmiUnlock(topoLock); } #endif #endif return; /* comm thread return */ } /* get my ip address */ if (CmiMyRank() == 0) { #if CMK_HAS_GETHOSTNAME && !CMK_BLUEGENEQ myip = skt_my_ip(); /* not thread safe, so only calls on rank 0 */ // fprintf(stderr, "[%d] IP is %d.%d.%d.%d\n", CmiMyPe(), myip.data[0],myip.data[1],myip.data[2],myip.data[3]); #elif CMK_BPROC myip = skt_innode_my_ip(); #else if (!CmiMyPe()) CmiPrintf("CmiInitCPUTopology Warning: Can not get unique name for the compute nodes. \n"); _noip = 1; #endif cpuTopo.numPes = CmiNumPes(); } CmiNodeAllBarrier(); if (_noip) return; /* prepare a msg to send */ msg = (hostnameMsg *)CmiAlloc(sizeof(hostnameMsg)+sizeof(_procInfo)); msg->n = 1; msg->procs = (_procInfo*)((char*)msg + sizeof(hostnameMsg)); CmiSetHandler((char *)msg, cpuTopoHandlerIdx); msg->procs[0].pe = CmiMyPe(); msg->procs[0].ip = myip; msg->procs[0].ncores = CmiNumCores(); msg->procs[0].rank = 0; msg->procs[0].nodeID = 0; CmiReduce(msg, sizeof(hostnameMsg)+sizeof(_procInfo), combineMessage); // blocking here while (topoInProgress) { CsdSchedulePoll(); CmiLock(topoLock); topoInProgress = done < CmiMyNodeSize(); CmiUnlock(topoLock); } if (CmiMyPe() == 0) { #if CMK_BIGSIM_CHARM if (BgNodeRank() == 0) #endif CmiPrintf("Charm++> cpu topology info is gathered in %.3f seconds.\n", CmiWallTimer()-startT); } #endif #endif /* __BIGSIM__ */ // now every one should have the node info CcdRaiseCondition(CcdTOPOLOGY_AVAIL); // call callbacks if (CmiMyPe() == 0 && show_flag) cpuTopo.print(); }
void build_process_map(int size, int *map, int dist, int numRG, int *mapRG) { TopoManager tmgr; int pe1, pe2, x, y, z, t; int dimNX, dimNY, dimNZ, dimNT; dimNX = tmgr.getDimNX(); dimNY = tmgr.getDimNY(); dimNZ = tmgr.getDimNZ(); dimNT = tmgr.getDimNT(); int count = 0; #if CREATE_JOBS for(int i=0; i<size; i++) map[i] = -1; // assumes a cubic partition such as 8 x 8 x 8 // inner brick is always used for(int i=0; i<dimNX; i++) for(int j=1; j<dimNY-1; j++) for(int k=1; k<dimNZ-1; k++) for(int l=0; l<dimNT; l++) { if(k == 2 || k == dimNZ-3) { pe1 = tmgr.coordinatesToRank(i, j, k, l); if(k == 2) pe2 = tmgr.coordinatesToRank(i, j, dimNZ-3, l); else pe2 = tmgr.coordinatesToRank(i, j, 2, l); map[pe1] = pe2; mapRG[count++] = pe1; printf("%d ", pe1); } } printf("\n"); if(dist == 1) { // outer brick is used only when dist == 1 for(int i=0; i<dimNX; i++) for(int j=0; j<dimNY; j++) for(int k=0; k<dimNZ; k++) for(int l=0; l<dimNT; l++) { if(j == 0 || j == dimNY-1 || k == 0 || k == dimNZ-1) { pe1 = tmgr.coordinatesToRank(i, j, k, l); pe2 = tmgr.coordinatesToRank(i, k, j, l); if(j == 0 && k == 0) pe2 = tmgr.coordinatesToRank(i, dimNY-1, dimNZ-1, l); else if(j == dimNY-1 && k == dimNZ-1) pe2 = tmgr.coordinatesToRank(i, 0, 0, l); map[pe1] = pe2; } } } #else for(int i=0; i<dimNX; i++) for(int j=0; j<dimNY; j++) for(int k=0; k<dimNZ; k++) for(int l=0; l<dimNT; l++) { pe1 = tmgr.coordinatesToRank(i, j, k, l); if( abs(dimNZ - 1 - 2*k) <= (2*dist+1) ) { pe2 = tmgr.coordinatesToRank(i, j, (dimNZ-1-k), l); map[pe1] = pe2; if(i==0 && j==0 && l==0) { printf("Hops %d [%d] [%d]\n", 2*dist+1, pe1, pe2); } if(k == dimNZ/2-1 || k == dimNZ/2) mapRG[count++] = pe1; } else map[pe1] = -1; } #endif printf("Barrier Process %d %d\n", count, numRG); check_map(size, map); }