/** Extract an IDXL_Side_t into Roccom format. */ static void getRoccomPconn(IDXL_Side_t is,int bias,CkVec<int> &pconn,const int *paneFmChunk) { int p,np=IDXL_Get_partners(is); pconn.push_back(np); for (p=0;p<np;p++) { int chunk=IDXL_Get_partner(is,p); int pane=1+chunk; if(paneFmChunk) pane=paneFmChunk[chunk]; pconn.push_back(pane); int n,nn=IDXL_Get_count(is,p); pconn.push_back(nn); /* number of shared nodes */ for (n=0;n<nn;n++) pconn.push_back(IDXL_Get_index(is,p,n)+1+bias); /* nodes are 1-based */ } }
//Find next un-occupied global number: int nextUnoccupied(void) { while (occupiedBefore<occupied.size()) { if (occupied[occupiedBefore]==0) return occupiedBefore; else occupiedBefore++; } /* occupiedBefore==occupied.size(), so add to end of list */ occupied.push_back(1); return occupiedBefore; }
// find sender comms void HybridBaseLB::collectCommData(int objIdx, CkVec<LDCommData> &comms, int atlevel) { LevelData *lData = levelData[atlevel]; LDStats *statsData = lData->statsData; LDObjData &objData = statsData->objData[objIdx]; for (int com=0; com<statsData->n_comm; com++) { LDCommData &cdata = statsData->commData[com]; if (cdata.from_proc()) continue; if (cdata.sender.objID() == objData.objID() && cdata.sender.omID() == objData.omID()) comms.push_back(cdata); } }
static void readClause(StreamBuffer& in, par_SolverState& S, CkVec<par_Lit>& lits) { int parsed_lit, var; lits.removeAll(); for (;;){ parsed_lit = parseInt(in); if (parsed_lit == 0) break; var = abs(parsed_lit)-1; S.occurrence[var]++; if(parsed_lit>0) S.positive_occurrence[var]++; lits.push_back( par_Lit(parsed_lit)); } }
/// Mark this entity's global numbers as used: void mark(FEM_Entity &src) { int l,len=src.size(); for (l=0;l<len;l++) { int g=src.getGlobalno(l); if (g!=-1) { while (occupied.size()<=g) { //Make room for this marker occupied.push_back(0); } //FIXME: make sure element global numbers aren't repeated // (tough because *node* global numbers may be repeated) occupied[g]=1; } } }
void Patch::localCreateSection() { #ifdef USE_SECTION_MULTICAST CkVec<CkArrayIndex6D> elems; for (int num=0; num<numNbrs; num++) elems.push_back(CkArrayIndex6D(computesList[num][0], computesList[num][1], computesList[num][2], computesList[num][3], computesList[num][4], computesList[num][5])); CkArrayID computeArrayID = computeArray.ckGetArrayID(); mCastSecProxy = CProxySection_Compute::ckNew(computeArrayID, elems.getVec(), elems.size()); CkMulticastMgr *mCastGrp = CProxy_CkMulticastMgr(mCastGrpID).ckLocalBranch(); mCastSecProxy.ckSectionDelegate(mCastGrp); mCastGrp->setReductionClient(mCastSecProxy, new CkCallback(CkIndex_Patch::reduceForces(NULL), thisProxy(thisIndex.x, thisIndex.y, thisIndex.z))); #endif }
// find sender comms void HbmLB::collectCommData(int objIdx, CkVec<LDCommData> &comms) { #if CMK_LBDB_ON LevelData *lData = levelData[0]; LDObjData &objData = myStats.objData[objIdx]; for (int com=0; com<myStats.n_comm; com++) { LDCommData &cdata = myStats.commData[com]; if (cdata.from_proc()) continue; if (cdata.sender.objID() == objData.objID() && cdata.sender.omID() == objData.omID()) comms.push_back(cdata); } #endif }
// pick objects to migrate "t" amount of work void HbmLB::ReceiveMigrationDelta(double t, int lblevel, int fromlevel) { #if CMK_LBDB_ON int i; int atlevel = fromlevel-1; LevelData *lData = levelData[atlevel]; if (atlevel != 0) { thisProxy.ReceiveMigrationDelta(t, lblevel, atlevel, lData->nChildren, lData->children); return; } // I am leave, find objects to migrate CkVec<int> migs; CkVec<LDObjData> &objData = myStats.objData; for (i=0; i<myStats.n_objs; i++) { LDObjData &oData = objData[i]; if (oData.wallTime < t) { migs.push_back(i); t -= oData.wallTime; if (t == 0.0) break; } } int nmigs = migs.size(); // send a message to int matchPE = CkMyPe() ^ (1<<(lblevel-1)); DEBUGF(("[%d] migrating %d objs to %d at lblevel %d! \n", CkMyPe(),nmigs,matchPE,lblevel)); thisProxy[matchPE].ReceiveMigrationCount(nmigs, lblevel); // migrate objects for (i=0; i<nmigs; i++) { int idx = migs[i]-i; LDObjData &oData = objData[idx]; CkVec<LDCommData> comms; collectCommData(idx, comms); thisProxy[matchPE].ObjMigrated(oData, comms.getVec(), comms.size()); theLbdb->Migrate(oData.handle, matchPE); // TODO modify LDStats DEBUGF(("myStats.removeObject: %d, %d, %d\n", migs[i], i, objData.size())); myStats.removeObject(idx); } #endif }
// read from a file called "globals" static void readGlobals() { if (loaded) return; const char *fname = "globals"; printf("Loading globals from file \"%s\" ... \n", fname); FILE *gf = fopen(fname, "r"); if (gf == NULL) { CmiAbort("Failed to load globals, file may not exist!"); } while (!feof(gf)) { char name[1024]; fscanf(gf, "%s\n", name); _namelist.push_back(strdup(name)); } fclose(gf); loaded = 1; }
static void readBlacklist() { if (loaded) return; const char *fname = "blacklist"; FILE *bl = fopen(fname, "r"); if (bl == NULL){ if (CmiMyPe() == 0) printf("WARNING: Running swapglobals without blacklist, globals from libraries might be getting un-necessarily swapped\n"); loaded = 1; return; } printf("Loading blacklist from file \"%s\" ... \n", fname); while (!feof(bl)){ char name[512]; fscanf(bl, "%s\n", name); _blacklist.push_back(strdup(name)); } fclose(bl); loaded = 1; }
void HybridBaseLB::GetObjsToMigrate(int toPe, double load, LDStats *stats, int atlevel, CkVec<LDCommData>& comms, CkVec<LDObjData>& objs) { // TODO: sort max => low for (int obj=stats->n_objs-1; obj>=0; obj--) { LDObjData &objData = stats->objData[obj]; if (!objData.migratable) continue; if (objData.wallTime <= load) { if (_lb_args.debug()>2) { CkPrintf("[%d] send obj: %d to PE %d (load: %f).\n", CkMyPe(), obj, toPe, objData.wallTime); } objs.push_back(objData); // send comm data collectCommData(obj, comms, atlevel); load -= objData.wallTime; CreateMigrationOutObjs(atlevel, stats, obj); stats->removeObject(obj); if (load <= 0.0) break; } } }
// return -1 when not supported int numUniqNodes() { #if 0 if (numNodes != 0) return numNodes; int n = 0; for (int i=0; i<CmiNumPes(); i++) if (nodeIDs[i] > n) n = nodeIDs[i]; numNodes = n+1; return numNodes; #else if (numNodes > 0) return numNodes; // already calculated CkVec<int> unodes; int i; for (i=0; i<numPes; i++) unodes.push_back(nodeIDs[i]); //unodes.bubbleSort(0, numPes-1); unodes.quickSort(); int last = -1; std::map<int, int> nodemap; // nodeIDs can be out of range of [0,numNodes] for (i=0; i<numPes; i++) { if (unodes[i] != last) { last=unodes[i]; nodemap[unodes[i]] = numNodes; numNodes++; } } if (numNodes == 0) { numNodes = CmiNumNodes(); numPes = CmiNumPes(); } else { // re-number nodeIDs, which may be necessary e.g. on BlueGene/P for (i=0; i<numPes; i++) nodeIDs[i] = nodemap[nodeIDs[i]]; CpuTopology::supported = 1; } return numNodes; #endif }
void BaseLB::LDStats::computeNonlocalComm(int &nmsgs, int &nbytes) { #if CMK_LBDB_ON nmsgs = 0; nbytes = 0; makeCommHash(); int mcast_count = 0; for (int cidx=0; cidx < n_comm; cidx++) { LDCommData& cdata = commData[cidx]; int senderPE, receiverPE; if (cdata.from_proc()) senderPE = cdata.src_proc; else { int idx = getHash(cdata.sender); if (idx == -1) continue; // sender has just migrated? senderPE = to_proc[idx]; CmiAssert(senderPE != -1); } CmiAssert(senderPE < nprocs() && senderPE >= 0); // find receiver: point-to-point and multicast two cases int receiver_type = cdata.receiver.get_type(); if (receiver_type == LD_PROC_MSG || receiver_type == LD_OBJ_MSG) { if (receiver_type == LD_PROC_MSG) receiverPE = cdata.receiver.proc(); else { // LD_OBJ_MSG int idx = getHash(cdata.receiver.get_destObj()); if (idx == -1) { // receiver outside this domain if (complete_flag) continue; else receiverPE = -1; } else { receiverPE = to_proc[idx]; CmiAssert(receiverPE < nprocs() && receiverPE >= 0); } } if(senderPE != receiverPE) { nmsgs += cdata.messages; nbytes += cdata.bytes; } } else if (receiver_type == LD_OBJLIST_MSG) { int nobjs; LDObjKey *objs = cdata.receiver.get_destObjs(nobjs); mcast_count ++; CkVec<int> pes; for (int i=0; i<nobjs; i++) { int idx = getHash(objs[i]); CmiAssert(idx != -1); if (idx == -1) continue; // receiver has just been removed? receiverPE = to_proc[idx]; CmiAssert(receiverPE < nprocs() && receiverPE >= 0); int exist = 0; for (int p=0; p<pes.size(); p++) if (receiverPE == pes[p]) { exist=1; break; } if (exist) continue; pes.push_back(receiverPE); if(senderPE != receiverPE) { nmsgs += cdata.messages; nbytes += cdata.bytes; } } } } // end of for #endif }
/** Perform a Delaunay flip of the edge (n1, n2) returning 1 if successful, 0 if not (likely due to the edge being on a boundary). The convexity of the quadrilateral formed by two faces incident to edge (n1, n2) is assumed. n1 and n2 are assumed to be local to this chunk. An adjacency test is performed on n1 and n2 by searching for an element with edge [n1,n2]. n3 n3 o o / \ /|\ / \ / | \ / \ / | \ / \ / | \ n1 o---------o n2 n1 o | o n2 \ / \ | / \ / \ | / \ / \ | / \ / \|/ o o n4 n4 */ int FEM_Adapt::edge_flip_help(int e1, int e2, int n1, int n2, int e1_n1, int e1_n2, int e1_n3, int n3, int n4, int *locknodes) { int numNodes = 4; int numElems = 2; int lockelems[2]; int elemConn[3]; locknodes[0] = n1; locknodes[1] = n2; locknodes[2] = n3; locknodes[3] = n4; lockelems[0] = e1; lockelems[1] = e2; if(n1 < 0 || n2 < 0) { return -1; } int index = theMod->getIdx(); bool flag = theMod->fmAdaptAlgs->controlQualityF(n1,n2,n3,n4); if(flag) return -1; int e1Topurge = e1; int e2Topurge = e2; #ifdef DEBUG_1 CkPrintf("Flipping edge %d->%d on chunk %d\n", n1, n2, theMod->getfmUtil()->getIdx()); #endif //FEM_Modify_Lock(theMesh, locknodes, numNodes, lockelems, numElems); //if any of the two elements is remote, eat those if(n3 < 0) { e1Topurge = theMod->fmUtil->eatIntoElement(e1); theMesh->e2n_getAll(e1Topurge,elemConn); for(int i=0; i<3; i++) { if(elemConn[i]!=n1 && elemConn[i]!=n2) { n3 = elemConn[i]; } } locknodes[2] = n3; } if(n4 < 0) { e2Topurge = theMod->fmUtil->eatIntoElement(e2); theMesh->e2n_getAll(e2Topurge,elemConn); for(int i=0; i<3; i++) { if(elemConn[i]!=n1 && elemConn[i]!=n2) { n4 = elemConn[i]; } } locknodes[3] = n4; } FEM_remove_element(theMesh,e1Topurge,0,0); FEM_remove_element(theMesh,e2Topurge,0,0); // add n1, n3, n4 elemConn[e1_n1] = n1; elemConn[e1_n2] = n4; elemConn[e1_n3] = n3; lockelems[0] = FEM_add_element(theMesh, elemConn, 3, 0, index); //the attributes should really be interpolated, i.e. on both new elems, //the values should be an average of the previous two elements theMod->fmUtil->copyElemData(0,e1Topurge,lockelems[0]); // add n2, n3, n4 elemConn[e1_n1] = n4; elemConn[e1_n2] = n2; elemConn[e1_n3] = n3; lockelems[1] = FEM_add_element(theMesh, elemConn, 3, 0, index); theMod->fmUtil->copyElemData(0,e1Topurge,lockelems[1]); //both of the new elements copy from one element //purge the two elements FEM_purge_element(theMesh,e1Topurge,0); FEM_purge_element(theMesh,e2Topurge,0); //get rid of some unnecessary ghost node sends for(int i=0; i<4;i++) { int nodeToUpdate = -1; if(i==0) nodeToUpdate = n1; else if(i==1) nodeToUpdate = n2; else if(i==2) nodeToUpdate = n3; else if(i==3) nodeToUpdate = n4; //if any of the chunks sharing this node sends this as a ghost, then all of them have to //so find out the set of chunks I need to send this as a ghost to //collect info from each of the shared chunks, do a union of all these chunks //send this updated list to everyone. //if anyone needs to add or delete some ghosts, they will int *chkl, numchkl=0; CkVec<int> finalchkl; theMod->fmUtil->findGhostSend(nodeToUpdate, chkl, numchkl); for(int j=0; j<numchkl; j++) { finalchkl.push_back(chkl[j]); } if(numchkl>0) delete[] chkl; const IDXL_Rec *irec=theMesh->node.shared.getRec(nodeToUpdate); int numchunks=0; int *chunks1, *inds1; if(irec) { numchunks = irec->getShared(); chunks1 = new int[numchunks]; inds1 = new int[numchunks]; for(int j=0; j<numchunks; j++) { chunks1[j] = irec->getChk(j); inds1[j] = irec->getIdx(j); } } for(int j=0; j<numchunks; j++) { findgsMsg *fmsg = meshMod[chunks1[j]].findghostsend(index,inds1[j]); if(fmsg->numchks>0) { for(int k=0; k<fmsg->numchks; k++) { bool shouldbeadded = true; for(int l=0; l<finalchkl.size(); l++) { if(fmsg->chunks[k]==finalchkl[l]) { shouldbeadded = false; break; } } if(shouldbeadded) finalchkl.push_back(fmsg->chunks[k]); } } delete fmsg; } int *finall, numfinall=finalchkl.size(); if(numfinall>0) finall = new int[numfinall]; for(int j=0; j<numfinall; j++) finall[j] = finalchkl[j]; finalchkl.free(); theMod->fmUtil->UpdateGhostSend(nodeToUpdate, finall, numfinall); for(int j=0; j<numchunks; j++) { verifyghostsendMsg *vmsg = new(numfinall)verifyghostsendMsg(); vmsg->fromChk = index; vmsg->sharedIdx = inds1[j]; vmsg->numchks = numfinall; for(int k=0; k<numfinall; k++) vmsg->chunks[k] = finall[k]; meshMod[chunks1[j]].updateghostsend(vmsg); } if(numfinall>0) delete[] finall; if(numchunks>0) { delete[] chunks1; delete[] inds1; } } //make sure that it always comes here, don't return with unlocking return 1; //return newNode; }
void LBInfo::getInfo(BaseLB::LDStats* stats, int count, int considerComm) { #if CMK_LBDB_ON int i, pe; CmiAssert(peLoads); clear(); double alpha = _lb_args.alpha(); double beeta = _lb_args.beeta(); minObjLoad = 1.0e20; // I suppose no object load is beyond this maxObjLoad = 0.0; msgCount = 0; msgBytes = 0; if (considerComm) stats->makeCommHash(); // get background load if (bgLoads) for(pe = 0; pe < count; pe++) bgLoads[pe] = stats->procs[pe].bg_walltime; for(pe = 0; pe < count; pe++) peLoads[pe] = stats->procs[pe].bg_walltime; for(int obj = 0; obj < stats->n_objs; obj++) { int pe = stats->to_proc[obj]; if (pe == -1) continue; // this object is out CmiAssert(pe >=0 && pe < count); double oload = stats->objData[obj].wallTime; if (oload < minObjLoad) minObjLoad = oload; if (oload > maxObjLoad) maxObjLoad = oload; peLoads[pe] += oload; if (objLoads) objLoads[pe] += oload; } // handling of the communication overheads. if (considerComm) { int* msgSentCount = new int[count]; // # of messages sent by each PE int* msgRecvCount = new int[count]; // # of messages received by each PE int* byteSentCount = new int[count];// # of bytes sent by each PE int* byteRecvCount = new int[count];// # of bytes reeived by each PE for(i = 0; i < count; i++) msgSentCount[i] = msgRecvCount[i] = byteSentCount[i] = byteRecvCount[i] = 0; int mcast_count = 0; for (int cidx=0; cidx < stats->n_comm; cidx++) { LDCommData& cdata = stats->commData[cidx]; int senderPE, receiverPE; if (cdata.from_proc()) senderPE = cdata.src_proc; else { int idx = stats->getHash(cdata.sender); if (idx == -1) continue; // sender has just migrated? senderPE = stats->to_proc[idx]; CmiAssert(senderPE != -1); } CmiAssert(senderPE < count && senderPE >= 0); // find receiver: point-to-point and multicast two cases int receiver_type = cdata.receiver.get_type(); if (receiver_type == LD_PROC_MSG || receiver_type == LD_OBJ_MSG) { if (receiver_type == LD_PROC_MSG) receiverPE = cdata.receiver.proc(); else { // LD_OBJ_MSG int idx = stats->getHash(cdata.receiver.get_destObj()); if (idx == -1) continue; // receiver has just been removed? receiverPE = stats->to_proc[idx]; CmiAssert(receiverPE != -1); } CmiAssert(receiverPE < count && receiverPE >= 0); if(senderPE != receiverPE) { msgSentCount[senderPE] += cdata.messages; byteSentCount[senderPE] += cdata.bytes; msgRecvCount[receiverPE] += cdata.messages; byteRecvCount[receiverPE] += cdata.bytes; } } else if (receiver_type == LD_OBJLIST_MSG) { int nobjs; LDObjKey *objs = cdata.receiver.get_destObjs(nobjs); mcast_count ++; CkVec<int> pes; for (i=0; i<nobjs; i++) { int idx = stats->getHash(objs[i]); CmiAssert(idx != -1); if (idx == -1) continue; // receiver has just been removed? receiverPE = stats->to_proc[idx]; CmiAssert(receiverPE < count && receiverPE >= 0); int exist = 0; for (int p=0; p<pes.size(); p++) if (receiverPE == pes[p]) { exist=1; break; } if (exist) continue; pes.push_back(receiverPE); if(senderPE != receiverPE) { msgSentCount[senderPE] += cdata.messages; byteSentCount[senderPE] += cdata.bytes; msgRecvCount[receiverPE] += cdata.messages; byteRecvCount[receiverPE] += cdata.bytes; } } } } // end of for if (_lb_args.debug()) CkPrintf("Number of MULTICAST: %d\n", mcast_count); // now for each processor, add to its load the send and receive overheads for(i = 0; i < count; i++) { double comload = msgRecvCount[i] * PER_MESSAGE_RECV_OVERHEAD + msgSentCount[i] * alpha + byteRecvCount[i] * PER_BYTE_RECV_OVERHEAD + byteSentCount[i] * beeta; peLoads[i] += comload; if (comLoads) comLoads[i] += comload; msgCount += msgRecvCount[i] + msgSentCount[i]; msgBytes += byteRecvCount[i] + byteSentCount[i]; } delete [] msgRecvCount; delete [] msgSentCount; delete [] byteRecvCount; delete [] byteSentCount; } #endif }
/** Given edge e:(n1, n2), remove the two elements (n1,n2,n3) and (n2,n1,n4) adjacent to e, and bisect e by adding node n5. Add elements (n1,n5,n3), (n5,n2,n3), (n5,n1,n4) and (n2,n5,n4); returns new node n5. n3 n3 o o / \ /|\ / \ / | \ / \ / | \ / \ / |n5 \ n1 o---------o n2 n1 o----o----o n2 \ / \ | / \ / \ | / \ / \ | / \ / \|/ o o n4 n4 */ int FEM_Adapt::edge_bisect_help(int e1, int e2, int n1, int n2, int e1_n1, int e1_n2, int e1_n3, int e2_n1, int e2_n2, int e2_n3, int n3, int n4) { int n5; int numNodes = 4; int numElems = 2; int numNodesNew = 5; int numElemsNew = 4; int locknodes[5]; int lockelems[4]; int elemConn[3]; locknodes[0] = n1; locknodes[1] = n2; locknodes[2] = n3; locknodes[3] = n4; locknodes[4] = -1; lockelems[0] = e1; lockelems[1] = e2; lockelems[2] = -1; lockelems[3] = -1; //FEM_Modify_Lock(theMesh, locknodes, numNodes, lockelems, numElems); int e1chunk=-1, e2chunk=-1, e3chunk=-1, e4chunk=-1, n5chunk=-1; int index = theMod->getIdx(); #ifdef DEBUG_1 CkPrintf("Bisect edge %d->%d on chunk %d\n", n1, n2, theMod->getfmUtil()->getIdx()); #endif //verify quality bool flag = theMod->fmAdaptAlgs->controlQualityR(n1,n2,n3,n4); if(flag) return -1; //add node if(e1==-1) e1chunk=-1; else if(e1>=0) e1chunk=index; else { int ghostid = FEM_To_ghost_index(e1); const IDXL_Rec *irec = theMesh->elem[0].ghost->ghostRecv.getRec(ghostid); CkAssert(irec->getShared()==1); e1chunk = irec->getChk(0); } if(e2==-1) e2chunk=-1; else if(e2>=0) e2chunk=index; else { int ghostid = FEM_To_ghost_index(e2); const IDXL_Rec *irec = theMesh->elem[0].ghost->ghostRecv.getRec(ghostid); CkAssert(irec->getShared()==1); e2chunk = irec->getChk(0); } int adjnodes[2]; adjnodes[0] = n1; adjnodes[1] = n2; int *chunks; int numChunks=0; int forceshared = 0; if(e1chunk==e2chunk || (e1chunk==-1 || e2chunk==-1)) { forceshared = -1; numChunks = 1; chunks = new int[1]; if(e1chunk!=-1) chunks[0] = e1chunk; else chunks[0] = e2chunk; } else { numChunks = 2; chunks = new int[2]; chunks[0] = e1chunk; chunks[1] = e2chunk; } n5 = FEM_add_node(theMesh,adjnodes,2,chunks,numChunks,forceshared); delete[] chunks; //lock this node immediately FEM_Modify_LockN(theMesh, n5, 0); //remove elements e1chunk = FEM_remove_element(theMesh, e1, 0); e2chunk = FEM_remove_element(theMesh, e2, 0); // assumes intelligent behavior when no e2 exists // hmm... if e2 is a ghost and we remove it and create all the new elements // locally, then we don't really need to add a *shared* node //but we are not moving chunk boundaries for bisect if(e1chunk==-1 || e2chunk==-1) { //it is fine, let it continue e4chunk = e2chunk; e3chunk = e1chunk; } else if(e1chunk==e2chunk && e1chunk!=index) { n5chunk = e1chunk; e4chunk = e2chunk; e3chunk = e1chunk; } else { //there can be a lot of conditions, but we do not have to do aything special now n5chunk = -1; e4chunk = e2chunk; e3chunk = e1chunk; } // add n1, n5, n3 elemConn[e1_n1] = n1; elemConn[e1_n2] = n5; elemConn[e1_n3] = n3; lockelems[0] = FEM_add_element(theMesh, elemConn, 3, 0, e1chunk); theMod->fmUtil->copyElemData(0,e1,lockelems[0]); // add n2, n5, n3 elemConn[e1_n1] = n5; elemConn[e1_n2] = n2; elemConn[e1_n3] = n3; lockelems[1] = FEM_add_element(theMesh, elemConn, 3, 0, e3chunk); theMod->fmUtil->copyElemData(0,e1,lockelems[1]); if (e2 != -1) { // e2 exists // add n1, n5, n4 elemConn[e2_n1] = n1; elemConn[e2_n2] = n5; elemConn[e2_n3] = n4; lockelems[2] = FEM_add_element(theMesh, elemConn, 3, 0, e2chunk); theMod->fmUtil->copyElemData(0,e2,lockelems[2]); // add n2, n5, n4 elemConn[e2_n1] = n5; elemConn[e2_n2] = n2; elemConn[e2_n3] = n4; lockelems[3] = FEM_add_element(theMesh, elemConn, 3, 0, e4chunk); theMod->fmUtil->copyElemData(0,e2,lockelems[3]); } FEM_purge_element(theMesh,e1,0); FEM_purge_element(theMesh,e2,0); //get rid of some unnecessary ghost node sends for(int i=0; i<4;i++) { int nodeToUpdate = -1; if(i==0) nodeToUpdate = n1; else if(i==1) nodeToUpdate = n2; else if(i==2) nodeToUpdate = n3; else if(i==3) nodeToUpdate = n4; //if any of the chunks sharing this node sends this as a ghost, then all of them have to //so find out the set of chunks I need to send this as a ghost to //collect info from each of the shared chunks, do a union of all these chunks //send this updated list to everyone. //if anyone needs to add or delete some ghosts, they will int *chkl, numchkl=0; CkVec<int> finalchkl; theMod->fmUtil->findGhostSend(nodeToUpdate, chkl, numchkl); for(int j=0; j<numchkl; j++) { finalchkl.push_back(chkl[j]); } if(numchkl>0) delete[] chkl; const IDXL_Rec *irec=theMesh->node.shared.getRec(nodeToUpdate); int numchunks=0; int *chunks1, *inds1; if(irec) { numchunks = irec->getShared(); chunks1 = new int[numchunks]; inds1 = new int[numchunks]; for(int j=0; j<numchunks; j++) { chunks1[j] = irec->getChk(j); inds1[j] = irec->getIdx(j); } } for(int j=0; j<numchunks; j++) { findgsMsg *fmsg = meshMod[chunks1[j]].findghostsend(index,inds1[j]); if(fmsg->numchks>0) { for(int k=0; k<fmsg->numchks; k++) { bool shouldbeadded = true; for(int l=0; l<finalchkl.size(); l++) { if(fmsg->chunks[k]==finalchkl[l]) { shouldbeadded = false; break; } } if(shouldbeadded) finalchkl.push_back(fmsg->chunks[k]); } } delete fmsg; } int *finall, numfinall=finalchkl.size(); if(numfinall>0) finall = new int[numfinall]; for(int j=0; j<numfinall; j++) finall[j] = finalchkl[j]; finalchkl.free(); theMod->fmUtil->UpdateGhostSend(nodeToUpdate, finall, numfinall); for(int j=0; j<numchunks; j++) { verifyghostsendMsg *vmsg = new(numfinall)verifyghostsendMsg(); vmsg->fromChk = index; vmsg->sharedIdx = inds1[j]; vmsg->numchks = numfinall; for(int k=0; k<numfinall; k++) vmsg->chunks[k] = finall[k]; meshMod[chunks1[j]].updateghostsend(vmsg); } if(numfinall>0) delete[] finall; if(numchunks>0) { delete[] chunks1; delete[] inds1; } } FEM_Modify_UnlockN(theMesh, n5, 0); return n5; }
void GreedyCommLB::work(LDStats* stats) { int pe,obj,com; ObjectRecord *x; int i; if (_lb_args.debug()) CkPrintf("In GreedyCommLB strategy\n",CkMyPe()); npe = stats->nprocs(); nobj = stats->n_objs; // nmigobj is calculated as the number of migratable objects // ObjectHeap maxh is of size nmigobj nmigobj = stats->n_migrateobjs; stats->makeCommHash(); assigned_array = new int[nobj]; object_graph = new graph[nobj]; init_data(assigned_array,object_graph,npe,nobj); #define MAXDOUBLE 1e10; // processor heap processors = new processorInfo[npe]; for (int p=0; p<npe; p++) { processors[p].Id = p; processors[p].backgroundLoad = stats->procs[p].bg_walltime; processors[p].computeLoad = 0; processors[p].pe_speed = stats->procs[p].pe_speed; if (!stats->procs[p].available) { processors[p].load = MAXDOUBLE; } else { processors[p].load = 0; if (!_lb_args.ignoreBgLoad()) processors[p].load = processors[p].backgroundLoad; } } // assign communication graph for(com =0; com< stats->n_comm;com++) { int xcoord=0,ycoord=0; LDCommData &commData = stats->commData[com]; if((!commData.from_proc())&&(commData.recv_type()==LD_OBJ_MSG)) { xcoord = stats->getHash(commData.sender); ycoord = stats->getHash(commData.receiver.get_destObj()); if((xcoord == -1)||(ycoord == -1)) if (_lb_args.ignoreBgLoad() || stats->complete_flag==0) continue; else CkAbort("Error in search\n"); add_graph(xcoord,ycoord,commData.bytes, commData.messages); } else if (commData.recv_type()==LD_OBJLIST_MSG) { int nobjs; LDObjKey *objs = commData.receiver.get_destObjs(nobjs); xcoord = stats->getHash(commData.sender); for (int i=0; i<nobjs; i++) { ycoord = stats->getHash(objs[i]); if((xcoord == -1)||(ycoord == -1)) if (_lb_args.migObjOnly()) continue; else CkAbort("Error in search\n"); //printf("Multicast: %d => %d %d %d\n", xcoord, ycoord, commData.bytes, commData.messages); add_graph(xcoord,ycoord,commData.bytes, commData.messages); } } } // only build heap with migratable objects, // mapping nonmigratable objects to the same processors ObjectHeap maxh(nmigobj+1); for(obj=0; obj < stats->n_objs; obj++) { LDObjData &objData = stats->objData[obj]; int onpe = stats->from_proc[obj]; if (!objData.migratable) { if (!stats->procs[onpe].available) { CmiAbort("Load balancer is not be able to move a nonmigratable object out of an unavailable processor.\n"); } alloc(onpe, obj, objData.wallTime); update(stats, obj, onpe); // update communication cost on other pes } else { x = new ObjectRecord; x->id = obj; x->pos = obj; x->val = objData.wallTime; x->pe = onpe; maxh.insert(x); } } minHeap *lightProcessors = new minHeap(npe); for (i=0; i<npe; i++) if (stats->procs[i].available) lightProcessors->insert((InfoRecord *) &(processors[i])); int id,maxid,minpe=0; double temp,total_time,min_temp; // for(pe=0;pe < count;pe++) // CkPrintf("avail for %d = %d\n",pe,stats[pe].available); double *pe_comm = new double[npe]; for (int i=0; i<npe; i++) pe_comm[i] = 0.0; for(id = 0;id<nmigobj;id++){ x = maxh.deleteMax(); maxid = x->id; processorInfo *donor = (processorInfo *) lightProcessors->deleteMin(); CmiAssert(donor); int first_avail_pe = donor->Id; temp = compute_com(stats, maxid, first_avail_pe); min_temp = temp; //total_time = temp + alloc_array[first_avail_pe][nobj]; total_time = temp + donor->load; minpe = first_avail_pe; // search all procs for best // optimization: only search processors that it communicates // and the minimum of all others CkVec<int> commPes; graph * ptr = object_graph[maxid].next; // find out all processors that this obj communicates double commload = 0.0; // total comm load for(int com=0;(com<2*nobj)&&(ptr != NULL);com++,ptr=ptr->next){ int destObj = ptr->id; if(assigned_array[destObj] == 0) // this obj has not been assigned continue; int destPe = stats->to_proc[destObj]; if(stats->procs[destPe].available == 0) continue; double cload = alpha*ptr->nmsg + beeta*ptr->data; pe_comm[destPe] += cload; commload += cload; int exist = 0; for (int pp=0; pp<commPes.size(); pp++) if (destPe == commPes[pp]) { exist=1; break; } // duplicated if (!exist) commPes.push_back(destPe); } int k; for(k = 0; k < commPes.size(); k++){ pe = commPes[k]; processorInfo *commpe = (processorInfo *) &processors[pe]; temp = commload - pe_comm[pe]; //CkPrintf("check id = %d, processor = %d,com = %lf, pro = %lf, comp=%lf\n", maxid,pe,temp,alloc_array[pe][nobj],total_time); if(total_time > (temp + commpe->load)){ minpe = pe; total_time = temp + commpe->load; min_temp = temp; } } /* CkPrintf("check id = %d, processor = %d, obj = %lf com = %lf, pro = %lf, comp=%lf\n", maxid,minpe,x->load,min_temp,alloc_array[minpe][nobj],total_time); */ // CkPrintf("before 2nd alloc\n"); stats->assign(maxid, minpe); alloc(minpe, maxid, x->val + min_temp); // now that maxid assigned to minpe, update other pes load update(stats, maxid, minpe); // update heap lightProcessors->insert(donor); for(k = 0; k < commPes.size(); k++) { pe = commPes[k]; processorInfo *commpe = (processorInfo *) &processors[pe]; lightProcessors->update(commpe); pe_comm[pe] = 0.0; // clear } delete x; } // free up memory delete [] pe_comm; delete [] processors; delete [] assigned_array; delete lightProcessors; for(int oindex= 0; oindex < nobj; oindex++){ graph * ptr = &object_graph[oindex]; ptr = ptr->next; while(ptr != NULL){ graph *cur = ptr; ptr = ptr->next; delete cur; } } delete [] object_graph; }
void readinput(char* filename) { FILE *file; char line[128]; char variable[64]; char value[64]; file = fopen(filename, "r"); if(file == NULL) { printf("file read error %s\n", filename); CkExit(); } /* parse the header lines to get the number of vertices*/ while(fgets(line, 128, file) != NULL) { if(strncmp(line, "DIMENSION", 9) == 0) { sscanf(line, "%s : %s", variable, value); verticesNum = atoi(value); }else if(strncmp(line, "EDGE_DATA_SECTION", 17) == 0) { break; } } vector<int> verticeNbs; verticeNbs.resize(verticesNum); /* get the edges, src dest */ int src, dest; int previous=-1; int countptr = 0; int edgeNum = 0; while(fgets(line, sizeof line, file) != NULL && strncmp(line, "-1", 2) != 0) { edgeNum += 1; sscanf(line, "%d %d", &src, &dest); if(src != previous) { inputGraph.push_back(src-1); //CkPrintf("\nSource: %d %d:", src, inputGraph[countptr]); previous = src; countptr = inputGraph.size(); inputGraph.push_back(1); }else { inputGraph[countptr]++; } //CkPrintf(" %d ", dest); inputGraph.push_back(dest-1); } #ifdef YHDEBUG CkPrintf("\n"); for(int i=0; i<inputGraph.size(); i++) { CkPrintf(" %d ", inputGraph[i]); } CkPrintf("+++++++++++++===\n"); #endif fclose(file); }
void addEntry(const char *name, LBCreateFn fn, LBAllocFn afn, const char *help, int shown) { lbtables.push_back(LBDBEntry(name, fn, afn, help, shown)); }
void addCompiletimeBalancer(const char *name) { compile_lbs.push_back(name); }
void HybridBaseLB::CollectInfo(Location *loc, int n, int fromlevel) { int atlevel = fromlevel + 1; LevelData *lData = levelData[atlevel]; lData->info_recved++; CkVec<Location> &matchedObjs = lData->matchedObjs; std::map<LDObjKey, int> &unmatchedObjs = lData->unmatchedObjs; // sort into matched and unmatched list #if 0 for (int i=0; i<n; i++) { // search and see if we have answer, put to matched // store in unknown int found = 0; for (int obj=0; obj<unmatchedObjs.size(); obj++) { if (loc[i].key == unmatchedObjs[obj].key) { // answer must exist CmiAssert(unmatchedObjs[obj].loc != -1 || loc[i].loc != -1); if (unmatchedObjs[obj].loc == -1) unmatchedObjs[obj].loc = loc[i].loc; matchedObjs.push_back(unmatchedObjs[obj]); unmatchedObjs.remove(obj); found = 1; break; } } if (!found) unmatchedObjs.push_back(loc[i]); } #else for (int i=0; i<n; i++) { std::map<LDObjKey, int>::iterator iter = unmatchedObjs.find(loc[i].key); if (iter != unmatchedObjs.end()) { CmiAssert(iter->second != -1 || loc[i].loc != -1); if (loc[i].loc == -1) loc[i].loc = iter->second; matchedObjs.push_back(loc[i]); unmatchedObjs.erase(iter); } else unmatchedObjs[loc[i].key] = loc[i].loc; } #endif DEBUGF(("[%d] level %d has %d unmatched and %d matched. \n", CkMyPe(), atlevel, unmatchedObjs.size(), matchedObjs.size())); if (lData->info_recved == lData->nChildren) { lData->info_recved = 0; if (_lb_args.debug() > 1) CkPrintf("[%d] CollectInfo at level %d started at %f\n", CkMyPe(), atlevel, CkWallTimer()); if (lData->parent != -1) { // send only unmatched ones up the tree CkVec<Location> unmatchedbuf; for(std::map<LDObjKey, int>::const_iterator it = unmatchedObjs.begin(); it != unmatchedObjs.end(); ++it) { unmatchedbuf.push_back(Location(it->first, it->second)); } thisProxy[lData->parent].CollectInfo(unmatchedbuf.getVec(), unmatchedbuf.size(), atlevel); } else { // root // we should have all answers now CmiAssert(unmatchedObjs.size() == 0); // start send match list down thisProxy.PropagateInfo(matchedObjs.getVec(), matchedObjs.size(), atlevel, lData->nChildren, lData->children); lData->statsData->clear(); } } }
void addRuntimeBalancer(const char *name) { runtime_lbs.push_back(name); }
void FEM_REFINE2D_Split(int meshID,int nodeID,double *coord,int elemID,double *desiredAreas,int sparseID){ int nnodes = FEM_Mesh_get_length(meshID,nodeID); int nelems = FEM_Mesh_get_length(meshID,elemID); int actual_nodes = nnodes, actual_elems = nelems; FEM_Refine_Operation_Data refine_data; refine_data.meshID = meshID; refine_data.nodeID = nodeID; refine_data.sparseID = sparseID; refine_data.elemID = elemID; refine_data.cur_nodes = FEM_Mesh_get_length(meshID,nodeID); /*Copy the cordinates of the nodes into a vector, the cordinates of the new nodes will be inserted into this vector and will be used to sort all the nodes on the basis of the distance from origin */ CkVec<double> coordVec; for(int i=0;i<nnodes*2;i++){ coordVec.push_back(coord[i]); } refine_data.coordVec = &coordVec; refine_data.coord = coord; /*find out the attributes of the node */ FEM_Entity *e=refine_data.node = FEM_Entity_lookup(meshID,nodeID,"REFINE2D_Mesh"); CkVec<FEM_Attribute *> *attrs = refine_data.attrs = e->getAttrVec(); /* FEM_DataAttribute *boundaryAttr = (FEM_DataAttribute *)e->lookup(FEM_BOUNDARY,"split"); if(boundaryAttr != NULL){ AllocTable2d<int> &boundaryTable = boundaryAttr->getInt(); printf(" Node Boundary flags \n"); for(int i=0;i<nnodes;i++){ printf("Node %d flag %d \n",i,boundaryTable[i][0]); } } */ FEM_Entity *elem = refine_data.elem = FEM_Entity_lookup(meshID,elemID,"REFIN2D_Mesh_elem"); CkVec<FEM_Attribute *> *elemattrs = refine_data.elemattrs = elem->getAttrVec(); FEM_Attribute *connAttr = elem->lookup(FEM_CONN,"REFINE2D_Mesh"); if(connAttr == NULL){ CkAbort("Grrrr element without connectivity \n"); } AllocTable2d<int> &connTable = ((FEM_IndexAttribute *)connAttr)->get(); refine_data.connTable = &connTable; //hashtable to store the new node number as a function of the two old numbers CkHashtableT<intdual,int> newnodes(nnodes); refine_data.newnodes = &newnodes; /* Get the FEM_BOUNDARY data of sparse elements and load it into a hashtable indexed by the 2 node ids that make up the edge. The data in the hashtable is the index number of the sparse element */ FEM_Entity *sparse; CkVec<FEM_Attribute *> *sparseattrs; FEM_Attribute *sparseConnAttr, *sparseBoundaryAttr; AllocTable2d<int> *sparseConnTable, *sparseBoundaryTable; CkHashtableT<intdual,int> nodes2sparse(nelems); refine_data.nodes2sparse = &nodes2sparse; if(sparseID != -1){ sparse = refine_data.sparse = FEM_Entity_lookup(meshID,sparseID,"REFINE2D_Mesh_sparse"); refine_data.sparseattrs = sparseattrs = sparse->getAttrVec(); refine_data.sparseConnAttr = sparseConnAttr = sparse->lookup(FEM_CONN,"REFINE2D_Mesh_sparse"); sparseConnTable = &(((FEM_IndexAttribute *)sparseConnAttr)->get()); refine_data.sparseBoundaryAttr = sparseBoundaryAttr = sparse->lookup(FEM_BOUNDARY,"REFINE2D_Mesh_sparse"); if(sparseBoundaryAttr == NULL){ CkAbort("Specified sparse elements without boundary conditions"); } FEM_DataAttribute *validEdgeAttribute = (FEM_DataAttribute *)sparse->lookup(FEM_VALID,"REFINE2D_Mesh_sparse"); if(validEdgeAttribute){ refine_data.validEdge = &(validEdgeAttribute->getInt()); }else{ refine_data.validEdge = NULL; } /* since the default value in the hashtable is 0, to distinguish between uninserted keys and the sparse element with index 0, the index of the sparse elements is incremented by 1 while inserting. */ // printf("[%d] Sparse elements\n",FEM_My_partition()); for(int j=0;j<sparse->size();j++){ if(refine_data.validEdge == NULL || (*(refine_data.validEdge))[j][0]){ int *cdata = (*sparseConnTable)[j]; // printf("%d < %d,%d > \n",j,cdata[0],cdata[1]); nodes2sparse.put(intdual(cdata[0],cdata[1])) = j+1; } } }else{ printf("Edge boundary conditions not passed into FEM_REFINE2D_Split \n"); } //count the actual number of nodes and elements if(refine_data.node->lookup(FEM_VALID,"refine2D_splilt") != NULL){ AllocTable2d<int> &validNodeTable = ((FEM_DataAttribute *)(refine_data.node->lookup(FEM_VALID,"refine2D_splilt")))->getInt(); actual_nodes = countValidEntities(validNodeTable.getData(),nnodes); } if(refine_data.elem->lookup(FEM_VALID,"refine2D_splilt") != NULL){ AllocTable2d<int> &validElemTable = ((FEM_DataAttribute *)(refine_data.elem->lookup(FEM_VALID,"refine2D_splilt")))->getInt(); actual_elems = countValidEntities(validElemTable.getData(),nelems); } DEBUGINT(printf("%d %d \n",nnodes,nelems)); REFINE2D_Split(actual_nodes,coord,actual_elems,desiredAreas,&refine_data); int nSplits= refine_data.nSplits = REFINE2D_Get_Split_Length(); DEBUGINT(printf("called REFINE2D_Split nSplits = %d \n",nSplits)); if(nSplits == 0){ return; } for(int split = 0;split < nSplits;split++){ refineData op; REFINE2D_Get_Split(split,&op); FEM_Refine_Operation(&refine_data,op); } DEBUGINT(printf("Cordinate list length %d according to FEM %d\n",coordVec.size()/2,FEM_Mesh_get_length(meshID,nodeID))); IDXL_Sort_2d(FEM_Comm_shared(meshID,nodeID),coordVec.getVec()); int read = FEM_Mesh_is_get(meshID) ; assert(read); }