/* * Map objects to PE for load balance. It takes in a min heap of objects which * can be transferred and finds suitable receiver PEs. The mapping is stored in * obj_no and the corresponding entry in obj_pe_no indicates the receiver PE. */ void DistributedLB::MapObjsToPe(minHeap &objs, CkVec<int> &obj_no, CkVec<int> &obj_pe_no) { int p_id; double p_load; int rand_pe; // While my load is more than the threshold, try to transfer objs while (my_load > (thr_avg)) { // If there is only one object, then nothing can be done to balance it. if (objs_count < 2) break; // Flag to indicate whether successful in finding a transfer bool success = false; // Get the smallest object InfoRecord* obj = objs.deleteMin(); // No more objects to retrieve if (obj == 0) break; // If transferring this object makes this PE underloaded, then don't // transfer if ((my_load - obj->load) < (thr_avg)) { break; } // Pick random PE based on the probability and the find is successful only // if on transferring the object, that PE does not become overloaded do { rand_pe = PickRandReceiverPeIdx(); if (rand_pe == -1) break; p_id = pe_no[rand_pe]; p_load = loads[rand_pe]; if ((p_load + obj->load) < avg_load) { success = true; } kMaxTrials--; } while (!success && (kMaxTrials > 0)); // No successful in finding a suitable PE to transfer the object if (!success) { break; } // Found an object and a suitable PE to transfer it to. Decrement the obj // count and update the loads. obj_no.insertAtEnd(obj->Id); obj_pe_no.insertAtEnd(p_id); objs_count--; loads[rand_pe] += obj->load; my_load -= obj->load; // Send information to the receiver PE about this obj. This is necessary for // ack as well as finding out how many objs are migrating in thisProxy[p_id].InformMigration(obj->Id, CkMyPe(), my_stats->objData[obj->Id].wallTime, false); // This object is assigned, so we delete it from the heap delete obj; } }
// generate migrate message from stats->from_proc and to_proc LBMigrateMsg * CentralLB::createMigrateMsg(LDStats* stats) { int i; CkVec<MigrateInfo*> migrateInfo; for (i=0; i<stats->n_objs; i++) { LDObjData &objData = stats->objData[i]; int frompe = stats->from_proc[i]; int tope = stats->to_proc[i]; if (frompe != tope) { // CkPrintf("[%d] Obj %d migrating from %d to %d\n", // CkMyPe(),obj,pe,dest); MigrateInfo *migrateMe = new MigrateInfo; migrateMe->obj = objData.handle; migrateMe->from_pe = frompe; migrateMe->to_pe = tope; migrateMe->async_arrival = objData.asyncArrival; migrateInfo.insertAtEnd(migrateMe); } } int migrate_count=migrateInfo.length(); LBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) LBMigrateMsg; msg->n_moves = migrate_count; for(i=0; i < migrate_count; i++) { MigrateInfo* item = (MigrateInfo*) migrateInfo[i]; msg->moves[i] = *item; delete item; migrateInfo[i] = 0; } return msg; }
void ComlibSectionInfo::getNodeLocalIndices(int nindices, CkArrayIndex *idxlist, CkArrayID &destArrayID, CkVec<CkArrayIndex> &idx_vec){ int acount = 0; idx_vec.resize(0); CkArray *a = (CkArray *)_localBranch(destArrayID); for(acount = 0; acount < nindices; acount++){ //int p = ComlibGetLastKnown(destArrayID, idxlist[acount]); int p = a->lastKnown(idxlist[acount]); if(p == CkMyPe()) idx_vec.insertAtEnd(idxlist[acount]); } }
LBMigrateMsg* WSLB::Strategy(WSLB::LDStats* stats, int count) { #if CMK_LBDB_ON // CkPrintf("[%d] Strategy starting\n",CkMyPe()); // Compute the average load to see if we are overloaded relative // to our neighbors const double load_factor = 1.05; double objload; double myload = myStats.total_walltime - myStats.idletime; double avgload = myload; int unvacated_neighbors = 0; int i; for(i=0; i < count; i++) { // If the neighbor is vacating, skip him if (stats[i].vacate_me) continue; // Scale times we need appropriately for relative proc speeds double hisload = stats[i].total_walltime - stats[i].idletime; const double hisusage = stats[i].usage; const double scale = (myStats.proc_speed * usage) / (stats[i].proc_speed * hisusage); hisload *= scale; stats[i].total_walltime *= scale; stats[i].idletime *= scale; // CkPrintf("PE %d %d hisload = %f hisusage = %f\n", // CkMyPe(),i,hisload,hisusage); avgload += hisload; unvacated_neighbors++; } if (vacate && unvacated_neighbors == 0) CkPrintf("[%d] ALL NEIGHBORS WANT TO VACATE!!!\n",CkMyPe()); avgload /= (unvacated_neighbors+1); CkVec<MigrateInfo*> migrateInfo; // If we want to vacate, we always dump our load, otherwise // only if we are overloaded if (vacate || myload > avgload) { // CkPrintf("[%d] OVERLOAD My load is %f, average load is %f\n", // CkMyPe(),myload,avgload); // First, build heaps of other processors and my objects // Then assign objects to other processors until either // - The smallest remaining object would put me below average, or // - I only have 1 object left, or // - The smallest remaining object would put someone else // above average // Build heaps minHeap procs(count); for(i=0; i < count; i++) { // If all my neighbors vacate, I won't have anyone to give work // to if (!stats[i].vacate_me) { InfoRecord* item = new InfoRecord; item->load = stats[i].total_walltime - stats[i].idletime; item->Id = stats[i].from_pe; procs.insert(item); } } maxHeap objs(myStats.obj_data_sz); for(i=0; i < myStats.obj_data_sz; i++) { InfoRecord* item = new InfoRecord; item->load = myStats.objData[i].wallTime; item->Id = i; objs.insert(item); } int objs_here = myStats.obj_data_sz; do { // if (objs_here <= 1) break; // For now, always leave 1 object InfoRecord* p; InfoRecord* obj; // Get the lightest-loaded processor p = procs.deleteMin(); if (p == 0) { // CkPrintf("[%d] No destination PE found!\n",CkMyPe()); break; } // Get the biggest object bool objfound = false; do { obj = objs.deleteMax(); if (obj == 0) break; objload = load_factor * obj->load; double new_p_load = p->load + objload; double my_new_load = myload - objload; // If we're vacating, the biggest object is always good. // Otherwise, only take it if it doesn't produce overload if (vacate || new_p_load < my_new_load) { objfound = true; } else { // This object is too big, so throw it away // CkPrintf("[%d] Can't move object w/ load %f to proc %d load %f %f\n", // CkMyPe(),obj->load,p->Id,p->load,avgload); delete obj; } } while (!objfound); if (!objfound) { // CkPrintf("[%d] No suitable object found!\n",CkMyPe()); break; } const int me = CkMyPe(); // Apparently we can give this object to this processor if (_lb_args.debug()) CkPrintf("[%d] Obj %d of %d migrating from %d to %d\n", CkMyPe(),obj->Id,myStats.obj_data_sz,me,p->Id); MigrateInfo* migrateMe = new MigrateInfo; migrateMe->obj = myStats.objData[obj->Id].handle; migrateMe->from_pe = me; migrateMe->to_pe = p->Id; migrateInfo.insertAtEnd(migrateMe); objs_here--; // We may want to assign more to this processor, so lets // update it and put it back in the heap p->load += objload; myload -= objload; procs.insert(p); // This object is assigned, so we delete it from the heap delete obj; } while(vacate || myload > avgload); // Now empty out the heaps InfoRecord* p; while (NULL!=(p=procs.deleteMin())) delete p; InfoRecord* obj; while (NULL!=(obj=objs.deleteMax())) delete obj; } // Now build the message to actually perform the migrations int migrate_count=migrateInfo.length(); // if (migrate_count) { // CkPrintf("PE %d: Sent away %d of %d objects\n", // CkMyPe(),migrate_count,myStats.obj_data_sz); // } LBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) LBMigrateMsg; msg->n_moves = migrate_count; for(i=0; i < migrate_count; i++) { MigrateInfo* item = (MigrateInfo*) migrateInfo[i]; msg->moves[i] = *item; delete item; migrateInfo[i] = 0; } return msg; #else return NULL; #endif }
/** * This function implements a strategy similar to the one used in the * centralized case in NamdCentLB. */ CLBMigrateMsg* NamdHybridLB::GrpLevelStrategy(LDStats* stats) { int numProcessors = stats->nprocs(); // number of processors at group level int numPatches = PatchMap::Object()->numPatches(); ComputeMap *computeMap = ComputeMap::Object(); const int numComputes = computeMap->numComputes(); const int numGroupComputes = stats->n_migrateobjs; const SimParameters* simParams = Node::Object()->simParameters; if ( ! processorArray ) processorArray = new processorInfo[numProcessors]; // these data structures are global and need to be distributed if ( ! patchArray ) patchArray = new patchInfo[numPatches]; if ( ! computeArray ) computeArray = new computeInfo[numGroupComputes]; if ( ! from_procs ) from_procs = new int[numGroupComputes]; int nMoveableComputes = buildData(stats); CmiAssert(nMoveableComputes <= numGroupComputes); #if LDB_DEBUG #define DUMP_LDBDATA 1 #define LOAD_LDBDATA 1 #endif #if DUMP_LDBDATA dumpDataASCII("ldbd_before", numProcessors, numPatches, nMoveableComputes); #elif LOAD_LDBDATA loadDataASCII("ldbd_before.5", numProcessors, numPatches, nMoveableComputes); // CkExit(); #endif double averageLoad = 0.; double avgCompute; double maxCompute; int maxComputeId; int numPesAvailable; { int i; double total = 0.; maxCompute = 0.; int maxi = 0; for (i=0; i<nMoveableComputes; i++) { double load = computeArray[i].load; total += load; if ( load > maxCompute ) { maxCompute = load; maxi = i; } } avgCompute = total / nMoveableComputes; maxComputeId = computeArray[maxi].handle.id.id[0]; int P = stats->nprocs(); numPesAvailable = 0; for (i=0; i<P; i++) { if (processorArray[i].available) { ++numPesAvailable; total += processorArray[i].backgroundLoad; } } if (numPesAvailable == 0) NAMD_die("No processors available for load balancing!\n"); averageLoad = total/numPesAvailable; } int i_split = 0; double maxUnsplit = 0.; if ( step() == 1 ) { for (int i=0; i<nMoveableComputes; i++) { const int cid = computeArray[i].handle.id.id[0]; if ( computeMap->numPartitions(cid) == 0 ) { const double load = computeArray[i].load; if ( load > maxUnsplit ) maxUnsplit = load; continue; } ++i_split; } } { SplitComputesMsg *msg = new(i_split,i_split) SplitComputesMsg; msg->maxUnsplit = maxUnsplit; msg->averageLoad = averageLoad; msg->avgCompute = avgCompute; msg->maxCompute = maxCompute; msg->maxComputeId = maxComputeId; msg->nMoveableComputes = nMoveableComputes; msg->numPesAvailable = numPesAvailable; msg->n = i_split; if ( step() == 1 ) { i_split = 0; for (int i=0; i<nMoveableComputes; i++) { computeArray[i].processor = computeArray[i].oldProcessor; const int cid = computeArray[i].handle.id.id[0]; if ( computeMap->numPartitions(cid) == 0 ) { continue; } msg->cid[i_split] = cid; msg->load[i_split] = computeArray[i].load; ++i_split; } } thisProxy[0].splitComputes(msg); } if ( step() == 1 ) { // compute splitting only } else if (simParams->ldbStrategy == LDBSTRAT_DEFAULT) { // default if (step() < 4) TorusLB(computeArray, patchArray, processorArray, nMoveableComputes, numPatches, numProcessors); else RefineTorusLB(computeArray, patchArray, processorArray, nMoveableComputes, numPatches, numProcessors, 1); } else if (simParams->ldbStrategy == LDBSTRAT_COMPREHENSIVE) { TorusLB(computeArray, patchArray, processorArray, nMoveableComputes, numPatches, numProcessors); } else if (simParams->ldbStrategy == LDBSTRAT_REFINEONLY) { RefineTorusLB(computeArray, patchArray, processorArray, nMoveableComputes, numPatches, numProcessors, 1); } else if (simParams->ldbStrategy == LDBSTRAT_OLD) { NAMD_die("Old load balancer strategy is not compatible with hybrid balancer."); if (step() < 4) Alg7(computeArray, patchArray, processorArray, nMoveableComputes, numPatches, numProcessors); else RefineOnly(computeArray, patchArray, processorArray, nMoveableComputes, numPatches, numProcessors); } #if LDB_DEBUG && USE_TOPOMAP TopoManager tmgr; int pe1, pe2, pe3, hops=0; /* This is double counting the hops for(int i=0; i<nMoveableComputes; i++) { pe1 = computeArray[i].processor; pe2 = patchArray[computeArray[i].patch1].processor; pe3 = patchArray[computeArray[i].patch2].processor; hops += tmgr.getHopsBetweenRanks(pe1, pe2); if(computeArray[i].patch1 != computeArray[i].patch2) hops += tmgr.getHopsBetweenRanks(pe1, pe3); }*/ for (int i=0; i<numPatches; i++) { //int num = patchArray[i].proxiesOn.numElements(); pe1 = patchArray[i].processor; Iterator nextProc; processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.iterator((Iterator *)&nextProc); while (p) { pe2 = p->Id; hops += tmgr.getHopsBetweenRanks(pe1, pe2); p = (processorInfo *)patchArray[i].proxiesOn.next((Iterator*)&nextProc); } } CkPrintf("Load Balancing: Number of Hops: %d\n", hops); #endif #if DUMP_LDBDATA dumpDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes); #elif LOAD_LDBDATA dumpDataASCII("ldbd_after.5", numProcessors, numPatches, nMoveableComputes); // loadDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes); // CkExit(); #endif // For error checking: // Count up computes, to see if somebody doesn't have any computes int i; #if 0 int* computeCount = new int[numProcessors]; for(i=0; i<numProcessors; i++) computeCount[i]=0; for(i=0; i<nMoveableComputes; i++) computeCount[computeArray[i].processor]++; for(i=0; i<numProcessors; i++) { if (computeCount[i]==0) iout << iINFO <<"Warning: Processor " << i << " has NO moveable computes.\n" << endi; } delete [] computeCount; #endif CkVec<MigrateInfo *> migrateInfo; for(i=0;i<nMoveableComputes;i++) { if (computeArray[i].processor != from_procs[i]+stats->procs[0].pe) { /* CkPrintf("[%d] Obj %d migrating from %d (%d) to %d\n", CkMyPe(),computeArray[i].handle.id.id[0], from_procs[i], computeArray[i].oldProcessor, computeArray[i].processor); */ MigrateInfo *migrateMe = new MigrateInfo; migrateMe->obj = computeArray[i].handle; //migrateMe->from_pe = computeArray[i].oldProcessor; int frompe = from_procs[i]; if (frompe == numProcessors) frompe = -1; else frompe = frompe + stats->procs[0].pe; migrateMe->from_pe = frompe; migrateMe->to_pe = computeArray[i].processor; if (frompe == -1) { // don't know yet which processor this compute belongs to, but // inform receiver LDObjData obj; obj.handle = computeArray[i].handle; thisProxy[computeArray[i].processor].ObjMigrated(obj, NULL, 0, currentLevel-1); } migrateInfo.insertAtEnd(migrateMe); // sneak in updates to ComputeMap //ERASE CkPrintf("%d setting %d to processor %d\n",CkMyPe(),computeArray[i].handle.id.id[0],computeArray[i].processor); computeMap->setNewNode(computeArray[i].handle.id.id[0], computeArray[i].processor); } } // CkPrintf("LOAD BALANCING READY %d\n",CkMyPe()); LBMigrateMsg* msg; msg = createMigrateMsg(migrateInfo, numProcessors); peLoads = new double [numProcessors]; startPE = processorArray[0].Id; endPE = processorArray[numProcessors-1].Id; // CkPrintf("[%d] numProcessors=%d, %d to %d\n",CkMyPe(),numProcessors,processorArray[0].Id,processorArray[numProcessors-1].Id); for (i=0; i<numProcessors; i++) { peLoads[i] = processorArray[i].load; } delete [] from_procs; delete [] processorArray; delete [] patchArray; delete [] computeArray; from_procs = NULL; processorArray = NULL; patchArray = NULL; computeArray = NULL; return msg; }
LBMigrateMsg* NeighborCommLB::Strategy(NborBaseLB::LDStats* stats, int n_nbrs) { bool _lb_debug=0; bool _lb_debug1=0; bool _lb_debug2=0; #if CMK_LBDB_ON // CkPrintf("[%d] Strategy starting\n",CkMyPe()); // Compute the average load to see if we are overloaded relative // to our neighbors double myload = myStats.total_walltime - myStats.idletime; double avgload = myload; int i; if (_lb_debug) CkPrintf("[%d] Neighbor Count = %d\n", CkMyPe(), n_nbrs); for(i=0; i < n_nbrs; i++) { // Scale times we need appropriately for relative proc speeds const double scale = ((double)myStats.pe_speed) / stats[i].pe_speed; stats[i].total_walltime *= scale; stats[i].idletime *= scale; avgload += (stats[i].total_walltime - stats[i].idletime); } avgload /= (n_nbrs + 1); CkVec<MigrateInfo*> migrateInfo; if (_lb_debug) CkPrintf("[%d] My load is %lf\n", CkMyPe(),myload); if (myload > avgload) { if (_lb_debug1) CkPrintf("[%d] OVERLOAD My load is %lf average load is %lf\n", CkMyPe(), myload, avgload); // First of all, explore the topology and get dimension LBTopology* topo; { LBtopoFn topofn; topofn = LBTopoLookup(_lbtopo); if (topofn == NULL) { char str[1024]; CmiPrintf("NeighborCommLB> Fatal error: Unknown topology: %s. Choose from:\n", _lbtopo); printoutTopo(); sprintf(str, "NeighborCommLB> Fatal error: Unknown topology: %s", _lbtopo); CmiAbort(str); } topo = topofn(CkNumPes()); } int dimension = topo->get_dimension(); if (_lb_debug2) CkPrintf("[%d] Topology dimension = %d\n", CkMyPe(), dimension); if (dimension == -1) { char str[1024]; CmiPrintf("NeighborCommLB> Fatal error: Unsupported topology: %s. Only some of the following are supported:\n", _lbtopo); printoutTopo(); sprintf(str, "NeighborCommLB> Fatal error: Unsupported topology: %s", _lbtopo); CmiAbort(str); } // Position of this processor int *myProc = new int[dimension]; topo->get_processor_coordinates(myStats.from_pe, myProc); if (_lb_debug2) { char temp[1000]; char* now=temp; sprintf(now, "[%d] Coordinates = [", CkMyPe()); now += strlen(now); for(i=0;i<dimension;i++) { sprintf(now, "%d ", myProc[i]); now +=strlen(now); } sprintf(now, "]\n"); now += strlen(now); CkPrintf(temp); } // Then calculate the communication center of each object // The communication center is relative to myProc double **commcenter = new double*[myStats.n_objs]; double *commamount = new double[myStats.n_objs]; if(_lb_debug1) { CkPrintf("[%d] Number of Objs = %d \n", CkMyPe(), myStats.n_objs); } { memset(commamount, 0, sizeof(double)*myStats.n_objs); for(i=0; i<myStats.n_objs;i++) { commcenter[i] = new double[dimension]; memset(commcenter[i], 0, sizeof(double)*dimension); } //coordinates of procs int *destProc = new int[dimension]; int *diff = new int[dimension]; //for each comm entry for(i=0; i<myStats.n_comm;i++) { int j; //for each object //TODO use hashtable to accelerate for(j=0; j<myStats.n_objs;j++) if((myStats.objData[j].handle.omhandle.id == myStats.commData[i].sender.omId) && (myStats.objData[j].handle.id == myStats.commData[i].sender.objId)) { double comm= PER_MESSAGE_SEND_OVERHEAD * myStats.commData[i].messages + PER_BYTE_SEND_OVERHEAD * myStats.commData[i].bytes; commamount[j] += comm; int dest_pe = myStats.commData[i].receiver.lastKnown(); if(dest_pe==-1) continue; topo->get_processor_coordinates(dest_pe, destProc); topo->coordinate_difference(myProc, destProc, diff); int k; for(k=0;k<dimension;k++) { commcenter[j][k] += diff[k] * comm; } } } for(i=0; i<myStats.n_objs;i++) if (commamount[i]>0) { int k; double ratio = 1.0 /commamount[i]; for(k=0;k<dimension;k++) commcenter[i][k] *= ratio; } else { //if no communication, set commcenter to myself int k; for(k=0;k<dimension;k++) commcenter[i][k] = 0; } delete [] destProc; delete [] diff; } if(_lb_debug2) { for(i=0;i<myStats.n_objs;i++) { char temp[1000]; char* now=temp; sprintf(now, "[%d] Objs [%d] Load = %lf Comm Amount = %lf ", CkMyPe(), i, myStats.objData[i].wallTime, commamount[i] ); now += strlen(now); sprintf(now, "Comm Center = ["); now += strlen(now); int j; for(j=0;j<dimension;j++) { sprintf(now, "%lf ", commcenter[i][j]); now += strlen(now); } sprintf(now, "]\n"); now += strlen(now); CkPrintf(temp); } } // First, build heaps of my objects // Then assign objects to the least loaded other processors until either // - The smallest remaining object would put me below average, or // - I only have 1 object left, or // - The smallest remaining object would put someone else // above average // Note: Object can only move towards its communication center! // My neighbors: typedef struct _procInfo{ int id; double load; int* difference; } procInfo; if(_lb_debug2) { CkPrintf("[%d] Querying neighborhood topology...\n", CkMyPe() ); } procInfo* neighbors = new procInfo[n_nbrs]; { int *destProc = new int[dimension]; for(i=0; i < n_nbrs; i++) { neighbors[i].id = stats[i].from_pe; neighbors[i].load = stats[i].total_walltime - stats[i].idletime; neighbors[i].difference = new int[dimension]; topo->get_processor_coordinates(neighbors[i].id, destProc); topo->coordinate_difference(myProc, destProc, neighbors[i].difference); } delete[] destProc; } if(_lb_debug2) { CkPrintf("[%d] Building obj heap...\n", CkMyPe() ); } // My objects: build heaps maxHeap objs(myStats.n_objs); double totalObjLoad=0.0; for(i=0; i < myStats.n_objs; i++) { InfoRecord* item = new InfoRecord; item->load = myStats.objData[i].wallTime; totalObjLoad += item->load; item->Id = i; objs.insert(item); } if(_lb_debug2) { CkPrintf("[%d] Beginning distributing objects...\n", CkMyPe() ); } // for each object while(objs.numElements()>0) { InfoRecord* obj; obj = objs.deleteMax(); int bestDest = -1; for(i = 0; i < n_nbrs; i++) if(neighbors[i].load +obj->load < myload - obj->load && (bestDest==-1 || neighbors[i].load < neighbors[bestDest].load)) { double dotsum=0; int j; for(j=0; j<dimension; j++) dotsum += (commcenter[obj->Id][j] * neighbors[i].difference[j]); if(myload - avgload < totalObjLoad || dotsum>0.5 || (dotsum>0 && objs.numElements()==0) || commamount[obj->Id]==0) { bestDest = i; } } // Best place for the object if(bestDest != -1) { if(_lb_debug1) { CkPrintf("[%d] Obj[%d] will move to Proc[%d]\n", CkMyPe(), obj->Id, neighbors[bestDest].id); } //Migrate it MigrateInfo* migrateMe = new MigrateInfo; migrateMe->obj = myStats.objData[obj->Id].handle; migrateMe->from_pe = myStats.from_pe; migrateMe->to_pe = neighbors[bestDest].id; migrateInfo.insertAtEnd(migrateMe); //Modify loads myload -= obj->load; neighbors[bestDest].load += obj->load; } totalObjLoad -= obj->load; delete obj; } if(_lb_debug2) { CkPrintf("[%d] Clearing Up...\n", CkMyPe()); } for(i=0; i<n_nbrs; i++) { delete[] neighbors[i].difference; } delete[] neighbors; delete[] myProc; for(i=0;i<myStats.n_objs;i++) { delete[] commcenter[i]; } delete[] commcenter; delete[] commamount; } if(_lb_debug2) { CkPrintf("[%d] Generating result...\n", CkMyPe()); } // Now build the message to actually perform the migrations int migrate_count=migrateInfo.length(); // if (migrate_count > 0) { // CkPrintf("PE %d migrating %d elements\n",CkMyPe(),migrate_count); // } LBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) LBMigrateMsg; msg->n_moves = migrate_count; for(i=0; i < migrate_count; i++) { MigrateInfo* item = (MigrateInfo*) migrateInfo[i]; msg->moves[i] = *item; delete item; migrateInfo[i] = 0; } return msg; #else return NULL; #endif }
LBMigrateMsg * HybridBaseLB::createMigrateMsg(LDStats* stats) { #if CMK_LBDB_ON int i; LevelData *lData = levelData[currentLevel]; CkVec<MigrateInfo*> migrateInfo; // stats contains all objects that belong to this group // outObjs contains objects that are migrated out for (i=0; i<stats->n_objs; i++) { LDObjData &objData = stats->objData[i]; int frompe = stats->from_proc[i]; int tope = stats->to_proc[i]; CmiAssert(tope != -1); if (frompe != tope) { // CkPrintf("[%d] Obj %d migrating from %d to %d\n", // CkMyPe(),obj,pe,dest); #if 0 // delay until a summary is printed if (frompe == lData->nChildren) { frompe = -1; CmiAssert(tope != -1 && tope != lData->nChildren); } else frompe = lData->children[frompe]; if (tope != -1) { CmiAssert(tope < lData->nChildren); tope = lData->children[tope]; } #endif MigrateInfo *migrateMe = new MigrateInfo; migrateMe->obj = objData.handle; migrateMe->from_pe = frompe; migrateMe->to_pe = tope; migrateMe->async_arrival = objData.asyncArrival; migrateInfo.insertAtEnd(migrateMe); } else CmiAssert(frompe != lData->nChildren); } // merge outgoing objs CkVec<MigrationRecord> &outObjs = lData->outObjs; for (i=0; i<outObjs.size(); i++) { MigrateInfo *migrateMe = new MigrateInfo; migrateMe->obj = outObjs[i].handle; migrateMe->from_pe = outObjs[i].fromPe; migrateMe->to_pe = -1; // migrateMe->async_arrival = objData.asyncArrival; migrateInfo.insertAtEnd(migrateMe); } // construct migration message int migrate_count=migrateInfo.length(); DEBUGF(("[%d] level: %d has %d migrations. \n", CkMyPe(), currentLevel, migrate_count)); // ignore avail_vector, etc for now //LBMigrateMsg * msg = new(migrate_count,count,count,0) LBMigrateMsg; LBMigrateMsg * msg = new(migrate_count,0,0,0) LBMigrateMsg; msg->level = currentLevel; msg->n_moves = migrate_count; for(i=0; i < migrate_count; i++) { MigrateInfo* item = (MigrateInfo*) migrateInfo[i]; msg->moves[i] = *item; delete item; migrateInfo[i] = 0; DEBUGF(("[%d] obj (%d %d %d %d) migrate from %d to %d\n", CkMyPe(), item->obj.objID().id[0], item->obj.objID().id[1], item->obj.objID().id[2], item->obj.objID().id[3], item->from_pe, item->to_pe)); } if (_lb_args.printSummary()) printSummary(stats, stats->nprocs()); // translate relative pe number to its real number for(i=0; i < migrate_count; i++) { MigrateInfo* move = &msg->moves[i]; if (move->to_pe != -1) { if (move->from_pe == lData->nChildren) { // an object from outside group move->from_pe = -1; CmiAssert(move->to_pe != -1 && move->to_pe != lData->nChildren); } else move->from_pe = lData->children[move->from_pe]; CmiAssert(move->to_pe < lData->nChildren); move->to_pe = lData->children[move->to_pe]; } } return msg; #else return NULL; #endif }