// generate migrate message from stats->from_proc and to_proc LBMigrateMsg * CentralLB::createMigrateMsg(LDStats* stats) { int i; CkVec<MigrateInfo*> migrateInfo; for (i=0; i<stats->n_objs; i++) { LDObjData &objData = stats->objData[i]; int frompe = stats->from_proc[i]; int tope = stats->to_proc[i]; if (frompe != tope) { // CkPrintf("[%d] Obj %d migrating from %d to %d\n", // CkMyPe(),obj,pe,dest); MigrateInfo *migrateMe = new MigrateInfo; migrateMe->obj = objData.handle; migrateMe->from_pe = frompe; migrateMe->to_pe = tope; migrateMe->async_arrival = objData.asyncArrival; migrateInfo.insertAtEnd(migrateMe); } } int migrate_count=migrateInfo.length(); LBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) LBMigrateMsg; msg->n_moves = migrate_count; for(i=0; i < migrate_count; i++) { MigrateInfo* item = (MigrateInfo*) migrateInfo[i]; msg->moves[i] = *item; delete item; migrateInfo[i] = 0; } return msg; }
LBAllocFn getLBAllocFn(const char *name) { char *ptr = strpbrk((char *)name, ":,"); int slen = ptr-name; for (int i=0; i<lbtables.length(); i++) if (0==strncmp(name, lbtables[i].name, slen)) return lbtables[i].afn; return NULL; }
LBCreateFn search(const char *name) { char *ptr = strpbrk((char *)name, ":,"); int slen = ptr!=NULL?ptr-name:strlen(name); for (int i=0; i<lbtables.length(); i++) if (0==strncmp(name, lbtables[i].name, slen)) return lbtables[i].cfn; return NULL; }
void displayLBs() { CmiPrintf("\nAvailable load balancers:\n"); for (int i=0; i<lbtables.length(); i++) { LBDBEntry &entry = lbtables[i]; if (entry.shown) CmiPrintf("* %s: %s\n", entry.name, entry.help); } CmiPrintf("\n"); }
// Function that receives a set of particles and updates the // forces of them into the local set void Patch::receiveParticles(CkVec<Particle> &updates) { updateCount++; for( int i=0; i < updates.length(); i++) { incomingParticles.push_back(updates[i]); } // if all the incoming particle updates have been received, we must check // whether to proceed with next step if(updateCount == numNbrs ) { updateCount = 0; incomingFlag = true; checkNextStep(); } }
/* * Now define the taskGraphArray that actually handles doing all that work. */ taskGraphArray::taskGraphArray( CkVec<CkArrayIndex> deps, taskGraphSolver *data, CkCallback returnResults ) : Waiting() { // Set some state variables ReturnResults = returnResults; Self = data; isSolved = false; // Save everything I need to know about DepsCount = deps.length(); DepsData = new taskGraphSolver*[DepsCount]; DepsReceived = 0; // Ask everyone I depend on for their data CProxy_taskGraphArray neighbor(thisArrayID); for ( int i = 0 ; i < DepsCount ; i++ ) { neighbor(deps[i]).requestData(thisIndexMax); } // If we're waiting on nothing we're solved tryToSolve(); }
LBMigrateMsg* WSLB::Strategy(WSLB::LDStats* stats, int count) { #if CMK_LBDB_ON // CkPrintf("[%d] Strategy starting\n",CkMyPe()); // Compute the average load to see if we are overloaded relative // to our neighbors const double load_factor = 1.05; double objload; double myload = myStats.total_walltime - myStats.idletime; double avgload = myload; int unvacated_neighbors = 0; int i; for(i=0; i < count; i++) { // If the neighbor is vacating, skip him if (stats[i].vacate_me) continue; // Scale times we need appropriately for relative proc speeds double hisload = stats[i].total_walltime - stats[i].idletime; const double hisusage = stats[i].usage; const double scale = (myStats.proc_speed * usage) / (stats[i].proc_speed * hisusage); hisload *= scale; stats[i].total_walltime *= scale; stats[i].idletime *= scale; // CkPrintf("PE %d %d hisload = %f hisusage = %f\n", // CkMyPe(),i,hisload,hisusage); avgload += hisload; unvacated_neighbors++; } if (vacate && unvacated_neighbors == 0) CkPrintf("[%d] ALL NEIGHBORS WANT TO VACATE!!!\n",CkMyPe()); avgload /= (unvacated_neighbors+1); CkVec<MigrateInfo*> migrateInfo; // If we want to vacate, we always dump our load, otherwise // only if we are overloaded if (vacate || myload > avgload) { // CkPrintf("[%d] OVERLOAD My load is %f, average load is %f\n", // CkMyPe(),myload,avgload); // First, build heaps of other processors and my objects // Then assign objects to other processors until either // - The smallest remaining object would put me below average, or // - I only have 1 object left, or // - The smallest remaining object would put someone else // above average // Build heaps minHeap procs(count); for(i=0; i < count; i++) { // If all my neighbors vacate, I won't have anyone to give work // to if (!stats[i].vacate_me) { InfoRecord* item = new InfoRecord; item->load = stats[i].total_walltime - stats[i].idletime; item->Id = stats[i].from_pe; procs.insert(item); } } maxHeap objs(myStats.obj_data_sz); for(i=0; i < myStats.obj_data_sz; i++) { InfoRecord* item = new InfoRecord; item->load = myStats.objData[i].wallTime; item->Id = i; objs.insert(item); } int objs_here = myStats.obj_data_sz; do { // if (objs_here <= 1) break; // For now, always leave 1 object InfoRecord* p; InfoRecord* obj; // Get the lightest-loaded processor p = procs.deleteMin(); if (p == 0) { // CkPrintf("[%d] No destination PE found!\n",CkMyPe()); break; } // Get the biggest object bool objfound = false; do { obj = objs.deleteMax(); if (obj == 0) break; objload = load_factor * obj->load; double new_p_load = p->load + objload; double my_new_load = myload - objload; // If we're vacating, the biggest object is always good. // Otherwise, only take it if it doesn't produce overload if (vacate || new_p_load < my_new_load) { objfound = true; } else { // This object is too big, so throw it away // CkPrintf("[%d] Can't move object w/ load %f to proc %d load %f %f\n", // CkMyPe(),obj->load,p->Id,p->load,avgload); delete obj; } } while (!objfound); if (!objfound) { // CkPrintf("[%d] No suitable object found!\n",CkMyPe()); break; } const int me = CkMyPe(); // Apparently we can give this object to this processor if (_lb_args.debug()) CkPrintf("[%d] Obj %d of %d migrating from %d to %d\n", CkMyPe(),obj->Id,myStats.obj_data_sz,me,p->Id); MigrateInfo* migrateMe = new MigrateInfo; migrateMe->obj = myStats.objData[obj->Id].handle; migrateMe->from_pe = me; migrateMe->to_pe = p->Id; migrateInfo.insertAtEnd(migrateMe); objs_here--; // We may want to assign more to this processor, so lets // update it and put it back in the heap p->load += objload; myload -= objload; procs.insert(p); // This object is assigned, so we delete it from the heap delete obj; } while(vacate || myload > avgload); // Now empty out the heaps InfoRecord* p; while (NULL!=(p=procs.deleteMin())) delete p; InfoRecord* obj; while (NULL!=(obj=objs.deleteMax())) delete obj; } // Now build the message to actually perform the migrations int migrate_count=migrateInfo.length(); // if (migrate_count) { // CkPrintf("PE %d: Sent away %d of %d objects\n", // CkMyPe(),migrate_count,myStats.obj_data_sz); // } LBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) LBMigrateMsg; msg->n_moves = migrate_count; for(i=0; i < migrate_count; i++) { MigrateInfo* item = (MigrateInfo*) migrateInfo[i]; msg->moves[i] = *item; delete item; migrateInfo[i] = 0; } return msg; #else return NULL; #endif }
LBMigrateMsg* NeighborCommLB::Strategy(NborBaseLB::LDStats* stats, int n_nbrs) { bool _lb_debug=0; bool _lb_debug1=0; bool _lb_debug2=0; #if CMK_LBDB_ON // CkPrintf("[%d] Strategy starting\n",CkMyPe()); // Compute the average load to see if we are overloaded relative // to our neighbors double myload = myStats.total_walltime - myStats.idletime; double avgload = myload; int i; if (_lb_debug) CkPrintf("[%d] Neighbor Count = %d\n", CkMyPe(), n_nbrs); for(i=0; i < n_nbrs; i++) { // Scale times we need appropriately for relative proc speeds const double scale = ((double)myStats.pe_speed) / stats[i].pe_speed; stats[i].total_walltime *= scale; stats[i].idletime *= scale; avgload += (stats[i].total_walltime - stats[i].idletime); } avgload /= (n_nbrs + 1); CkVec<MigrateInfo*> migrateInfo; if (_lb_debug) CkPrintf("[%d] My load is %lf\n", CkMyPe(),myload); if (myload > avgload) { if (_lb_debug1) CkPrintf("[%d] OVERLOAD My load is %lf average load is %lf\n", CkMyPe(), myload, avgload); // First of all, explore the topology and get dimension LBTopology* topo; { LBtopoFn topofn; topofn = LBTopoLookup(_lbtopo); if (topofn == NULL) { char str[1024]; CmiPrintf("NeighborCommLB> Fatal error: Unknown topology: %s. Choose from:\n", _lbtopo); printoutTopo(); sprintf(str, "NeighborCommLB> Fatal error: Unknown topology: %s", _lbtopo); CmiAbort(str); } topo = topofn(CkNumPes()); } int dimension = topo->get_dimension(); if (_lb_debug2) CkPrintf("[%d] Topology dimension = %d\n", CkMyPe(), dimension); if (dimension == -1) { char str[1024]; CmiPrintf("NeighborCommLB> Fatal error: Unsupported topology: %s. Only some of the following are supported:\n", _lbtopo); printoutTopo(); sprintf(str, "NeighborCommLB> Fatal error: Unsupported topology: %s", _lbtopo); CmiAbort(str); } // Position of this processor int *myProc = new int[dimension]; topo->get_processor_coordinates(myStats.from_pe, myProc); if (_lb_debug2) { char temp[1000]; char* now=temp; sprintf(now, "[%d] Coordinates = [", CkMyPe()); now += strlen(now); for(i=0;i<dimension;i++) { sprintf(now, "%d ", myProc[i]); now +=strlen(now); } sprintf(now, "]\n"); now += strlen(now); CkPrintf(temp); } // Then calculate the communication center of each object // The communication center is relative to myProc double **commcenter = new double*[myStats.n_objs]; double *commamount = new double[myStats.n_objs]; if(_lb_debug1) { CkPrintf("[%d] Number of Objs = %d \n", CkMyPe(), myStats.n_objs); } { memset(commamount, 0, sizeof(double)*myStats.n_objs); for(i=0; i<myStats.n_objs;i++) { commcenter[i] = new double[dimension]; memset(commcenter[i], 0, sizeof(double)*dimension); } //coordinates of procs int *destProc = new int[dimension]; int *diff = new int[dimension]; //for each comm entry for(i=0; i<myStats.n_comm;i++) { int j; //for each object //TODO use hashtable to accelerate for(j=0; j<myStats.n_objs;j++) if((myStats.objData[j].handle.omhandle.id == myStats.commData[i].sender.omId) && (myStats.objData[j].handle.id == myStats.commData[i].sender.objId)) { double comm= PER_MESSAGE_SEND_OVERHEAD * myStats.commData[i].messages + PER_BYTE_SEND_OVERHEAD * myStats.commData[i].bytes; commamount[j] += comm; int dest_pe = myStats.commData[i].receiver.lastKnown(); if(dest_pe==-1) continue; topo->get_processor_coordinates(dest_pe, destProc); topo->coordinate_difference(myProc, destProc, diff); int k; for(k=0;k<dimension;k++) { commcenter[j][k] += diff[k] * comm; } } } for(i=0; i<myStats.n_objs;i++) if (commamount[i]>0) { int k; double ratio = 1.0 /commamount[i]; for(k=0;k<dimension;k++) commcenter[i][k] *= ratio; } else { //if no communication, set commcenter to myself int k; for(k=0;k<dimension;k++) commcenter[i][k] = 0; } delete [] destProc; delete [] diff; } if(_lb_debug2) { for(i=0;i<myStats.n_objs;i++) { char temp[1000]; char* now=temp; sprintf(now, "[%d] Objs [%d] Load = %lf Comm Amount = %lf ", CkMyPe(), i, myStats.objData[i].wallTime, commamount[i] ); now += strlen(now); sprintf(now, "Comm Center = ["); now += strlen(now); int j; for(j=0;j<dimension;j++) { sprintf(now, "%lf ", commcenter[i][j]); now += strlen(now); } sprintf(now, "]\n"); now += strlen(now); CkPrintf(temp); } } // First, build heaps of my objects // Then assign objects to the least loaded other processors until either // - The smallest remaining object would put me below average, or // - I only have 1 object left, or // - The smallest remaining object would put someone else // above average // Note: Object can only move towards its communication center! // My neighbors: typedef struct _procInfo{ int id; double load; int* difference; } procInfo; if(_lb_debug2) { CkPrintf("[%d] Querying neighborhood topology...\n", CkMyPe() ); } procInfo* neighbors = new procInfo[n_nbrs]; { int *destProc = new int[dimension]; for(i=0; i < n_nbrs; i++) { neighbors[i].id = stats[i].from_pe; neighbors[i].load = stats[i].total_walltime - stats[i].idletime; neighbors[i].difference = new int[dimension]; topo->get_processor_coordinates(neighbors[i].id, destProc); topo->coordinate_difference(myProc, destProc, neighbors[i].difference); } delete[] destProc; } if(_lb_debug2) { CkPrintf("[%d] Building obj heap...\n", CkMyPe() ); } // My objects: build heaps maxHeap objs(myStats.n_objs); double totalObjLoad=0.0; for(i=0; i < myStats.n_objs; i++) { InfoRecord* item = new InfoRecord; item->load = myStats.objData[i].wallTime; totalObjLoad += item->load; item->Id = i; objs.insert(item); } if(_lb_debug2) { CkPrintf("[%d] Beginning distributing objects...\n", CkMyPe() ); } // for each object while(objs.numElements()>0) { InfoRecord* obj; obj = objs.deleteMax(); int bestDest = -1; for(i = 0; i < n_nbrs; i++) if(neighbors[i].load +obj->load < myload - obj->load && (bestDest==-1 || neighbors[i].load < neighbors[bestDest].load)) { double dotsum=0; int j; for(j=0; j<dimension; j++) dotsum += (commcenter[obj->Id][j] * neighbors[i].difference[j]); if(myload - avgload < totalObjLoad || dotsum>0.5 || (dotsum>0 && objs.numElements()==0) || commamount[obj->Id]==0) { bestDest = i; } } // Best place for the object if(bestDest != -1) { if(_lb_debug1) { CkPrintf("[%d] Obj[%d] will move to Proc[%d]\n", CkMyPe(), obj->Id, neighbors[bestDest].id); } //Migrate it MigrateInfo* migrateMe = new MigrateInfo; migrateMe->obj = myStats.objData[obj->Id].handle; migrateMe->from_pe = myStats.from_pe; migrateMe->to_pe = neighbors[bestDest].id; migrateInfo.insertAtEnd(migrateMe); //Modify loads myload -= obj->load; neighbors[bestDest].load += obj->load; } totalObjLoad -= obj->load; delete obj; } if(_lb_debug2) { CkPrintf("[%d] Clearing Up...\n", CkMyPe()); } for(i=0; i<n_nbrs; i++) { delete[] neighbors[i].difference; } delete[] neighbors; delete[] myProc; for(i=0;i<myStats.n_objs;i++) { delete[] commcenter[i]; } delete[] commcenter; delete[] commamount; } if(_lb_debug2) { CkPrintf("[%d] Generating result...\n", CkMyPe()); } // Now build the message to actually perform the migrations int migrate_count=migrateInfo.length(); // if (migrate_count > 0) { // CkPrintf("PE %d migrating %d elements\n",CkMyPe(),migrate_count); // } LBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) LBMigrateMsg; msg->n_moves = migrate_count; for(i=0; i < migrate_count; i++) { MigrateInfo* item = (MigrateInfo*) migrateInfo[i]; msg->moves[i] = *item; delete item; migrateInfo[i] = 0; } return msg; #else return NULL; #endif }
LBMigrateMsg * HybridBaseLB::createMigrateMsg(LDStats* stats) { #if CMK_LBDB_ON int i; LevelData *lData = levelData[currentLevel]; CkVec<MigrateInfo*> migrateInfo; // stats contains all objects that belong to this group // outObjs contains objects that are migrated out for (i=0; i<stats->n_objs; i++) { LDObjData &objData = stats->objData[i]; int frompe = stats->from_proc[i]; int tope = stats->to_proc[i]; CmiAssert(tope != -1); if (frompe != tope) { // CkPrintf("[%d] Obj %d migrating from %d to %d\n", // CkMyPe(),obj,pe,dest); #if 0 // delay until a summary is printed if (frompe == lData->nChildren) { frompe = -1; CmiAssert(tope != -1 && tope != lData->nChildren); } else frompe = lData->children[frompe]; if (tope != -1) { CmiAssert(tope < lData->nChildren); tope = lData->children[tope]; } #endif MigrateInfo *migrateMe = new MigrateInfo; migrateMe->obj = objData.handle; migrateMe->from_pe = frompe; migrateMe->to_pe = tope; migrateMe->async_arrival = objData.asyncArrival; migrateInfo.insertAtEnd(migrateMe); } else CmiAssert(frompe != lData->nChildren); } // merge outgoing objs CkVec<MigrationRecord> &outObjs = lData->outObjs; for (i=0; i<outObjs.size(); i++) { MigrateInfo *migrateMe = new MigrateInfo; migrateMe->obj = outObjs[i].handle; migrateMe->from_pe = outObjs[i].fromPe; migrateMe->to_pe = -1; // migrateMe->async_arrival = objData.asyncArrival; migrateInfo.insertAtEnd(migrateMe); } // construct migration message int migrate_count=migrateInfo.length(); DEBUGF(("[%d] level: %d has %d migrations. \n", CkMyPe(), currentLevel, migrate_count)); // ignore avail_vector, etc for now //LBMigrateMsg * msg = new(migrate_count,count,count,0) LBMigrateMsg; LBMigrateMsg * msg = new(migrate_count,0,0,0) LBMigrateMsg; msg->level = currentLevel; msg->n_moves = migrate_count; for(i=0; i < migrate_count; i++) { MigrateInfo* item = (MigrateInfo*) migrateInfo[i]; msg->moves[i] = *item; delete item; migrateInfo[i] = 0; DEBUGF(("[%d] obj (%d %d %d %d) migrate from %d to %d\n", CkMyPe(), item->obj.objID().id[0], item->obj.objID().id[1], item->obj.objID().id[2], item->obj.objID().id[3], item->from_pe, item->to_pe)); } if (_lb_args.printSummary()) printSummary(stats, stats->nprocs()); // translate relative pe number to its real number for(i=0; i < migrate_count; i++) { MigrateInfo* move = &msg->moves[i]; if (move->to_pe != -1) { if (move->from_pe == lData->nChildren) { // an object from outside group move->from_pe = -1; CmiAssert(move->to_pe != -1 && move->to_pe != lData->nChildren); } else move->from_pe = lData->children[move->from_pe]; CmiAssert(move->to_pe < lData->nChildren); move->to_pe = lData->children[move->to_pe]; } } return msg; #else return NULL; #endif }