void Refiner::Refine(int count, BaseLB::LDStats* stats, int* cur_p, int* new_p) { // CkPrintf("[%d] Refiner strategy\n",CkMyPe()); P = count; numComputes = stats->n_objs; computes = new computeInfo[numComputes]; processors = new processorInfo[count]; create(count, stats, cur_p); int i; for (i=0; i<numComputes; i++) assign((computeInfo *) &(computes[i]), (processorInfo *) &(processors[computes[i].oldProcessor])); removeComputes(); computeAverage(); if (_lb_args.debug()>2) { CkPrintf("Old PE load (bg load): "); for (i=0; i<count; i++) CkPrintf("%d:%f(%f) ", i, processors[i].load, processors[i].backgroundLoad); CkPrintf("\n"); } // Perform multi refine but reset it to the original state before changing the // refinement load balancing threshold. multirefine(true); int nmoves = 0; for (int pe=0; pe < P; pe++) { Iterator nextCompute; nextCompute.id = 0; computeInfo *c = (computeInfo *) processors[pe].computeSet->iterator((Iterator *)&nextCompute); while(c) { new_p[c->Id] = c->processor; if (new_p[c->Id] != cur_p[c->Id]) nmoves++; // if (c->oldProcessor != c->processor) // CkPrintf("Refiner::Refine: from %d to %d\n", c->oldProcessor, c->processor); nextCompute.id++; c = (computeInfo *) processors[pe].computeSet-> next((Iterator *)&nextCompute); } } if (_lb_args.debug()>2) { CkPrintf("New PE load: "); for (i=0; i<count; i++) CkPrintf("%f ", processors[i].load); CkPrintf("\n"); } if (_lb_args.debug()>1) CkPrintf("Refiner: moving %d obejcts. \n", nmoves); delete [] computes; delete [] processors; }
void Alg7::strategy() { // double bestSize0, bestSize1, bestSize2; computeInfo *c; int numAssigned; processorInfo* goodP[3][3][2]; // goodP[# of real patches][# of proxies] processorInfo* poorP[3][3][2]; // fallback option double startTime = CmiWallTimer(); // iout << iINFO << "calling makeHeaps. \n"; adjustBackgroundLoadAndComputeAverage(); makeHeaps(); // iout << iINFO << "Before assignment\n" << endi; // printLoads(); /* int numOverloaded = 0; for (int ip=0; ip<P; ip++) { if ( processors[ip].backgroundLoad > averageLoad ) ++numOverloaded; } if ( numOverloaded ) { iout << iWARN << numOverloaded << " processors are overloaded due to background load.\n" << endi; } */ numAssigned = 0; // for (int i=0; i<numPatches; i++) // { std::cout << "(" << patches[i].Id << "," << patches[i].processor ;} overLoad = 1.2; for (int ic=0; ic<numComputes; ic++) { // place computes w/ patches on heavily background loaded nodes first // place pair before self, because self is more flexible c = (computeInfo *) computeBgPairHeap->deleteMax(); if ( ! c ) c = (computeInfo *) computeBgSelfHeap->deleteMax(); if ( ! c ) c = (computeInfo *) computePairHeap->deleteMax(); if ( ! c ) c = (computeInfo *) computeSelfHeap->deleteMax(); if (c->processor != -1) continue; // skip to the next compute; if ( ! c ) NAMD_bug("Alg7: computesHeap empty!"); int i,j,k; for(i=0;i<3;i++) for(j=0;j<3;j++) { for(k=0;k<2;k++) { goodP[i][j][k]=0; poorP[i][j][k]=0; } } // first try for at least one proxy { Iterator nextProc; processorInfo *p; p = &processors[patches[c->patch1].processor]; togrid(goodP, poorP, p, c); p = &processors[patches[c->patch2].processor]; togrid(goodP, poorP, p, c); p = (processorInfo *)patches[c->patch1]. proxiesOn.iterator((Iterator *)&nextProc); while (p) { togrid(goodP, poorP, p, c); p = (processorInfo *)patches[c->patch1]. proxiesOn.next((Iterator*)&nextProc); } p = (processorInfo *)patches[c->patch2]. proxiesOn.iterator((Iterator *)&nextProc); while (p) { togrid(goodP, poorP, p, c); p = (processorInfo *)patches[c->patch2]. proxiesOn.next((Iterator*)&nextProc); } p = 0; // prefer to place compute with existing proxies over home patches if ((p = goodP[0][2][0]) // No home, two proxies || (p = goodP[1][1][0]) // One home, one proxy || (p = goodP[2][0][0]) // Two home, no proxies || (p = goodP[0][1][0]) // No home, one proxy || (p = goodP[1][0][0]) // One home, no proxies || (p = goodP[0][0][0]) // No home, no proxies || (p = goodP[0][1][1]) // No home, one proxy || (p = goodP[1][0][1]) // One home, no proxies || (p = goodP[0][0][1]) // No home, no proxies ) { assign(c,p); numAssigned++; continue; } } // no luck, do it the long way heapIterator nextProcessor; processorInfo *p = (processorInfo *) pes->iterator((heapIterator *) &nextProcessor); while (p) { togrid(goodP, poorP, p, c); p = (processorInfo *) pes->next(&nextProcessor); } // if (numAssigned >= 0) { Else is commented out below p = 0; // prefer to place compute with existing proxies over home patches if ((p = goodP[0][2][0]) // No home, two proxies || (p = goodP[1][1][0]) // One home, one proxy || (p = goodP[2][0][0]) // Two home, no proxies || (p = goodP[0][1][0]) // No home, one proxy || (p = goodP[1][0][0]) // One home, no proxies || (p = goodP[0][0][0]) // No home, no proxies || (p = goodP[0][1][1]) // No home, one proxy || (p = goodP[1][0][1]) // One home, no proxies || (p = goodP[0][0][1]) // No home, no proxies ) { assign(c,p); numAssigned++; } else if ( // overloaded processors (p = poorP[0][2][0]) // No home, two proxies || (p = poorP[1][1][0]) // One home, one proxy || (p = poorP[2][0][0]) // Two home, no proxies || (p = poorP[0][1][0]) // No home, one proxy || (p = poorP[1][0][0]) // One home, no proxies || (p = poorP[0][0][0]) // No home, no proxies || (p = poorP[0][1][1]) // No home, one proxy || (p = poorP[1][0][1]) // One home, no proxies || (p = poorP[0][0][1]) // No home, no proxies ) { //iout << iWARN << "overload assign to " << p->Id << "\n" << endi; assign(c,p); numAssigned++; } else { NAMD_bug("*** Alg 7 No receiver found 1 ***"); break; } } printLoads(); if ( computeMax() <= origMaxLoad ) { // binary-search refinement procedure multirefine(1.05); printLoads(); } }
void RefinerTemp::Refine(int count, BaseLB::LDStats* stats, int* cur_p, int* new_p) { #ifdef TEMP_LDB // CkPrintf("[%d] RefinerTemp strategy\n",CkMyPe()); P = count; numComputes = stats->n_objs; computes = new computeInfo[numComputes]; processors = new processorInfo[count]; create(count, stats, cur_p); int i; for (i=0; i<numComputes; i++) { int ind1 = computes[i].id.getID()[0]; int ind2 = computes[i].id.getID()[1]; if(ind1==0 && ind2==48) CkPrintf("----- assigning oldproc%d load:%f\n",computes[i].oldProcessor,computes[i].load); assign((computeInfo *) &(computes[i]), (processorInfo *) &(processors[computes[i].oldProcessor])); } removeComputes(); computeAverage(); if (_lb_args.debug()>2) { CkPrintf("Old PE load (bg load): "); for (i=0; i<count; i++) CkPrintf("%d:%f(%f) ", i, processors[i].load, processors[i].backgroundLoad); CkPrintf("\n"); } multirefine(); int nmoves = 0; for (int pe=0; pe < P; pe++) { Iterator nextCompute; nextCompute.id = 0; computeInfo *c = (computeInfo *) processors[pe].computeSet->iterator((Iterator *)&nextCompute); while(c) { new_p[c->Id] = c->processor; if (new_p[c->Id] != cur_p[c->Id]) nmoves++; // if (c->oldProcessor != c->processor) // CkPrintf("RefinerTemp::Refine: from %d to %d\n", c->oldProcessor, c->processor); nextCompute.id++; c = (computeInfo *) processors[pe].computeSet-> next((Iterator *)&nextCompute); } } if (_lb_args.debug()>2) { CkPrintf("New PE load: "); for (i=0; i<count; i++) CkPrintf("%f ", processors[i].load); CkPrintf("\n"); } if (_lb_args.debug()>1) CkPrintf("RefinerTemp: moving %d obejcts. \n", nmoves); delete [] computes; delete [] processors; #endif }