Exemplo n.º 1
0
void Refiner::Refine(int count, BaseLB::LDStats* stats, 
		     int* cur_p, int* new_p)
{
  //  CkPrintf("[%d] Refiner strategy\n",CkMyPe());

  P = count;
  numComputes = stats->n_objs;
  computes = new computeInfo[numComputes];
  processors = new processorInfo[count];

  create(count, stats, cur_p);

  int i;
  for (i=0; i<numComputes; i++)
    assign((computeInfo *) &(computes[i]),
           (processorInfo *) &(processors[computes[i].oldProcessor]));

  removeComputes();

  computeAverage();

  if (_lb_args.debug()>2)  {
    CkPrintf("Old PE load (bg load): ");
    for (i=0; i<count; i++) CkPrintf("%d:%f(%f) ", i, processors[i].load, processors[i].backgroundLoad);
    CkPrintf("\n");
  }

  // Perform multi refine but reset it to the original state before changing the
  // refinement load balancing threshold.
  multirefine(true);

  int nmoves = 0;
  for (int pe=0; pe < P; pe++) {
    Iterator nextCompute;
    nextCompute.id = 0;
    computeInfo *c = (computeInfo *)
      processors[pe].computeSet->iterator((Iterator *)&nextCompute);
    while(c) {
      new_p[c->Id] = c->processor;
      if (new_p[c->Id] != cur_p[c->Id]) nmoves++;
//      if (c->oldProcessor != c->processor)
//      CkPrintf("Refiner::Refine: from %d to %d\n", c->oldProcessor, c->processor);
      nextCompute.id++;
      c = (computeInfo *) processors[pe].computeSet->
	             next((Iterator *)&nextCompute);
    }
  }
  if (_lb_args.debug()>2)  {
    CkPrintf("New PE load: ");
    for (i=0; i<count; i++) CkPrintf("%f ", processors[i].load);
    CkPrintf("\n");
  }
  if (_lb_args.debug()>1) 
    CkPrintf("Refiner: moving %d obejcts. \n", nmoves);
  delete [] computes;
  delete [] processors;
}
Exemplo n.º 2
0
void Alg7::strategy()
{
  // double bestSize0, bestSize1, bestSize2;
  computeInfo *c;
  int numAssigned;
  processorInfo* goodP[3][3][2];  // goodP[# of real patches][# of proxies]
  processorInfo* poorP[3][3][2];  // fallback option

  double startTime = CmiWallTimer();

  // iout << iINFO << "calling makeHeaps. \n";
  adjustBackgroundLoadAndComputeAverage();
  makeHeaps();
  // iout << iINFO << "Before assignment\n" << endi;
  // printLoads();

  /*
  int numOverloaded = 0;
  for (int ip=0; ip<P; ip++) {
    if ( processors[ip].backgroundLoad > averageLoad ) ++numOverloaded;
  }
  if ( numOverloaded ) {
    iout << iWARN << numOverloaded
      << " processors are overloaded due to background load.\n" << endi;
  }
  */
	      
  numAssigned = 0;

  //   for (int i=0; i<numPatches; i++)
  //     { std::cout << "(" << patches[i].Id << "," << patches[i].processor ;}
  overLoad = 1.2;
  for (int ic=0; ic<numComputes; ic++) {

    // place computes w/ patches on heavily background loaded nodes first
    // place pair before self, because self is more flexible
    c = (computeInfo *) computeBgPairHeap->deleteMax();
    if ( ! c ) c = (computeInfo *) computeBgSelfHeap->deleteMax();
    if ( ! c ) c = (computeInfo *) computePairHeap->deleteMax();
    if ( ! c ) c = (computeInfo *) computeSelfHeap->deleteMax();

    if (c->processor != -1) continue; // skip to the next compute;

    if ( ! c ) NAMD_bug("Alg7: computesHeap empty!");
    int i,j,k;
    for(i=0;i<3;i++)
      for(j=0;j<3;j++) {
        for(k=0;k<2;k++) {
	  goodP[i][j][k]=0;
	  poorP[i][j][k]=0;
        }
      }

    // first try for at least one proxy
    {
      Iterator nextProc;
      processorInfo *p;

      p = &processors[patches[c->patch1].processor];
      togrid(goodP, poorP, p, c);

      p = &processors[patches[c->patch2].processor];
      togrid(goodP, poorP, p, c);

      p = (processorInfo *)patches[c->patch1].
                            proxiesOn.iterator((Iterator *)&nextProc);
      while (p) {
        togrid(goodP, poorP, p, c);
        p = (processorInfo *)patches[c->patch1].
                            proxiesOn.next((Iterator*)&nextProc);
      }

      p = (processorInfo *)patches[c->patch2].
                            proxiesOn.iterator((Iterator *)&nextProc);
      while (p) {
        togrid(goodP, poorP, p, c);
        p = (processorInfo *)patches[c->patch2].
                            proxiesOn.next((Iterator*)&nextProc);
      }
      p = 0;
      // prefer to place compute with existing proxies over home patches
      if ((p = goodP[0][2][0])    // No home, two proxies
       || (p = goodP[1][1][0])    // One home, one proxy
       || (p = goodP[2][0][0])    // Two home, no proxies
       || (p = goodP[0][1][0])    // No home, one proxy
       || (p = goodP[1][0][0])    // One home, no proxies
       || (p = goodP[0][0][0])    // No home, no proxies
       || (p = goodP[0][1][1])    // No home, one proxy
       || (p = goodP[1][0][1])    // One home, no proxies
       || (p = goodP[0][0][1])    // No home, no proxies
         ) {
        assign(c,p); numAssigned++;
        continue;
      }
    }

    // no luck, do it the long way

    heapIterator nextProcessor;
    processorInfo *p = (processorInfo *) 
      pes->iterator((heapIterator *) &nextProcessor);
    while (p) {
      togrid(goodP, poorP, p, c);
      p = (processorInfo *) pes->next(&nextProcessor);
    }

    //    if (numAssigned >= 0) {  Else is commented out below

    p = 0;
      // prefer to place compute with existing proxies over home patches
      if ((p = goodP[0][2][0])    // No home, two proxies
       || (p = goodP[1][1][0])    // One home, one proxy
       || (p = goodP[2][0][0])    // Two home, no proxies
       || (p = goodP[0][1][0])    // No home, one proxy
       || (p = goodP[1][0][0])    // One home, no proxies
       || (p = goodP[0][0][0])    // No home, no proxies
       || (p = goodP[0][1][1])    // No home, one proxy
       || (p = goodP[1][0][1])    // One home, no proxies
       || (p = goodP[0][0][1])    // No home, no proxies
       ) {
      assign(c,p); numAssigned++;
   } else if (   // overloaded processors
          (p = poorP[0][2][0])    // No home, two proxies
       || (p = poorP[1][1][0])    // One home, one proxy
       || (p = poorP[2][0][0])    // Two home, no proxies
       || (p = poorP[0][1][0])    // No home, one proxy
       || (p = poorP[1][0][0])    // One home, no proxies
       || (p = poorP[0][0][0])    // No home, no proxies
       || (p = poorP[0][1][1])    // No home, one proxy
       || (p = poorP[1][0][1])    // One home, no proxies
       || (p = poorP[0][0][1])    // No home, no proxies
       ) {
      //iout << iWARN << "overload assign to " << p->Id << "\n" << endi;
      assign(c,p); numAssigned++;
    } else {
      NAMD_bug("*** Alg 7 No receiver found 1 ***");
      break;
    }
  }

  printLoads();

  if ( computeMax() <= origMaxLoad ) {
    // binary-search refinement procedure
    multirefine(1.05);
    printLoads();
  }

}
Exemplo n.º 3
0
void RefinerTemp::Refine(int count, BaseLB::LDStats* stats, 
		     int* cur_p, int* new_p)
{
#ifdef TEMP_LDB
  //  CkPrintf("[%d] RefinerTemp strategy\n",CkMyPe());

  P = count;
  numComputes = stats->n_objs;
  computes = new computeInfo[numComputes];
  processors = new processorInfo[count];

  create(count, stats, cur_p);

  int i;
  for (i=0; i<numComputes; i++)
	{
          int ind1 = computes[i].id.getID()[0];
        int ind2 = computes[i].id.getID()[1];
  if(ind1==0 && ind2==48) CkPrintf("----- assigning oldproc%d load:%f\n",computes[i].oldProcessor,computes[i].load);

    assign((computeInfo *) &(computes[i]),
           (processorInfo *) &(processors[computes[i].oldProcessor]));
	}
  removeComputes();

  computeAverage();

  if (_lb_args.debug()>2)  {
    CkPrintf("Old PE load (bg load): ");
    for (i=0; i<count; i++) CkPrintf("%d:%f(%f) ", i, processors[i].load, processors[i].backgroundLoad);
    CkPrintf("\n");
  }

  multirefine();

  int nmoves = 0;
  for (int pe=0; pe < P; pe++) {
    Iterator nextCompute;
    nextCompute.id = 0;
    computeInfo *c = (computeInfo *)
      processors[pe].computeSet->iterator((Iterator *)&nextCompute);
    while(c) {
      new_p[c->Id] = c->processor;
      if (new_p[c->Id] != cur_p[c->Id]) nmoves++;
//      if (c->oldProcessor != c->processor)
//      CkPrintf("RefinerTemp::Refine: from %d to %d\n", c->oldProcessor, c->processor);
      nextCompute.id++;
      c = (computeInfo *) processors[pe].computeSet->
	             next((Iterator *)&nextCompute);
    }
  }
  if (_lb_args.debug()>2)  {
    CkPrintf("New PE load: ");
    for (i=0; i<count; i++) CkPrintf("%f ", processors[i].load);
    CkPrintf("\n");
  }
  if (_lb_args.debug()>1) 
    CkPrintf("RefinerTemp: moving %d obejcts. \n", nmoves);
  delete [] computes;
  delete [] processors;
#endif
}