Exemple #1
0
  int procNum(int arrayHdl, const CkArrayIndex &i)
  {
#if 1
    if (i.nInts==1) {
      //Map 1D integer indices in simple round-robin fashion
      return shiftPE+((i.data()[0])%numPE);
    }
    else 
#endif
      {
        //Map other indices based on their hash code, mod a big prime.
        unsigned int hash=(i.hash()+739)%1280107;
        return shiftPE+(hash % numPE);
      }
  }
Exemple #2
0
void
LinearMap::populateInitial( int, CkArrayIndex& idx, void *msg, CkArrMgr *mgr )
//******************************************************************************
// Create initial set of array elements based on linear distribution
//! \param[in] idx Charm++ array index object containing the number of initial
//!   array elements to be created
//! \param[in] msg Charm++ messsage to use for array element creation
//! \param[in] mgr Array manager to use
//! \author J. Bakosi
//******************************************************************************
{
  int nelem = *idx.data();      // number of array elements requested
  if (nelem == 0) return;       // no initial elements requested

  auto lower = CkMyPe() * m_chunksize;
  auto upper = lower + m_chunksize;
  auto remainder = nelem % CkNumPes();
  if (remainder && CkMyPe() == CkNumPes()-1) upper += remainder;

  for (int e=0; e<nelem; ++e)
    if (e >= lower && e < upper)
      mgr->insertInitial( e, CkCopyMsg(&msg) );

  mgr->doneInserting();
  CkFreeMsg( msg );
}
Exemple #3
0
// Print out an array index to this string as decimal fields
// separated by underscores.
void printIndex(const CkArrayIndex &idx,char *dest) {
	const int *idxData=idx.data();
	for (int i=0;i<idx.nInts;i++) {
		sprintf(dest,"%s%d",i==0?"":"_", idxData[i]);
		dest+=strlen(dest);
	}
}
Exemple #4
0
	void forwardMsg(int ep,const CkArrayIndex &idx,
			const CkArrayID &a,
			int nBytes,char *env)
	{
		ckout<<"DelegateMgr> Recv message for "<<idx.data()[0]<<endl;
		//Have to allocate a new message because of Charm++'s 
		// weird allocate rules:
		envelope *msg=(envelope *)CmiAlloc(nBytes);
		memcpy(msg,env,nBytes);
		CkUnpackMessage(&msg);
		CProxy_CkArray ap(msg->getsetArrayMgr());
		ap.ckLocalBranch()->deliver((CkMessage *)EnvToUsr(msg),CkDeliver_inline);
	}
Exemple #5
0
	virtual void ArraySend(CkDelegateData *pd,int ep,void *m,const CkArrayIndex &idx,CkArrayID a)
	{
		CkArray *arrMgr=CProxy_CkArray(a).ckLocalBranch();
		int onPE=arrMgr->lastKnown(idx);
		if (onPE==CkMyPe()) 
		{ //Send to local element
			arrMgr->deliver((CkMessage *)m, CkDeliver_queue);
		} else 
		{ //Forward to remote element
			ckout<<"DelegateMgr> Sending message for "<<idx.data()[0]<<" to "<<onPE<<endl;
			envelope *env=UsrToEnv(m);
			CkPackMessage(&env);
			forwardMsg(ep,idx,a,env->getTotalsize(),(char *)env);
			CkFreeMsg(m);
		}
	}
Exemple #6
0
int
LinearMap::procNum( int, const CkArrayIndex& idx )
//******************************************************************************
//  Return the home processor number for the array element for linear
//  distribution
//! \param[in] idx Charm++ array index object containing the array element index
//!   to assign a PE to
//! \return PE assigned
//! \author J. Bakosi
//******************************************************************************
{
  int elem = *idx.data();       // array element we assign PE for
  auto pe = elem / m_chunksize;
  if (pe >= CkNumPes()) pe = CkNumPes()-1;

  Assert( pe < CkNumPes(), "Assigned PE (" + std::to_string(pe) +
          ") larger than NumPEs (" + std::to_string(CkNumPes()) + ")" );

  return pe;
}
Exemple #7
0
int ComputeMap::procNum(int, const CkArrayIndex &idx) {
  int *index = (int *)idx.data();
  return mapping[index[0]*Y*Z + index[1]*Z + index[2]];
}
Exemple #8
0
void OrbLB::work(LDStats* stats)
{
#if CMK_LBDB_ON
  int i,j;

  statsData = stats;

  P = stats->nprocs();

  // calculate total number of migratable objects
  nObjs = stats->n_migrateobjs;
#ifdef DEBUG
  CmiPrintf("ORB: num objects:%d\n", nObjs);
#endif

  // create computeLoad and calculate tentative computes coordinates
  computeLoad = new ComputeLoad[nObjs];
  for (i=XDIR; i<=ZDIR; i++) vArray[i] = new VecArray[nObjs];

  // v[0] = XDIR  v[1] = YDIR v[2] = ZDIR
  // vArray[XDIR] is an array holding the x vector for all computes
  int objIdx = 0;
  for (i=0; i<stats->n_objs; i++) {
    LDObjData &odata = stats->objData[i];
    if (odata.migratable == 0) continue;
    computeLoad[objIdx].id = objIdx;
#if CMK_LB_USER_DATA
    int x, y, z;
    if (use_udata) {
      CkArrayIndex *idx =
        (CkArrayIndex *)odata.getUserData(CkpvAccess(_lb_obj_index));
      x = idx->data()[0];
      y = idx->data()[1];
      z = idx->data()[2];
    } else {
      x = odata.objID().id[0];
      y = odata.objID().id[1];
      z = odata.objID().id[2];
    }
    computeLoad[objIdx].v[XDIR] = x;
    computeLoad[objIdx].v[YDIR] = y;
    computeLoad[objIdx].v[ZDIR] = z;
#else
    computeLoad[objIdx].v[XDIR] = odata.objID().id[0];
    computeLoad[objIdx].v[YDIR] = odata.objID().id[1];
    computeLoad[objIdx].v[ZDIR] = odata.objID().id[2];
#endif
#if CMK_LB_CPUTIMER
    computeLoad[objIdx].load = _lb_args.useCpuTime()?odata.cpuTime:odata.wallTime;
#else
    computeLoad[objIdx].load = odata.wallTime;
#endif
    computeLoad[objIdx].refno = 0;
    computeLoad[objIdx].partition = NULL;
    for (int k=XDIR; k<=ZDIR; k++) {
        vArray[k][objIdx].id = objIdx;
        vArray[k][objIdx].v = computeLoad[objIdx].v[k];
    }
#ifdef DEBUG
    CmiPrintf("Object %d: %d %d %d load:%f\n", objIdx, computeLoad[objIdx].v[XDIR], computeLoad[objIdx].v[YDIR], computeLoad[objIdx].v[ZDIR], computeLoad[objIdx].load);
#endif
    objIdx ++;
  }
  CmiAssert(nObjs == objIdx);

  double t = CkWallTimer();

  quicksort(XDIR);
  quicksort(YDIR);
  quicksort(ZDIR);
#ifdef DEBUG
  CmiPrintf("qsort time: %f\n", CkWallTimer() - t);
#endif

  npartition = 0;
  for (i=0; i<P; i++)
    if (stats->procs[i].available == true) npartition++;
  partitions = new Partition[npartition];

  double totalLoad = 0.0;
  int minx, miny, minz, maxx, maxy, maxz;
  minx = maxx= computeLoad[0].v[XDIR];
  miny = maxy= computeLoad[0].v[YDIR];
  minz = maxz= computeLoad[0].v[ZDIR];
  for (i=1; i<nObjs; i++) {
    totalLoad += computeLoad[i].load;
    if (computeLoad[i].v[XDIR] < minx) minx = computeLoad[i].v[XDIR];
    else if (computeLoad[i].v[XDIR] > maxx) maxx = computeLoad[i].v[XDIR];
    if (computeLoad[i].v[YDIR] < miny) miny = computeLoad[i].v[YDIR];
    else if (computeLoad[i].v[YDIR] > maxy) maxy = computeLoad[i].v[YDIR];
    if (computeLoad[i].v[ZDIR] < minz) minz = computeLoad[i].v[ZDIR];
    else if (computeLoad[i].v[ZDIR] > maxz) maxz = computeLoad[i].v[ZDIR];
  }

  top_partition.origin[XDIR] = minx;
  top_partition.origin[YDIR] = miny;
  top_partition.origin[ZDIR] = minz;
  top_partition.corner[XDIR] = maxx;
  top_partition.corner[YDIR] = maxy; 
  top_partition.corner[ZDIR] = maxz;

  top_partition.refno = 0;
  top_partition.load = 0.0;
  top_partition.count = nObjs;

  // if we take background load into account
  if (!_lb_args.ignoreBgLoad()) {
    top_partition.bkpes.resize(0);
    double total = totalLoad;
    for (i=0; i<P; i++) {
      if (!stats->procs[i].available) continue;
      double bkload = stats->procs[i].bg_walltime;
      total += bkload;
    }
    double averageLoad = total / npartition;
    for (i=0; i<P; i++) {
      if (!stats->procs[i].available) continue;
      double bkload = stats->procs[i].bg_walltime;
      if (bkload < averageLoad) top_partition.bkpes.push_back(i);
      else CkPrintf("OrbLB Info> PE %d with %f background load will have 0 object.\n", i, bkload);
    }
    npartition = top_partition.bkpes.size();
    // formally add these bg load to total load
    for (i=0; i<npartition; i++) 
      totalLoad += stats->procs[top_partition.bkpes[i]].bg_walltime; 
    if (_lb_args.debug()>=2) {
      CkPrintf("BG load: ");
      for (i=0; i<P; i++)  CkPrintf(" %f", stats->procs[i].bg_walltime);
      CkPrintf("\n");
      CkPrintf("Partition BG load: ");
      for (i=0; i<npartition; i++)  CkPrintf(" %f", stats->procs[top_partition.bkpes[i]].bg_walltime);
      CkPrintf("\n");
    }
  }

  top_partition.load = totalLoad;

  currentp = 0;
  refno = 0;

  // recursively divide
  rec_divide(npartition, top_partition);

  // mapping partitions to nodes
  mapPartitionsToNodes();

  // this is for sanity check
  int *num = new int[P];
  for (i=0; i<P; i++) num[i] = 0;

  for (i=0; i<nObjs; i++)
  {
    for (j=0; j<npartition; j++)
      if (computeLoad[i].refno == partitions[j].refno)   {
        computeLoad[i].partition = partitions+j;
        num[j] ++;
    }
    CmiAssert(computeLoad[i].partition != NULL);
  }

  for (i=0; i<npartition; i++)
    if (num[i] != partitions[i].count) 
      CmiAbort("OrbLB: Compute counts don't agree!\n");

  delete [] num;

  // Save output
  objIdx = 0;
  for(int obj=0;obj<stats->n_objs;obj++) {
      stats->to_proc[obj] = stats->from_proc[obj];
      LDObjData &odata = stats->objData[obj];
      if (odata.migratable == 0) { continue; }
      int frompe = stats->from_proc[obj];
      int tope = computeLoad[objIdx].partition->node;
      if (frompe != tope) {
        if (_lb_args.debug() >= 3) {
              CkPrintf("[%d] Obj %d migrating from %d to %d\n",
                     CkMyPe(),obj,frompe,tope);
        }
	stats->to_proc[obj] = tope;
      }
      objIdx ++;
  }

  // free memory
  delete [] computeLoad;
  for (i=0; i<3; i++) delete [] vArray[i];
  delete [] partitions;

  if (_lb_args.debug() >= 1)
    CkPrintf("OrbLB finished time: %fs\n", CkWallTimer() - t);
#endif
}
Exemple #9
0
    int procNum(int , const CkArrayIndex &element) 
    {
	int myPe = *(element.data());
	    
	return pmap[myPe];
    }