int procNum(int arrayHdl, const CkArrayIndex &i) { #if 1 if (i.nInts==1) { //Map 1D integer indices in simple round-robin fashion return shiftPE+((i.data()[0])%numPE); } else #endif { //Map other indices based on their hash code, mod a big prime. unsigned int hash=(i.hash()+739)%1280107; return shiftPE+(hash % numPE); } }
void LinearMap::populateInitial( int, CkArrayIndex& idx, void *msg, CkArrMgr *mgr ) //****************************************************************************** // Create initial set of array elements based on linear distribution //! \param[in] idx Charm++ array index object containing the number of initial //! array elements to be created //! \param[in] msg Charm++ messsage to use for array element creation //! \param[in] mgr Array manager to use //! \author J. Bakosi //****************************************************************************** { int nelem = *idx.data(); // number of array elements requested if (nelem == 0) return; // no initial elements requested auto lower = CkMyPe() * m_chunksize; auto upper = lower + m_chunksize; auto remainder = nelem % CkNumPes(); if (remainder && CkMyPe() == CkNumPes()-1) upper += remainder; for (int e=0; e<nelem; ++e) if (e >= lower && e < upper) mgr->insertInitial( e, CkCopyMsg(&msg) ); mgr->doneInserting(); CkFreeMsg( msg ); }
// Print out an array index to this string as decimal fields // separated by underscores. void printIndex(const CkArrayIndex &idx,char *dest) { const int *idxData=idx.data(); for (int i=0;i<idx.nInts;i++) { sprintf(dest,"%s%d",i==0?"":"_", idxData[i]); dest+=strlen(dest); } }
void forwardMsg(int ep,const CkArrayIndex &idx, const CkArrayID &a, int nBytes,char *env) { ckout<<"DelegateMgr> Recv message for "<<idx.data()[0]<<endl; //Have to allocate a new message because of Charm++'s // weird allocate rules: envelope *msg=(envelope *)CmiAlloc(nBytes); memcpy(msg,env,nBytes); CkUnpackMessage(&msg); CProxy_CkArray ap(msg->getsetArrayMgr()); ap.ckLocalBranch()->deliver((CkMessage *)EnvToUsr(msg),CkDeliver_inline); }
virtual void ArraySend(CkDelegateData *pd,int ep,void *m,const CkArrayIndex &idx,CkArrayID a) { CkArray *arrMgr=CProxy_CkArray(a).ckLocalBranch(); int onPE=arrMgr->lastKnown(idx); if (onPE==CkMyPe()) { //Send to local element arrMgr->deliver((CkMessage *)m, CkDeliver_queue); } else { //Forward to remote element ckout<<"DelegateMgr> Sending message for "<<idx.data()[0]<<" to "<<onPE<<endl; envelope *env=UsrToEnv(m); CkPackMessage(&env); forwardMsg(ep,idx,a,env->getTotalsize(),(char *)env); CkFreeMsg(m); } }
int LinearMap::procNum( int, const CkArrayIndex& idx ) //****************************************************************************** // Return the home processor number for the array element for linear // distribution //! \param[in] idx Charm++ array index object containing the array element index //! to assign a PE to //! \return PE assigned //! \author J. Bakosi //****************************************************************************** { int elem = *idx.data(); // array element we assign PE for auto pe = elem / m_chunksize; if (pe >= CkNumPes()) pe = CkNumPes()-1; Assert( pe < CkNumPes(), "Assigned PE (" + std::to_string(pe) + ") larger than NumPEs (" + std::to_string(CkNumPes()) + ")" ); return pe; }
int ComputeMap::procNum(int, const CkArrayIndex &idx) { int *index = (int *)idx.data(); return mapping[index[0]*Y*Z + index[1]*Z + index[2]]; }
void OrbLB::work(LDStats* stats) { #if CMK_LBDB_ON int i,j; statsData = stats; P = stats->nprocs(); // calculate total number of migratable objects nObjs = stats->n_migrateobjs; #ifdef DEBUG CmiPrintf("ORB: num objects:%d\n", nObjs); #endif // create computeLoad and calculate tentative computes coordinates computeLoad = new ComputeLoad[nObjs]; for (i=XDIR; i<=ZDIR; i++) vArray[i] = new VecArray[nObjs]; // v[0] = XDIR v[1] = YDIR v[2] = ZDIR // vArray[XDIR] is an array holding the x vector for all computes int objIdx = 0; for (i=0; i<stats->n_objs; i++) { LDObjData &odata = stats->objData[i]; if (odata.migratable == 0) continue; computeLoad[objIdx].id = objIdx; #if CMK_LB_USER_DATA int x, y, z; if (use_udata) { CkArrayIndex *idx = (CkArrayIndex *)odata.getUserData(CkpvAccess(_lb_obj_index)); x = idx->data()[0]; y = idx->data()[1]; z = idx->data()[2]; } else { x = odata.objID().id[0]; y = odata.objID().id[1]; z = odata.objID().id[2]; } computeLoad[objIdx].v[XDIR] = x; computeLoad[objIdx].v[YDIR] = y; computeLoad[objIdx].v[ZDIR] = z; #else computeLoad[objIdx].v[XDIR] = odata.objID().id[0]; computeLoad[objIdx].v[YDIR] = odata.objID().id[1]; computeLoad[objIdx].v[ZDIR] = odata.objID().id[2]; #endif #if CMK_LB_CPUTIMER computeLoad[objIdx].load = _lb_args.useCpuTime()?odata.cpuTime:odata.wallTime; #else computeLoad[objIdx].load = odata.wallTime; #endif computeLoad[objIdx].refno = 0; computeLoad[objIdx].partition = NULL; for (int k=XDIR; k<=ZDIR; k++) { vArray[k][objIdx].id = objIdx; vArray[k][objIdx].v = computeLoad[objIdx].v[k]; } #ifdef DEBUG CmiPrintf("Object %d: %d %d %d load:%f\n", objIdx, computeLoad[objIdx].v[XDIR], computeLoad[objIdx].v[YDIR], computeLoad[objIdx].v[ZDIR], computeLoad[objIdx].load); #endif objIdx ++; } CmiAssert(nObjs == objIdx); double t = CkWallTimer(); quicksort(XDIR); quicksort(YDIR); quicksort(ZDIR); #ifdef DEBUG CmiPrintf("qsort time: %f\n", CkWallTimer() - t); #endif npartition = 0; for (i=0; i<P; i++) if (stats->procs[i].available == true) npartition++; partitions = new Partition[npartition]; double totalLoad = 0.0; int minx, miny, minz, maxx, maxy, maxz; minx = maxx= computeLoad[0].v[XDIR]; miny = maxy= computeLoad[0].v[YDIR]; minz = maxz= computeLoad[0].v[ZDIR]; for (i=1; i<nObjs; i++) { totalLoad += computeLoad[i].load; if (computeLoad[i].v[XDIR] < minx) minx = computeLoad[i].v[XDIR]; else if (computeLoad[i].v[XDIR] > maxx) maxx = computeLoad[i].v[XDIR]; if (computeLoad[i].v[YDIR] < miny) miny = computeLoad[i].v[YDIR]; else if (computeLoad[i].v[YDIR] > maxy) maxy = computeLoad[i].v[YDIR]; if (computeLoad[i].v[ZDIR] < minz) minz = computeLoad[i].v[ZDIR]; else if (computeLoad[i].v[ZDIR] > maxz) maxz = computeLoad[i].v[ZDIR]; } top_partition.origin[XDIR] = minx; top_partition.origin[YDIR] = miny; top_partition.origin[ZDIR] = minz; top_partition.corner[XDIR] = maxx; top_partition.corner[YDIR] = maxy; top_partition.corner[ZDIR] = maxz; top_partition.refno = 0; top_partition.load = 0.0; top_partition.count = nObjs; // if we take background load into account if (!_lb_args.ignoreBgLoad()) { top_partition.bkpes.resize(0); double total = totalLoad; for (i=0; i<P; i++) { if (!stats->procs[i].available) continue; double bkload = stats->procs[i].bg_walltime; total += bkload; } double averageLoad = total / npartition; for (i=0; i<P; i++) { if (!stats->procs[i].available) continue; double bkload = stats->procs[i].bg_walltime; if (bkload < averageLoad) top_partition.bkpes.push_back(i); else CkPrintf("OrbLB Info> PE %d with %f background load will have 0 object.\n", i, bkload); } npartition = top_partition.bkpes.size(); // formally add these bg load to total load for (i=0; i<npartition; i++) totalLoad += stats->procs[top_partition.bkpes[i]].bg_walltime; if (_lb_args.debug()>=2) { CkPrintf("BG load: "); for (i=0; i<P; i++) CkPrintf(" %f", stats->procs[i].bg_walltime); CkPrintf("\n"); CkPrintf("Partition BG load: "); for (i=0; i<npartition; i++) CkPrintf(" %f", stats->procs[top_partition.bkpes[i]].bg_walltime); CkPrintf("\n"); } } top_partition.load = totalLoad; currentp = 0; refno = 0; // recursively divide rec_divide(npartition, top_partition); // mapping partitions to nodes mapPartitionsToNodes(); // this is for sanity check int *num = new int[P]; for (i=0; i<P; i++) num[i] = 0; for (i=0; i<nObjs; i++) { for (j=0; j<npartition; j++) if (computeLoad[i].refno == partitions[j].refno) { computeLoad[i].partition = partitions+j; num[j] ++; } CmiAssert(computeLoad[i].partition != NULL); } for (i=0; i<npartition; i++) if (num[i] != partitions[i].count) CmiAbort("OrbLB: Compute counts don't agree!\n"); delete [] num; // Save output objIdx = 0; for(int obj=0;obj<stats->n_objs;obj++) { stats->to_proc[obj] = stats->from_proc[obj]; LDObjData &odata = stats->objData[obj]; if (odata.migratable == 0) { continue; } int frompe = stats->from_proc[obj]; int tope = computeLoad[objIdx].partition->node; if (frompe != tope) { if (_lb_args.debug() >= 3) { CkPrintf("[%d] Obj %d migrating from %d to %d\n", CkMyPe(),obj,frompe,tope); } stats->to_proc[obj] = tope; } objIdx ++; } // free memory delete [] computeLoad; for (i=0; i<3; i++) delete [] vArray[i]; delete [] partitions; if (_lb_args.debug() >= 1) CkPrintf("OrbLB finished time: %fs\n", CkWallTimer() - t); #endif }
int procNum(int , const CkArrayIndex &element) { int myPe = *(element.data()); return pmap[myPe]; }