// generate migrate message from stats->from_proc and to_proc LBMigrateMsg * CentralLB::createMigrateMsg(LDStats* stats) { int i; CkVec<MigrateInfo*> migrateInfo; for (i=0; i<stats->n_objs; i++) { LDObjData &objData = stats->objData[i]; int frompe = stats->from_proc[i]; int tope = stats->to_proc[i]; if (frompe != tope) { // CkPrintf("[%d] Obj %d migrating from %d to %d\n", // CkMyPe(),obj,pe,dest); MigrateInfo *migrateMe = new MigrateInfo; migrateMe->obj = objData.handle; migrateMe->from_pe = frompe; migrateMe->to_pe = tope; migrateMe->async_arrival = objData.asyncArrival; migrateInfo.insertAtEnd(migrateMe); } } int migrate_count=migrateInfo.length(); LBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) LBMigrateMsg; msg->n_moves = migrate_count; for(i=0; i < migrate_count; i++) { MigrateInfo* item = (MigrateInfo*) migrateInfo[i]; msg->moves[i] = *item; delete item; migrateInfo[i] = 0; } return msg; }
TestDriver(CkArgMsg* args) { N = atoi(args->argv[1]); numElementsPerPe = atoll(args->argv[2]); localTableSize = (1l << N) / numElementsPerPe; if (!localTableSize) CkAbort("Table size is too small, or number of chares is too large\n"); tableSize = localTableSize * CkNumPes() * numElementsPerPe; CkPrintf("Global table size = 2^%d * %d = %lld words\n", N, CkNumPes(), tableSize); CkPrintf("Number of processors = %d\nNumber of updates = %lld\n", CkNumPes(), 4 * tableSize); driverProxy = thishandle; // Create the chares storing and updating the global table updater_array = CProxy_Updater::ckNew(CkNumPes() * numElementsPerPe); int dims[2] = {CkNumNodes(), CkNumPes() / CkNumNodes()}; CkPrintf("Aggregation topology: %d %d\n", dims[0], dims[1]); // Instantiate communication library group with a handle to the client //aggregator = // CProxy_ArrayMeshStreamer<dtype, int, Updater, SimpleMeshRouter> // ::ckNew(numMsgsBuffered, 2, dims, updater_array, 1); delete args; }
void Main::save_temp(int SIZE, double *number, int row_number){ int i,j; double (*temp)[matrix_size] = (double (*)[matrix_size]) result; for(i = 0; i < matrix_size/CkNumPes(); i++){ for(j = 0; j < matrix_size; j++) { temp[i+row_number*matrix_size/CkNumPes()][j] = number[i*matrix_size+j]; } } doneCount ++; //CkPrintf("donecount: %d ",doneCount); if (doneCount == CkNumPes()){ /* for (i = 0; i < matrix_size; i++) { CkPrintf("tulemus: "); for (j = 0; j < matrix_size; j++) { CkPrintf(" %.1f ",temp[i][j]); } CkPrintf("\n"); } */ CkExit(); } }
void Sync::openSync(void) { int reportPe = 1; while ( 2 * reportPe < CkNumPes() ) reportPe *= 2; step = -1; useSync = 1; useProxySync = 0; if (useSync) { // if use proxy spanning tree, proxy sync is forced if (!useProxySync && (proxySendSpanning || proxyRecvSpanning) && PatchMap::Object()->numPatches() < 4 * CkNumPes() ) { // If on BG/P, useProxySync should not be turned on for better performance #if ! (CMK_BLUEGENEQ || CMK_BLUEGENEP) // CmiPrintf("[%d] useProxySync is turned on. \n", CkMyPe()); useProxySync = 1; #endif } #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) // immediate messages can be processed by any PE if (CkMyNodeSize() > 2) useProxySync = 0; #endif // no proxies on this node, no need to use proxy sync. if (useProxySync && ProxyMgr::Object()->numProxies() == 0) { // CmiPrintf("[%d] useProxySync is turned off because no proxy. \n", CkMyPe()); useProxySync = 0; } // if no proxy sync and no home patch, then disable home patch sync as well if (!useProxySync && PatchMap::Object()->numHomePatches() == 0) useSync = 0; } if(CkMyPe() == reportPe) iout << iINFO << "useSync: " << useSync << " useProxySync: " << useProxySync << "\n" << endi; }
LBMigrateMsg * CentralLB::extractMigrateMsg(LBMigrateMsg *m, int p) { int nmoves = 0; int nunavail = 0; int i; for (i=0; i<m->n_moves; i++) { MigrateInfo* item = (MigrateInfo*) &m->moves[i]; if (item->from_pe == p || item->to_pe == p) nmoves++; } for (i=0; i<CkNumPes();i++) { if (!m->avail_vector[i]) nunavail++; } LBMigrateMsg* msg; if (nunavail) msg = new(nmoves,CkNumPes(),CkNumPes(),0) LBMigrateMsg; else msg = new(nmoves,0,0,0) LBMigrateMsg; msg->n_moves = nmoves; msg->level = m->level; msg->next_lb = m->next_lb; for (i=0,nmoves=0; i<m->n_moves; i++) { MigrateInfo* item = (MigrateInfo*) &m->moves[i]; if (item->from_pe == p || item->to_pe == p) { msg->moves[nmoves] = *item; nmoves++; } } // copy processor data if (nunavail) for (i=0; i<CkNumPes();i++) { msg->avail_vector[i] = m->avail_vector[i]; msg->expectedLoad[i] = m->expectedLoad[i]; } return msg; }
inline int getPErepresentingNodeContainingPE(int pe){ #if 1 return pe; #else #if USE_CONTROL_POINTS std::vector<int> v; v.push_back(1); if(CkNumPes() >= 2) v.push_back(2); if(CkNumPes() >= 4) v.push_back(4); if(CkNumPes() >= 8) v.push_back(8); int pes_per_node = controlPoint("Number of PEs per Node", v); #else int pes_per_node = 1; #endif if(getenv("PE_PER_NODES") != NULL) pes_per_node = CkNumPes()/atoi(getenv("PE_PER_NODES")); if( pes_per_node > 1 && pes_per_node <= CkNumPes() ){ ComlibPrintf("NODE AWARE Sending a message to a representative of the node instead of its real owner\n"); int newpe = pe - (pe % pes_per_node); return newpe; } else { return pe; } #endif }
Main(CkArgMsg* m) { #if CMK_BLUEGENEL BGLPersonality bgl_p; int i = rts_get_personality(&bgl_p, sizeof(BGLPersonality)); #elif CMK_BLUEGENEP DCMF_Hardware_t bgp_hwt; DCMF_Hardware(&bgp_hwt); #elif XT3_TOPOLOGY XT3TorusManager xt3tm; #elif XT4_TOPOLOGY || XT5_TOPOLOGY XTTorusManager xttm; #endif mainProxy = thishandle; CkPrintf("Testing TopoManager .... \n"); TopoManager tmgr; CkPrintf("Torus Size [%d] [%d] [%d] [%d]\n", tmgr.getDimNX(), tmgr.getDimNY(), tmgr.getDimNZ(), tmgr.getDimNT()); #if CMK_BLUEGENEP CkPrintf("Torus Size [%d] [%d] [%d] [%d]\n", bgp_hwt.xSize, bgp_hwt.ySize, bgp_hwt.zSize, bgp_hwt.tSize); #endif int x, y, z, t; for(int i=0; i<CkNumPes(); i++) { tmgr.rankToCoordinates(i, x, y, z, t); CkPrintf("---- Processor %d ---> x %d y %d z %d t %d\n", i, x, y, z, t); #if CMK_BLUEGENEL unsigned int tmp_t, tmp_x, tmp_y, tmp_z; rts_coordinatesForRank(i, &tmp_x, &tmp_y, &tmp_z, &tmp_t); CkPrintf("Real Processor %d ---> x %d y %d z %d t %d\n", i, tmp_x, tmp_y, tmp_z, tmp_t); #elif CMK_BLUEGENEP unsigned int tmp_t, tmp_x, tmp_y, tmp_z; #if (DCMF_VERSION_MAJOR >= 3) DCMF_NetworkCoord_t nc; DCMF_Messager_rank2network(i, DCMF_DEFAULT_NETWORK, &nc); tmp_x = nc.torus.x; tmp_y = nc.torus.y; tmp_z = nc.torus.z; tmp_t = nc.torus.t; #else DCMF_Messager_rank2torus(c, &tmp_x, &tmp_y, &tmp_z, &tmp_t); #endif CkPrintf("Real Processor %d ---> x %d y %d z %d t %d\n", i, tmp_x, tmp_y, tmp_z, tmp_t); #elif XT3_TOPOLOGY int tmp_t, tmp_x, tmp_y, tmp_z; xt3tm.realRankToCoordinates(i, tmp_x, tmp_y, tmp_z, tmp_t); CkPrintf("Real Processor %d ---> x %d y %d z %d t %d\n", i, tmp_x, tmp_y, tmp_z, tmp_t); #elif XT4_TOPOLOGY || XT5_TOPOLOGY int tmp_t, tmp_x, tmp_y, tmp_z; xttm.realRankToCoordinates(i, tmp_x, tmp_y, tmp_z, tmp_t); CkPrintf("Real Processor %d ---> x %d y %d z %d t %d\n", i, tmp_x, tmp_y, tmp_z, tmp_t); #endif } // end of for loop int size = tmgr.getDimNX() * tmgr.getDimNY() * tmgr.getDimNZ(); CkPrintf("Torus Contiguity Metric %d : %d [%f] \n", size, CkNumPes()/tmgr.getDimNT(), (float)(CkNumPes())/(tmgr.getDimNT()*size) ); CkExit(); };
void migration_init(void) { const int numElements = 10 + (CkNumPes() * 2); if(CkNumPes() < 2) { CkError("migration: requires at least 2 processors.\n"); megatest_finish(); } else CProxy_mig_Element::ckNew(numElements); }
void CentralLB::BuildStatsMsg() { #if CMK_LBDB_ON // build and send stats const int osz = theLbdb->GetObjDataSz(); const int csz = theLbdb->GetCommDataSz(); int npes = CkNumPes(); CLBStatsMsg* msg = new CLBStatsMsg(osz, csz); _MEMCHECK(msg); msg->from_pe = CkMyPe(); #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) msg->step = step(); #endif //msg->serial = CrnRand(); /* theLbdb->TotalTime(&msg->total_walltime,&msg->total_cputime); theLbdb->IdleTime(&msg->idletime); theLbdb->BackgroundLoad(&msg->bg_walltime,&msg->bg_cputime); */ #if CMK_LB_CPUTIMER theLbdb->GetTime(&msg->total_walltime,&msg->total_cputime, &msg->idletime, &msg->bg_walltime,&msg->bg_cputime); #else theLbdb->GetTime(&msg->total_walltime,&msg->total_walltime, &msg->idletime, &msg->bg_walltime,&msg->bg_walltime); #endif #if defined(TEMP_LDB) float mytemp=getTemp(CkMyPe()%physicalCoresPerNode); int freq=cpufreq_sysfs_read (CkMyPe()%logicalCoresPerNode); msg->pe_temp=mytemp; msg->pe_speed=freq; #else msg->pe_speed = myspeed; #endif DEBUGF(("Processor %d Total time (wall,cpu) = %f %f Idle = %f Bg = %f %f\n", CkMyPe(),msg->total_walltime,msg->total_cputime,msg->idletime,msg->bg_walltime,msg->bg_cputime)); msg->n_objs = osz; theLbdb->GetObjData(msg->objData); msg->n_comm = csz; theLbdb->GetCommData(msg->commData); // theLbdb->ClearLoads(); DEBUGF(("PE %d BuildStatsMsg %d objs, %d comm\n",CkMyPe(),msg->n_objs,msg->n_comm)); if(CkMyPe() == cur_ld_balancer) { msg->avail_vector = new char[CkNumPes()]; LBDatabaseObj()->get_avail_vector(msg->avail_vector); msg->next_lb = LBDatabaseObj()->new_lbbalancer(); } CmiAssert(statsMsg == NULL); statsMsg = msg; #endif }
/** * Creates the chare array for the hybrid load balancer. */ void CreateNamdHybridLB() { CProxy_NamdHybridLB::ckNew(); // creating an array to store the loads of all processors // to be used with proxy spanning tree if (CkMyPe() == 0 && cpuloads == NULL) { cpuloads = new double[CkNumPes()]; CmiAssert(cpuloads != NULL); for (int i=0; i<CkNumPes(); i++) cpuloads[i] = 0.0; } }
Main(CkArgMsg* m) { totalTime = new float[CkNumPes()]; totalObjs = new int[CkNumPes()]; for(int i=0;i<CkNumPes();i++) { totalTime[i] = 0.0; totalObjs[i] = 0; } if (m->argc < 3) { CkPrintf("%s [array_size] [block_size]\n", m->argv[0]); CkAbort("Abort"); } totalIterTime = 0.0; lbdOverhead = 0.0; // set iteration counter to zero iterations=0; // store the main proxy mainProxy = thisProxy; array_height = atoi(m->argv[1]); array_width = atoi(m->argv[2]); block_height = atoi(m->argv[3]); block_width = atoi(m->argv[4]); if (array_width < block_width || array_width % block_width != 0) CkAbort("array_size % block_size != 0!"); num_chare_rows = array_height / block_height; num_chare_cols = array_width / block_width; // print info //CkPrintf("Running Jacobi on %d processors with (%d,%d) elements\n", CkNumPes(), num_chare_rows, num_chare_cols); total_iterations = 200; if (m->argc > 5) { total_iterations = atoi(m->argv[5]); } // Create new array of worker chares array = CProxy_Jacobi::ckNew(num_chare_cols, num_chare_rows); // save the total number of worker chares we have in this simulation num_chares = num_chare_rows*num_chare_cols; //Start the computation perIterStartTime = CkWallTimer(); progStartTime = CkWallTimer(); recieve_count = 0; array.begin_iteration(); }
int main(int argc,char **argv) { int rank; /* process id */ int p; /* number of processes */ MPI_Init( &argc, &argv ); MPI_Comm_rank( MPI_COMM_WORLD, &rank); MPI_Comm_size( MPI_COMM_WORLD, &p ); MPI_Barrier(MPI_COMM_WORLD); if(p >= 1){ CkPrintf("\nbegin migrating\n"); for (int step=0; step<=CkNumPes(); ++step) { if (rank == 1) { int destination_pe = (CkMyPe() + 1) % CkNumPes(); CkPrintf("Trying to migrate partition %d from pe %d to %d\n", rank, CkMyPe(), destination_pe); //fflush(stdout); CkAssert(destination_pe >= 0); int migrate_test = CkMyPe(); printf("Entering TCHARM_Migrate_to, " "FEM_My_partition is %d, " "CkMyPe() is %d, migrate_test is %d\n", rank, CkMyPe(), migrate_test); //fflush(stdout); AMPI_Migrateto(destination_pe); printf("Leaving TCHARM_Migrate_to, " "FEM_My_partition is %d, " "CkMyPe() is %d, migrate_test is %d\n", rank, CkMyPe(), migrate_test); //fflush(stdout); } MPI_Barrier(MPI_COMM_WORLD); CkPrintf("Done with step %d\n", step); //fflush(stdout); } MPI_Barrier(MPI_COMM_WORLD); CkPrintf("done migrating\n"); MPI_Barrier(MPI_COMM_WORLD); } if (rank==0) CkPrintf("All tests passed\n"); MPI_Finalize(); return 0; }
void SpanningTree::calcNumChildren(int n) { numChildren = 0; if (arity == 0) return; int fullNode=(CkNumPes()-1-arity)/arity; if(n <= fullNode) numChildren = arity; if(n == fullNode+1) numChildren = CkNumPes()-1-(fullNode+1)*arity; if(n > fullNode+1) numChildren = 0; }
void Main::ready() { int i; readyCount += 1; int BLOCK_SIZE = matrix_size*matrix_size/CkNumPes(); double (*split_y)[BLOCK_SIZE] = (double (*)[BLOCK_SIZE]) split_bycol; if (readyCount == CkNumPes()) { // calculate CkPrintf("Start calculation\n"); for (i = 0; i < CkNumPes(); i++) { object_array[i].multiply(matrix_size * matrix_size/CkNumPes(), split_y[i],i); } } }
Main(CkArgMsg *m) { CkPrintf("%d:Hallo: argv[1] = %d, argv[2] = %d!\n", CmiMyPe(), atoi(m->argv[1]), atoi(m->argv[2])); nElements = CkNumPes() * atoi(m->argv[1]); nChares = CkNumPes() * atoi(m->argv[2]); mainProxy = thisProxy; //!! alltoall_proxy = new Pingping_uChareArray(nElements, nChares); alltoall_proxy->init(); //thisProxy.init(); delete m; }
HybridBaseLB::HybridBaseLB(const CkLBOptions &opt): CBase_HybridBaseLB(opt) { #if CMK_LBDB_ON lbname = (char *)"HybridBaseLB"; thisProxy = CProxy_HybridBaseLB(thisgroup); receiver = theLbdb-> AddLocalBarrierReceiver((LDBarrierFn)(staticAtSync), (void*)(this)); notifier = theLbdb->getLBDB()-> NotifyMigrated((LDMigratedFn)(staticMigrated), (void*)(this)); statsStrategy = FULL; // defines topology if (CkNumPes() <= 4) { tree = new TwoLevelTree; } else { tree = new ThreeLevelTree; if (CkNumPes() >= 4096) statsStrategy = SHRINK; //statsStrategy = SHRINK; } //tree = new FourLevelTree; if (CkMyPe() == 0) CkPrintf("%s: %s is created.\n", lbname, tree->name()); // decide which load balancer to call // greedy = (CentralLB *)AllocateGreedyLB(); // refine = (CentralLB *)AllocateRefineLB(); currentLevel = 0; foundNeighbors = 0; future_migrates_expected = -1; vector_n_moves = 0; maxLoad = 0.0; maxCpuLoad = 0.0; totalLoad = 0.0; maxCommCount = 0; maxCommBytes = 0.0; maxMem = 0.0; if (_lb_args.statsOn()) theLbdb->CollectStatsOn(); group1_created = 0; // base class need to call initTree() #endif }
// Insert an object in the list in the firstEmpty slot, expanding the list // size if necessary int lbObjects::Insert(int sync, int index, sim *myPtr) { int idx, i; if (numObjs < size) { // insert in empty space idx = firstEmpty; if (firstEmpty == numSpaces) // all spaces occupied up to end of list numSpaces++; // use a previously unused space objs[idx].index = index; objs[idx].present = 1; objs[idx].sync = sync; objs[idx].localObjPtr = myPtr; numObjs++; for (i=firstEmpty+1; i<size; i++) // reset firstEmpty if (objs[i].present == 0) { firstEmpty = i; break; } } else { // no free spaces; expand objs firstEmpty = size; // this is where firstEmpty will be after expansion size += 50; // expand by 50 if (!(objs = (lbObjectNode *)realloc(objs, size * sizeof(lbObjectNode)))) { CkPrintf("ERROR: lbObjects::Insert: OUT OF MEMORY!\n"); CkExit(); } for (i=firstEmpty; i<size; i++) { // initialize new slots to empty objs[i].present = 0; objs[i].eet = objs[i].ne = objs[i].execPrio = 0; objs[i].rbOh = 0.0; objs[i].comm = (int *)malloc(CkNumPes()*sizeof(int)); for (int j=0; j<CkNumPes(); j++) objs[i].comm[j] = 0; objs[i].totalComm = objs[i].localComm = objs[i].remoteComm = objs[i].maxComm = 0; objs[i].maxCommPE = -1; } idx = firstEmpty; // insert new object at firstEmpty objs[idx].index = index; objs[idx].present = 1; objs[idx].sync = sync; objs[idx].localObjPtr = myPtr; numObjs++; numSpaces++; firstEmpty++; } return idx; }
// Entry point of Charm++ application Main::Main(CkArgMsg* msg) { int i, j, k, l; numParts = DEFAULT_PARTICLES; m = DEFAULT_M; n = DEFAULT_N; L = DEFAULT_L; radius = DEFAULT_RADIUS; finalStepCount = DEFAULT_FINALSTEPCOUNT; delete msg; checkInCount = 0; mainProxy = thisProxy; // initializing the cell 2D array cellArray = CProxy_Cell::ckNew(m,n); // initializing the interaction 4D array interactionArray = CProxy_Interaction::ckNew(); // For Round Robin insertion int numPes = CkNumPes(); int currPE = -1; for (int x = 0; x < m ; x++ ) { for (int y = 0; y < n; y++ ) { // self interaction interactionArray( x, y, x, y ).insert( (currPE++) % numPes ); // (x,y) and (x+1,y) pair (x == m-1) ? (i=0, k=x) : (i=x, k=x+1); interactionArray( i, y, k, y ).insert( (currPE++) % numPes ); // (x,y) and (x,y+1) pair (y == n-1) ? (j=0, l=y) : (j=y, l=y+1); interactionArray( x, j, x, l ).insert( (currPE++) % numPes ); // (x,y) and (x+1,y+1) pair, Irrespective of y (x == m-1) ? ( i=0, k=x, j=(y+1)%n, l=y ) : (i=x, k=x+1, j=y, l=(y+1)%n ); interactionArray( i, j, k, l ).insert( (currPE++) % numPes ); // (x,y) and (x-1,y+1) pair (x == 0) ? ( i=x, k=(x-1+m)%m, j=y, l=(y+1)%n ) : (i=x-1, k=x, j=(y+1)%n, l=y ); interactionArray( i, j, k, l ).insert( (currPE++) % numPes ); } } interactionArray.doneInserting(); // setup liveviz CkCallback c(CkIndex_Cell::requestNextFrame(0),cellArray); liveVizConfig cfg(liveVizConfig::pix_color,true); liveVizInit(cfg,cellArray,c); sleep(1); cellArray.start(); }
Main(CkArgMsg* m) { count = 2; //Process command-line arguments nElements=5; if(m->argc >1 ) nElements=atoi(m->argv[1]); delete m; //Start the computation CkPrintf("Running Hello on %d processors for %d elements\n", CkNumPes(),nElements); mainProxy = thisProxy; //Allocate elements scattered down a sparse 3D line CProxy_Hello arr = CProxy_Hello::ckNew(); for (int y=0;y<nElements;y++) arr(37,y,2*y+1).insert(); arr.doneInserting(); CProxy_Hello2 arr2 = CProxy_Hello2::ckNew(); for (int y=0;y<nElements;y++) arr2(37,y,2*y+1).insert(); arr2.doneInserting(); arr(37,0,1).SayHi(17); arr2(37,0,1).SayHi2(17); };
void SayHi(HiMsg *m) { redNo ++; CmiAssert(m->data[0] == 22 && m->data[1] == 28); CkGetSectionInfo(sid, m); CkMulticastMgr *mg = CProxy_CkMulticastMgr(mCastGrpId).ckLocalBranch(); int dataSize = (int)(CompleteMsgSize); int* data = new int [dataSize]; int fragSize = dataSize/NumFrags; CkAssert (0 != fragSize); for (int i=0; i<dataSize; i++) { data [i] = thisIndex; } CkCallback cb(CkIndex_Hello::cb_client(NULL), CkArrayIndex1D(0), thisProxy); mg->contribute(dataSize*sizeof(int), data,CkReduction::sum_int, sid, cb, fragSize*sizeof(int)); // data[0] = thisIndex+2; // data[1] = thisIndex+2; // mg->contribute(2*sizeof(int), &data,CkReduction::max_int, sid, sizeof(int)); // data[0] = thisIndex+1; // data[1] = thisIndex+1; // mg->contribute(2*sizeof(int), &data,CkReduction::product_int, sid, sizeof(int)); delete m; if (1) ckMigrate((CkMyPe()+1)%CkNumPes()); }
// build a complete data from bufferred messages // not used when USE_REDUCTION = 1 void CentralLB::buildStats() { statsData->nprocs() = stats_msg_count; // allocate space statsData->objData.resize(statsData->n_objs); statsData->from_proc.resize(statsData->n_objs); statsData->to_proc.resize(statsData->n_objs); statsData->commData.resize(statsData->n_comm); int nobj = 0; int ncom = 0; int nmigobj = 0; // copy all data in individule message to this big structure for (int pe=0; pe<CkNumPes(); pe++) { int i; CLBStatsMsg *msg = statsMsgsList[pe]; if(msg == NULL) continue; for (i=0; i<msg->n_objs; i++) { statsData->from_proc[nobj] = statsData->to_proc[nobj] = pe; statsData->objData[nobj] = msg->objData[i]; if (msg->objData[i].migratable) nmigobj++; nobj++; } for (i=0; i<msg->n_comm; i++) { statsData->commData[ncom] = msg->commData[i]; ncom++; } // free the memory delete msg; statsMsgsList[pe]=0; } statsData->n_migrateobjs = nmigobj; }
// called on every processor void CentralLB::SendStats() { #if CMK_LBDB_ON CmiAssert(statsMsg != NULL); reduction_started = 0; #if USE_LDB_SPANNING_TREE if(CkNumPes()>1024) { if (CkMyPe() == cur_ld_balancer) thisProxy[CkMyPe()].ReceiveStats(statsMsg); else thisProxy[CkMyPe()].ReceiveStatsViaTree(statsMsg); } else #endif { DEBUGF(("[%d] calling ReceiveStats on step %d \n",CmiMyPe(),step())); thisProxy[cur_ld_balancer].ReceiveStats(statsMsg); } statsMsg = NULL; #ifdef __BIGSIM__ BgEndStreaming(); #endif { // enfore the barrier to wait until centralLB says no LDOMHandle h; h.id.id.idx = 0; theLbdb->getLBDB()->RegisteringObjects(h); } #endif }
SpanningTree::SpanningTree() { double sq = sqrt(CkNumPes()*4.0-3.0) - 1; // 1 + arity + arity*arity = CkNumPes() arity = (int)ceil(sq/2); calcParent(CkMyPe()); calcNumChildren(CkMyPe()); }
void IntermediateCall() { CkPrintf("Intermediate %d of %d \n", CkMyPe(), CkNumPes()); //CkPrintf("Message: %s arrived at element %d\n", m->msg, CkMyPe()); // assert(strcmp(m->msg,"|This is a short broadcast message|") == 0); mainProxy.Intermediate(); }
worker::worker(WorkerData *m) { int i; numObjs = m->numObjs; numMsgs = m->numMsgs; msgSize = m->msgSize; grainSize = m->grainSize; granularity = m->granularity; density = m->density; for (i=0; i<100; i++) data[i] = 0; sent = 0; totalObjs = numObjs * CkNumPes(); localDensity = ((double)density)/((double)totalObjs); delete m; SmallWorkMsg *sm = new SmallWorkMsg; memset(sm->data, 0, SM_MSG_SZ*sizeof(int)); sm->fromPE = -1; //CkPrintf("Worker %d created on PE %d.\n", myHandle, CkMyPe()); //if (myHandle%numObjs == 0) { //local ring; multiple global rings //if (myHandle%(numObjs/2) == 0) { //multiple offset global rings //if (myHandle == 0) { //CkPrintf("Worker %d starting ring, sending to self.\n", myHandle); POSE_invoke(workSmall(sm), worker, parent->thisIndex, 0); //} }
void Intermediate(){ nDone++; if (nDone == CkNumPes()){ nDone = 0; broadcastProxy.TestBroadcast(); } }
main::main(CkArgMsg* m) { //Process command-line arguments //Start the computation mainProxy = thishandle; if(m->argc<2) { CkPrintf("Needs number of array elements\n"); CkExit(); } units=atoi(m->argv[1]); // 4 MB size allredSize= 4194304; //atoi(m->argv[2]); baseSize = 262144; currentSize = baseSize; sizeInd = 0; numItr = 10; sizesNo = 5; timeForEach = new double[sizesNo]; iterNo = 0; for(int i=0; i<sizesNo; i++) timeForEach[i] = 0.0; arr = CProxy_AllReduce::ckNew(thisProxy, units); CkPrintf("AllReduce for %d pes on %d units for %d size\n", CkNumPes(),units,allredSize); arr.init(); startTime = CkWallTimer(); arr.dowork(baseSize); }
// called on PE 0 void PVT::resumeAfterLB(eventMsg *m) { static int count = 0; count ++; if (count != CkNumPes()) { CkFreeMsg(m); return; } count = 0; #ifndef CMK_OPTIMIZE if(pose_config.stats) localStats->TimerStart(GVT_TIMER); #endif if (parLBInProgress) { CkPrintf("POSE: load balancing complete on processor %d at GVT=%lld sim time=%.1f sec\n", CkMyPe(), estGVT, CmiWallTimer() + parStartTime); parLBInProgress = 0; parLastLBGVT = estGVT; } CkFreeMsg(m); CProxy_PVT p(ThePVT); startPhaseActive = 0; prioBcMsg *startMsg = new (8*sizeof(int)) prioBcMsg; startMsg->bc = 1; *((int *)CkPriorityPtr(startMsg)) = 0; CkSetQueueing(startMsg, CK_QUEUEING_IFIFO); p[CkMyPe()].startPhase(startMsg); #ifndef CMK_OPTIMIZE if(pose_config.stats) localStats->TimerStop(); #endif }
Main(CkArgMsg *m) { nDone = 0; nPesDone = 0; nElements=CkNumPes()*4; if(m->argc >1 ) nElements=atoi(m->argv[1]); delete m; mainProxy = thishandle; CkArrayIndex *allElts = new CkArrayIndex[nElements]; for (int i = 0; i<nElements; i++) { CkArrayIndex1D index = i; allElts[i] = index; } // Create the array eachToManyArrayProxy = CProxy_EachToManyArray::ckNew(nElements); // Create a strategy Strategy *strategy2 = new EachToManyMulticastStrategy(USE_DIRECT, eachToManyArrayProxy, eachToManyArrayProxy, nElements, allElts, nElements, allElts); stratEachToManyArray = ComlibRegister(strategy2); eachToManyArrayProxy.TestEachToMany(); delete [] allElts; }
/// Basic Constructor GVT::GVT() { #ifdef VERBOSE_DEBUG CkPrintf("[%d] constructing GVT\n",CkMyPe()); #endif optGVT = POSE_UnsetTS, conGVT = POSE_UnsetTS; done=0; SRs = NULL; startOffset = 0; gvtIterationCount = 0; #ifndef CMK_OPTIMIZE localStats = (localStat *)CkLocalBranch(theLocalStats); #endif #ifndef SEQUENTIAL_POSE if(pose_config.lb_on) nextLBstart = pose_config.lb_skip - 1; #endif estGVT = lastEarliest = inactiveTime = POSE_UnsetTS; lastSends = lastRecvs = inactive = 0; reportsExpected = 1; if (CkNumPes() >= 2) reportsExpected = 2; // CkPrintf("GVT expects %d reports!\n", reportsExpected); if (CkMyPe() == 0) { // start the PVT phase of the GVT algorithm CProxy_PVT p(ThePVT); prioBcMsg *startMsg = new (8*sizeof(int)) prioBcMsg; startMsg->bc = 1; *((int *)CkPriorityPtr(startMsg)) = 0; CkSetQueueing(startMsg, CK_QUEUEING_IFIFO); p.startPhase(startMsg); // broadcast PVT calculation to all PVT branches } }