/// /// Feedback main method /// void Main::StellarFeedback(double dTime, double dDelta) { if(verbosity) CkPrintf("Stellar Feedback ... \n"); double startTime = CkWallTimer(); CkReductionMsg *msgFeedback; treeProxy.Feedback(*(param.feedback), dTime, dDelta, CkCallbackResumeThread((void*&)msgFeedback)); double *dFeedback = (double *)msgFeedback->getData(); if(verbosity) { CkPrintf("Feedback totals: mass, energy, metalicity\n"); for(int i = 0; i < NFEEDBACKS; i++){ CkPrintf("feedback %d: %g %g %g\n", i, dFeedback[i*3], dFeedback[i*3 + 1], dFeedback[i*3] != 0.0 ? dFeedback[i*3 + 2]/dFeedback[i*3] : 0.0); } } delete msgFeedback; CkReductionMsg *msgChk; treeProxy.massMetalsEnergyCheck(1, CkCallbackResumeThread((void*&)msgChk)); if(verbosity) CkPrintf("Distribute Stellar Feedback ... "); // Need to build tree since we just did addDelParticle. // treeProxy.buildTree(bucketSize, CkCallbackResumeThread()); DistStellarFeedbackSmoothParams pDSFB(TYPE_GAS, 0, param.csm, dTime, param.dConstGamma, param.feedback); double dfBall2OverSoft2 = 4.0*param.dhMinOverSoft*param.dhMinOverSoft; treeProxy.startSmooth(&pDSFB, 0, param.feedback->nSmoothFeedback, dfBall2OverSoft2, CkCallbackResumeThread()); treeProxy.finishNodeCache(CkCallbackResumeThread()); CkPrintf("Stellar Feedback Calculated, Wallclock %f secs\n", CkWallTimer() - startTime); CkReductionMsg *msgChk2; treeProxy.massMetalsEnergyCheck(0, CkCallbackResumeThread((void*&)msgChk2)); double *dTotals = (double *)msgChk->getData(); double *dTotals2 = (double *)msgChk2->getData(); int i; for(i = 0; i < 5; i++) { std::string labels[5] = {"Mass", "Metals", "Oxygen", "Iron", "Energy"}; if(verbosity > 1) CkPrintf("Total %s: %g\n", labels[i].c_str(), dTotals[i]); if(fabs(dTotals[i] - dTotals2[i]) > 1e-12*(dTotals[i])) { CkError("ERROR: %s not conserved: %.15e != %.15e!\n", labels[i].c_str(), dTotals[i], dTotals2[i]); } } delete msgChk; delete msgChk2; }
void worker::doStuff() { int i; double timer; WorkMsg *nm; // generate remote events timer = CkWallTimer(); for (i=0; i<numMsgs; i++) { nm = new (msgSize/sizeof(int), 0) WorkMsg; memset(nm->data, 0, msgSize); wArray[(thisIndex+2)%(CkNumPes()*2)].work(nm); //CkPrintf("%d on %d sending %d th remote work to %d\n", thisIndex, CkMyPe(), i, (thisIndex+2)%(CkNumPes()*2)); } timer = CkWallTimer() - timer; rsum += timer; // generate a local event timer = CkWallTimer(); for (i=0; i<numMsgs; i++) { nm = new (msgSize/sizeof(int), 0) WorkMsg; memset(nm->data, 0, msgSize); if (thisIndex%2 == 0) { wArray[thisIndex+1].work(nm); //CkPrintf("%d on %d sending %d th local work to %d\n", thisIndex, CkMyPe(), i, thisIndex+1); } else { wArray[thisIndex-1].work(nm); //CkPrintf("%d on %d sending %d th local work to %d\n", thisIndex, CkMyPe(), i, thisIndex-1); } } timer = CkWallTimer() - timer; lsum += timer; }
main::main(CkArgMsg * m) { if(m->argc != 3) { CkPrintf("Usage: pgm <nsamples> <nchares>\n"); CkAbort(""); } ns = atoi(m->argv[1]); nc = atoi(m->argv[2]); delete m; starttime = CkWallTimer(); //FIXME //CkGroupID gid = CkCreatePropMap(); //CProxy_piPart arr = CProxy_piPart::ckNew(nc, gid); CProxy_piPart arr = CProxy_piPart::ckNew(nc); CkPrintf("At time %lf, array created.\n", (CkWallTimer()-starttime)); #if USE_REDUCTION arr.setReductionClient(reductionHandler,(void *)NULL); #endif arr.compute(ns); responders = nc; count = 0; mainhandle = thishandle; // readonly initialization CkPrintf("At time %lf, main exits.\n", (CkWallTimer()-starttime)); }
void cb_client(CkReductionMsg *msg) { endTime = CkWallTimer (); int dataSize = msg->getSize(); void *data = msg->getData(); CmiPrintf("%e\n", endTime-startTime); // check correctness int result; int redno = msg->getRedNo(); result = 0; for (int i=0; i<sectionSize; i++) result+=i; if (*(int *)data != result) { CmiPrintf("Expected: %d acual:%d\n", result, *(int *)data); CmiAbort("reduction result is wrong!"); } cnt.reductionsRemaining--; if (cnt.reductionsRemaining<=0) { CProxy_main mproxy(mid); mproxy.maindone(); cnt.reductionNo++; } else { HiMsg *hiMsg = new (2, 0) HiMsg; hiMsg->data[0] = 22; hiMsg->data[1] = 28; startTime = CkWallTimer (); mcp.SayHi(hiMsg); cnt.reductionNo++; } delete msg; }
void maindone(void) { niter++; if(niter == START_TRACE_ITER) _traceControl.startTrace(); if(niter == END_TRACE_ITER) _traceControl.endTrace(); if(niter == iterations) { double pingTimer = CkWallTimer() - start_time; CkPrintf("Pingping %d\t\t %d \t\t%d \t\t%d \t\t%d \t\t%.1f\n", nodeIndex, CharesPerPE, PEsPerNode, PEsPerNode * CharesPerPE, payload, 1000*1000*pingTimer/(iterations-WARM_UP)); if(nodeIndex == CkNumNodes() -1) CkExit(); else { niter = 0; for(int i=0; i<PEsPerNode * CharesPerPE; i++) arr1[i].start(nodeIndex); } nodeIndex++; }else { if(niter == WARM_UP) start_time = CkWallTimer(); for(int i=0; i<PEsPerNode * CharesPerPE; i++) arr1[i].start(nodeIndex); } };
void Main::done() { numIterations++; if(numIterations == 1) { firstTime = CkWallTimer(); #if USE_CKDIRECT CkPrintf("FIRST ITER TIME %f secs\n", firstTime - setupTime); #else CkPrintf("FIRST ITER TIME %f secs\n", firstTime - startTime); #endif compute.resetArrays(); } else { if(numIterations == NUM_ITER) { endTime[numIterations-2] = CkWallTimer() - firstTime; double sum = 0; for(int i=0; i<NUM_ITER-1; i++) sum += endTime[i]; #if USE_CKDIRECT CkPrintf("AVG TIME %f secs\n", sum/(NUM_ITER-1)); #else CkPrintf("AVG TIME %f secs\n", sum/(NUM_ITER-1)); #endif CkExit(); } else { endTime[numIterations-2] = CkWallTimer() - firstTime; compute.resetArrays(); } } }
void CentralLB::findSimResults(LDStats* stats, int count, LBMigrateMsg* msg, LBSimulation* simResults) { CkAssert(simResults != NULL && count == simResults->numPes); // estimate the new loads of the processors. As a first approximation, this is the // sum of the cpu times of the objects on that processor double startT = CkWallTimer(); getPredictedLoadWithMsg(stats, count, msg, simResults->lbinfo, 1); CmiPrintf("getPredictedLoad finished in %fs\n", CkWallTimer()-startT); }
Main(CkArgMsg* m) { totalTime = new float[CkNumPes()]; totalObjs = new int[CkNumPes()]; for(int i=0;i<CkNumPes();i++) { totalTime[i] = 0.0; totalObjs[i] = 0; } if (m->argc < 3) { CkPrintf("%s [array_size] [block_size]\n", m->argv[0]); CkAbort("Abort"); } totalIterTime = 0.0; lbdOverhead = 0.0; // set iteration counter to zero iterations=0; // store the main proxy mainProxy = thisProxy; array_height = atoi(m->argv[1]); array_width = atoi(m->argv[2]); block_height = atoi(m->argv[3]); block_width = atoi(m->argv[4]); if (array_width < block_width || array_width % block_width != 0) CkAbort("array_size % block_size != 0!"); num_chare_rows = array_height / block_height; num_chare_cols = array_width / block_width; // print info //CkPrintf("Running Jacobi on %d processors with (%d,%d) elements\n", CkNumPes(), num_chare_rows, num_chare_cols); total_iterations = 200; if (m->argc > 5) { total_iterations = atoi(m->argv[5]); } // Create new array of worker chares array = CProxy_Jacobi::ckNew(num_chare_cols, num_chare_rows); // save the total number of worker chares we have in this simulation num_chares = num_chare_rows*num_chare_cols; //Start the computation perIterStartTime = CkWallTimer(); progStartTime = CkWallTimer(); recieve_count = 0; array.begin_iteration(); }
void SayHi(int hiNo) { if(hiNo <2 ) startTimer = CkWallTimer(); else if(hiNo >= numIterations) { double time = CkWallTimer() - startTimer; CkPrintf(" migration cost total : %f sec single migration cost: %f us\n", time, time/(hiNo-1)*1000000); CkExit(); } //CkPrintf("executing %d %d\n", CkMyPe(), hiNo); thisProxy[thisIndex].SayHi(hiNo+1); migrateMe(1-CkMyPe()); }
void Main::done() { numIterations++; if(numIterations == 1) { firstTime = CkWallTimer(); CkPrintf("FIRST ITER TIME %f secs\n", firstTime - startTime); } if(numIterations == NUM_ITER) { endTime = CkWallTimer(); CkPrintf("AVG TIME %f secs\n", (endTime - firstTime)/(NUM_ITER-1)); CkExit(); } else { compute.resetArrays(); } }
// called on a parent node void HybridBaseLB::NotifyObjectMigrationDone(int fromlevel) { int atlevel = fromlevel + 1; LevelData *lData = levelData[atlevel]; lData->mig_reported ++; if (lData->mig_reported == lData->nChildren) { lData->mig_reported = 0; // start load balancing at this level if (atlevel > 1) { // I am done at the level, propagate load balancing to next level thisProxy.Loadbalancing(atlevel-1, lData->nChildren, lData->children); } else { // atlevel = 1 if (_lb_args.debug() > 1) CkPrintf("[%d] NotifyObjectMigrationDone at level %d started at %f\n", CkMyPe(), atlevel, CkWallTimer()); DummyMsg *m = new (8*sizeof(int)) DummyMsg; *((int *)CkPriorityPtr(m)) = -100-atlevel; CkSetQueueing(m, CK_QUEUEING_IFIFO); thisProxy.StartCollectInfo(m, lData->nChildren, lData->children); } } }
main(CkArgMsg* m) { if(m->argc < 5) CkPrintf("Usage: payload PEs CharesPerPE iteration\n"); niter = 0; iterations=NITER; payload=PAYLOAD; if(m->argc>1) totalPayload=atoi(m->argv[1]); if(m->argc>2) PEsPerNode = atoi(m->argv[2]); if(m->argc>3) CharesPerPE = atoi(m->argv[3]); if(m->argc>4) iterations=atoi(m->argv[4]); payload = totalPayload/PEsPerNode/CharesPerPE; mainProxy = thishandle; arr1 = CProxy_Ping1::ckNew(CkNumNodes()* PEsPerNode * CharesPerPE ); start_time = CkWallTimer(); nodeIndex = 1; int x,y,z,t; TopoManager tmgr; for(int i=0; i<CmiNumPes(); i+=CmiMyNodeSize()) { tmgr.rankToCoordinates(i, x,y, z, t); CkPrintf(" %d [%d:%d:%d:%d]\n", i, x, y, z, t); } CkPrintf("NodeIndex Chares Workers NoOfMsgs Bytes Total Time(us)\n"); _traceControl = CProxy_TraceControl::ckNew(); for(int i=0; i<PEsPerNode * CharesPerPE; i++) arr1[i].start(nodeIndex); delete m; };
void done() { endt = CkWallTimer(); CkPrintf("Main: done\n"); CkPrintf("\tnumber of uchares %ld, number of sent messages %f*10^6\n", N_uChares, ((double)(N_uChares*(N_uChares-1)))/(1000000)); CkPrintf("\t%f secs, %f.4 MMsg/s \n", endt-startt, ((double)(N_uChares*(N_uChares-1)))/(1000000 *(endt-startt))); CkExit(); }
main::main(CkArgMsg* m) { //Process command-line arguments //Start the computation mainProxy = thishandle; if(m->argc<2) { CkPrintf("Needs number of array elements\n"); CkExit(); } units=atoi(m->argv[1]); // 4 MB size allredSize= 4194304; //atoi(m->argv[2]); baseSize = 262144; currentSize = baseSize; sizeInd = 0; numItr = 10; sizesNo = 5; timeForEach = new double[sizesNo]; iterNo = 0; for(int i=0; i<sizesNo; i++) timeForEach[i] = 0.0; arr = CProxy_AllReduce::ckNew(thisProxy, units); CkPrintf("AllReduce for %d pes on %d units for %d size\n", CkNumPes(),units,allredSize); arr.init(); startTime = CkWallTimer(); arr.dowork(baseSize); }
void HbmLB::ResumeClients(int balancing) { #if CMK_LBDB_ON DEBUGF(("[%d] ResumeClients. \n", CkMyPe())); theLbdb->incStep(); // reset LevelData *lData = levelData[0]; lData->clear(); if (CkMyPe() == 0 && balancing) { double end_lb_time = CkWallTimer(); if (_lb_args.debug()) CkPrintf("[%s] Load balancing step %d finished at %f duration %f\n", lbName(), step()-1,end_lb_time,end_lb_time - start_lb_time); } if (balancing && _lb_args.printSummary()) { int count = 1; LBInfo info(count); LDStats *stats = &myStats; info.getInfo(stats, count, 0); // no comm cost LBRealType mLoad, mCpuLoad, totalLoad; info.getSummary(mLoad, mCpuLoad, totalLoad); int nmsgs, nbytes; stats->computeNonlocalComm(nmsgs, nbytes); CkPrintf("[%d] Load with %d objs: max (with comm): %f max (obj only): %f total: %f on %d processors at step %d useMem: %fKB nonlocal: %d %.2fKB.\n", CkMyPe(), stats->n_objs, mLoad, mCpuLoad, totalLoad, count, step()-1, (1.0*useMem())/1024, nmsgs, nbytes/1024.0); thisProxy[0].reportLBQulity(mLoad, mCpuLoad, totalLoad, nmsgs, 1.0*nbytes/1024.0); } // zero out stats theLbdb->ClearLoads(); theLbdb->ResumeClients(); #endif }
void WSLB::AtSync() { #if CMK_LBDB_ON // CkPrintf("[%d] WSLB At Sync step %d!!!!\n",CkMyPe(),mystep); if (CkMyPe() == 0) { start_lb_time = CkWallTimer(); CkPrintf("Load balancing step %d starting at %f\n", step(),start_lb_time); } if (neighbor_pes == 0) FindNeighbors(); if (!QueryBalanceNow(step()) || mig_msgs_expected == 0) { MigrationDone(); return; } WSLBStatsMsg* msg = AssembleStats(); thisProxy.ReceiveStats(msg,mig_msgs_expected,neighbor_pes); // Tell our own node that we are ready ReceiveStats((WSLBStatsMsg*)0); #endif }
void LBMachineUtil::Clear() { total_walltime = 0.0; #if CMK_LB_CPUTIMER total_cputime = 0.0; #endif if (state == off) { start_totalwall = -1.; #if CMK_LB_CPUTIMER start_totalcpu = -1.; #endif } else { const double cur_wall = CkWallTimer(); #if CMK_LB_CPUTIMER const double cur_cpu = CkCpuTimer(); #endif start_totalwall = cur_wall; #if CMK_LB_CPUTIMER start_totalcpu = cur_cpu; #endif } total_idletime = 0.0; start_idle = -1.; }
/* migration of objects contains two different kinds: (1) objects want to make a barrier for migration completion (waitForBarrier is true) migrationDone() to finish and resumeClients (2) objects don't need a barrier However, next load balancing can only happen when both migrations complete */ void CentralLB::CheckMigrationComplete() { #if CMK_LBDB_ON lbdone ++; if (lbdone == 2) { double end_lb_time = CkWallTimer(); if (_lb_args.debug() && CkMyPe()==0) { CkPrintf("CharmLB> %s: PE [%d] step %d finished at %f duration %f s\n\n", lbname, cur_ld_balancer, step()-1, end_lb_time, end_lb_time-start_lb_time); } theLbdb->SetMigrationCost(end_lb_time - start_lb_time); lbdone = 0; future_migrates_expected = -1; future_migrates_completed = 0; DEBUGF(("[%d] Migration Complete\n", CkMyPe())); // release local barrier so that the next load balancer can go LDOMHandle h; h.id.id.idx = 0; theLbdb->getLBDB()->DoneRegisteringObjects(h); // switch to the next load balancer in the list // subtle: called from Migrated() may result in Migrated() called in next LB theLbdb->nextLoadbalancer(seqno); } #endif }
void LBMachineUtil::StatsOn() { const double cur_wall = CkWallTimer(); #if CMK_LB_CPUTIMER const double cur_cpu = CkCpuTimer(); #endif if (state == off) { #if ! CMK_BIGSIM_CHARM cancel_idleStart=CcdCallOnConditionKeep( CcdPROCESSOR_BEGIN_IDLE,(CcdVoidFn)staticIdleStart,(void *)this); cancel_idleEnd=CcdCallOnConditionKeep( CcdPROCESSOR_END_IDLE,(CcdVoidFn)staticIdleEnd,(void *)this); #endif state = on; } if (start_totalwall != -1.) { total_walltime += (cur_wall - start_totalwall); #if CMK_LB_CPUTIMER total_cputime += (cur_cpu - start_totalcpu); #endif } start_totalwall = cur_wall; #if CMK_LB_CPUTIMER start_totalcpu = cur_cpu; #endif }
void CentralLB::ProcessAtSync() { #if CMK_LBDB_ON if (reduction_started) return; // reducton in progress CmiAssert(CmiNodeAlive(CkMyPe())); if (CkMyPe() == cur_ld_balancer) { start_lb_time = CkWallTimer(); } #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) initMlogLBStep(thisgroup); #endif // build message BuildStatsMsg(); #if USE_REDUCTION // reduction to get total number of objects and comm // so that processor 0 can pre-allocate load balancing database int counts[2]; counts[0] = theLbdb->GetObjDataSz(); counts[1] = theLbdb->GetCommDataSz(); CkCallback cb(CkIndex_CentralLB::ReceiveCounts((CkReductionMsg*)NULL), thisProxy[0]); contribute(2*sizeof(int), counts, CkReduction::sum_int, cb); reduction_started = 1; #else SendStats(); #endif #endif }
void CentralLB::ResumeClients(int balancing) { #if CMK_LBDB_ON #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) resumeCount++; globalResumeCount = resumeCount; #endif DEBUGF(("[%d] Resuming clients. balancing:%d.\n",CkMyPe(),balancing)); if (balancing && _lb_args.debug() && CkMyPe() == cur_ld_balancer) { double end_lb_time = CkWallTimer(); } #if (!defined(_FAULT_MLOG_) && !defined(_FAULT_CAUSAL_)) if (balancing) ComlibNotifyMigrationDone(); #endif theLbdb->ResumeClients(); if (balancing) { CheckMigrationComplete(); if (future_migrates_expected == 0 || future_migrates_expected == future_migrates_completed) { CheckMigrationComplete(); } } #endif }
// default load balancing strategy LBMigrateMsg* CentralLB::Strategy(LDStats* stats) { #if CMK_LBDB_ON double strat_start_time = CkWallTimer(); if (_lb_args.debug()) CkPrintf("CharmLB> %s: PE [%d] strategy starting at %f\n", lbname, cur_ld_balancer, strat_start_time); work(stats); if (_lb_args.debug()>2) { CkPrintf("CharmLB> Obj Map:\n"); for (int i=0; i<stats->n_objs; i++) CkPrintf("%d ", stats->to_proc[i]); CkPrintf("\n"); } LBMigrateMsg *msg = createMigrateMsg(stats); /* Extra feature for MetaBalancer if (_lb_args.metaLbOn()) { int clients = CkNumPes(); LBInfo info(clients); getPredictedLoadWithMsg(stats, clients, msg, info, 0); LBRealType mLoad, mCpuLoad, totalLoad, totalLoadWComm; info.getSummary(mLoad, mCpuLoad, totalLoad); theLbdb->UpdateDataAfterLB(mLoad, mCpuLoad, totalLoad/clients); } */ if (_lb_args.debug()) { double strat_end_time = CkWallTimer(); envelope *env = UsrToEnv(msg); double lbdbMemsize = LBDatabase::Object()->useMem()/1000; CkPrintf("CharmLB> %s: PE [%d] Memory: LBManager: %d KB CentralLB: %d KB\n", lbname, cur_ld_balancer, (int)lbdbMemsize, (int)(useMem()/1000)); CkPrintf("CharmLB> %s: PE [%d] #Objects migrating: %d, LBMigrateMsg size: %.2f MB\n", lbname, cur_ld_balancer, msg->n_moves, env->getTotalsize()/1024.0/1024.0); CkPrintf("CharmLB> %s: PE [%d] strategy finished at %f duration %f s\n", lbname, cur_ld_balancer, strat_end_time, strat_end_time-strat_start_time); theLbdb->SetStrategyCost(strat_end_time - strat_start_time); } return msg; #else return NULL; #endif }
Main (CkArgMsg * m) { if (m->argc < 3) { CkPrintf ("%s [array_size] [block_size]\n", m->argv[0]); CkAbort ("Abort"); } f = fopen ("temp.out", "w"); // set iteration counter to zero iterations = 0; // store the main proxy mainProxy = thisProxy; freqProxy = CProxy_ProcFreq::ckNew (); majElements = minElements = 8; majElements = atoi (m->argv[1]); minElements = atoi (m->argv[2]); // print info total_iterations = 200; if (m->argc > 3) { total_iterations = atoi (m->argv[3]); } // Create new array of worker chares array = CProxy_Jacobi::ckNew (majElements, majElements); arrayMin = CProxy_Minor::ckNew (minElements, minElements); minorProxy = arrayMin; CkPrintf ("************** majorElements=%d minorElements=%d iterations=%d ********************\n", majElements, minElements, total_iterations); // save the total number of worker chares we have in this simulation num_chares = majElements + minElements; //Start the computation startTime = CkWallTimer (); start = startTime; recieve_count = 0; #ifdef PRIOR opts = new CkEntryOptions (); opts1 = new CkEntryOptions (); opts->setPriority (-100); opts1->setPriority (100); array[0].begin_iteration (1, opts); for(int i=0;i<7;i++) arrayMin[i].begin_iteration(1,opts1); // arrayMin.begin_iteration(1,opts1); #else array[0].begin_iteration (1); for (int i = 0; i < 7; i++) arrayMin[i].begin_iteration (1); #endif // arrayMin.begin_iteration(1,opts1); }
/*entry*/ void run() { startt = CkWallTimer(); uchare_set_proxy.run(CkCallbackResumeThread()); while(1){ //CkPrintf("Main: run resumed...\n"); uchare_set_proxy.flush(CkCallbackResumeThread()); sleep(0.1); } }
Main(CkArgMsg* m) { if ( (m->argc<3) || (m->argc>8) ) { CkPrintf("%s [array_size] [block_size]\n", m->argv[0]); CkPrintf("OR %s [array_size] [block_size] maxiterations\n", m->argv[0]); CkPrintf("OR %s [array_size_X] [array_size_Y] [array_size_Z] [block_size_X] [block_size_Y] [block_size_Z]\n", m->argv[0]); CkPrintf("OR %s [array_size_X] [array_size_Y] [array_size_Z] [block_size_X] [block_size_Y] [block_size_Z] maxiterations\n", m->argv[0]); CkAbort("Abort"); } iterations = 0; // store the main proxy mainProxy = thisProxy; if(m->argc <=4 ) { arrayDimX = arrayDimY = arrayDimZ = atoi(m->argv[1]); blockDimX = blockDimY = blockDimZ = atoi(m->argv[2]); } else if (m->argc <=8) { arrayDimX = atoi(m->argv[1]); arrayDimY = atoi(m->argv[2]); arrayDimZ = atoi(m->argv[3]); blockDimX = atoi(m->argv[4]); blockDimY = atoi(m->argv[5]); blockDimZ = atoi(m->argv[6]); } maxiterations = MAX_ITER; if(m->argc==4) maxiterations = atoi(m->argv[3]); if(m->argc==8) maxiterations = atoi(m->argv[7]); if (arrayDimX < blockDimX || arrayDimX % blockDimX != 0) CkAbort("array_size_X % block_size_X != 0!"); if (arrayDimY < blockDimY || arrayDimY % blockDimY != 0) CkAbort("array_size_Y % block_size_Y != 0!"); if (arrayDimZ < blockDimZ || arrayDimZ % blockDimZ != 0) CkAbort("array_size_Z % block_size_Z != 0!"); numChareX = arrayDimX / blockDimX; numChareY = arrayDimY / blockDimY; numChareZ = arrayDimZ / blockDimZ; // print info CkPrintf("\nSTENCIL COMPUTATION WITH NO BARRIERS\n"); CkPrintf("Running Jacobi on %d processors with (%d, %d, %d) chares\n", CkNumPes(), numChareX, numChareY, numChareZ); CkPrintf("Array Dimensions: %d %d %d\n", arrayDimX, arrayDimY, arrayDimZ); CkPrintf("Block Dimensions: %d %d %d\n", blockDimX, blockDimY, blockDimZ); // Create new array of worker chares array = CProxy_Jacobi::ckNew(numChareX, numChareY, numChareZ); //Start the computation array.run(); startTime = CkWallTimer(); }
void doPageRank() { PageRankGraph::Proxy & g = graph->getProxy(); double update_walltime = CkWallTimer() - starttime; CkPrintf("Initialization completed:\n"); CkPrintf("CPU time used = %.6f seconds\n", update_walltime); starttime = CkWallTimer(); for (int i = 0; i < iters; i++) { CkPrintf("PageRank step %d:\n", i); // do pagerank step initilization g.doPageRankStep_init(); // wait for current step to be done CkStartQD(CkCallbackResumeThread()); // do pagerank step g.doPageRankStep_update(); // wait for current step to be done CkStartQD(CkCallbackResumeThread()); } startVerificationPhase(); }
int main(int argc, char * argv[]) { void *baseAddress[MAX_PROCESSORS]; char *local; int thisImage; int iter = 100, size; double startTime, endTime; int i; // initialize ARMCI_Init(); ARMCI_Myid(&thisImage); // allocate data (collective operation) ARMCI_Malloc(baseAddress, MAX_BUF_SIZE*sizeof(char)); local = (char *)ARMCI_Malloc_local(MAX_BUF_SIZE*sizeof(char)); ARMCI_Barrier(); ARMCI_Migrate(); if (thisImage == 0) { for(size = 1; size <= MAX_BUF_SIZE; size = size<<1){ startTime = CkWallTimer(); for(i = 0; i < iter; i++){ ARMCI_Put(local, baseAddress[1], size, 1); } ARMCI_Fence(1); endTime = CkWallTimer(); printf("%d: %f us\n", size, (endTime-startTime)*1000); } ARMCI_Barrier(); } else if (thisImage == 1) { ARMCI_Barrier(); } ARMCI_Free(baseAddress[thisImage]); ARMCI_Free_local(local); // finalize ARMCI_Finalize(); return 0; }
void done() { double update_walltime = CkWallTimer() - starttime; //double gteps = 1e-9 * globalNubScannedVertices * 1.0/update_walltime; CkPrintf("[Final] CPU time used = %.6f seconds\n", update_walltime); //CkPrintf("Scanned vertices = %lld (%.0f%%)\n", globalNubScannedVertices, (double)globalNubScannedVertices*100/opts.N); //CkPrintf("%.9f Billion(10^9) Traversed edges per second [GTEP/s]\n", gteps); //CkPrintf("%.9f Billion(10^9) Traversed edges/PE per second [GTEP/s]\n", // gteps / CkNumPes()); //g.print(); CkStartQD(CkIndex_TestDriver::exit(), &thishandle); }
/*entry*/ void start() { CkPrintf("Main: run calculations...\n"); CkPrintf("Main: start...\n"); startt = CkWallTimer(); alltoall_proxy->run(CkCallback(CkIndex_Main::done(), thisProxy)); //for (int i = 0; i < N_uChares; i++) // (*hello_proxy)[i]->ping(0); alltoall_proxy->start(); //alltoall_proxy->flush(); }
void main::results(int cnt) { count += cnt; #if ! USE_REDUCTION if (0 == --responders) #endif { endtime = CkWallTimer(); CkPrintf("At time %lf, pi=: %f \n", (endtime-starttime), 4.0*count/(ns*nc)); CkExit(); } }