void communicate(int iters, bool useTram) { GroupMeshStreamer<DataItem, Participant, SimpleMeshRouter> *localStreamer; if (useTram) { localStreamer = aggregator.ckLocalBranch(); } int ctr = 0; for (int i = 0; i < iters; i++) { for (int j=0; j<CkNumPes(); j++) { if (useTram) { localStreamer->insertData(myItem, neighbors[j]); } else { allToAllGroup[neighbors[j]].receive(myItem); ctr++; } } if (!useTram) { if (ctr == 1024) { ctr = 0; CthYield(); } } } if (useTram) { localStreamer->done(); } else { contribute(CkCallback(CkReductionTarget(Main, allDone), mainProxy)); } }
void posixth_add(Cpthread_mutex_t *mutex, int *var, int val) { int n; Cpthread_mutex_lock(mutex); n = *var; if (CrnRand()&1) CthYield(); *var = n + val; Cpthread_mutex_unlock(mutex); }
void *posixth_top(void *x) { Cpthread_t t; void *result; int n; if (CrnRand()&1) CthYield(); errck(Cpthread_create(&t, &CpvAccess(joinable), posixth_fib, (void*)6)); if (CrnRand()&1) CthYield(); errck(Cpthread_join(t, &result)); if (CrnRand()&1) CthYield(); if ((size_t)result != 8) posixth_fail(); if (CrnRand()&1) CthYield(); posixth_add(&CpvAccess(total_mutex), &CpvAccess(total), (size_t)result); if (CrnRand()&1) CthYield(); posixth_add(&CpvAccess(fibs_mutex), &CpvAccess(fibs), -1); if (CrnRand()&1) CthYield(); if (CpvAccess(fibs)==0) errck(Cpthread_cond_signal(&CpvAccess(donecond))); if (CrnRand()&1) CthYield(); }
void chunk::refiningElements() { int i; CkPrintf("[tmr] Chunk %d: refiningElements\n", cid); while (modified) { // continue trying to refine elements until nothing changes during // a refinement cycle i = 0; modified = 0; CkPrintf("[tmr] Chunk %d: Entering internal refinement loop...\n", cid); while (i < numElements) { // loop through the elements if (theElements[i].getCachedArea() < 0.0) // no cached area yet theElements[i].calculateArea(); if ((!theElements[i].hasDependency() && (((theElements[i].getTargetArea() <= theElements[i].getCachedArea()) && (theElements[i].getTargetArea() >= 0.0)) || theElements[i].isSpecialRequest() || theElements[i].isPendingRequest())) || theElements[i].isRequestResponse()) { // the element either needs refining or has been asked to // refine or has asked someone else to refine //CkPrintf("[tmr] Chunk %d: Element %d: hasdep? %c target=%f current=%f spcReq? %c pend? %c reqResp? %c\n", cid, i, (theElements[i].hasDependency() ? 'y' : 'n'), theElements[i].getTargetArea(), theElements[i].getCachedArea(), (theElements[i].isSpecialRequest() ? 'y' : 'n'), (theElements[i].isPendingRequest() ? 'y' : 'n'), (theElements[i].isRequestResponse() ? 'y' : 'n')); modified = 1; // something's bound to change theElements[i].refine(); // refine the element } i++; } //if (CkMyPe() == 0) for (int j=0; j<numChunks; j++) mesh[j].print(); adjustMesh(); CthYield(); // give other chunks on the same PE a chance } // nothing is in need of refinement; turn refine loop off refineInProgress = 0; CkPrintf("[tmr] Chunk %d: DONE refiningElements\n", cid); }
void *posixth_fib(void *np) { Cpthread_t t1, t2; void *r1, *r2; CmiIntPtr n = (size_t)np, total; if (n<2) { if (CrnRand()&1) CthYield(); posixth_add(&CpvAccess(leaves_mutex), &CpvAccess(leaves), 1); return (void*)n; } if (CrnRand()&1) CthYield(); errck(Cpthread_create(&t1, &CpvAccess(joinable), posixth_fib, (void*)(n-1))); if (CrnRand()&1) CthYield(); errck(Cpthread_create(&t2, &CpvAccess(joinable), posixth_fib, (void*)(n-2))); if (CrnRand()&1) CthYield(); errck(Cpthread_join(t1, &r1)); if (CrnRand()&1) CthYield(); errck(Cpthread_join(t2, &r2)); if (CrnRand()&1) CthYield(); total = ((size_t)r1) + ((size_t)r2); return (void*)total; }
void chunk::addRemoteEdge(int elem, int localEdge, edgeRef er) { while (sizeElements == 0) CthYield(); theElements[elem].updateEdge(localEdge, er); }
void chunk::adjustLock() { while (meshLock != 0) CthYield(); meshLock = 1; }
// the following methods are for run-time additions and modifications // to the chunk components void chunk::accessLock() { while (meshExpandFlag) CthYield(); meshLock--; }
void posixth_main(int argc, char **argv) { Cpthread_mutex_t dummymutex; int i; Cpthread_t t; if (CrnRand()&1) CthYield(); errck(Cpthread_attr_init(&CpvAccess(joinable))); if (CrnRand()&1) CthYield(); errck(Cpthread_attr_setdetachstate(&CpvAccess(joinable),CPTHREAD_CREATE_JOINABLE)); if (CrnRand()&1) CthYield(); errck(Cpthread_attr_init(&CpvAccess(detached))); if (CrnRand()&1) CthYield(); errck(Cpthread_attr_setdetachstate(&CpvAccess(detached),CPTHREAD_CREATE_DETACHED)); if (CrnRand()&1) CthYield(); errck(Cpthread_mutexattr_init(&CpvAccess(mutexattrs))); if (CrnRand()&1) CthYield(); errck(Cpthread_condattr_init(&CpvAccess(condattrs))); if (CrnRand()&1) CthYield(); errck(Cpthread_mutex_init(&CpvAccess(total_mutex), &CpvAccess(mutexattrs))); if (CrnRand()&1) CthYield(); errck(Cpthread_mutex_init(&CpvAccess(leaves_mutex), &CpvAccess(mutexattrs))); if (CrnRand()&1) CthYield(); errck(Cpthread_mutex_init(&CpvAccess(fibs_mutex), &CpvAccess(mutexattrs))); if (CrnRand()&1) CthYield(); errck(Cpthread_cond_init(&CpvAccess(donecond), &CpvAccess(condattrs))); if (CrnRand()&1) CthYield(); CpvAccess(total) = 0; CpvAccess(fibs) = 20; CpvAccess(leaves) = 0; for (i=0; i<20; i++) { if (CrnRand()&1) CthYield(); Cpthread_create(&t, &CpvAccess(detached), posixth_top, 0); } if (CrnRand()&1) CthYield(); errck(Cpthread_mutex_init(&dummymutex, &CpvAccess(mutexattrs))); if (CrnRand()&1) CthYield(); errck(Cpthread_mutex_lock(&dummymutex)); if (CrnRand()&1) CthYield(); errck(Cpthread_cond_wait(&CpvAccess(donecond), &dummymutex)); if (CrnRand()&1) CthYield(); errck(Cpthread_mutex_unlock(&dummymutex)); if (CrnRand()&1) CthYield(); errck(Cpthread_mutex_destroy(&dummymutex)); if (CrnRand()&1) CthYield(); if (CpvAccess(total)!=160) posixth_fail(); if (CpvAccess(leaves)!=260) posixth_fail(); if (CrnRand()&1) CthYield(); errck(Cpthread_mutex_destroy(&CpvAccess(total_mutex))); if (CrnRand()&1) CthYield(); errck(Cpthread_mutex_destroy(&CpvAccess(leaves_mutex))); if (CrnRand()&1) CthYield(); errck(Cpthread_mutex_destroy(&CpvAccess(fibs_mutex))); if (CrnRand()&1) CthYield(); errck(Cpthread_cond_destroy(&CpvAccess(donecond))); if (CrnRand()&1) CthYield(); Cpm_megacon_ack(CpmSend(0)); }
/** threadInfo methods */ void commThreadInfo::run() { CpvAccess(CthResumeBigSimThreadIdx) = BgRegisterHandler((BgHandler)CthResumeNormalThread); tSTARTTIME = CmiWallTimer(); if (!tSTARTED) { tSTARTED = 1; // InitHandlerTable(); BgNodeStart(BgGetArgc(), BgGetArgv()); /* bnv should be initialized */ } threadQueue *commQ = myNode->commThQ; //int recvd=0; //for debugging only for (;;) { char *msg = getFullBuffer(); if (!msg) { // tCURRTIME += (CmiWallTimer()-tSTARTTIME); commQ->enq(CthSelf()); DEBUGF(("[%d] comm thread suspend.\n", BgMyNode())); CthSuspend(); DEBUGF(("[%d] comm thread assume.\n", BgMyNode())); // tSTARTTIME = CmiWallTimer(); continue; } DEBUGF(("[%d] comm thread has a msg.\n", BgMyNode())); //printf("on node %d, comm thread process a msg %p with type %d\n", BgMyNode(), msg, CmiBgMsgType(msg)); /* schedule a worker thread, if small work do it itself */ if (CmiBgMsgType(msg) == SMALL_WORK) { if (CmiBgMsgRecvTime(msg) > tCURRTIME) tCURRTIME = CmiBgMsgRecvTime(msg); // tSTARTTIME = CmiWallTimer(); /* call user registered handler function */ BgProcessMessage(this, msg); } else { #if BIGSIM_TIMING correctMsgTime(msg); #endif //recvd++; //DEBUGM(4, ("[N%d] C[%d] will add a msg (handler=%d | cnt=%d", BgMyNode(), id, CmiBgMsgHandle(msg), recvd)); int msgLen = CmiBgMsgLength(msg); DEBUGM(4, (" | len: %d | type: %d | node id: %d | src pe: %d\n" , msgLen, CmiBgMsgType(msg), CmiBgMsgNodeID(msg), CmiBgMsgSrcPe(msg))); if (CmiBgMsgThreadID(msg) == ANYTHREAD) { DEBUGF(("anythread, call addBgNodeMessage\n")); addBgNodeMessage(msg); /* non-affinity message */ DEBUGM(4, ("The message is added to node\n\n")); } else { DEBUGF(("[N%d] affinity msg, call addBgThreadMessage to tID:%d\n", BgMyNode(), CmiBgMsgThreadID(msg))); addBgThreadMessage(msg, CmiBgMsgThreadID(msg)); DEBUGM(4, ("The message is added to thread(%d)\n\n", CmiBgMsgThreadID(msg))); } } /* let other communication thread do their jobs */ // tCURRTIME += (CmiWallTimer()-tSTARTTIME); if (!schedule_flag) CthYield(); tSTARTTIME = CmiWallTimer(); } }
//The original version of scheduler void workThreadInfo::scheduler(int count) { ckMsgQueue &q1 = myNode->nodeQ; ckMsgQueue &q2 = myNode->affinityQ[id]; int cycle = CsdStopFlag; int recvd = 0; for (;;) { char *msg=NULL; int e1 = q1.isEmpty(); int e2 = q2.isEmpty(); int fromQ2 = 0; // delay the deq of msg from affinity queue // not deq from nodeQ assuming no interrupt in the handler if (e1 && !e2) { msg = q2[0]; fromQ2 = 1;} // else if (e2 && !e1) { msg = q1.deq(); } else if (e2 && !e1) { msg = q1[0]; } else if (!e1 && !e2) { if (CmiBgMsgRecvTime(q1[0]) < CmiBgMsgRecvTime(q2[0])) { // msg = q1.deq(); msg = q1[0]; } else { msg = q2[0]; fromQ2 = 1; } } /* if no msg is ready, go back to sleep */ if ( msg == NULL ) { // tCURRTIME += (CmiWallTimer()-tSTARTTIME); DEBUGM(4,("N[%d] work thread %d has no msg and go to sleep!\n", BgMyNode(), id)); if (watcher) watcher->replay(); #if BIGSIM_OUT_OF_CORE && BIGSIM_OOC_PREFETCH if(bgUseOutOfCore){ //thread scheduling point!! workThreadInfo *thisThd = schedWorkThds->pop(); //CmiPrintf("thisThd=%p, actualThd=%p, equal=%d qsize=%d\n", thisThd, this, thisThd==this, schedWorkThds->size()); assert(thisThd==this); } #endif CthSuspend(); DEBUGM(4, ("N[%d] work thread %d awakened!\n", BgMyNode(), id)); continue; } #if BIGSIM_TIMING correctMsgTime(msg); #if THROTTLE_WORK if (correctTimeLog) { if (CmiBgMsgRecvTime(msg) > gvt+ BG_LEASH) { double nextT = CmiBgMsgRecvTime(msg); int prio = (int)(nextT*PRIO_FACTOR)+1; if (prio < 0) { CmiPrintf("PRIO_FACTOR %e is too small. \n", PRIO_FACTOR); CmiAbort("BigSim time correction abort!\n"); } //CmiPrintf("Thread %d YieldPrio: %g gvt: %g leash: %g\n", id, nextT, gvt, BG_LEASH); CthYieldPrio(CQS_QUEUEING_IFIFO, sizeof(int), (unsigned int*)&prio); continue; } } #endif #endif /* TIMING */ DEBUGM(2, ("[N%d] work thread T%d has a msg with recvT:%e msgId:%d.\n", BgMyNode(), id, CmiBgMsgRecvTime(msg), CmiBgMsgID(msg))); //if (tMYNODEID==0) //CmiPrintf("[%d] recvT: %e\n", tMYNODEID, CmiBgMsgRecvTime(msg)); if (CmiBgMsgRecvTime(msg) > currTime) { tCURRTIME = CmiBgMsgRecvTime(msg); } #if 1 if (fromQ2 == 1) q2.deq(); else q1.deq(); #endif recvd ++; DEBUGM(4, ("[N%d] W[%d] will process a msg (handler=%d | cnt=%d", BgMyNode(), id, CmiBgMsgHandle(msg), recvd)); int msgLen = CmiBgMsgLength(msg); DEBUGM(4, (" | len: %d | type: %d | node id: %d | src pe: %d\n" , msgLen, CmiBgMsgType(msg), CmiBgMsgNodeID(msg), CmiBgMsgSrcPe(msg))); for(int msgIndex=CmiBlueGeneMsgHeaderSizeBytes-1; msgIndex<msgLen; msgIndex++) DEBUGM(2, ("%d,", msg[msgIndex])); DEBUGM(2,("\n")); DEBUGM(4, ("[N%d] W[%d] now has %d msgs from own queue and %d from affinity before processing msg\n", BgMyNode(), id, q1.length(), q2.length())); //CmiMemoryCheck(); // BgProcessMessage may trap into scheduler if(bgUseOutOfCore){ #if 0 if(startOutOfCore){ DEBUGM(4, ("to execute in ooc mode\n")); if(isCoreOnDisk) this->broughtIntoMem(); BgProcessMessage(this, msg); //startOutOfCore may be changed in processing this msg (AMPI_Init) if(startOOCChanged){ //indicate AMPI_Init is called and before it is finished, out-of-core is not executed //just to track the 0->1 change phase (which means MPI_Init is finished) //the 1->0 phase is not tracked because "startOutOfCore" is unset so that //the next processing of a msg will not go into this part of code startOOCChanged=0; }else{ //if(!isCoreOnDisk) { //the condition is added for virtual process this->takenOutofMem(); //} } }else{ DEBUGM(4, ("to execute not in ooc mode\n")); if(isCoreOnDisk) { CmiAbort("This should never be entered!\n"); this->broughtIntoMem(); } //put before processing msg since thread may be scheduled during processing the msg BgProcessMessage(this, msg); } #else //schedWorkThds->print(); bgOutOfCoreSchedule(this); BG_ENTRYSTART(msg); #if BIGSIM_OOC_PREFETCH #if !BIGSIM_OOC_NOPREFETCH_AIO //do prefetch here for the next different thread in queue (schedWorkThds) assert(schedWorkThds->peek(0)==this); for(int offset=1; offset<schedWorkThds->size(); offset++) { workThreadInfo *nThd = schedWorkThds->peek(offset); //if nThd's core has been dumped to disk, then we could prefetch its core. //otherwise, it is the first time for the thread to process a message, thus //no need to resort to disk to find its core if(nThd!=this && !checkThreadInCore(nThd) && nThd->isCoreOnDisk && oocPrefetchSpace->occupiedThd==NULL) { oocPrefetchSpace->newPrefetch(nThd); } } #endif #endif BgProcessMessage(this, msg); #endif }else{ DEBUGM(4, ("to execute not in ooc mode\n")); BG_ENTRYSTART(msg); BgProcessMessage(this, msg); } DEBUGM(4, ("[N%d] W[%d] now has %d msgs from own queue and %d from affinity after processing msg\n\n", BgMyNode(), id, q1.length(), q2.length())); BG_ENTRYEND(); // counter of processed real mesgs stateCounters.realMsgProcCnt++; // NOTE: I forgot why I delayed the dequeue after processing it #if 0 if (fromQ2 == 1) q2.deq(); else q1.deq(); #endif //recvd ++; //DEBUGF(("[N%d] work thread T%d finish a msg.\n", BgMyNode(), id)); //CmiPrintf("[N%d] work thread T%d finish a msg (msg=%s, cnt=%d).\n", BgMyNode(), id, msg, recvd); if ( recvd == count) return; if (cycle != CsdStopFlag) break; /* let other work thread do their jobs */ if (schedule_flag) { DEBUGF(("[N%d] work thread T%d suspend when done - %d to go.\n", BgMyNode(), tMYID, q2.length())); CthSuspend(); DEBUGF(("[N%d] work thread T%d awakened here.\n", BgMyNode(), id)); } else { #if BIGSIM_OUT_OF_CORE && BIGSIM_OOC_PREFETCH //thread scheduling point!! //Suspend and put itself back to the end of the queue if(bgUseOutOfCore){ workThreadInfo *thisThd = schedWorkThds->pop(); //CmiPrintf("thisThd=%p, actualThd=%p, equal=%d qsize=%d\n", thisThd, this, thisThd==this, schedWorkThds->size()); assert(thisThd==this); schedWorkThds->push(this); } #endif CthYield(); } } CsdStopFlag --; }