CmiStartFn mymain(int argc, char** argv) { if(CmiMyRank() == CmiMyNodeSize()) return 0; CpvInitialize(int,msgSize); CpvInitialize(int,cycleNum); CpvInitialize(int,sizeNum); CpvAccess(sizeNum) = 1; CpvAccess(msgSize)= CmiMsgHeaderSizeBytes + 8; CpvInitialize(int,exitHandler); CpvAccess(exitHandler) = CmiRegisterHandler((CmiHandler) exitHandlerFunc); CpvInitialize(int,node0Handler); CpvAccess(node0Handler) = CmiRegisterHandler((CmiHandler) node0HandlerFunc); CpvInitialize(int,node1Handler); CpvAccess(node1Handler) = CmiRegisterHandler((CmiHandler) node1HandlerFunc); CpvInitialize(int,ackHandler); CpvAccess(ackHandler) = CmiRegisterHandler((CmiHandler) ackHandlerFunc); CpvInitialize(double,startTime); CpvInitialize(double,endTime); CpvInitialize(double, IdleStartTime); CpvInitialize(double, IdleTime); CpvInitialize(int,ackCount); CpvAccess(ackCount) = 0; CpvInitialize(int,twoway); CpvAccess(twoway) = 0; CcdCallOnConditionKeep(CcdPROCESSOR_BEGIN_IDLE, ApplIdleStart, NULL); CcdCallOnConditionKeep(CcdPROCESSOR_END_IDLE, ApplIdleEnd, NULL); if(argc > 1) CpvAccess(twoway) = atoi(argv[1]); if(CmiMyPe() == 0) { if(!CpvAccess(twoway)) CmiPrintf("Starting Pingpong with oneway traffic \n"); else CmiPrintf("Starting Pingpong with twoway traffic\n"); } if ((CmiMyPe() < CmiNumPes()/2) || CpvAccess(twoway)) startPingpong(); return 0; }
static void collectNumbers(ProcMsg *msg) { int npes; EmptyMsg emsg; if(CpvAccess(isSingle)) { CpvAccess(Time1) = CmiWallTimer() - CpvAccess(Time1); CpvAccess(seqPI) = 4.0 * msg->success / NTRIALS; CpvAccess(isSingle) = 0; CpvAccess(nreported) = 0; CpvAccess(success) = 0; msg->success = NTRIALS/CmiNumPes(); CmiSetHandler(msg, CpvAccess(trial_handler)); CmiSyncBroadcastAll(sizeof(ProcMsg), msg); CpvAccess(TimeN) = CmiWallTimer(); printf("if\n"); } else { printf("else\n"); CpvAccess(nreported)++; CpvAccess(success) += msg->success; if(CpvAccess(nreported)==CmiNumPes()) { CpvAccess(TimeN) = CmiWallTimer() - CpvAccess(TimeN); CpvAccess(parPI) = 4.0 * CpvAccess(success) / NTRIALS; npes = iround(CpvAccess(Time1)/CpvAccess(TimeN)); CmiPrintf("[proc] Tseq = %le seconds, Tpar = %le seconds\n", CpvAccess(Time1), CpvAccess(TimeN)); CmiPrintf("[proc] CmiNumPes() reported %d processors\n", CmiNumPes()); CmiPrintf("[proc] But actual number of processors is %d\n", npes); CmiPrintf("[proc] FYI, appox PI (seq) = %lf\n",CpvAccess(seqPI)); CmiPrintf("[proc] FYI, appox PI (par) = %lf\n",CpvAccess(parPI)); CmiSetHandler(&emsg, CpvAccess(ack_handler)); CmiSyncSend(0, sizeof(EmptyMsg), &emsg); printf("else if\n"); } } }
void StreamingStrategy::pup(PUP::er &p){ Strategy::pup(p); p | PERIOD; p | bufferMax; p | msgSizeMax; //p | shortMsgPackingFlag; p | bufSizeMax; p | idleFlush; //p | streaming_handler_id; if(p.isPacking() || p.isUnpacking()) { streamingMsgBuf = new CkQ<MessageHolder *>[CmiNumPes()]; streamingMsgCount = new int[CmiNumPes()]; bufSize = new int[CmiNumPes()]; for(int count = 0; count < CmiNumPes(); count ++) { streamingMsgCount[count] = 0; bufSize[count] = 0; } } // packing is done once in processor 0, unpacking is done once in all processors except 0 if (p.isPacking() || p.isUnpacking()) registerFlush(); }
void CldAverageHandler(struct loadmsg *msg) { peinfo *pinf = &(CpvAccess(peinf)); double load = CldEstimate(); double average = (msg->load_total / CmiNumPes()); int rebalance; if (load < (average+10) * 1.2) rebalance=0; else rebalance = (int)(load - average); if (DEBUGGING_OUTPUT) CmiPrintf("PE %d load=%6d average=%6d rebalance=%d\n", CmiMyPe(), CldEstimate(), (int)average, rebalance); pinf->rebalance = rebalance; CmiFree(msg); CcdCallFnAfter((CcdVoidFn)CldInitiateReduction, 0, CYCLE_MILLISECONDS); }
void ringsimple_hop(ringmsg *msg) { int thispe = CmiMyPe(); int nextpe = (thispe+1) % CmiNumPes(); // CmiPrintf("[%d] ringsimple #%d hop send to %d hop: %d\n", thispe, msg->ringno, nextpe, msg->hops); int i; for (i=0; i<10; i++) if (msg->data[i] != i) ringsimple_fail(); if (msg->hops) { msg->hops--; CmiSyncSendAndFree(nextpe, sizeof(ringmsg), msg); } else { Cpm_megacon_ack(CpmSend(0)); CmiFree(msg); } }
// initialize the stat table internals void StatTable::init(int argc) { char** counterNames = CpvAccess(_counterNames); char** counterDesc = CpvAccess(_counterDesc); if (argc > numStats_) { delete [] stats_; stats_ = new Statistics[argc]; _MEMCHECK(stats_); numStats_ = argc; } for (int i=0; i<argc; i++) { DEBUGF(("%d/%d DEBUG: %d name %s\n desc %s\n", CmiMyPe(), CmiNumPes(), i, name[i], desc[i])); stats_[i].name = counterNames[i]; stats_[i].desc = counterDesc[i]; } clear(); }
CmiHandler node0HandlerFunc(char *msg) { CpvAccess(cycleNum)++; if (CpvAccess(cycleNum) == nCycles) { CpvAccess(endTime) = CmiWallTimer(); pingpongFinished(msg); } else { CmiSetHandler(msg,CpvAccess(node1Handler)); *((int *)(msg+CmiMsgHeaderSizeBytes)) = CpvAccess(msgSize); int dest = CmiNumPes() - CmiMyPe() - 1; CmiSyncSendAndFree(dest,CpvAccess(msgSize),msg); } return 0; }
static void *call_startfn(void *vindex) { size_t index = (size_t)vindex; #if CMK_HAS_TLS_VARIABLES && !CMK_NOT_USE_TLS_THREAD if (index<_Cmi_mynodesize) CmiStateInit(index+Cmi_nodestart, index, &Cmi_mystate); else CmiStateInit(_Cmi_mynode+CmiNumPes(),_Cmi_mynodesize,&Cmi_mystate); Cmi_state_vector[index] = &Cmi_mystate; #else CmiState state = Cmi_state_vector + index; pthread_setspecific(Cmi_state_key, state); #endif ConverseRunPE(0); if(CharmLibInterOperate) { while(1) { if(!_cleanUp) { StartInteropScheduler(); CmiNodeAllBarrier(); } else { if (CmiMyRank() == CmiMyNodeSize()) { while (1) { CommunicationServerThread(5); } } else { CsdScheduler(-1); } break; } } } #if 0 if (index<_Cmi_mynodesize) ConverseRunPE(0); /*Regular worker thread*/ else { /*Communication thread*/ CommunicationServerInit(); if (Cmi_charmrun_fd!=-1) while (1) CommunicationServer(5,COM_SERVER_FROM_SMP); } #endif return 0; }
/// Method invoked upon receipt of an acknowledgement of table received void *comlibTableReceivedHandler(void *msg) { if (CmiMyPe() == 0) { // CmiPrintf("Num acks to go: %d\n",CkpvAccess(conv_com_object).acksReceived); if (--CkpvAccess(conv_com_object).acksReceived == 0) { CkpvAccess(conv_com_object).tableReady(); // reset acksReceived for the second step //CmiPrintf("All acks received, broadcasting message to table_received\n"); CkpvAccess(conv_com_object).acksReceived = CmiNumPes() - 1; CmiSyncBroadcastAndFree(CmiReservedHeaderSize, (char*)msg); } else { CmiFree(msg); } } else { CkpvAccess(conv_com_object).tableReady(); CmiSetHandler(msg, CkpvAccess(comlib_ready)); CmiSyncSendAndFree(0, CmiReservedHeaderSize, (char*)msg); } return NULL; }
static void bcast_central(void *msg) { EmptyMsg emsg; ptimemsg tmsg = (ptimemsg)msg; CmiAssert(CmiMyPe() == 0); if(CpvAccess(currentPe) == 0) { CpvAccess(lasttime) = tmsg->time - CpvAccess(starttime) + CpvAccess(timediff)[tmsg->srcpe]; } else if((tmsg->time - CpvAccess(starttime) + CpvAccess(timediff)[tmsg->srcpe]) > CpvAccess(lasttime)) { CpvAccess(lasttime) = tmsg->time - CpvAccess(starttime) + CpvAccess(timediff)[tmsg->srcpe]; } CmiFree(msg); CpvAccess(currentPe)++; if(CpvAccess(currentPe) == CmiNumPes()) { sizes[CpvAccess(nextidx)].time += CpvAccess(lasttime); CpvAccess(numiter)++; if(CpvAccess(numiter)<sizes[CpvAccess(nextidx)].numiter) { msg = CmiAlloc(CmiMsgHeaderSizeBytes+sizes[CpvAccess(nextidx)].size); CpvAccess(currentPe) = 0; CmiSetHandler(msg, CpvAccess(bcast_reply)); CpvAccess(starttime) = CmiWallTimer(); CmiSyncBroadcastAllAndFree(CmiMsgHeaderSizeBytes+sizes[CpvAccess(nextidx)].size, msg); } else { CpvAccess(numiter) = 0; CpvAccess(nextidx)++; if(sizes[CpvAccess(nextidx)].size == (-1)) { print_results("CmiSyncBroadcastAllAndFree"); CmiSetHandler(&emsg, CpvAccess(ack_handler)); CmiSyncSend(0, sizeof(EmptyMsg), &emsg); return; } else { msg = CmiAlloc(CmiMsgHeaderSizeBytes+sizes[CpvAccess(nextidx)].size); CpvAccess(currentPe) = 0; CmiSetHandler(msg, CpvAccess(bcast_reply)); CpvAccess(starttime) = CmiWallTimer(); CmiSyncBroadcastAllAndFree(CmiMsgHeaderSizeBytes+sizes[CpvAccess(nextidx)].size, msg); } } } }
/* called on each processor */ static void cpuAffinityRecvHandler(void *msg) { int myrank, mynode; rankMsg *m = (rankMsg *)msg; m->ranks = (int *)((char*)m + sizeof(rankMsg)); m->nodes = (int *)((char*)m + sizeof(rankMsg) + CmiNumPes()*sizeof(int)); myrank = m->ranks[CmiMyPe()]; mynode = m->nodes[CmiMyPe()]; /*CmiPrintf("[%d %d] set to core #: %d\n", CmiMyNode(), CmiMyPe(), myrank);*/ if (-1 != CmiSetCPUAffinity(myrank)) { DEBUGP(("Processor %d is bound to core #%d on node #%d\n", CmiMyPe(), myrank, mynode)); } else{ CmiPrintf("Processor %d set affinity failed!\n", CmiMyPe()); CmiAbort("set cpu affinity abort!\n"); } CmiFree(m); }
static void CmiStartThreads(char **argv) { int i,tocreate; DWORD threadID; HANDLE thr; CmiMemLock_lock=CmiCreateLock(); comm_mutex = CmiCreateLock(); barrier_mutex = CmiCreateLock(); #ifdef CMK_NO_ASM_AVAILABLE cmiMemoryLock = CmiCreateLock(); if (CmiMyNode()==0) printf("Charm++ warning> fences and atomic operations not available in native assembly\n"); #endif Cmi_state_key = TlsAlloc(); if(Cmi_state_key == 0xFFFFFFFF) PerrorExit("TlsAlloc main"); Cmi_state_vector = (CmiState)calloc(_Cmi_mynodesize+1, sizeof(struct CmiStateStruct)); for (i=0; i<_Cmi_mynodesize; i++) CmiStateInit(i+Cmi_nodestart, i, CmiGetStateN(i)); /*Create a fake state structure for the comm. thread*/ /* CmiStateInit(-1,_Cmi_mynodesize,CmiGetStateN(_Cmi_mynodesize)); */ CmiStateInit(_Cmi_mynode+CmiNumPes(),_Cmi_mynodesize,CmiGetStateN(_Cmi_mynodesize)); #if CMK_MULTICORE || CMK_SMP_NO_COMMTHD if (!Cmi_commthread) tocreate = _Cmi_mynodesize-1; else #endif tocreate = _Cmi_mynodesize; for (i=1; i<=tocreate; i++) { if((thr = CreateThread(NULL, 0, call_startfn, (LPVOID)i, 0, &threadID)) == NULL) PerrorExit("CreateThread"); CloseHandle(thr); } if(TlsSetValue(Cmi_state_key, (LPVOID)Cmi_state_vector) == 0) PerrorExit("TlsSetValue"); }
static void initMsgOrderInfo(MsgOrderInfo *info) { int i; int totalPEs = CmiNumPes(); #if CMK_SMP && CMK_OFFLOAD_BCAST_PROCESS /* the comm thread will also access such info */ totalPEs += CmiNumNodes(); #endif info->nextMsgSeqNo = malloc(totalPEs*sizeof(int)); memset(info->nextMsgSeqNo, 0, totalPEs*sizeof(int)); info->expectedMsgSeqNo = malloc(totalPEs*sizeof(int)); memset(info->expectedMsgSeqNo, 0, totalPEs*sizeof(int)); info->oooMsgBuffer = malloc(totalPEs*sizeof(void **)); memset(info->oooMsgBuffer, 0, totalPEs*sizeof(void **)); info->oooMaxOffset = malloc(totalPEs*sizeof(unsigned char)); memset(info->oooMaxOffset, 0, totalPEs*sizeof(unsigned char)); info->CUR_WINDOW_SIZE = malloc(totalPEs*sizeof(unsigned char)); for (i=0; i<totalPEs; i++) info->CUR_WINDOW_SIZE[i] = INIT_WINDOW_SIZE; }
/* on PE 0 */ static void sync_starter(void *msg) { EmptyMsg emsg; ptimemsg tmsg = (ptimemsg)msg; double midTime = (CmiWallTimer() + CpvAccess(lasttime))/2; CpvAccess(timediff)[CpvAccess(currentPe)] = midTime - tmsg->time; CmiFree(msg); CpvAccess(currentPe)++; if(CpvAccess(currentPe) < CmiNumPes()) { CmiSetHandler(&emsg, CpvAccess(sync_reply)); CpvAccess(lasttime) = CmiWallTimer(); CmiSyncSend(CpvAccess(currentPe), sizeof(EmptyMsg), &emsg); } else { msg = CmiAlloc(CmiMsgHeaderSizeBytes+sizes[0].size); CmiSetHandler(msg, CpvAccess(bcast_reply)); CpvAccess(currentPe) = 0; CpvAccess(starttime) = CmiWallTimer(); CmiSyncBroadcastAllAndFree(CmiMsgHeaderSizeBytes+sizes[0].size, msg); } }
void CldHopHandler(char *msg) { peinfo *pinf = &(CpvAccess(peinf)); int len, queueing, priobits; unsigned int *prioptr; CldInfoFn ifn; CldPackFn pfn; int pe; if (pinf->rebalance) { /* do pe = ((lrand48()&0x7FFFFFFF)%CmiNumPes()); */ do pe = ((CrnRand()&0x7FFFFFFF)%CmiNumPes()); while (pe == pinf->mype); ifn = (CldInfoFn)CmiHandlerToFunction(CmiGetInfo(msg)); ifn(msg, &pfn, &len, &queueing, &priobits, &prioptr); if (pfn && CmiNodeOf(pe) != CmiMyNode()) { pfn(&msg); ifn(msg, &pfn, &len, &queueing, &priobits, &prioptr); } CmiSyncSendAndFree(pe, len, msg); pinf->rebalance--; } else { CmiSetHandler(msg, CmiGetXHandler(msg)); CmiHandleMessage(msg); } }
void StreamingStrategy::periodicFlush() { for (int proc = 0; proc < CmiNumPes(); proc++) flushPE(proc); }
void CmiInitCPUAffinity(char **argv) { static skt_ip_t myip; int ret, i, exclude; hostnameMsg *msg; char *pemap = NULL; char *commap = NULL; char *pemapfile = NULL; int show_affinity_flag; int affinity_flag = CmiGetArgFlagDesc(argv,"+setcpuaffinity", "set cpu affinity"); while (CmiGetArgIntDesc(argv,"+excludecore", &exclude, "avoid core when setting cpuaffinity")) { if (CmiMyRank() == 0) add_exclude(exclude); affinity_flag = 1; } if (CmiGetArgStringDesc(argv, "+pemapfile", &pemapfile, "define pe to core mapping file")) { FILE *fp; char buf[128]; pemap = (char*)malloc(1024); fp = fopen(pemapfile, "r"); if (fp == NULL) CmiAbort("pemapfile does not exist"); while (!feof(fp)) { if (fgets(buf, 128, fp)) { if (buf[strlen(buf)-1] == '\n') buf[strlen(buf)-1] = 0; strcat(pemap, buf); } } fclose(fp); if (CmiMyPe()==0) CmiPrintf("Charm++> read from pemap file '%s': %s\n", pemapfile, pemap); } CmiGetArgStringDesc(argv, "+pemap", &pemap, "define pe to core mapping"); if (pemap!=NULL && excludecount>0) CmiAbort("Charm++> +pemap can not be used with +excludecore.\n"); CmiGetArgStringDesc(argv, "+commap", &commap, "define comm threads to core mapping"); if (pemap!=NULL || commap!=NULL) affinity_flag = 1; show_affinity_flag = CmiGetArgFlagDesc(argv,"+showcpuaffinity", "print cpu affinity"); cpuAffinityHandlerIdx = CmiRegisterHandler((CmiHandler)cpuAffinityHandler); cpuAffinityRecvHandlerIdx = CmiRegisterHandler((CmiHandler)cpuAffinityRecvHandler); if (CmiMyRank() ==0) { affLock = CmiCreateLock(); } #if CMK_BLUEGENEP || CMK_BLUEGENEQ if(affinity_flag){ affinity_flag = 0; if(CmiMyPe()==0) CmiPrintf("Charm++> cpu affinity setting is not needed on Blue Gene, thus ignored.\n"); } if(show_affinity_flag){ show_affinity_flag = 0; if(CmiMyPe()==0) CmiPrintf("Charm++> printing cpu affinity is not supported on Blue Gene.\n"); } #endif if (!affinity_flag) { if (show_affinity_flag) CmiPrintCPUAffinity(); return; } if (CmiMyPe() == 0) { CmiPrintf("Charm++> cpu affinity enabled. \n"); if (excludecount > 0) { CmiPrintf("Charm++> cpuaffinity excludes core: %d", excludecore[0]); for (i=1; i<excludecount; i++) CmiPrintf(" %d", excludecore[i]); CmiPrintf(".\n"); } if (pemap!=NULL) CmiPrintf("Charm++> cpuaffinity PE-core map : %s\n", pemap); } if (CmiMyPe() >= CmiNumPes()) { /* this is comm thread */ /* comm thread either can float around, or pin down to the last rank. however it seems to be reportedly slower if it is floating */ CmiNodeAllBarrier(); if (commap != NULL) { int mycore = search_pemap(commap, CmiMyPeGlobal()-CmiNumPesGlobal()); if(CmiMyPe()-CmiNumPes()==0) printf("Charm++> set comm %d on node %d to core #%d\n", CmiMyPe()-CmiNumPes(), CmiMyNode(), mycore); if (-1 == CmiSetCPUAffinity(mycore)) CmiAbort("set_cpu_affinity abort!"); CmiNodeAllBarrier(); if (show_affinity_flag) CmiPrintCPUAffinity(); return; /* comm thread return */ } else { /* if (CmiSetCPUAffinity(CmiNumCores()-1) == -1) CmiAbort("set_cpu_affinity abort!"); */ #if !CMK_CRAYXT && !CMK_CRAYXE && !CMK_CRAYXC && !CMK_BLUEGENEQ if (pemap == NULL) { #if CMK_MACHINE_PROGRESS_DEFINED while (affinity_doneflag < CmiMyNodeSize()) CmiNetworkProgress(); #else #if CMK_SMP #error "Machine progress call needs to be implemented for cpu affinity!" #endif #endif } #endif #if CMK_CRAYXT || CMK_CRAYXE || CMK_CRAYXC /* if both pemap and commmap are NULL, will compute one */ if (pemap != NULL) #endif { CmiNodeAllBarrier(); if (show_affinity_flag) CmiPrintCPUAffinity(); return; /* comm thread return */ } } } if (pemap != NULL && CmiMyPe()<CmiNumPes()) { /* work thread */ int mycore = search_pemap(pemap, CmiMyPeGlobal()); if(show_affinity_flag) CmiPrintf("Charm++> set PE %d on node %d to core #%d\n", CmiMyPe(), CmiMyNode(), mycore); if (mycore >= CmiNumCores()) { CmiPrintf("Error> Invalid core number %d, only have %d cores (0-%d) on the node. \n", mycore, CmiNumCores(), CmiNumCores()-1); CmiAbort("Invalid core number"); } if (CmiSetCPUAffinity(mycore) == -1) CmiAbort("set_cpu_affinity abort!"); CmiNodeAllBarrier(); CmiNodeAllBarrier(); /* if (show_affinity_flag) CmiPrintCPUAffinity(); */ return; } #if CMK_CRAYXT || CMK_CRAYXE || CMK_CRAYXC { int numCores = CmiNumCores(); int myid = getXTNodeID(CmiMyNodeGlobal(), CmiNumNodesGlobal()); int myrank; int pe, mype = CmiMyPeGlobal(); int node = CmiMyNodeGlobal(); int nnodes = 0; #if CMK_SMP if (CmiMyPe() >= CmiNumPes()) { /* this is comm thread */ int node = CmiMyPe() - CmiNumPes(); mype = CmiGetPeGlobal(CmiNodeFirst(node) + CmiMyNodeSize() - 1, CmiMyPartition()); /* last pe on SMP node */ node = CmiGetNodeGlobal(node, CmiMyPartition()); } #endif pe = mype - 1; while (pe >= 0) { int n = CmiNodeOf(pe); if (n != node) { nnodes++; node = n; } if (getXTNodeID(n, CmiNumNodesGlobal()) != myid) break; pe --; } CmiAssert(numCores > 0); myrank = (mype - pe - 1 + nnodes)%numCores; #if CMK_SMP if (CmiMyPe() >= CmiNumPes()) myrank = (myrank + 1)%numCores; #endif if (-1 != CmiSetCPUAffinity(myrank)) { DEBUGP(("Processor %d is bound to core #%d on node #%d\n", CmiMyPe(), myrank, mynode)); } else{ CmiPrintf("Processor %d set affinity failed!\n", CmiMyPe()); CmiAbort("set cpu affinity abort!\n"); } } if (CmiMyPe() < CmiNumPes()) CmiNodeAllBarrier(); CmiNodeAllBarrier(); #else /* get my ip address */ if (CmiMyRank() == 0) { #if CMK_HAS_GETHOSTNAME myip = skt_my_ip(); /* not thread safe, so only calls on rank 0 */ #else CmiAbort("Can not get unique name for the compute nodes. \n"); #endif } CmiNodeAllBarrier(); /* prepare a msg to send */ msg = (hostnameMsg *)CmiAlloc(sizeof(hostnameMsg)); CmiSetHandler((char *)msg, cpuAffinityHandlerIdx); msg->pe = CmiMyPe(); msg->ip = myip; msg->ncores = CmiNumCores(); DEBUGP(("PE %d's node has %d number of cores. \n", CmiMyPe(), msg->ncores)); msg->rank = 0; CmiSyncSendAndFree(0, sizeof(hostnameMsg), (void *)msg); if (CmiMyPe() == 0) { int i; hostTable = CmmNew(); rankmsg = (rankMsg *)CmiAlloc(sizeof(rankMsg)+CmiNumPes()*sizeof(int)*2); CmiSetHandler((char *)rankmsg, cpuAffinityRecvHandlerIdx); rankmsg->ranks = (int *)((char*)rankmsg + sizeof(rankMsg)); rankmsg->nodes = (int *)((char*)rankmsg + sizeof(rankMsg) + CmiNumPes()*sizeof(int)); for (i=0; i<CmiNumPes(); i++) { rankmsg->ranks[i] = 0; rankmsg->nodes[i] = -1; } for (i=0; i<CmiNumPes(); i++) CmiDeliverSpecificMsg(cpuAffinityHandlerIdx); } /* receive broadcast from PE 0 */ CmiDeliverSpecificMsg(cpuAffinityRecvHandlerIdx); CmiLock(affLock); affinity_doneflag++; CmiUnlock(affLock); CmiNodeAllBarrier(); #endif if (show_affinity_flag) CmiPrintCPUAffinity(); }
//! process command line arguments! void TraceCounter::traceInit(char **argv) { CpvInitialize(CountLogPool*, _logPool); CpvInitialize(char*, _logName); CpvInitialize(double, version); CpvInitialize(char**, _counterNames); CpvInitialize(char**, _counterDesc); CpvInitialize(int, _numCounters); CpvInitialize(int, _reductionID); CpvAccess(_logName) = (char *) malloc(strlen(argv[0])+1); _MEMCHECK(CpvAccess(_logName)); strcpy(CpvAccess(_logName), argv[0]); CpvAccess(version) = VER; int i; // parse command line args char* counters = NULL; commandLine_ = NULL; bool badArg = false; int numCounters = 0; if (CmiGetArgStringDesc(argv, "+counters", &counters, "Measure these performance counters")) { if (CmiMyPe()==0) { CmiPrintf("Counters: %s\n", counters); } int offset = 0; int limit = strlen(counters); char* ptr = counters; while (offset < limit && (ptr = strtok(&counters[offset], ",")) != NULL) { offset += strlen(ptr)+1; ptr = &ptr[strlen(ptr)+1]; numCounters++; } if (CmiMyPe()==0) { CmiPrintf("There are %d counters\n", numCounters); } commandLine_ = new CounterArg[numCounters]; ptr = counters; for (i=0; i<numCounters; i++) { commandLine_[i].arg = ptr; if (!matchArg(&commandLine_[i])) { if (CmiMyPe()==0) { CmiPrintf("Bad arg: [%s]\n", ptr); } badArg = true; } ptr = &ptr[strlen(ptr)+1]; } } commandLineSz_ = numCounters; // check to see if args are valid, output if not if (badArg || CmiGetArgFlagDesc(argv, "+count-help", "List available performance counters")) { if (CmiMyPe() == 0) { printHelp(); } ConverseExit(); return; } else if (counters == NULL) { if (CmiMyPe() == 0) { usage(); } ConverseExit(); return; } // get optional command line args overview_ = CmiGetArgFlag(argv, "+count-overview"); switchRandom_ = CmiGetArgFlag(argv, "+count-switchrandom"); switchByPhase_ = CmiGetArgFlag(argv, "+count-switchbyphase"); noLog_ = CmiGetArgFlag(argv, "+count-nolog"); writeByPhase_ = CmiGetArgFlag(argv, "+count-writebyphase"); char* logName = NULL; if (CmiGetArgString(argv, "+count-logname", &logName)) { CpvAccess(_logName) = logName; if (noLog_) { if (CkMyPe()==0) { CmiPrintf("+count-logname and +count-nolog are MUTUALLY EXCLUSIVE\n"); usage(); CmiAbort(""); } } } if (switchByPhase_ && overview_) { if (CkMyPe()==0) { CmiPrintf( "+count-switchbyphase and +count-overview are MUTUALLY EXCLUSIVE\n" "+count-overview automatically switches by phase.\n"); usage(); CmiAbort(""); } } if (writeByPhase_ && noLog_) { if (CkMyPe()==0) { CmiPrintf("+count-writebyphase and +count-nolog are MUTUALLY EXCLUSIVE\n"); usage(); CmiAbort(""); } } // parse through commandLine_, figure out which belongs on which list (1 vs 2) CounterArg* last1 = NULL; CounterArg* last2 = NULL; CounterArg* tmp = NULL; counter1Sz_ = counter2Sz_ = 0; for (i=0; i<commandLineSz_; i++) { tmp = &commandLine_[i]; if (tmp->code < NUM_COUNTER_ARGS/2) { if (counter1_ == NULL) { counter1_ = tmp; last1 = counter1_; } else { last1->next = tmp; last1 = tmp; } counter1Sz_++; } else { if (counter2_ == NULL) { counter2_ = tmp; last2 = counter2_; } else { last2->next = tmp; last2 = tmp; } counter2Sz_++; } } if (counter1_ == NULL) { printHelp(); if (CmiMyPe()==0) { CmiPrintf("\nMust specify some counters with code < %d\n", NUM_COUNTER_ARGS/2); } ConverseExit(); } if (counter2_ == NULL) { printHelp(); if (CmiMyPe()==0) { CmiPrintf("\nMust specify some counters with code >= %d\n", NUM_COUNTER_ARGS/2); } ConverseExit(); } last1->next = counter1_; last2->next = counter2_; // all args valid, now set up logging if (CmiMyPe() == 0) { CmiPrintf("Running with tracemode=counter and args:\n"); // print out counter1 set tmp = counter1_; i = 0; do { CmiPrintf(" <counter1-%d>=%d %s %s\n", i, tmp->code, tmp->arg, tmp->desc); tmp = tmp->next; i++; } while (tmp != counter1_); // print out counter2 set tmp = counter2_; i = 0; do { CmiPrintf(" <counter2-%d>=%d %s %s\n", i, tmp->code, tmp->arg, tmp->desc); tmp = tmp->next; i++; } while (tmp != counter2_); CmiPrintf( "+count-overview %d\n+count-switchrandom %d\n" "+count-switchbyphase %d\n+count-nolog %d\n" "+count-logname %s\n+count-writebyphase %d\n", overview_, switchRandom_, switchByPhase_, noLog_, logName, writeByPhase_); } // DEBUGF((" DEBUG: Counter1=%d Counter2=%d\n", counter1_, counter2_)); CpvAccess(_logPool) = new CountLogPool(); // allocate names so can do reduction/analysis on the fly char** counterNames = new char*[counter1Sz_+counter2Sz_]; char** counterDesc = new char*[counter1Sz_+counter2Sz_]; tmp = counter1_; for (i=0; i<counter1Sz_; i++) { tmp->index = i; counterNames[i] = tmp->arg; counterDesc[i] = tmp->desc; tmp = tmp->next; } tmp = counter2_; for (i=0; i<counter2Sz_; i++) { tmp->index = counter1Sz_+i; counterNames[counter1Sz_+i] = tmp->arg; counterDesc[counter1Sz_+i] = tmp->desc; tmp = tmp->next; } CpvAccess(_counterNames) = counterNames; CpvAccess(_counterDesc) = counterDesc; CpvAccess(_numCounters) = numCounters; // don't erase counterNames or counterDesc, // the reduction client will do it on the final reduction _MEMCHECK(CpvAccess(_logPool)); CpvAccess(_logPool)->init(numCounters); DEBUGF(("%d/%d DEBUG: Created _logPool at %08x\n", CmiMyPe(), CmiNumPes(), CpvAccess(_logPool))); }
void CmiInitMemAffinity(char **argv) { int i; int policy=-1; /*step1: parsing args maffinity, mempol and nodemap (nodemap is optional)*/ int maffinity_flag = CmiGetArgFlagDesc(argv, "+maffinity", "memory affinity"); /*the node here refers to the nodes that are seen by libnuma on a phy node*/ /*nodemap is a string of ints separated by ","*/ char *nodemap = NULL; char *mpol = NULL; CmiGetArgStringDesc(argv, "+memnodemap", &nodemap, "define memory node mapping"); CmiGetArgStringDesc(argv, "+mempol", &mpol, "define memory policy {bind, preferred or interleave} "); if (!maffinity_flag) return; /*Currently skip the communication thread*/ /** * Note: the cpu affinity of comm thread may not be set * if "commap" is not specified. This is why the following * code regarding the comm thd needs to be put before * the codes that checks whether cpu affinity is set * or not */ if (CmiMyPe() >= CmiNumPes()) { CmiNodeAllBarrier(); return; } /*step2: checking whether the required cpu affinity has been set*/ if (CpvInitialized(myCPUAffToCore) && CpvAccess(myCPUAffToCore)==-1) { if (CmiMyPe()==0) CmiPrintf("Charm++> memory affinity disabled because cpu affinity is not enabled!\n"); CmiNodeAllBarrier(); return; } if (CmiMyPe()==0) { CmiPrintf("Charm++> memory affinity enabled! \n"); } /*Select memory policy*/ if (mpol==NULL) { CmiAbort("Memory policy must be specified!\n"); } if (strcmp(mpol, "interleave")==0) policy = MPOL_INTERLEAVE; else if (strcmp(mpol, "preferred")==0) policy = MPOL_PREFERRED; else if (strcmp(mpol, "bind")==0) policy = MPOL_BIND; else { CmiPrintf("Error> Invalid memory policy :%s\n", mpol); CmiAbort("Invalid memory policy!"); } /** * step3: check whether nodemap is NULL or not * step 3a): nodemap is not NULL * step 3b): nodemap is NULL, set memory policy according to the result * of cpu affinity settings. */ if (nodemap!=NULL) { int *nodemapArr = NULL; int nodemapArrSize = 1; int prevIntStart,j; int curnid; for (i=0; i<strlen((const char *)nodemap); i++) { if (nodemap[i]==',') nodemapArrSize++; } nodemapArr = malloc(nodemapArrSize*sizeof(int)); prevIntStart=j=0; for (i=0; i<strlen((const char *)nodemap); i++) { if (nodemap[i]==',') { curnid = atoi(nodemap+prevIntStart); if (curnid >= CmiNumNUMANodes()) { CmiPrintf("Error> Invalid node number %d, only have %d nodes (0-%d) on the machine. \n", curnid, CmiNumNUMANodes(), CmiNumNUMANodes()-1); CmiAbort("Invalid node number!"); } nodemapArr[j++] = curnid; prevIntStart=i+1; } } /*record the last nid after the last comma*/ curnid = atoi(nodemap+prevIntStart); if (curnid >= CmiNumNUMANodes()) { CmiPrintf("Error> Invalid node number %d, only have %d nodes (0-%d) on the machine. \n", curnid, CmiNumNUMANodes(), CmiNumNUMANodes()-1); CmiAbort("Invalid node number!"); } nodemapArr[j] = curnid; int myPhyRank = CpvAccess(myCPUAffToCore); int myMemNid = nodemapArr[myPhyRank%nodemapArrSize]; int retval = -1; if (policy==MPOL_INTERLEAVE) { retval = CmiSetMemAffinity(policy, nodemapArr, nodemapArrSize); } else { retval = CmiSetMemAffinity(policy, &myMemNid, 1); } if (retval<0) { CmiAbort("set_mempolicy error w/ mem nodemap"); } } else { /*use the affinity map set by the cpu affinity*/ int myPhyRank = CpvAccess(myCPUAffToCore); /*get the NUMA node id from myPhyRank (a core id)*/ int myMemNid = getNUMANidByRank(myPhyRank); int retval=-1; if (policy==MPOL_INTERLEAVE) { int totalNUMANodes = CmiNumNUMANodes(); int *nids = (int *)malloc(totalNUMANodes*sizeof(int)); for (i=0; i<totalNUMANodes; i++) nids[i] = i; retval = CmiSetMemAffinity(policy, nids, totalNUMANodes); free(nids); } else { retval = CmiSetMemAffinity(policy, &myMemNid, 1); } if (retval<0) { CmiAbort("set_mempolicy error w/o mem nodemap"); } } /*print_mem_affinity();*/ CmiNodeAllBarrier(); }
//! do a reduction across processors to calculate the total count for //! each count, and if the count has flops, etc, then calc the //! the flops/s, etc... void StatTable::doReduction(int phase, double idleTime) { DEBUGF(("%d/%d DEBUG: StatTable::doReduction()\n", CmiMyPe(), CmiNumPes(), this)); // see above (NUM_EXTRA_PERF) for the fields in the message int msgSize = ALIGN8(CmiMsgHeaderSizeBytes)+ sizeof(double)*(2*numStats_+NUM_EXTRA_PERF); char *msg = (char *)CmiAlloc(msgSize); double* reduction = (double*)(msg+ALIGN8(CmiMsgHeaderSizeBytes)); // calculate flops/s, l1%, l2%, tlb% if it's there char** counterNames = CpvAccess(_counterNames); int GR_FLOPS = -1; double flopsRate = -1.0; int LOAD = -1; double loadRate = -1.0; int STORE = -1; double storeRate = -1.0; int L1_DMISS = -1; double l1Rate = -1.0; int L2_DMISS = -1; double l2Rate = -1.0; int TLB_MISS = -1; double tlbRate = -1.0; int i, j; for (i=0; i<2*numStats_+NUM_EXTRA_PERF; i++) { reduction[i] = 0.0; } for (i=0; i<numStats_; i++) { for (int j=0; j<MAX_ENTRIES; j++) { reduction[2*i] += stats_[i].numCalled[j]*stats_[i].avgCount[j]; reduction[2*i+1] += stats_[i].totTime[j]; } if (strcmp(counterNames[i], "GR_FLOPS")==0) { GR_FLOPS = i; } else if (strcmp(counterNames[i], "LOAD")==0) { LOAD = i; } else if (strcmp(counterNames[i], "STORE")==0) { STORE = i; } else if (strcmp(counterNames[i], "L1_DMISS")==0) { L1_DMISS = i; } else if (strcmp(counterNames[i], "L2_DMISS")==0) { L2_DMISS = i; } else if (strcmp(counterNames[i], "TLB_MISS")==0) { TLB_MISS = i; } } if (CmiMyPe()==0) { reduction[2*numStats_] = phase; } reduction[2*numStats_+1] = idleTime; // -1 for the rest of the calc values reduction[2*numStats_+2] = -1.0; reduction[2*numStats_+3] = -1.0; reduction[2*numStats_+4] = -1.0; reduction[2*numStats_+5] = -1.0; // calculate flops/s, l1%, l2%, tlb% if it's there double* rate = NULL; int index; for (i=0; i<6; i++) { switch (i) { case 0: rate = &flopsRate; index = GR_FLOPS; break; case 1: rate = &loadRate; index = LOAD; break; case 2: rate = &storeRate; index = STORE; break; case 3: rate = &l1Rate; index = L1_DMISS; break; case 4: rate = &l2Rate; index = L2_DMISS; break; case 5: rate = &tlbRate; index = TLB_MISS; break; } if (index >= 0 && reduction[2*index+1] > 0.0) { // if we have the counter AND it's times were non-zero *rate = reduction[2*index]/reduction[2*index+1]; } } // store rates if there if (GR_FLOPS >= 0) { reduction[2*numStats_+2] = flopsRate; } if (LOAD >= 0 && STORE >= 0) { double memRate = loadRate + storeRate; if (L1_DMISS >= 0 & memRate > 0) { reduction[2*numStats_+3] = l1Rate / memRate; } if (L2_DMISS >= 0 & memRate > 0) { reduction[2*numStats_+4] = l2Rate / memRate; } if (TLB_MISS >= 0 & memRate > 0) { reduction[2*numStats_+5] = tlbRate / memRate; } } // send the data CmiSetHandler(msg, (int)CpvAccess(_reductionID)); int handlerID = CmiGetHandler(msg); DEBUGF(("%d/%d handlerID %d reductionID %d\n", CmiMyPe(), CmiNumPes(), handlerID, CpvAccess(_reductionID))); CmiSyncSendAndFree(0, msgSize, msg); }
// a rudimentary reduction to print out the performance results across the run CmiHandler StatTableReduction(char* msg) { DEBUGF(("StatTableReduction called\n", CmiMyPe(), CmiNumPes())); static double* reduce = NULL; static int numReduce = 0; int numCounters = CpvAccess(_numCounters); int size = 2*CpvAccess(_numCounters)+NUM_EXTRA_PERF; int i; if (reduce == NULL) { // allocate reduce = new double[size]; for (i=0; i<size; i++) { reduce[i] = 0.0; } DEBUGF((" allocated reduce numCounters %d size %d\n", numCounters, size)); } // see above for the feilds of this message double* msgResults = (double *)(msg+ALIGN8(CmiMsgHeaderSizeBytes)); for (i=0; i<size; i++) { reduce[i] += msgResults[i]; } char** counterNames = CpvAccess(_counterNames); numReduce++; DEBUGF((" numReduce %d numPes %d\n", numReduce, CmiNumPes())); int phase = reduce[2*numCounters]; if (numReduce >= CmiNumPes()) { // finished with reduction, print out results numReduce = 0; for (i=0; i<numCounters; i++) { if (reduce[2*i+1]>0.0) { // is time > 0? if (phase >= 0) { CmiPrintf("PHASE %d %s totalCount %f totalTime (us) %f\n" "PHASE %d %s count/proc %f avgTime (us)/phase %f\n", phase, counterNames[i], reduce[2*i], reduce[2*i+1]*1e6, phase, counterNames[i], reduce[2*i]/CmiNumPes(), reduce[2*i+1]*1e6/CmiNumPes()); } else { CmiPrintf("%s totalCount %f totalTime (us) %f\n" "%s count/proc %f avgTime (us)/phase %f\n", counterNames[i], reduce[2*i], reduce[2*i+1]*1e6, counterNames[i], reduce[2*i]/CmiNumPes(), reduce[2*i+1]*1e6/CmiNumPes()); } } } if (phase >= 0) { CmiPrintf("PHASE %d totalIdleTime (us) %f avgIdleTime (us)/phase %f\n", phase, reduce[2*numCounters+1]*1e6, reduce[2*numCounters+1]*1e6/CmiNumPes()); } else { CmiPrintf("totalIdleTime (us) %f avgIdleTime (us)/phase %f\n", reduce[2*numCounters+1]*1e6, reduce[2*numCounters+1]*1e6/CmiNumPes()); } if (reduce[2*numCounters+2] > 0.0) { // we have flops if (phase >= 0) { CmiPrintf("PHASE %d flops/s %f flops/s/PE %f\n", phase, reduce[2*numCounters+2], reduce[2*numCounters+2]/CmiNumPes()); } else { CmiPrintf("flops/s %f flops/s/PE %f\n", reduce[2*numCounters+2], reduce[2*numCounters+2]/CmiNumPes()); } } char* missRate = NULL; for (i=0; i<3; i++) { switch (i) { case 0: missRate = "l1 avg miss rate (%)"; break; case 1: missRate = "l2 avg miss rate (%)"; break; case 2: missRate = "tlb avg miss rate (%)"; break; } if (reduce[2*numCounters+3+i] >= 0.0) { if (phase >= 0) { CmiPrintf("PHASE %d %s %f\n", phase, missRate, reduce[2*numCounters+3+i]/CmiNumPes()*100); } else { CmiPrintf("%s %f\n", missRate, reduce[2*numCounters+3+i]/CmiNumPes()*100); } } } // clean up delete [] reduce; reduce = NULL; } CmiFree(msg); }
static CmiUInt4 base(CmiUInt8 v) { return v / (N / CmiNumPes()); }
main(CkArgMsg *) { // print banner iout << iINFO << "NAMD " << NAMD_VERSION << " for " << NAMD_PLATFORM << "\n" #ifdef MEM_OPT_VERSION << iWARN << "\n" << iWARN << " *** EXPERIMENTAL MEMORY OPTIMIZED VERSION ***\n" << iWARN << "\n" #endif #if 0 << iWARN << "\n" << iWARN << " *** UNRELEASED EXPERIMENTAL VERSION ***\n" << iWARN << "\n" #endif #ifdef SPEC_DISABLED_VERSION << iINFO << "\n" << iINFO << "NAMD is a parallel, object-oriented molecular dynamics\n" << iINFO << "code designed for high-performance simulation of large\n" << iINFO << "biomolecular systems. NAMD is distributed free of\n" << iINFO << "charge and includes source code. For more information\n" << iINFO << "please visit http://www.ks.uiuc.edu/Research/namd/\n" << iINFO << "\n" << iINFO << "*********************************************************\n" << iINFO << "This version of NAMD may be distributed only as a part of\n" << iINFO << "the SPEC Workstation Benchmark and all other distribution\n" << iINFO << "is prohibited. Any use of this software is bound by\n" << iINFO << "the terms of the NAMD License, which is available at\n" << iINFO << "http://www.ks.uiuc.edu/Research/namd/license.html\n" << iINFO << "The NAMD development team will not provide support for\n" << iINFO << "any version of NAMD unless you have first registered\n" << iINFO << "and downloaded the latest version of NAMD available at\n" << iINFO << "http://www.ks.uiuc.edu/Research/namd/\n" << iINFO << "*********************************************************\n" #else << iINFO << "\n" << iINFO << "Please visit http://www.ks.uiuc.edu/Research/namd/\n" << iINFO << "for updates, documentation, and support information.\n" #endif << iINFO << "\n" << iINFO << "Please cite Phillips et al., J. Comp. Chem. 26:1781-1802 (2005)\n" << iINFO << "in all publications reporting results obtained with NAMD.\n" << iINFO << "\n" << endi; char charm_version[64]; sprintf(charm_version,"%d",CHARM_VERSION); #if CHARM_VERSION < 60500 #error "Charm++ 6.5.1 or later is required to build NAMD" #endif iout << iINFO << "Based on Charm++/Converse " << charm_version << " for " << CMK_MACHINE_NAME << "\n" << endi; iout << iINFO << "Built " << namd_build_date << " by " << namd_build_user << " on " << namd_build_machine << "\n" << endi; #ifndef NO_SOCKET char numcpus[512]; sprintf(numcpus,"%d",CkNumPes()); tbsoft_sendusage("NAMD",NAMD_VERSION,NAMD_PLATFORM,numcpus,""); #endif #if CMK_BLUEGENE_CHARM iout << iINFO << "Running on BigSim using " << CmiNumPes() << " real processors.\n" << endi; #endif iout << iINFO << "Running on " << CkNumPes() << " processors, " << CmiNumNodes() << " nodes, " << CmiNumPhysicalNodes() << " physical nodes.\n" << endi; iout << iINFO << "CPU topology information " << (CmiCpuTopologyEnabled()?"available":"unavailable") << ".\n" << endi; iout << iINFO << "Charm++/Converse parallel runtime startup completed at " << CmiWallTimer() << " s\n"<< endi; const char* memsource; memusage(&memsource); iout << iINFO << memusage_MB() << " MB of memory in use" << " based on " << memsource << "\n"; }
void CentralLB::ProcessReceiveMigration(CkReductionMsg *msg) { #if CMK_LBDB_ON int i; LBMigrateMsg *m = storedMigrateMsg; CmiAssert(m!=NULL); delete msg; #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) int *dummyCounts; DEBUGF(("[%d] Starting ReceiveMigration WITH step %d m->step %d\n",CkMyPe(),step(),m->step)); // CmiPrintf("[%d] Starting ReceiveMigration step %d m->step %d\n",CkMyPe(),step(),m->step); if(step() > m->step){ char str[100]; envelope *env = UsrToEnv(m); return; } lbDecisionCount = m->lbDecisionCount; #endif if (_lb_args.debug() > 1) if (CkMyPe()%1024==0) CmiPrintf("[%d] Starting ReceiveMigration step %d at %f\n",CkMyPe(),step(), CmiWallTimer()); for (i=0; i<CkNumPes(); i++) theLbdb->lastLBInfo.expectedLoad[i] = m->expectedLoad[i]; CmiAssert(migrates_expected <= 0 || migrates_completed == migrates_expected); /*FAULT_EVAC*/ if(!CmiNodeAlive(CkMyPe())){ delete m; return; } migrates_expected = 0; future_migrates_expected = 0; #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) int sending=0; int dummy=0; LBDB *_myLBDB = theLbdb->getLBDB(); if(_restartFlag){ dummyCounts = new int[CmiNumPes()]; bzero(dummyCounts,sizeof(int)*CmiNumPes()); } #endif for(i=0; i < m->n_moves; i++) { MigrateInfo& move = m->moves[i]; const int me = CkMyPe(); if (move.from_pe == me && move.to_pe != me) { DEBUGF(("[%d] migrating object to %d\n",move.from_pe,move.to_pe)); // migrate object, in case it is already gone, inform toPe #if (!defined(_FAULT_MLOG_) && !defined(_FAULT_CAUSAL_)) if (theLbdb->Migrate(move.obj,move.to_pe) == 0) thisProxy[move.to_pe].MissMigrate(!move.async_arrival); #else if(_restartFlag == 0){ DEBUG(CmiPrintf("[%d] need to move object from %d to %d \n",CkMyPe(),move.from_pe,move.to_pe)); theLbdb->Migrate(move.obj,move.to_pe); sending++; }else{ if(_myLBDB->validObjHandle(move.obj)){ DEBUG(CmiPrintf("[%d] need to move object from %d to %d \n",CkMyPe(),move.from_pe,move.to_pe)); theLbdb->Migrate(move.obj,move.to_pe); sending++; }else{ DEBUG(CmiPrintf("[%d] dummy move to pe %d detected after restart \n",CmiMyPe(),move.to_pe)); dummyCounts[move.to_pe]++; dummy++; } } #endif } else if (move.from_pe != me && move.to_pe == me) { DEBUGF(("[%d] expecting object from %d\n",move.to_pe,move.from_pe)); if (!move.async_arrival) migrates_expected++; else future_migrates_expected++; } else { #if CMK_GLOBAL_LOCATION_UPDATE UpdateLocation(move); #endif } } DEBUGF(("[%d] in ReceiveMigration %d moves expected: %d future expected: %d\n",CkMyPe(),m->n_moves, migrates_expected, future_migrates_expected)); // if (_lb_debug) CkPrintf("[%d] expecting %d objects migrating.\n", CkMyPe(), migrates_expected); #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) if(_restartFlag){ sendDummyMigrationCounts(dummyCounts); _restartFlag =0; delete []dummyCounts; } #endif #if 0 if (m->n_moves ==0) { theLbdb->SetLBPeriod(theLbdb->GetLBPeriod()*2); } #endif cur_ld_balancer = m->next_lb; if((CkMyPe() == cur_ld_balancer) && (cur_ld_balancer != 0)){ LBDatabaseObj()->set_avail_vector(m->avail_vector, -2); } if (migrates_expected == 0 || migrates_completed == migrates_expected) MigrationDone(1); delete m; // CkEvacuatedElement(); #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) // migrates_expected = 0; // // ResumeClients(1); #endif #endif }
CpmDestination CpmLDB() { int pe = ( (CrnRand() & 0x7FFFFFFF) >>8 ) % CmiNumPes(); return CpmSend(pe); }
static void CmiStartThreads(char **argv) { pthread_t pid; size_t i; int ok, tocreate; pthread_attr_t attr; int start, end; MACHSTATE(4,"CmiStartThreads") CmiMemLock_lock=CmiCreateLock(); comm_mutex=CmiCreateLock(); _smp_mutex = CmiCreateLock(); #if defined(CMK_NO_ASM_AVAILABLE) && CMK_PCQUEUE_LOCK cmiMemoryLock = CmiCreateLock(); if (CmiMyNode()==0) printf("Charm++ warning> fences and atomic operations not available in native assembly\n"); #endif #if ! (CMK_HAS_TLS_VARIABLES && !CMK_NOT_USE_TLS_THREAD) pthread_key_create(&Cmi_state_key, 0); Cmi_state_vector = (CmiState)calloc(_Cmi_mynodesize+1, sizeof(struct CmiStateStruct)); for (i=0; i<_Cmi_mynodesize; i++) CmiStateInit(i+Cmi_nodestart, i, CmiGetStateN(i)); /*Create a fake state structure for the comm. thread*/ /* CmiStateInit(-1,_Cmi_mynodesize,CmiGetStateN(_Cmi_mynodesize)); */ CmiStateInit(_Cmi_mynode+CmiNumPes(),_Cmi_mynodesize,CmiGetStateN(_Cmi_mynodesize)); #else /* for main thread */ Cmi_state_vector = (CmiState *)calloc(_Cmi_mynodesize+1, sizeof(CmiState)); #if CMK_CONVERSE_MPI /* main thread is communication thread */ if(!CharmLibInterOperate) { CmiStateInit(_Cmi_mynode+CmiNumPes(), _Cmi_mynodesize, &Cmi_mystate); Cmi_state_vector[_Cmi_mynodesize] = &Cmi_mystate; } else #endif { /* main thread is of rank 0 */ CmiStateInit(Cmi_nodestart, 0, &Cmi_mystate); Cmi_state_vector[0] = &Cmi_mystate; } #endif #if CMK_MULTICORE || CMK_SMP_NO_COMMTHD if (!Cmi_commthread) tocreate = _Cmi_mynodesize-1; else #endif tocreate = _Cmi_mynodesize; #if CMK_CONVERSE_MPI if(!CharmLibInterOperate) { start = 0; end = tocreate - 1; /* skip comm thread */ } else #endif { start = 1; end = tocreate; /* skip rank 0 main thread */ } for (i=start; i<=end; i++) { pthread_attr_init(&attr); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); ok = pthread_create(&pid, &attr, call_startfn, (void *)i); if (ok<0) PerrorExit("pthread_create"); pthread_attr_destroy(&attr); } #if ! (CMK_HAS_TLS_VARIABLES && !CMK_NOT_USE_TLS_THREAD) #if CMK_CONVERSE_MPI if(!CharmLibInterOperate) pthread_setspecific(Cmi_state_key, Cmi_state_vector+_Cmi_mynodesize); else #endif pthread_setspecific(Cmi_state_key, Cmi_state_vector); #endif MACHSTATE(4,"CmiStartThreads done") }