int ScriptTcl::Tcl_replicaAtomSendrecv(ClientData clientData, Tcl_Interp *interp, int argc, char **argv) { ScriptTcl *script = (ScriptTcl *)clientData; script->initcheck(); if ( ! Node::Object()->simParameters->replicaUniformPatchGrids ) { Tcl_SetResult(interp,"replicaUniformPatchGrids is required for atom exchange",TCL_VOLATILE); return TCL_ERROR; } if ( argc < 2 || argc > 3 ) { Tcl_SetResult(interp,"bad arg count; args: dest ?source?",TCL_VOLATILE); return TCL_ERROR; } int dest = -1; if ( sscanf(argv[1], "%d", &dest) != 1 ) { Tcl_SetResult(interp,"bad dest; args: dest ?source?",TCL_VOLATILE); return TCL_ERROR; } int source = -1; if ( argc == 3 ) { if ( sscanf(argv[2], "%d", &source) != 1 ) { Tcl_SetResult(interp,"bad source; args: dest ?source?",TCL_VOLATILE); return TCL_ERROR; } } #if CMK_HAS_PARTITION if (dest != CmiMyPartition()) { DataMessage *recvMsg = NULL; replica_sendRecv((char*)&(script->state->lattice), sizeof(Lattice), dest, CkMyPe(), &recvMsg, source, CkMyPe()); CmiAssert(recvMsg != NULL); memcpy(&(script->state->lattice), recvMsg->data, recvMsg->size); CmiFree(recvMsg); } #endif char str[40]; sprintf(str, "%d", dest); script->setParameter("scriptArg1", str); sprintf(str, "%d", source); script->setParameter("scriptArg2", str); CkpvAccess(_qd)->create(2 * PatchMap::Object()->numPatches()); script->runController(SCRIPT_ATOMSENDRECV); #if CMK_HAS_PARTITION if (dest != CmiMyPartition()) { DataMessage *recvMsg = NULL; ControllerState *cstate = script->state->controller; replica_sendRecv((char*)cstate, sizeof(ControllerState), dest, CkMyPe(), &recvMsg, source, CkMyPe()); CmiAssert(recvMsg != NULL); memcpy(cstate, recvMsg->data, recvMsg->size); CmiFree(recvMsg); } #endif return TCL_OK; }
/** \function pidtonid * finds nids for pids 1 to CmiNumPes and stores them in an array * correspondingly also creates an array for nids to pids */ void pidtonid(int numpes) { CmiLock(cray_lock); if (pid2nid != NULL) { CmiUnlock(cray_lock); return; /* did once already */ } getDimension(&maxNID,&maxX,&maxY,&maxZ); int numCores = CmiNumCores(); pid2nid = (int *)malloc(sizeof(int) * numpes); #if XT4_TOPOLOGY || XT5_TOPOLOGY || XE6_TOPOLOGY int i, nid, ret; CmiAssert(rca_coords == NULL); rca_coords = (rca_mesh_coord_t *)malloc(sizeof(rca_mesh_coord_t)*(maxNID+1)); for (i=0; i<maxNID; i++) { rca_coords[i].mesh_x = rca_coords[i].mesh_y = rca_coords[i].mesh_z = -1; } for (i=0; i<numpes; i++) { PMI_Get_nid(CmiGetNodeGlobal(CmiNodeOf(i),CmiMyPartition()), &nid); pid2nid[i] = nid; CmiAssert(nid < maxNID); ret = rca_get_meshcoord(nid, &rca_coords[nid]); CmiAssert(ret != -1); } #endif CmiUnlock(cray_lock); }
int ScriptTcl::Tcl_replicaSendrecv(ClientData, Tcl_Interp *interp, int argc, char **argv) { if ( argc < 3 || argc > 4 ) { Tcl_SetResult(interp,"args: data dest ?source?",TCL_VOLATILE); return TCL_ERROR; } Tcl_DString recvstr; Tcl_DStringInit(&recvstr); int sendcount = strlen(argv[1]); int recvcount = 0; int dest = atoi(argv[2]); int source = -1; if ( argc > 3 ) source = atoi(argv[3]); #if CMK_HAS_PARTITION if (dest == CmiMyPartition()) { Tcl_DStringSetLength(&recvstr,sendcount); memcpy(Tcl_DStringValue(&recvstr),argv[1],sendcount); } else { DataMessage *recvMsg = NULL; replica_sendRecv(argv[1], sendcount, dest, CkMyPe(), &recvMsg, source, CkMyPe()); CmiAssert(recvMsg != NULL); Tcl_DStringAppend(&recvstr, recvMsg->data, recvMsg->size); CmiFree(recvMsg); } #endif Tcl_DStringResult(interp, &recvstr); Tcl_DStringFree(&recvstr); return TCL_OK; }
// needs to figure out what is local/remote void CmiDirect_get(CmiDirectUserHandle *userHandle) { gni_post_descriptor_t *pd; #if USE_LRTS_MEMPOOL if (userHandle->remoteNode== CmiMyNode()) { CmiMemcpy(userHandle->remoteBuf,userHandle->localBuf,userHandle->transSize); (*(userHandle->callbackFnPtr))(userHandle->callbackData); } else { gni_return_t status; RDMA_REQUEST *rdma_request_msg; MallocPostDesc(pd); if(userHandle->transSize <= LRTS_GNI_RDMA_THRESHOLD) pd->type = GNI_POST_FMA_GET; else pd->type = GNI_POST_RDMA_GET; pd->cq_mode = GNI_CQMODE_GLOBAL_EVENT; pd->dlvr_mode = GNI_DLVMODE_PERFORMANCE; pd->length = userHandle->transSize; pd->local_addr = (uint64_t) (userHandle->localBuf); pd->local_mem_hndl = userHandle->localMdh; pd->remote_addr = (uint64_t)(userHandle->remoteBuf); pd->remote_mem_hndl = userHandle->remoteMdh; pd->src_cq_hndl = 0; pd->rdma_mode = 0; pd->first_operand = (uint64_t) (userHandle->callbackFnPtr); pd->second_operand = (uint64_t) (userHandle->callbackData); pd->amo_cmd = 2; pd->cqwrite_value = DIRECT_SEQ; #if REMOTE_EVENT bufferRdmaMsg(sendRdmaBuf, CmiGetNodeGlobal(userHandle->remoteNode,CmiMyPartition()), pd, userHandle->ack_index); #else bufferRdmaMsg(sendRdmaBuf, CmiGetNodeGlobal(userHandle->remoteNode,CmiMyPartition()), pd, -1); #endif #if CMI_DIRECT_DEBUG CmiPrintf("[%d] RDMA get %d,%d bytes addr %p to remoteNode %d:%p \n\n",CmiMyPe(), userHandle->transSize, pd->length, (void*)(pd->local_addr), userHandle->remoteNode, (void*) (pd->remote_addr)); #endif } #else CmiPrintf("Normal Send in CmiDirect Get\n"); CmiAbort(""); #endif }
int ScriptTcl::Tcl_myReplica(ClientData, Tcl_Interp *interp, int argc, char **) { if ( argc > 1 ) { Tcl_SetResult(interp,"no arguments needed",TCL_VOLATILE); return TCL_ERROR; } Tcl_SetObjResult(interp, Tcl_NewIntObj(CmiMyPartition())); return TCL_OK; }
int ScriptTcl::Tcl_checkpointReplica(ClientData clientData, Tcl_Interp *interp, int argc, char *argv[]) { ScriptTcl *script = (ScriptTcl *)clientData; script->initcheck(); if (argc < 2 || argc > 3) { Tcl_SetResult(interp,"args: <key> ?<replica> or global?",TCL_VOLATILE); return TCL_ERROR; } script->setParameter("scriptStringArg1", argv[1]); int replica = CmiMyPartition(); if ( argc == 3 ) { if ( ! strcmp(argv[2],"global") ) { replica = replica_hash(argv[1]); } else if ( sscanf(argv[2],"%d",&replica) != 1 ) { Tcl_SetResult(interp,"args: <key> ?<replica> or global?",TCL_VOLATILE); return TCL_ERROR; } } if ( replica != CmiMyPartition() ) { if ( ! Node::Object()->simParameters->replicaUniformPatchGrids ) { Tcl_SetResult(interp,"replicaUniformPatchGrids is required for checkpointing on other replicas",TCL_VOLATILE); return TCL_ERROR; } } CHECK_REPLICA(replica); char str[40]; sprintf(str, "%d", replica); script->setParameter("scriptIntArg1", str); CkpvAccess(_qd)->create(PatchMap::Object()->numPatches()); if ( replica != CmiMyPartition() ) CkpvAccess(_qd)->create(1); if ( ! strcmp(argv[0],"checkpointStore") ) script->runController(SCRIPT_CHECKPOINT_STORE); else if ( ! strcmp(argv[0],"checkpointLoad") ) script->runController(SCRIPT_CHECKPOINT_LOAD); else if ( ! strcmp(argv[0],"checkpointSwap") ) script->runController(SCRIPT_CHECKPOINT_SWAP); else if ( ! strcmp(argv[0],"checkpointFree") ) script->runController(SCRIPT_CHECKPOINT_FREE); else { Tcl_SetResult(interp,"checkpointStore/Load/Swap/Free called via unrecognized name",TCL_VOLATILE); return TCL_ERROR; } return TCL_OK; }
void CmiDirect_manytomany_initialize_send ( void * h, unsigned tag, unsigned idx, unsigned displ, unsigned bytes, unsigned rank ) { BGPCmiDirectM2mHandle *handle = (BGPCmiDirectM2mHandle *) h; assert ( tag < MAX_CONN ); handle->m2m_sndlens [tag][idx] = bytes; handle->m2m_sdispls [tag][idx] = displ; handle->m2m_ranks [tag][idx] = CmiGetNodeGlobal(CmiNodeOf(rank),CmiMyPartition()); handle->m2m_permutation[tag][idx] = (idx+1)%handle->m2m_nsndranks[tag]; }
/** This is the main charm setup routine. It's called on all processors after Converse initialization. This routine gets passed to Converse from "main.C". The main purpose of this routine is to set up the objects and Ckpv's used during a regular Charm run. See the comment at the top of the file for overall flow. */ void _initCharm(int unused_argc, char **argv) { int inCommThread = (CmiMyRank() == CmiMyNodeSize()); DEBUGF(("[%d,%.6lf ] _initCharm started\n",CmiMyPe(),CmiWallTimer())); CkpvInitialize(size_t *, _offsets); CkpvAccess(_offsets) = new size_t[32]; CkpvInitialize(PtrQ*,_buffQ); CkpvInitialize(PtrVec*,_bocInitVec); CkpvInitialize(void*, _currentChare); CkpvInitialize(int, _currentChareType); CkpvInitialize(CkGroupID, _currentGroup); CkpvInitialize(void *, _currentNodeGroupObj); CkpvInitialize(CkGroupID, _currentGroupRednMgr); CkpvInitialize(GroupTable*, _groupTable); CkpvInitialize(GroupIDTable*, _groupIDTable); CkpvInitialize(CmiImmediateLockType, _groupTableImmLock); CkpvInitialize(bool, _destroyingNodeGroup); CkpvAccess(_destroyingNodeGroup) = false; CkpvInitialize(UInt, _numGroups); CkpvInitialize(int, _numInitsRecd); CkpvInitialize(int, _initdone); CkpvInitialize(char**, Ck_argv); CkpvAccess(Ck_argv)=argv; CkpvInitialize(MsgPool*, _msgPool); CkpvInitialize(CkCoreState *, _coreState); /* Added for evacuation-sayantan */ #ifndef __BIGSIM__ CpvInitialize(char *,_validProcessors); #endif CkpvInitialize(char ,startedEvac); CpvInitialize(int,serializer); _initChareTables(); // for checkpointable plain chares CksvInitialize(UInt, _numNodeGroups); CksvInitialize(GroupTable*, _nodeGroupTable); CksvInitialize(GroupIDTable, _nodeGroupIDTable); CksvInitialize(CmiImmediateLockType, _nodeGroupTableImmLock); CksvInitialize(CmiNodeLock, _nodeLock); CksvInitialize(PtrVec*,_nodeBocInitVec); CksvInitialize(UInt,_numInitNodeMsgs); CkpvInitialize(int,_charmEpoch); CkpvAccess(_charmEpoch)=0; CksvInitialize(int, _triggersSent); CksvAccess(_triggersSent) = 0; CkpvInitialize(_CkOutStream*, _ckout); CkpvInitialize(_CkErrStream*, _ckerr); CkpvInitialize(Stats*, _myStats); CkpvAccess(_groupIDTable) = new GroupIDTable(0); CkpvAccess(_groupTable) = new GroupTable; CkpvAccess(_groupTable)->init(); CkpvAccess(_groupTableImmLock) = CmiCreateImmediateLock(); CkpvAccess(_numGroups) = 1; // make 0 an invalid group number CkpvAccess(_buffQ) = new PtrQ(); CkpvAccess(_bocInitVec) = new PtrVec(); CkpvAccess(_currentNodeGroupObj) = NULL; if(CkMyRank()==0) { CksvAccess(_numNodeGroups) = 1; //make 0 an invalid group number CksvAccess(_numInitNodeMsgs) = 0; CksvAccess(_nodeLock) = CmiCreateLock(); CksvAccess(_nodeGroupTable) = new GroupTable(); CksvAccess(_nodeGroupTable)->init(); CksvAccess(_nodeGroupTableImmLock) = CmiCreateImmediateLock(); CksvAccess(_nodeBocInitVec) = new PtrVec(); } CkCallbackInit(); CmiNodeAllBarrier(); #if ! CMK_BIGSIM_CHARM initQd(argv); // bigsim calls it in ConverseCommonInit #endif CkpvAccess(_coreState)=new CkCoreState(); CkpvAccess(_numInitsRecd) = 0; CkpvAccess(_initdone) = 0; CkpvAccess(_ckout) = new _CkOutStream(); CkpvAccess(_ckerr) = new _CkErrStream(); _charmHandlerIdx = CkRegisterHandler((CmiHandler)_bufferHandler); _initHandlerIdx = CkRegisterHandler((CmiHandler)_initHandler); CkNumberHandlerEx(_initHandlerIdx, (CmiHandlerEx)_initHandler, CkpvAccess(_coreState)); _roRestartHandlerIdx = CkRegisterHandler((CmiHandler)_roRestartHandler); _exitHandlerIdx = CkRegisterHandler((CmiHandler)_exitHandler); //added for interoperabilitY _libExitHandlerIdx = CkRegisterHandler((CmiHandler)_libExitHandler); _bocHandlerIdx = CkRegisterHandler((CmiHandler)_initHandler); CkNumberHandlerEx(_bocHandlerIdx, (CmiHandlerEx)_initHandler, CkpvAccess(_coreState)); #ifdef __BIGSIM__ if(BgNodeRank()==0) #endif _infoIdx = CldRegisterInfoFn((CldInfoFn)_infoFn); _triggerHandlerIdx = CkRegisterHandler((CmiHandler)_triggerHandler); _ckModuleInit(); CldRegisterEstimator((CldEstimator)_charmLoadEstimator); _futuresModuleInit(); // part of futures implementation is a converse module _loadbalancerInit(); _metabalancerInit(); #if CMK_MEM_CHECKPOINT init_memcheckpt(argv); #endif initCharmProjections(); #if CMK_TRACE_IN_CHARM // initialize trace module in ck traceCharmInit(argv); #endif CkpvInitialize(int, envelopeEventID); CkpvAccess(envelopeEventID) = 0; CkMessageWatcherInit(argv,CkpvAccess(_coreState)); /** The rank-0 processor of each node calls the translator-generated "_register" routines. _register routines call the charm.h "CkRegister*" routines, which record function pointers and class information for all Charm entities, like Chares, Arrays, and readonlies. There's one _register routine generated for each .ci file. _register routines *must* be called in the same order on every node, and *must not* be called by multiple threads simultaniously. */ #ifdef __BIGSIM__ if(BgNodeRank()==0) #else if(CkMyRank()==0) #endif { SDAG::registerPUPables(); CmiArgGroup("Charm++",NULL); _parseCommandLineOpts(argv); _registerInit(); CkRegisterMsg("System", 0, 0, CkFreeMsg, sizeof(int)); CkRegisterChareInCharm(CkRegisterChare("null", 0, TypeChare)); CkIndex_Chare::__idx=CkRegisterChare("Chare", sizeof(Chare), TypeChare); CkRegisterChareInCharm(CkIndex_Chare::__idx); CkIndex_Group::__idx=CkRegisterChare("Group", sizeof(Group), TypeGroup); CkRegisterChareInCharm(CkIndex_Group::__idx); CkRegisterEp("null", (CkCallFnPtr)_nullFn, 0, 0, 0+CK_EP_INTRINSIC); /** These _register calls are for the built-in Charm .ci files, like arrays and load balancing. If you add a .ci file to charm, you'll have to add a call to the _register routine here, or make your library into a "-module". */ _registerCkFutures(); _registerCkArray(); _registerLBDatabase(); _registerMetaBalancer(); _registerCkCallback(); _registertempo(); _registerwaitqd(); _registerCkCheckpoint(); #if CMK_MEM_CHECKPOINT _registerCkMemCheckpoint(); #endif /* Setup Control Point Automatic Tuning Framework. By default it is enabled as a part of charm, however it won't enable its tracing module unless a +CPEnableMeasurements command line argument is specified. See trace-common.C for more info Thus there should be no noticable overhead to always having the control point framework linked in. */ #if CMK_WITH_CONTROLPOINT _registerPathHistory(); _registerControlPoints(); _registerTraceControlPoints(); #endif /** CkRegisterMainModule is generated by the (unique) "mainmodule" .ci file. It will include calls to register all the .ci files. */ CkRegisterMainModule(); /** _registerExternalModules is actually generated by charmc at link time (as "moduleinit<pid>.C"). This generated routine calls the _register functions for the .ci files of libraries linked using "-module". This funny initialization is most useful for AMPI/FEM programs, which don't have a .ci file and hence have no other way to control the _register process. */ _registerExternalModules(argv); _registerDone(); } /* The following will happen on every virtual processor in BigEmulator, not just on once per real processor */ if (CkMyRank() == 0) { CpdBreakPointInit(); } CmiNodeAllBarrier(); // Execute the initcalls registered in modules _initCallTable.enumerateInitCalls(); #if CMK_CHARMDEBUG CpdFinishInitialization(); #endif //CmiNodeAllBarrier(); CkpvAccess(_myStats) = new Stats(); CkpvAccess(_msgPool) = new MsgPool(); CmiNodeAllBarrier(); #if !(__FAULT__) CmiBarrier(); CmiBarrier(); CmiBarrier(); #endif #if CMK_SMP_TRACE_COMMTHREAD _TRACE_BEGIN_COMPUTATION(); #else if (!inCommThread) { _TRACE_BEGIN_COMPUTATION(); } #endif #ifdef ADAPT_SCHED_MEM if(CkMyRank()==0){ memCriticalEntries = new int[numMemCriticalEntries]; int memcnt=0; for(int i=0; i<_entryTable.size(); i++){ if(_entryTable[i]->isMemCritical){ memCriticalEntries[memcnt++] = i; } } } #endif #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) _messageLoggingInit(); #endif #ifndef __BIGSIM__ /* FAULT_EVAC */ CpvAccess(_validProcessors) = new char[CkNumPes()]; for(int vProc=0;vProc<CkNumPes();vProc++){ CpvAccess(_validProcessors)[vProc]=1; } _ckEvacBcastIdx = CkRegisterHandler((CmiHandler)_ckEvacBcast); _ckAckEvacIdx = CkRegisterHandler((CmiHandler)_ckAckEvac); #endif CkpvAccess(startedEvac) = 0; CpvAccess(serializer) = 0; evacuate = 0; CcdCallOnCondition(CcdSIGUSR1,(CcdVoidFn)CkDecideEvacPe,0); #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) CcdCallOnCondition(CcdSIGUSR2,(CcdVoidFn)CkMlogRestart,0); #endif if(_raiseEvac){ processRaiseEvacFile(_raiseEvacFile); /* if(CkMyPe() == 2){ // CcdCallOnConditionKeep(CcdPERIODIC_10s,(CcdVoidFn)CkDecideEvacPe,0); CcdCallFnAfter((CcdVoidFn)CkDecideEvacPe, 0, 10000); } if(CkMyPe() == 3){ CcdCallFnAfter((CcdVoidFn)CkDecideEvacPe, 0, 10000); }*/ } if (CkMyRank() == 0) { TopoManager_init(); } CmiNodeAllBarrier(); if (!_replaySystem) { CkFtFn faultFunc_restart = CkRestartMain; if (faultFunc == NULL || faultFunc == faultFunc_restart) { // this is not restart from memory // these two are blocking calls for non-bigsim #if ! CMK_BIGSIM_CHARM CmiInitCPUAffinity(argv); CmiInitMemAffinity(argv); #endif } CmiInitCPUTopology(argv); #if CMK_SHARED_VARS_POSIX_THREADS_SMP if (CmiCpuTopologyEnabled()) { int *pelist; int num; CmiGetPesOnPhysicalNode(0, &pelist, &num); #if !CMK_MULTICORE && !CMK_SMP_NO_COMMTHD // Count communication threads, if present // XXX: Assuming uniformity of node size here num += num/CmiMyNodeSize(); #endif if (!_Cmi_forceSpinOnIdle && num > CmiNumCores()) { if (CmiMyPe() == 0) CmiPrintf("\nCharm++> Warning: the number of SMP threads (%d) is greater than the number of physical cores (%d), so threads will sleep while idling. Use +CmiSpinOnIdle or +CmiSleepOnIdle to control this directly.\n\n", num, CmiNumCores()); CmiLock(CksvAccess(_nodeLock)); if (! _Cmi_sleepOnIdle) _Cmi_sleepOnIdle = 1; CmiUnlock(CksvAccess(_nodeLock)); } } #endif } if(CmiMyPe() == 0) { char *topoFilename; if(CmiGetArgStringDesc(argv,"+printTopo",&topoFilename,"topo file name")) { std::stringstream sstm; sstm << topoFilename << "." << CmiMyPartition(); std::string result = sstm.str(); FILE *fp; fp = fopen(result.c_str(), "w"); if (fp == NULL) { CkPrintf("Error opening %s file, writing to stdout\n", topoFilename); fp = stdout; } TopoManager_printAllocation(fp); fclose(fp); } } #if CMK_USE_PXSHM && ( CMK_CRAYXE || CMK_CRAYXC ) && CMK_SMP // for SMP on Cray XE6 (hopper) it seems pxshm has to be initialized // again after cpuaffinity is done if (CkMyRank() == 0) { CmiInitPxshm(argv); } CmiNodeAllBarrier(); #endif //CldCallback(); #if CMK_BIGSIM_CHARM && CMK_CHARMDEBUG // Register the BG handler for CCS. Notice that this is put into a variable shared by // the whole real processor. This because converse needs to find it. We check that all // virtual processors register the same index for this handler. CpdBgInit(); #endif if (faultFunc) { #if CMK_WITH_STATS if (CkMyPe()==0) _allStats = new Stats*[CkNumPes()]; #endif if (!inCommThread) { CkArgMsg *msg = (CkArgMsg *)CkAllocMsg(0, sizeof(CkArgMsg), 0); msg->argc = CmiGetArgc(argv); msg->argv = argv; faultFunc(_restartDir, msg); CkFreeMsg(msg); } }else if(CkMyPe()==0){ #if CMK_WITH_STATS _allStats = new Stats*[CkNumPes()]; #endif register size_t i, nMains=_mainTable.size(); for(i=0;i<nMains;i++) /* Create all mainchares */ { register int size = _chareTable[_mainTable[i]->chareIdx]->size; register void *obj = malloc(size); _MEMCHECK(obj); _mainTable[i]->setObj(obj); CkpvAccess(_currentChare) = obj; CkpvAccess(_currentChareType) = _mainTable[i]->chareIdx; register CkArgMsg *msg = (CkArgMsg *)CkAllocMsg(0, sizeof(CkArgMsg), 0); msg->argc = CmiGetArgc(argv); msg->argv = argv; _entryTable[_mainTable[i]->entryIdx]->call(msg, obj); #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) CpvAccess(_currentObj) = (Chare *)obj; #endif } _mainDone = 1; _STATS_RECORD_CREATE_CHARE_N(nMains); _STATS_RECORD_PROCESS_CHARE_N(nMains); for(i=0;i<_readonlyMsgs.size();i++) /* Send out readonly messages */ { register void *roMsg = (void *) *((char **)(_readonlyMsgs[i]->pMsg)); if(roMsg==0) continue; //Pack the message and send it to all other processors register envelope *env = UsrToEnv(roMsg); env->setSrcPe(CkMyPe()); env->setMsgtype(ROMsgMsg); env->setRoIdx(i); CmiSetHandler(env, _initHandlerIdx); CkPackMessage(&env); CmiSyncBroadcast(env->getTotalsize(), (char *)env); CpvAccess(_qd)->create(CkNumPes()-1); //For processor 0, unpack and re-set the global CkUnpackMessage(&env); _processROMsgMsg(env); _numInitMsgs++; } //Determine the size of the RODataMessage PUP::sizer ps; for(i=0;i<_readonlyTable.size();i++) _readonlyTable[i]->pupData(ps); //Allocate and fill out the RODataMessage envelope *env = _allocEnv(RODataMsg, ps.size()); PUP::toMem pp((char *)EnvToUsr(env)); for(i=0;i<_readonlyTable.size();i++) _readonlyTable[i]->pupData(pp); env->setCount(++_numInitMsgs); env->setSrcPe(CkMyPe()); CmiSetHandler(env, _initHandlerIdx); DEBUGF(("[%d,%.6lf] RODataMsg being sent of size %d \n",CmiMyPe(),CmiWallTimer(),env->getTotalsize())); CmiSyncBroadcastAndFree(env->getTotalsize(), (char *)env); CpvAccess(_qd)->create(CkNumPes()-1); _initDone(); } DEBUGF(("[%d,%d%.6lf] inCommThread %d\n",CmiMyPe(),CmiMyRank(),CmiWallTimer(),inCommThread)); // when I am a communication thread, I don't participate initDone. if (inCommThread) { CkNumberHandlerEx(_bocHandlerIdx,(CmiHandlerEx)_processHandler, CkpvAccess(_coreState)); CkNumberHandlerEx(_charmHandlerIdx,(CmiHandlerEx)_processHandler , CkpvAccess(_coreState)); _processBufferedMsgs(); } #if CMK_CHARMDEBUG // Should not use CpdFreeze inside a thread (since this processor is really a user-level thread) if (CpvAccess(cpdSuspendStartup)) { //CmiPrintf("In Parallel Debugging mode .....\n"); CpdFreeze(); } #endif #if __FAULT__ if(killFlag){ readKillFile(); } #endif }
void CmiInitCPUAffinity(char **argv) { static skt_ip_t myip; int ret, i, exclude; hostnameMsg *msg; char *pemap = NULL; char *commap = NULL; char *pemapfile = NULL; int show_affinity_flag; int affinity_flag = CmiGetArgFlagDesc(argv,"+setcpuaffinity", "set cpu affinity"); while (CmiGetArgIntDesc(argv,"+excludecore", &exclude, "avoid core when setting cpuaffinity")) { if (CmiMyRank() == 0) add_exclude(exclude); affinity_flag = 1; } if (CmiGetArgStringDesc(argv, "+pemapfile", &pemapfile, "define pe to core mapping file")) { FILE *fp; char buf[128]; pemap = (char*)malloc(1024); fp = fopen(pemapfile, "r"); if (fp == NULL) CmiAbort("pemapfile does not exist"); while (!feof(fp)) { if (fgets(buf, 128, fp)) { if (buf[strlen(buf)-1] == '\n') buf[strlen(buf)-1] = 0; strcat(pemap, buf); } } fclose(fp); if (CmiMyPe()==0) CmiPrintf("Charm++> read from pemap file '%s': %s\n", pemapfile, pemap); } CmiGetArgStringDesc(argv, "+pemap", &pemap, "define pe to core mapping"); if (pemap!=NULL && excludecount>0) CmiAbort("Charm++> +pemap can not be used with +excludecore.\n"); CmiGetArgStringDesc(argv, "+commap", &commap, "define comm threads to core mapping"); if (pemap!=NULL || commap!=NULL) affinity_flag = 1; show_affinity_flag = CmiGetArgFlagDesc(argv,"+showcpuaffinity", "print cpu affinity"); cpuAffinityHandlerIdx = CmiRegisterHandler((CmiHandler)cpuAffinityHandler); cpuAffinityRecvHandlerIdx = CmiRegisterHandler((CmiHandler)cpuAffinityRecvHandler); if (CmiMyRank() ==0) { affLock = CmiCreateLock(); } #if CMK_BLUEGENEP || CMK_BLUEGENEQ if(affinity_flag){ affinity_flag = 0; if(CmiMyPe()==0) CmiPrintf("Charm++> cpu affinity setting is not needed on Blue Gene, thus ignored.\n"); } if(show_affinity_flag){ show_affinity_flag = 0; if(CmiMyPe()==0) CmiPrintf("Charm++> printing cpu affinity is not supported on Blue Gene.\n"); } #endif if (!affinity_flag) { if (show_affinity_flag) CmiPrintCPUAffinity(); return; } if (CmiMyPe() == 0) { CmiPrintf("Charm++> cpu affinity enabled. \n"); if (excludecount > 0) { CmiPrintf("Charm++> cpuaffinity excludes core: %d", excludecore[0]); for (i=1; i<excludecount; i++) CmiPrintf(" %d", excludecore[i]); CmiPrintf(".\n"); } if (pemap!=NULL) CmiPrintf("Charm++> cpuaffinity PE-core map : %s\n", pemap); } if (CmiMyPe() >= CmiNumPes()) { /* this is comm thread */ /* comm thread either can float around, or pin down to the last rank. however it seems to be reportedly slower if it is floating */ CmiNodeAllBarrier(); if (commap != NULL) { int mycore = search_pemap(commap, CmiMyPeGlobal()-CmiNumPesGlobal()); if(CmiMyPe()-CmiNumPes()==0) printf("Charm++> set comm %d on node %d to core #%d\n", CmiMyPe()-CmiNumPes(), CmiMyNode(), mycore); if (-1 == CmiSetCPUAffinity(mycore)) CmiAbort("set_cpu_affinity abort!"); CmiNodeAllBarrier(); if (show_affinity_flag) CmiPrintCPUAffinity(); return; /* comm thread return */ } else { /* if (CmiSetCPUAffinity(CmiNumCores()-1) == -1) CmiAbort("set_cpu_affinity abort!"); */ #if !CMK_CRAYXT && !CMK_CRAYXE && !CMK_CRAYXC && !CMK_BLUEGENEQ if (pemap == NULL) { #if CMK_MACHINE_PROGRESS_DEFINED while (affinity_doneflag < CmiMyNodeSize()) CmiNetworkProgress(); #else #if CMK_SMP #error "Machine progress call needs to be implemented for cpu affinity!" #endif #endif } #endif #if CMK_CRAYXT || CMK_CRAYXE || CMK_CRAYXC /* if both pemap and commmap are NULL, will compute one */ if (pemap != NULL) #endif { CmiNodeAllBarrier(); if (show_affinity_flag) CmiPrintCPUAffinity(); return; /* comm thread return */ } } } if (pemap != NULL && CmiMyPe()<CmiNumPes()) { /* work thread */ int mycore = search_pemap(pemap, CmiMyPeGlobal()); if(show_affinity_flag) CmiPrintf("Charm++> set PE %d on node %d to core #%d\n", CmiMyPe(), CmiMyNode(), mycore); if (mycore >= CmiNumCores()) { CmiPrintf("Error> Invalid core number %d, only have %d cores (0-%d) on the node. \n", mycore, CmiNumCores(), CmiNumCores()-1); CmiAbort("Invalid core number"); } if (CmiSetCPUAffinity(mycore) == -1) CmiAbort("set_cpu_affinity abort!"); CmiNodeAllBarrier(); CmiNodeAllBarrier(); /* if (show_affinity_flag) CmiPrintCPUAffinity(); */ return; } #if CMK_CRAYXT || CMK_CRAYXE || CMK_CRAYXC { int numCores = CmiNumCores(); int myid = getXTNodeID(CmiMyNodeGlobal(), CmiNumNodesGlobal()); int myrank; int pe, mype = CmiMyPeGlobal(); int node = CmiMyNodeGlobal(); int nnodes = 0; #if CMK_SMP if (CmiMyPe() >= CmiNumPes()) { /* this is comm thread */ int node = CmiMyPe() - CmiNumPes(); mype = CmiGetPeGlobal(CmiNodeFirst(node) + CmiMyNodeSize() - 1, CmiMyPartition()); /* last pe on SMP node */ node = CmiGetNodeGlobal(node, CmiMyPartition()); } #endif pe = mype - 1; while (pe >= 0) { int n = CmiNodeOf(pe); if (n != node) { nnodes++; node = n; } if (getXTNodeID(n, CmiNumNodesGlobal()) != myid) break; pe --; } CmiAssert(numCores > 0); myrank = (mype - pe - 1 + nnodes)%numCores; #if CMK_SMP if (CmiMyPe() >= CmiNumPes()) myrank = (myrank + 1)%numCores; #endif if (-1 != CmiSetCPUAffinity(myrank)) { DEBUGP(("Processor %d is bound to core #%d on node #%d\n", CmiMyPe(), myrank, mynode)); } else{ CmiPrintf("Processor %d set affinity failed!\n", CmiMyPe()); CmiAbort("set cpu affinity abort!\n"); } } if (CmiMyPe() < CmiNumPes()) CmiNodeAllBarrier(); CmiNodeAllBarrier(); #else /* get my ip address */ if (CmiMyRank() == 0) { #if CMK_HAS_GETHOSTNAME myip = skt_my_ip(); /* not thread safe, so only calls on rank 0 */ #else CmiAbort("Can not get unique name for the compute nodes. \n"); #endif } CmiNodeAllBarrier(); /* prepare a msg to send */ msg = (hostnameMsg *)CmiAlloc(sizeof(hostnameMsg)); CmiSetHandler((char *)msg, cpuAffinityHandlerIdx); msg->pe = CmiMyPe(); msg->ip = myip; msg->ncores = CmiNumCores(); DEBUGP(("PE %d's node has %d number of cores. \n", CmiMyPe(), msg->ncores)); msg->rank = 0; CmiSyncSendAndFree(0, sizeof(hostnameMsg), (void *)msg); if (CmiMyPe() == 0) { int i; hostTable = CmmNew(); rankmsg = (rankMsg *)CmiAlloc(sizeof(rankMsg)+CmiNumPes()*sizeof(int)*2); CmiSetHandler((char *)rankmsg, cpuAffinityRecvHandlerIdx); rankmsg->ranks = (int *)((char*)rankmsg + sizeof(rankMsg)); rankmsg->nodes = (int *)((char*)rankmsg + sizeof(rankMsg) + CmiNumPes()*sizeof(int)); for (i=0; i<CmiNumPes(); i++) { rankmsg->ranks[i] = 0; rankmsg->nodes[i] = -1; } for (i=0; i<CmiNumPes(); i++) CmiDeliverSpecificMsg(cpuAffinityHandlerIdx); } /* receive broadcast from PE 0 */ CmiDeliverSpecificMsg(cpuAffinityRecvHandlerIdx); CmiLock(affLock); affinity_doneflag++; CmiUnlock(affLock); CmiNodeAllBarrier(); #endif if (show_affinity_flag) CmiPrintCPUAffinity(); }