Пример #1
0
CmiStartFn mymain(int argc, char** argv)
{
    if(CmiMyRank() == CmiMyNodeSize()) return 0;

    CpvInitialize(int,msgSize);
    CpvInitialize(int,cycleNum);
    CpvInitialize(int,sizeNum);
    CpvAccess(sizeNum) = 1;
    CpvAccess(msgSize)= CmiMsgHeaderSizeBytes + 8;
    
    CpvInitialize(int,exitHandler);
    CpvAccess(exitHandler) = CmiRegisterHandler((CmiHandler) exitHandlerFunc);
    CpvInitialize(int,node0Handler);
    CpvAccess(node0Handler) = CmiRegisterHandler((CmiHandler) node0HandlerFunc);
    CpvInitialize(int,node1Handler);
    CpvAccess(node1Handler) = CmiRegisterHandler((CmiHandler) node1HandlerFunc);
    CpvInitialize(int,ackHandler);
    CpvAccess(ackHandler) = CmiRegisterHandler((CmiHandler) ackHandlerFunc);
    
    CpvInitialize(double,startTime);
    CpvInitialize(double,endTime);
    
    CpvInitialize(double, IdleStartTime);
    CpvInitialize(double, IdleTime);

    CpvInitialize(int,ackCount);
    CpvAccess(ackCount) = 0;

    CpvInitialize(int,twoway);
    CpvAccess(twoway) = 0;

    CcdCallOnConditionKeep(CcdPROCESSOR_BEGIN_IDLE, ApplIdleStart, NULL);
    CcdCallOnConditionKeep(CcdPROCESSOR_END_IDLE, ApplIdleEnd, NULL);
    
    if(argc > 1)
        CpvAccess(twoway) = atoi(argv[1]);

    if(CmiMyPe() == 0) {
      if(!CpvAccess(twoway))
        CmiPrintf("Starting Pingpong with oneway traffic \n");
      else
        CmiPrintf("Starting Pingpong with twoway traffic\n");
    }

    if ((CmiMyPe() < CmiNumPes()/2) || CpvAccess(twoway))
      startPingpong();

    return 0;
}
Пример #2
0
static void collectNumbers(ProcMsg *msg)
{
  int npes;
  EmptyMsg emsg;

  if(CpvAccess(isSingle)) {
    CpvAccess(Time1) = CmiWallTimer() - CpvAccess(Time1);
    CpvAccess(seqPI) = 4.0 * msg->success / NTRIALS;
    CpvAccess(isSingle) = 0;
    CpvAccess(nreported) = 0;
    CpvAccess(success) = 0;
    msg->success = NTRIALS/CmiNumPes();
    CmiSetHandler(msg, CpvAccess(trial_handler));
    CmiSyncBroadcastAll(sizeof(ProcMsg), msg);
    CpvAccess(TimeN) = CmiWallTimer();
	printf("if\n");
  } else {
	printf("else\n");
    CpvAccess(nreported)++;
    CpvAccess(success) += msg->success;
    if(CpvAccess(nreported)==CmiNumPes()) {
      CpvAccess(TimeN) = CmiWallTimer() - CpvAccess(TimeN);
      CpvAccess(parPI) = 4.0 * CpvAccess(success) / NTRIALS;
      npes = iround(CpvAccess(Time1)/CpvAccess(TimeN));
      CmiPrintf("[proc] Tseq = %le seconds, Tpar = %le seconds\n",
                 CpvAccess(Time1), CpvAccess(TimeN));
      CmiPrintf("[proc] CmiNumPes() reported %d processors\n", CmiNumPes());
      CmiPrintf("[proc] But actual number of processors is %d\n", npes);
      CmiPrintf("[proc] FYI, appox PI (seq) = %lf\n",CpvAccess(seqPI));
      CmiPrintf("[proc] FYI, appox PI (par) = %lf\n",CpvAccess(parPI));
      CmiSetHandler(&emsg, CpvAccess(ack_handler));
      CmiSyncSend(0, sizeof(EmptyMsg), &emsg);
		printf("else if\n");
    }
  }
}
Пример #3
0
void StreamingStrategy::pup(PUP::er &p){

  Strategy::pup(p);
  p | PERIOD;
  p | bufferMax;
  p | msgSizeMax;
  //p | shortMsgPackingFlag;
  p | bufSizeMax;
  p | idleFlush;
  //p | streaming_handler_id;

  if(p.isPacking() || p.isUnpacking()) {
      streamingMsgBuf = new CkQ<MessageHolder *>[CmiNumPes()];
      streamingMsgCount = new int[CmiNumPes()];
      bufSize = new int[CmiNumPes()];
      for(int count = 0; count < CmiNumPes(); count ++) {
	streamingMsgCount[count] = 0;
	bufSize[count] = 0;
      }
  }

  // packing is done once in processor 0, unpacking is done once in all processors except 0
  if (p.isPacking() || p.isUnpacking()) registerFlush();
}
Пример #4
0
void CldAverageHandler(struct loadmsg *msg)
{
  peinfo *pinf = &(CpvAccess(peinf));
  double load = CldEstimate();
  double average = (msg->load_total / CmiNumPes());
  int rebalance;
  if (load < (average+10) * 1.2) rebalance=0;
  else rebalance = (int)(load - average);
  if (DEBUGGING_OUTPUT)
    CmiPrintf("PE %d load=%6d average=%6d rebalance=%d\n", 
	      CmiMyPe(), CldEstimate(), (int)average, rebalance);
  pinf->rebalance = rebalance;
  CmiFree(msg);
  CcdCallFnAfter((CcdVoidFn)CldInitiateReduction, 0, CYCLE_MILLISECONDS);
}
Пример #5
0
void ringsimple_hop(ringmsg *msg)
{
  int thispe = CmiMyPe();
  int nextpe = (thispe+1) % CmiNumPes();
  // CmiPrintf("[%d] ringsimple #%d hop send to %d hop: %d\n", thispe, msg->ringno, nextpe, msg->hops);
  int i;
  for (i=0; i<10; i++)
    if (msg->data[i] != i) ringsimple_fail();
  if (msg->hops) {
    msg->hops--;
    CmiSyncSendAndFree(nextpe, sizeof(ringmsg), msg);
  } else {
    Cpm_megacon_ack(CpmSend(0));
    CmiFree(msg);
  }
}
Пример #6
0
// initialize the stat table internals
void StatTable::init(int argc)
{
  char** counterNames = CpvAccess(_counterNames);
  char** counterDesc = CpvAccess(_counterDesc);

  if (argc > numStats_) {
    delete [] stats_;
    stats_ = new Statistics[argc];  _MEMCHECK(stats_);
    numStats_ = argc;
  }
  for (int i=0; i<argc; i++) { 
    DEBUGF(("%d/%d DEBUG:   %d name %s\n     desc %s\n", 
	    CmiMyPe(), CmiNumPes(), i, name[i], desc[i]));
    stats_[i].name = counterNames[i]; 
    stats_[i].desc = counterDesc[i];
  }
  clear();
}
Пример #7
0
CmiHandler node0HandlerFunc(char *msg)
{
    CpvAccess(cycleNum)++;
    
    if (CpvAccess(cycleNum) == nCycles) {
        CpvAccess(endTime) = CmiWallTimer();
        pingpongFinished(msg);
    }
    else {
        CmiSetHandler(msg,CpvAccess(node1Handler));
        *((int *)(msg+CmiMsgHeaderSizeBytes)) = CpvAccess(msgSize);
        
        int dest = CmiNumPes() - CmiMyPe() - 1;
        CmiSyncSendAndFree(dest,CpvAccess(msgSize),msg);
    }
    
    return 0;
}
Пример #8
0
static void *call_startfn(void *vindex)
{
  size_t index = (size_t)vindex;
#if CMK_HAS_TLS_VARIABLES && !CMK_NOT_USE_TLS_THREAD
  if (index<_Cmi_mynodesize) 
    CmiStateInit(index+Cmi_nodestart, index, &Cmi_mystate);
  else
    CmiStateInit(_Cmi_mynode+CmiNumPes(),_Cmi_mynodesize,&Cmi_mystate);
  Cmi_state_vector[index] = &Cmi_mystate;
#else
  CmiState state = Cmi_state_vector + index;
  pthread_setspecific(Cmi_state_key, state);
#endif

  ConverseRunPE(0);

  if(CharmLibInterOperate) {
    while(1) {
      if(!_cleanUp) {
        StartInteropScheduler();
        CmiNodeAllBarrier();
      } else {
        if (CmiMyRank() == CmiMyNodeSize()) {
          while (1) { CommunicationServerThread(5); }
        } else { 
          CsdScheduler(-1);
        }
        break;
      }
    }
  }

#if 0
  if (index<_Cmi_mynodesize) 
	  ConverseRunPE(0); /*Regular worker thread*/
  else 
  { /*Communication thread*/
	  CommunicationServerInit();
	  if (Cmi_charmrun_fd!=-1)
		  while (1) CommunicationServer(5,COM_SERVER_FROM_SMP);
  }
#endif  
  return 0;
}
Пример #9
0
/// Method invoked upon receipt of an acknowledgement of table received
void *comlibTableReceivedHandler(void *msg) {
  if (CmiMyPe() == 0) {
	//    CmiPrintf("Num acks to go: %d\n",CkpvAccess(conv_com_object).acksReceived);
    if (--CkpvAccess(conv_com_object).acksReceived == 0) {
      CkpvAccess(conv_com_object).tableReady();
      // reset acksReceived for the second step
      //CmiPrintf("All acks received, broadcasting message to table_received\n");
      CkpvAccess(conv_com_object).acksReceived = CmiNumPes() - 1;
      CmiSyncBroadcastAndFree(CmiReservedHeaderSize, (char*)msg);
    } else {
      CmiFree(msg);
    }
  } else {
    CkpvAccess(conv_com_object).tableReady();
    CmiSetHandler(msg, CkpvAccess(comlib_ready));
    CmiSyncSendAndFree(0, CmiReservedHeaderSize, (char*)msg);
  }  
  return NULL;
}
Пример #10
0
static void bcast_central(void *msg)
{
  EmptyMsg emsg;
  ptimemsg tmsg = (ptimemsg)msg;
  CmiAssert(CmiMyPe() == 0);
  if(CpvAccess(currentPe) == 0) {
    CpvAccess(lasttime) = tmsg->time - CpvAccess(starttime) + 
                          CpvAccess(timediff)[tmsg->srcpe];
  } else if((tmsg->time - CpvAccess(starttime) + 
    CpvAccess(timediff)[tmsg->srcpe]) > CpvAccess(lasttime)) {
    CpvAccess(lasttime) = tmsg->time - CpvAccess(starttime) +
                          CpvAccess(timediff)[tmsg->srcpe];
  }
  CmiFree(msg);
  CpvAccess(currentPe)++;
  if(CpvAccess(currentPe) == CmiNumPes()) {
    sizes[CpvAccess(nextidx)].time += CpvAccess(lasttime);
    CpvAccess(numiter)++;
    if(CpvAccess(numiter)<sizes[CpvAccess(nextidx)].numiter) {
      msg = CmiAlloc(CmiMsgHeaderSizeBytes+sizes[CpvAccess(nextidx)].size);
      CpvAccess(currentPe) = 0;
      CmiSetHandler(msg, CpvAccess(bcast_reply));
      CpvAccess(starttime) = CmiWallTimer();
      CmiSyncBroadcastAllAndFree(CmiMsgHeaderSizeBytes+sizes[CpvAccess(nextidx)].size, msg);
    } else {
      CpvAccess(numiter) = 0;
      CpvAccess(nextidx)++;
      if(sizes[CpvAccess(nextidx)].size == (-1)) {
        print_results("CmiSyncBroadcastAllAndFree");
        CmiSetHandler(&emsg, CpvAccess(ack_handler));
        CmiSyncSend(0, sizeof(EmptyMsg), &emsg);
        return;
      } else {
        msg = CmiAlloc(CmiMsgHeaderSizeBytes+sizes[CpvAccess(nextidx)].size);
        CpvAccess(currentPe) = 0;
        CmiSetHandler(msg, CpvAccess(bcast_reply));
        CpvAccess(starttime) = CmiWallTimer();
        CmiSyncBroadcastAllAndFree(CmiMsgHeaderSizeBytes+sizes[CpvAccess(nextidx)].size, 
                            msg);
      }
    }
  }
}
Пример #11
0
/* called on each processor */
static void cpuAffinityRecvHandler(void *msg)
{
  int myrank, mynode;
  rankMsg *m = (rankMsg *)msg;
  m->ranks = (int *)((char*)m + sizeof(rankMsg));
  m->nodes = (int *)((char*)m + sizeof(rankMsg) + CmiNumPes()*sizeof(int));
  myrank = m->ranks[CmiMyPe()];
  mynode = m->nodes[CmiMyPe()];

  /*CmiPrintf("[%d %d] set to core #: %d\n", CmiMyNode(), CmiMyPe(), myrank);*/

  if (-1 != CmiSetCPUAffinity(myrank)) {
    DEBUGP(("Processor %d is bound to core #%d on node #%d\n", CmiMyPe(), myrank, mynode));
  }
  else{
    CmiPrintf("Processor %d set affinity failed!\n", CmiMyPe());
    CmiAbort("set cpu affinity abort!\n");
  }
  CmiFree(m);
}
Пример #12
0
static void CmiStartThreads(char **argv)
{
  int     i,tocreate;
  DWORD   threadID;
  HANDLE  thr;

  CmiMemLock_lock=CmiCreateLock();
  comm_mutex = CmiCreateLock();
  barrier_mutex = CmiCreateLock();
#ifdef CMK_NO_ASM_AVAILABLE
  cmiMemoryLock = CmiCreateLock();
  if (CmiMyNode()==0) printf("Charm++ warning> fences and atomic operations not available in native assembly\n");
#endif

  Cmi_state_key = TlsAlloc();
  if(Cmi_state_key == 0xFFFFFFFF) PerrorExit("TlsAlloc main");
  
  Cmi_state_vector =
    (CmiState)calloc(_Cmi_mynodesize+1, sizeof(struct CmiStateStruct));
  
  for (i=0; i<_Cmi_mynodesize; i++)
    CmiStateInit(i+Cmi_nodestart, i, CmiGetStateN(i));
  /*Create a fake state structure for the comm. thread*/
/*  CmiStateInit(-1,_Cmi_mynodesize,CmiGetStateN(_Cmi_mynodesize)); */
  CmiStateInit(_Cmi_mynode+CmiNumPes(),_Cmi_mynodesize,CmiGetStateN(_Cmi_mynodesize));
  
#if CMK_MULTICORE || CMK_SMP_NO_COMMTHD
  if (!Cmi_commthread)
    tocreate = _Cmi_mynodesize-1;
  else
#endif
  tocreate = _Cmi_mynodesize;
  for (i=1; i<=tocreate; i++) {
    if((thr = CreateThread(NULL, 0, call_startfn, (LPVOID)i, 0, &threadID)) 
       == NULL) PerrorExit("CreateThread");
    CloseHandle(thr);
  }
  
  if(TlsSetValue(Cmi_state_key, (LPVOID)Cmi_state_vector) == 0) 
    PerrorExit("TlsSetValue");
}
Пример #13
0
static void initMsgOrderInfo(MsgOrderInfo *info) {
    int i;
    int totalPEs = CmiNumPes();
#if CMK_SMP && CMK_OFFLOAD_BCAST_PROCESS
    /* the comm thread will also access such info */
    totalPEs += CmiNumNodes();
#endif
    info->nextMsgSeqNo = malloc(totalPEs*sizeof(int));
    memset(info->nextMsgSeqNo, 0, totalPEs*sizeof(int));

    info->expectedMsgSeqNo = malloc(totalPEs*sizeof(int));
    memset(info->expectedMsgSeqNo, 0, totalPEs*sizeof(int));

    info->oooMsgBuffer = malloc(totalPEs*sizeof(void **));
    memset(info->oooMsgBuffer, 0, totalPEs*sizeof(void **));

    info->oooMaxOffset = malloc(totalPEs*sizeof(unsigned char));
    memset(info->oooMaxOffset, 0, totalPEs*sizeof(unsigned char));

    info->CUR_WINDOW_SIZE = malloc(totalPEs*sizeof(unsigned char));
    for (i=0; i<totalPEs; i++) info->CUR_WINDOW_SIZE[i] = INIT_WINDOW_SIZE;
}
Пример #14
0
/* on PE 0 */
static void sync_starter(void *msg) 
{
  EmptyMsg emsg;    
  ptimemsg tmsg = (ptimemsg)msg;

  double midTime = (CmiWallTimer() + CpvAccess(lasttime))/2;
  CpvAccess(timediff)[CpvAccess(currentPe)] = midTime - tmsg->time;
  CmiFree(msg);

  CpvAccess(currentPe)++;
  if(CpvAccess(currentPe) < CmiNumPes()) {
    CmiSetHandler(&emsg, CpvAccess(sync_reply));
    CpvAccess(lasttime) = CmiWallTimer(); 
    CmiSyncSend(CpvAccess(currentPe), sizeof(EmptyMsg), &emsg);
  } else {
    msg = CmiAlloc(CmiMsgHeaderSizeBytes+sizes[0].size);
    CmiSetHandler(msg, CpvAccess(bcast_reply));
    CpvAccess(currentPe) = 0;
    CpvAccess(starttime) = CmiWallTimer();
    CmiSyncBroadcastAllAndFree(CmiMsgHeaderSizeBytes+sizes[0].size, msg);
  }
}
Пример #15
0
void CldHopHandler(char *msg)
{
  peinfo *pinf = &(CpvAccess(peinf));
  int len, queueing, priobits; unsigned int *prioptr;
  CldInfoFn ifn; CldPackFn pfn; int pe;

  if (pinf->rebalance) {
    /* do pe = ((lrand48()&0x7FFFFFFF)%CmiNumPes()); */
    do pe = ((CrnRand()&0x7FFFFFFF)%CmiNumPes());
    while (pe == pinf->mype);
    ifn = (CldInfoFn)CmiHandlerToFunction(CmiGetInfo(msg));
    ifn(msg, &pfn, &len, &queueing, &priobits, &prioptr);
    if (pfn && CmiNodeOf(pe) != CmiMyNode()) {
      pfn(&msg);
      ifn(msg, &pfn, &len, &queueing, &priobits, &prioptr);
    }
    CmiSyncSendAndFree(pe, len, msg);
    pinf->rebalance--;
  } else {
    CmiSetHandler(msg, CmiGetXHandler(msg));
    CmiHandleMessage(msg);
  }
}
Пример #16
0
void StreamingStrategy::periodicFlush() {
    for (int proc = 0; proc < CmiNumPes(); proc++) 
        flushPE(proc);
}
Пример #17
0
void CmiInitCPUAffinity(char **argv)
{
  static skt_ip_t myip;
  int ret, i, exclude;
  hostnameMsg  *msg;
  char *pemap = NULL;
  char *commap = NULL;
  char *pemapfile = NULL;
 
  int show_affinity_flag;
  int affinity_flag = CmiGetArgFlagDesc(argv,"+setcpuaffinity",
						"set cpu affinity");

  while (CmiGetArgIntDesc(argv,"+excludecore", &exclude, "avoid core when setting cpuaffinity"))  {
    if (CmiMyRank() == 0) add_exclude(exclude);
    affinity_flag = 1;
  }

  if (CmiGetArgStringDesc(argv, "+pemapfile", &pemapfile, "define pe to core mapping file")) {
    FILE *fp;
    char buf[128];
    pemap = (char*)malloc(1024);
    fp = fopen(pemapfile, "r");
    if (fp == NULL) CmiAbort("pemapfile does not exist");
    while (!feof(fp)) {
      if (fgets(buf, 128, fp)) {
        if (buf[strlen(buf)-1] == '\n') buf[strlen(buf)-1] = 0;
        strcat(pemap, buf);
      }
    }
    fclose(fp);
    if (CmiMyPe()==0) CmiPrintf("Charm++> read from pemap file '%s': %s\n", pemapfile, pemap);
  }

  CmiGetArgStringDesc(argv, "+pemap", &pemap, "define pe to core mapping");
  if (pemap!=NULL && excludecount>0)
    CmiAbort("Charm++> +pemap can not be used with +excludecore.\n");

  CmiGetArgStringDesc(argv, "+commap", &commap, "define comm threads to core mapping");

  if (pemap!=NULL || commap!=NULL) affinity_flag = 1;

  show_affinity_flag = CmiGetArgFlagDesc(argv,"+showcpuaffinity",
						"print cpu affinity");

  cpuAffinityHandlerIdx =
       CmiRegisterHandler((CmiHandler)cpuAffinityHandler);
  cpuAffinityRecvHandlerIdx =
       CmiRegisterHandler((CmiHandler)cpuAffinityRecvHandler);

  if (CmiMyRank() ==0) {
     affLock = CmiCreateLock();
  }

#if CMK_BLUEGENEP || CMK_BLUEGENEQ
  if(affinity_flag){
      affinity_flag = 0;
      if(CmiMyPe()==0) CmiPrintf("Charm++> cpu affinity setting is not needed on Blue Gene, thus ignored.\n");
  }
  if(show_affinity_flag){
      show_affinity_flag = 0;
      if(CmiMyPe()==0) CmiPrintf("Charm++> printing cpu affinity is not supported on Blue Gene.\n");
  }
#endif

  if (!affinity_flag) {
    if (show_affinity_flag) CmiPrintCPUAffinity();
    return;
  }

  if (CmiMyPe() == 0) {
     CmiPrintf("Charm++> cpu affinity enabled. \n");
     if (excludecount > 0) {
       CmiPrintf("Charm++> cpuaffinity excludes core: %d", excludecore[0]);
       for (i=1; i<excludecount; i++) CmiPrintf(" %d", excludecore[i]);
       CmiPrintf(".\n");
     }
     if (pemap!=NULL)
       CmiPrintf("Charm++> cpuaffinity PE-core map : %s\n", pemap);
  }

  if (CmiMyPe() >= CmiNumPes()) {         /* this is comm thread */
      /* comm thread either can float around, or pin down to the last rank.
         however it seems to be reportedly slower if it is floating */
    CmiNodeAllBarrier();
    if (commap != NULL) {
      int mycore = search_pemap(commap, CmiMyPeGlobal()-CmiNumPesGlobal());
      if(CmiMyPe()-CmiNumPes()==0) printf("Charm++> set comm %d on node %d to core #%d\n", CmiMyPe()-CmiNumPes(), CmiMyNode(), mycore); 
      if (-1 == CmiSetCPUAffinity(mycore))
        CmiAbort("set_cpu_affinity abort!");
      CmiNodeAllBarrier();
      if (show_affinity_flag) CmiPrintCPUAffinity();
      return;    /* comm thread return */
    }
    else {
    /* if (CmiSetCPUAffinity(CmiNumCores()-1) == -1) CmiAbort("set_cpu_affinity abort!"); */
#if !CMK_CRAYXT && !CMK_CRAYXE && !CMK_CRAYXC && !CMK_BLUEGENEQ
      if (pemap == NULL) {
#if CMK_MACHINE_PROGRESS_DEFINED
        while (affinity_doneflag < CmiMyNodeSize())  CmiNetworkProgress();
#else
#if CMK_SMP
       #error "Machine progress call needs to be implemented for cpu affinity!"
#endif
#endif
      }
#endif
#if CMK_CRAYXT || CMK_CRAYXE || CMK_CRAYXC
      /* if both pemap and commmap are NULL, will compute one */
      if (pemap != NULL)      
#endif
      {
      CmiNodeAllBarrier();
      if (show_affinity_flag) CmiPrintCPUAffinity();
      return;    /* comm thread return */
      }
    }
  }

  if (pemap != NULL && CmiMyPe()<CmiNumPes()) {    /* work thread */
    int mycore = search_pemap(pemap, CmiMyPeGlobal());
    if(show_affinity_flag) CmiPrintf("Charm++> set PE %d on node %d to core #%d\n", CmiMyPe(), CmiMyNode(), mycore); 
    if (mycore >= CmiNumCores()) {
      CmiPrintf("Error> Invalid core number %d, only have %d cores (0-%d) on the node. \n", mycore, CmiNumCores(), CmiNumCores()-1);
      CmiAbort("Invalid core number");
    }
    if (CmiSetCPUAffinity(mycore) == -1) CmiAbort("set_cpu_affinity abort!");
    CmiNodeAllBarrier();
    CmiNodeAllBarrier();
    /* if (show_affinity_flag) CmiPrintCPUAffinity(); */
    return;
  }

#if CMK_CRAYXT || CMK_CRAYXE || CMK_CRAYXC
  {
    int numCores = CmiNumCores();

    int myid = getXTNodeID(CmiMyNodeGlobal(), CmiNumNodesGlobal());
    int myrank;
    int pe, mype = CmiMyPeGlobal();
    int node = CmiMyNodeGlobal();
    int nnodes = 0;
#if CMK_SMP
    if (CmiMyPe() >= CmiNumPes()) {         /* this is comm thread */
      int node = CmiMyPe() - CmiNumPes();
      mype = CmiGetPeGlobal(CmiNodeFirst(node) + CmiMyNodeSize() - 1, CmiMyPartition()); /* last pe on SMP node */
      node = CmiGetNodeGlobal(node, CmiMyPartition());
    }
#endif
    pe = mype - 1;
    while (pe >= 0) {
      int n = CmiNodeOf(pe);
      if (n != node) { nnodes++; node = n; }
      if (getXTNodeID(n, CmiNumNodesGlobal()) != myid) break;
      pe --;
    }
    CmiAssert(numCores > 0);
    myrank = (mype - pe - 1 + nnodes)%numCores;
#if CMK_SMP
    if (CmiMyPe() >= CmiNumPes()) 
        myrank = (myrank + 1)%numCores;
#endif

    if (-1 != CmiSetCPUAffinity(myrank)) {
      DEBUGP(("Processor %d is bound to core #%d on node #%d\n", CmiMyPe(), myrank, mynode));
    }
    else{
      CmiPrintf("Processor %d set affinity failed!\n", CmiMyPe());
      CmiAbort("set cpu affinity abort!\n");
    }
  }
  if (CmiMyPe() < CmiNumPes()) 
  CmiNodeAllBarrier();
  CmiNodeAllBarrier();
#else
    /* get my ip address */
  if (CmiMyRank() == 0)
  {
#if CMK_HAS_GETHOSTNAME
    myip = skt_my_ip();        /* not thread safe, so only calls on rank 0 */
#else
    CmiAbort("Can not get unique name for the compute nodes. \n");
#endif
  }
  CmiNodeAllBarrier();

    /* prepare a msg to send */
  msg = (hostnameMsg *)CmiAlloc(sizeof(hostnameMsg));
  CmiSetHandler((char *)msg, cpuAffinityHandlerIdx);
  msg->pe = CmiMyPe();
  msg->ip = myip;
  msg->ncores = CmiNumCores();
  DEBUGP(("PE %d's node has %d number of cores. \n", CmiMyPe(), msg->ncores));
  msg->rank = 0;
  CmiSyncSendAndFree(0, sizeof(hostnameMsg), (void *)msg);

  if (CmiMyPe() == 0) {
    int i;
    hostTable = CmmNew();
    rankmsg = (rankMsg *)CmiAlloc(sizeof(rankMsg)+CmiNumPes()*sizeof(int)*2);
    CmiSetHandler((char *)rankmsg, cpuAffinityRecvHandlerIdx);
    rankmsg->ranks = (int *)((char*)rankmsg + sizeof(rankMsg));
    rankmsg->nodes = (int *)((char*)rankmsg + sizeof(rankMsg) + CmiNumPes()*sizeof(int));
    for (i=0; i<CmiNumPes(); i++) {
      rankmsg->ranks[i] = 0;
      rankmsg->nodes[i] = -1;
    }

    for (i=0; i<CmiNumPes(); i++) CmiDeliverSpecificMsg(cpuAffinityHandlerIdx);
  }

    /* receive broadcast from PE 0 */
  CmiDeliverSpecificMsg(cpuAffinityRecvHandlerIdx);
  CmiLock(affLock);
  affinity_doneflag++;
  CmiUnlock(affLock);
  CmiNodeAllBarrier();
#endif

  if (show_affinity_flag) CmiPrintCPUAffinity();
}
Пример #18
0
//! process command line arguments!
void TraceCounter::traceInit(char **argv)
{
  CpvInitialize(CountLogPool*, _logPool);
  CpvInitialize(char*, _logName);
  CpvInitialize(double, version);
  CpvInitialize(char**, _counterNames);
  CpvInitialize(char**, _counterDesc);
  CpvInitialize(int,    _numCounters);
  CpvInitialize(int, _reductionID);

  CpvAccess(_logName) = (char *) malloc(strlen(argv[0])+1);
  _MEMCHECK(CpvAccess(_logName));
  strcpy(CpvAccess(_logName), argv[0]);
  CpvAccess(version) = VER;

  int i;
  // parse command line args
  char* counters = NULL;
  commandLine_ = NULL;
  bool badArg = false;
  int numCounters = 0;
  if (CmiGetArgStringDesc(argv, "+counters", &counters, "Measure these performance counters")) {
    if (CmiMyPe()==0) { CmiPrintf("Counters: %s\n", counters); }
    int offset = 0;
    int limit = strlen(counters);
    char* ptr = counters;
    while (offset < limit && 
	   (ptr = strtok(&counters[offset], ",")) != NULL) 
    { 
      offset += strlen(ptr)+1;
      ptr = &ptr[strlen(ptr)+1];
      numCounters++; 
    }
    if (CmiMyPe()==0) { 
      CmiPrintf("There are %d counters\n", numCounters); 
    }
    commandLine_ = new CounterArg[numCounters];
    ptr = counters;
    for (i=0; i<numCounters; i++) {
      commandLine_[i].arg = ptr;
      if (!matchArg(&commandLine_[i])) { 
	if (CmiMyPe()==0) { CmiPrintf("Bad arg: [%s]\n", ptr); }
	badArg = true; 
      }
      ptr = &ptr[strlen(ptr)+1];
    }
  }
  commandLineSz_ = numCounters;

  // check to see if args are valid, output if not
  if (badArg || CmiGetArgFlagDesc(argv, "+count-help", "List available performance counters")) {
    if (CmiMyPe() == 0) { printHelp(); }
    ConverseExit();  return;
  }
  else if (counters == NULL) {
    if (CmiMyPe() == 0) { usage(); }
    ConverseExit();  return;
  }

  // get optional command line args
  overview_      = CmiGetArgFlag(argv, "+count-overview");  
  switchRandom_  = CmiGetArgFlag(argv, "+count-switchrandom");  
  switchByPhase_ = CmiGetArgFlag(argv, "+count-switchbyphase");
  noLog_         = CmiGetArgFlag(argv, "+count-nolog");
  writeByPhase_  = CmiGetArgFlag(argv, "+count-writebyphase");
  char* logName  = NULL;
  if (CmiGetArgString(argv, "+count-logname", &logName)) {
    CpvAccess(_logName) = logName;
    if (noLog_) {
      if (CkMyPe()==0) {
	CmiPrintf("+count-logname and +count-nolog are MUTUALLY EXCLUSIVE\n");
	usage();
	CmiAbort("");
      }
    }
  }
  if (switchByPhase_ && overview_) {
    if (CkMyPe()==0) {
      CmiPrintf(
	"+count-switchbyphase and +count-overview are MUTUALLY EXCLUSIVE\n"
	"+count-overview automatically switches by phase.\n");
      usage();
      CmiAbort("");
    }
  }
  if (writeByPhase_ && noLog_) {
    if (CkMyPe()==0) {
      CmiPrintf("+count-writebyphase and +count-nolog are MUTUALLY EXCLUSIVE\n");
      usage();
      CmiAbort("");
    }
  }

  // parse through commandLine_, figure out which belongs on which list (1 vs 2)
  CounterArg* last1 = NULL;
  CounterArg* last2 = NULL;
  CounterArg* tmp = NULL;
  counter1Sz_ = counter2Sz_ = 0;
  for (i=0; i<commandLineSz_; i++) {
    tmp = &commandLine_[i];
    if (tmp->code < NUM_COUNTER_ARGS/2) {
      if (counter1_ == NULL) { counter1_ = tmp;  last1 = counter1_; }
      else { last1->next = tmp;  last1 = tmp; }
      counter1Sz_++;
    }
    else {
      if (counter2_ == NULL) { counter2_ = tmp;  last2 = counter2_; }
      else { last2->next = tmp;  last2 = tmp; }
      counter2Sz_++;
    }
  }
  if (counter1_ == NULL) {
    printHelp();
    if (CmiMyPe()==0) {
      CmiPrintf("\nMust specify some counters with code < %d\n", 
		NUM_COUNTER_ARGS/2);
    }
    ConverseExit();
  }
  if (counter2_ == NULL) {
    printHelp();
    if (CmiMyPe()==0) {
      CmiPrintf("\nMust specify some counters with code >= %d\n", 
		NUM_COUNTER_ARGS/2);
    }
    ConverseExit();
  }
  last1->next = counter1_;
  last2->next = counter2_;

  // all args valid, now set up logging
  if (CmiMyPe() == 0) {
    CmiPrintf("Running with tracemode=counter and args:\n");
    // print out counter1 set
    tmp = counter1_;
    i = 0;
    do {
      CmiPrintf("  <counter1-%d>=%d %s %s\n", i, tmp->code, tmp->arg, tmp->desc);
      tmp = tmp->next;
      i++;
    } while (tmp != counter1_);
    // print out counter2 set
    tmp = counter2_;
    i = 0;
    do {
      CmiPrintf("  <counter2-%d>=%d %s %s\n", i, tmp->code, tmp->arg, tmp->desc);
      tmp = tmp->next;
      i++;
    } while (tmp != counter2_);

    CmiPrintf(
      "+count-overview %d\n+count-switchrandom %d\n"
      "+count-switchbyphase %d\n+count-nolog %d\n"
      "+count-logname %s\n+count-writebyphase %d\n",
      overview_, switchRandom_, switchByPhase_, noLog_, 
      logName, writeByPhase_);
  }

  // DEBUGF(("    DEBUG: Counter1=%d Counter2=%d\n", counter1_, counter2_));
  CpvAccess(_logPool) = new CountLogPool();

  // allocate names so can do reduction/analysis on the fly
  char** counterNames = new char*[counter1Sz_+counter2Sz_];
  char** counterDesc = new char*[counter1Sz_+counter2Sz_];
  tmp = counter1_;
  for (i=0; i<counter1Sz_; i++) {
    tmp->index = i;
    counterNames[i] = tmp->arg; 
    counterDesc[i] = tmp->desc;
    tmp = tmp->next;
  }
  tmp = counter2_;
  for (i=0; i<counter2Sz_; i++) {
    tmp->index = counter1Sz_+i;
    counterNames[counter1Sz_+i] = tmp->arg; 
    counterDesc[counter1Sz_+i] = tmp->desc;
    tmp = tmp->next;
  }
  CpvAccess(_counterNames) = counterNames;
  CpvAccess(_counterDesc) = counterDesc;
  CpvAccess(_numCounters) = numCounters;
  // don't erase counterNames or counterDesc, 
  // the reduction client will do it on the final reduction

  _MEMCHECK(CpvAccess(_logPool));
  CpvAccess(_logPool)->init(numCounters);
  DEBUGF(("%d/%d DEBUG: Created _logPool at %08x\n", 
          CmiMyPe(), CmiNumPes(), CpvAccess(_logPool)));
}
Пример #19
0
void CmiInitMemAffinity(char **argv) {

    int i;
    int policy=-1;
    /*step1: parsing args maffinity, mempol and nodemap (nodemap is optional)*/
    int maffinity_flag = CmiGetArgFlagDesc(argv, "+maffinity", "memory affinity");
    /*the node here refers to the nodes that are seen by libnuma on a phy node*/
    /*nodemap is a string of ints separated by ","*/
    char *nodemap = NULL;

    char *mpol = NULL;
    CmiGetArgStringDesc(argv, "+memnodemap", &nodemap, "define memory node mapping");
    CmiGetArgStringDesc(argv, "+mempol", &mpol, "define memory policy {bind, preferred or interleave} ");


    if (!maffinity_flag) return;

    /*Currently skip the communication thread*/
    /**
      * Note: the cpu affinity of comm thread may not be set
      * if "commap" is not specified. This is why the following
      * code regarding the comm thd needs to be put before
      * the codes that checks whether cpu affinity is set
      * or not
      */
    if (CmiMyPe() >= CmiNumPes()) {
        CmiNodeAllBarrier();
        return;
    }

    /*step2: checking whether the required cpu affinity has been set*/
    if (CpvInitialized(myCPUAffToCore) && CpvAccess(myCPUAffToCore)==-1) {
        if (CmiMyPe()==0)
            CmiPrintf("Charm++> memory affinity disabled because cpu affinity is not enabled!\n");
        CmiNodeAllBarrier();
        return;
    }

    if (CmiMyPe()==0) {
        CmiPrintf("Charm++> memory affinity enabled! \n");
    }

    /*Select memory policy*/
    if (mpol==NULL) {
        CmiAbort("Memory policy must be specified!\n");
    }
    if (strcmp(mpol, "interleave")==0) policy = MPOL_INTERLEAVE;
    else if (strcmp(mpol, "preferred")==0) policy = MPOL_PREFERRED;
    else if (strcmp(mpol, "bind")==0) policy = MPOL_BIND;
    else {
        CmiPrintf("Error> Invalid memory policy :%s\n", mpol);
        CmiAbort("Invalid memory policy!");
    }

    /**
     * step3: check whether nodemap is NULL or not
     * step 3a): nodemap is not NULL
     * step 3b): nodemap is NULL, set memory policy according to the result
     * of cpu affinity settings.
     */
    if (nodemap!=NULL) {
        int *nodemapArr = NULL;
        int nodemapArrSize = 1;
        int prevIntStart,j;
        int curnid;
        for (i=0; i<strlen((const char *)nodemap); i++) {
            if (nodemap[i]==',') nodemapArrSize++;
        }
        nodemapArr = malloc(nodemapArrSize*sizeof(int));
        prevIntStart=j=0;
        for (i=0; i<strlen((const char *)nodemap); i++) {
            if (nodemap[i]==',') {
                curnid = atoi(nodemap+prevIntStart);
                if (curnid >= CmiNumNUMANodes()) {
                    CmiPrintf("Error> Invalid node number %d, only have %d nodes (0-%d) on the machine. \n", curnid, CmiNumNUMANodes(), CmiNumNUMANodes()-1);
                    CmiAbort("Invalid node number!");
                }
                nodemapArr[j++] = curnid;
                prevIntStart=i+1;
            }
        }
        /*record the last nid after the last comma*/
        curnid = atoi(nodemap+prevIntStart);
        if (curnid >= CmiNumNUMANodes()) {
            CmiPrintf("Error> Invalid node number %d, only have %d nodes (0-%d) on the machine. \n", curnid, CmiNumNUMANodes(), CmiNumNUMANodes()-1);
            CmiAbort("Invalid node number!");
        }
        nodemapArr[j] = curnid;

        int myPhyRank = CpvAccess(myCPUAffToCore);
        int myMemNid = nodemapArr[myPhyRank%nodemapArrSize];
        int retval = -1;
        if (policy==MPOL_INTERLEAVE) {
            retval = CmiSetMemAffinity(policy, nodemapArr, nodemapArrSize);
        } else {
            retval = CmiSetMemAffinity(policy, &myMemNid, 1);
        }
        if (retval<0) {
            CmiAbort("set_mempolicy error w/ mem nodemap");
        }
    } else {
        /*use the affinity map set by the cpu affinity*/
        int myPhyRank = CpvAccess(myCPUAffToCore);
        /*get the NUMA node id from myPhyRank (a core id)*/
        int myMemNid = getNUMANidByRank(myPhyRank);

        int retval=-1;
        if (policy==MPOL_INTERLEAVE) {
            int totalNUMANodes = CmiNumNUMANodes();
            int *nids = (int *)malloc(totalNUMANodes*sizeof(int));
            for (i=0; i<totalNUMANodes; i++) nids[i] = i;
            retval = CmiSetMemAffinity(policy, nids, totalNUMANodes);
            free(nids);
        } else {
            retval = CmiSetMemAffinity(policy, &myMemNid, 1);
        }
        if (retval<0) {
            CmiAbort("set_mempolicy error w/o mem nodemap");
        }
    }

    /*print_mem_affinity();*/
    CmiNodeAllBarrier();
}
Пример #20
0
//! do a reduction across processors to calculate the total count for
//! each count, and if the count has flops, etc, then calc the 
//! the flops/s, etc...
void StatTable::doReduction(int phase, double idleTime) {
  DEBUGF(("%d/%d DEBUG: StatTable::doReduction()\n",
	  CmiMyPe(), CmiNumPes(), this));
  // see above (NUM_EXTRA_PERF) for the fields in the message
  int msgSize = 
    ALIGN8(CmiMsgHeaderSizeBytes)+
    sizeof(double)*(2*numStats_+NUM_EXTRA_PERF);
  char *msg = (char *)CmiAlloc(msgSize);
  double* reduction = (double*)(msg+ALIGN8(CmiMsgHeaderSizeBytes));
  // calculate flops/s, l1%, l2%, tlb% if it's there
  char** counterNames = CpvAccess(_counterNames);
  int GR_FLOPS = -1;  double flopsRate = -1.0;  
  int LOAD     = -1;  double loadRate = -1.0;
  int STORE    = -1;  double storeRate = -1.0;
  int L1_DMISS = -1;  double l1Rate = -1.0;
  int L2_DMISS = -1;  double l2Rate = -1.0;
  int TLB_MISS = -1;  double tlbRate = -1.0;
  int i, j;
  for (i=0; i<2*numStats_+NUM_EXTRA_PERF; i++) { reduction[i] = 0.0; }
  for (i=0; i<numStats_; i++) {
    for (int j=0; j<MAX_ENTRIES; j++) { 
      reduction[2*i] += stats_[i].numCalled[j]*stats_[i].avgCount[j]; 
      reduction[2*i+1] += stats_[i].totTime[j];
    }
    if (strcmp(counterNames[i], "GR_FLOPS")==0) { GR_FLOPS = i; }
    else if (strcmp(counterNames[i], "LOAD")==0) { LOAD = i; }
    else if (strcmp(counterNames[i], "STORE")==0) { STORE = i; }
    else if (strcmp(counterNames[i], "L1_DMISS")==0) { L1_DMISS = i; }
    else if (strcmp(counterNames[i], "L2_DMISS")==0) { L2_DMISS = i; }
    else if (strcmp(counterNames[i], "TLB_MISS")==0) { TLB_MISS = i; }
  }
  if (CmiMyPe()==0) { reduction[2*numStats_] = phase; }
  reduction[2*numStats_+1] = idleTime;  
  // -1 for the rest of the calc values
  reduction[2*numStats_+2] = -1.0;
  reduction[2*numStats_+3] = -1.0;
  reduction[2*numStats_+4] = -1.0;
  reduction[2*numStats_+5] = -1.0;
  // calculate flops/s, l1%, l2%, tlb% if it's there
  double* rate = NULL;
  int  index;
  for (i=0; i<6; i++) {
    switch (i) {
    case 0: rate = &flopsRate; index = GR_FLOPS;   break;
    case 1: rate = &loadRate;  index = LOAD;       break;
    case 2: rate = &storeRate; index = STORE;      break;
    case 3: rate = &l1Rate;    index = L1_DMISS;   break;
    case 4: rate = &l2Rate;    index = L2_DMISS;   break; 
    case 5: rate = &tlbRate;   index = TLB_MISS;   break;
    }
    if (index >= 0 && reduction[2*index+1] > 0.0) { 
      // if we have the counter AND it's times were non-zero
      *rate = reduction[2*index]/reduction[2*index+1]; 
    }
  }
  // store rates if there
  if (GR_FLOPS >= 0) { reduction[2*numStats_+2] = flopsRate; }
  if (LOAD >= 0 && STORE >= 0) {
    double memRate = loadRate + storeRate;
    if (L1_DMISS >= 0 & memRate > 0) { 
      reduction[2*numStats_+3] = l1Rate / memRate; 
    }
    if (L2_DMISS >= 0 & memRate > 0) { 
      reduction[2*numStats_+4] = l2Rate / memRate; 
    }
    if (TLB_MISS >= 0 & memRate > 0) { 
      reduction[2*numStats_+5] = tlbRate / memRate; 
    }
  }

  // send the data
  CmiSetHandler(msg, (int)CpvAccess(_reductionID));
  int handlerID = CmiGetHandler(msg);
  DEBUGF(("%d/%d handlerID %d reductionID %d\n", 
	  CmiMyPe(), CmiNumPes(), handlerID, CpvAccess(_reductionID)));
  CmiSyncSendAndFree(0, msgSize, msg);
}
Пример #21
0
// a rudimentary reduction to print out the performance results across the run
CmiHandler StatTableReduction(char* msg)
{
  DEBUGF(("StatTableReduction called\n", CmiMyPe(), CmiNumPes()));
  static double* reduce = NULL;
  static int numReduce = 0;
  int numCounters = CpvAccess(_numCounters);
  int size = 2*CpvAccess(_numCounters)+NUM_EXTRA_PERF;
  int i;
  if (reduce == NULL) {
    // allocate
    reduce = new double[size];
    for (i=0; i<size; i++) { reduce[i] = 0.0; }
    DEBUGF(("  allocated reduce numCounters %d size %d\n", 
	    numCounters, size));
  }

  // see above for the feilds of this message
  double* msgResults = (double *)(msg+ALIGN8(CmiMsgHeaderSizeBytes));
  for (i=0; i<size; i++) { 
    reduce[i] += msgResults[i]; 
  }
  
  char** counterNames = CpvAccess(_counterNames);
  numReduce++;
  DEBUGF(("      numReduce %d numPes %d\n", numReduce, CmiNumPes()));
  int phase = reduce[2*numCounters];
  if (numReduce >= CmiNumPes()) {
    // finished with reduction, print out results
    numReduce = 0;
    for (i=0; i<numCounters; i++) {
      if (reduce[2*i+1]>0.0) { // is time > 0?
	if (phase >= 0) {
	  CmiPrintf("PHASE %d %s totalCount %f totalTime (us) %f\n" 
		    "PHASE %d %s count/proc %f avgTime (us)/phase %f\n",
		    phase, counterNames[i], reduce[2*i], reduce[2*i+1]*1e6,
		    phase, counterNames[i], reduce[2*i]/CmiNumPes(), 
		    reduce[2*i+1]*1e6/CmiNumPes());
	}
	else {
	  CmiPrintf("%s totalCount %f totalTime (us) %f\n" 
		    "%s count/proc %f avgTime (us)/phase %f\n",
		    counterNames[i], reduce[2*i], reduce[2*i+1]*1e6,
		    counterNames[i], reduce[2*i]/CmiNumPes(), 
		    reduce[2*i+1]*1e6/CmiNumPes());
	}
      }
    }
    if (phase >= 0) {
      CmiPrintf("PHASE %d totalIdleTime (us) %f avgIdleTime (us)/phase %f\n",
		phase, reduce[2*numCounters+1]*1e6, 
		reduce[2*numCounters+1]*1e6/CmiNumPes());
    }
    else {
      CmiPrintf("totalIdleTime (us) %f avgIdleTime (us)/phase %f\n",
		reduce[2*numCounters+1]*1e6, 
		reduce[2*numCounters+1]*1e6/CmiNumPes());
    }
    if (reduce[2*numCounters+2] > 0.0) {
      // we have flops
      if (phase >= 0) {
	CmiPrintf("PHASE %d flops/s %f flops/s/PE %f\n",
		  phase, reduce[2*numCounters+2], 
		  reduce[2*numCounters+2]/CmiNumPes());
      }
      else {
	CmiPrintf("flops/s %f flops/s/PE %f\n",
		  reduce[2*numCounters+2], 
		  reduce[2*numCounters+2]/CmiNumPes());
      }
    }
    char* missRate = NULL;
    for (i=0; i<3; i++) {
      switch (i) {
      case 0: missRate = "l1 avg miss rate (%)";  break;
      case 1: missRate = "l2 avg miss rate (%)";  break;
      case 2: missRate = "tlb avg miss rate (%)";  break;
      }
      if (reduce[2*numCounters+3+i] >= 0.0) {
	if (phase >= 0) {
	  CmiPrintf("PHASE %d %s %f\n", 
		    phase, missRate, reduce[2*numCounters+3+i]/CmiNumPes()*100);
	}
	else {
	  CmiPrintf("%s %f\n", 
		    missRate, reduce[2*numCounters+3+i]/CmiNumPes()*100);
	}
      }
    }

    // clean up
    delete [] reduce;
    reduce = NULL;
  }
  CmiFree(msg);
}
Пример #22
0
	static CmiUInt4 base(CmiUInt8 v) { return v / (N / CmiNumPes()); }
Пример #23
0
  main(CkArgMsg *)
  {

    // print banner
    iout << iINFO << "NAMD " << NAMD_VERSION << " for " << NAMD_PLATFORM
         << "\n"
#ifdef MEM_OPT_VERSION
         << iWARN << "\n"
         << iWARN << "       ***  EXPERIMENTAL MEMORY OPTIMIZED VERSION  ***\n"
         << iWARN << "\n"
#endif
#if 0
         << iWARN << "\n"
         << iWARN << "          ***  UNRELEASED EXPERIMENTAL VERSION  ***\n"
         << iWARN << "\n"
#endif
#ifdef SPEC_DISABLED_VERSION

         << iINFO << "\n"
         << iINFO << "NAMD is a parallel, object-oriented molecular dynamics\n"
         << iINFO << "code designed for high-performance simulation of large\n"
         << iINFO << "biomolecular systems.  NAMD is distributed free of\n"
         << iINFO << "charge and includes source code.  For more information\n" 
         << iINFO << "please visit http://www.ks.uiuc.edu/Research/namd/\n"
         << iINFO << "\n"
         << iINFO << "*********************************************************\n"
         << iINFO << "This version of NAMD may be distributed only as a part of\n"
         << iINFO << "the SPEC Workstation Benchmark and all other distribution\n"
         << iINFO << "is prohibited.  Any use of this software is bound by\n"
         << iINFO << "the terms of the NAMD License, which is available at\n"
         << iINFO << "http://www.ks.uiuc.edu/Research/namd/license.html\n"
         << iINFO << "The NAMD development team will not provide support for\n"
         << iINFO << "any version of NAMD unless you have first registered\n"
         << iINFO << "and downloaded the latest version of NAMD available at\n"
         << iINFO << "http://www.ks.uiuc.edu/Research/namd/\n"
         << iINFO << "*********************************************************\n"
#else
         << iINFO << "\n"
         << iINFO << "Please visit http://www.ks.uiuc.edu/Research/namd/\n"
         << iINFO << "for updates, documentation, and support information.\n"
#endif
<< iINFO << "\n"
<< iINFO << "Please cite Phillips et al., J. Comp. Chem. 26:1781-1802 (2005)\n"
<< iINFO << "in all publications reporting results obtained with NAMD.\n"
<< iINFO << "\n"
         << endi;

    char charm_version[64];
    sprintf(charm_version,"%d",CHARM_VERSION);

#if CHARM_VERSION < 60500
#error "Charm++ 6.5.1 or later is required to build NAMD"
#endif

    iout << iINFO << "Based on Charm++/Converse " << charm_version
         << " for " << CMK_MACHINE_NAME << "\n" << endi;

    iout << iINFO << "Built " << namd_build_date << " by "
         << namd_build_user << " on " << namd_build_machine << "\n"
         << endi;
#ifndef NO_SOCKET
    char numcpus[512];
    sprintf(numcpus,"%d",CkNumPes());
    tbsoft_sendusage("NAMD",NAMD_VERSION,NAMD_PLATFORM,numcpus,"");
#endif

#if CMK_BLUEGENE_CHARM
    iout << iINFO << "Running on BigSim using " << CmiNumPes() << " real processors.\n" << endi;
#endif
    iout << iINFO << "Running on " << CkNumPes() << " processors, "
         << CmiNumNodes() << " nodes, "
         << CmiNumPhysicalNodes() << " physical nodes.\n" << endi;
    iout << iINFO << "CPU topology information " << (CmiCpuTopologyEnabled()?"available":"unavailable") << ".\n" << endi;
    iout << iINFO << "Charm++/Converse parallel runtime startup completed at "
	 << CmiWallTimer() << " s\n"<< endi;
    const char* memsource;
    memusage(&memsource);
    iout << iINFO << memusage_MB() << " MB of memory in use"
	 << " based on " << memsource << "\n";
  }
Пример #24
0
void CentralLB::ProcessReceiveMigration(CkReductionMsg  *msg)
{
#if CMK_LBDB_ON
	int i;
        LBMigrateMsg *m = storedMigrateMsg;
        CmiAssert(m!=NULL);
        delete msg;

#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
	int *dummyCounts;

	DEBUGF(("[%d] Starting ReceiveMigration WITH step %d m->step %d\n",CkMyPe(),step(),m->step));
	// CmiPrintf("[%d] Starting ReceiveMigration step %d m->step %d\n",CkMyPe(),step(),m->step);
	if(step() > m->step){
		char str[100];
		envelope *env = UsrToEnv(m);
		return;
	}
	lbDecisionCount = m->lbDecisionCount;
#endif

  if (_lb_args.debug() > 1) 
    if (CkMyPe()%1024==0) CmiPrintf("[%d] Starting ReceiveMigration step %d at %f\n",CkMyPe(),step(), CmiWallTimer());

  for (i=0; i<CkNumPes(); i++) theLbdb->lastLBInfo.expectedLoad[i] = m->expectedLoad[i];
  CmiAssert(migrates_expected <= 0 || migrates_completed == migrates_expected);
/*FAULT_EVAC*/
  if(!CmiNodeAlive(CkMyPe())){
	delete m;
	return;
  }
  migrates_expected = 0;
  future_migrates_expected = 0;
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
	int sending=0;
    int dummy=0;
	LBDB *_myLBDB = theLbdb->getLBDB();
	if(_restartFlag){
        dummyCounts = new int[CmiNumPes()];
        bzero(dummyCounts,sizeof(int)*CmiNumPes());
    }
#endif
  for(i=0; i < m->n_moves; i++) {
    MigrateInfo& move = m->moves[i];
    const int me = CkMyPe();
    if (move.from_pe == me && move.to_pe != me) {
      DEBUGF(("[%d] migrating object to %d\n",move.from_pe,move.to_pe));
      // migrate object, in case it is already gone, inform toPe
#if (!defined(_FAULT_MLOG_) && !defined(_FAULT_CAUSAL_))
      if (theLbdb->Migrate(move.obj,move.to_pe) == 0) 
         thisProxy[move.to_pe].MissMigrate(!move.async_arrival);
#else
            if(_restartFlag == 0){
                DEBUG(CmiPrintf("[%d] need to move object from %d to %d \n",CkMyPe(),move.from_pe,move.to_pe));
                theLbdb->Migrate(move.obj,move.to_pe);
                sending++;
            }else{
                if(_myLBDB->validObjHandle(move.obj)){
                    DEBUG(CmiPrintf("[%d] need to move object from %d to %d \n",CkMyPe(),move.from_pe,move.to_pe));
                    theLbdb->Migrate(move.obj,move.to_pe);
                    sending++;
                }else{
                    DEBUG(CmiPrintf("[%d] dummy move to pe %d detected after restart \n",CmiMyPe(),move.to_pe));
                    dummyCounts[move.to_pe]++;
                    dummy++;
                }
            }
#endif
    } else if (move.from_pe != me && move.to_pe == me) {
       DEBUGF(("[%d] expecting object from %d\n",move.to_pe,move.from_pe));
      if (!move.async_arrival) migrates_expected++;
      else future_migrates_expected++;
    }
    else {
#if CMK_GLOBAL_LOCATION_UPDATE      
      UpdateLocation(move); 
#endif
    }

  }
  DEBUGF(("[%d] in ReceiveMigration %d moves expected: %d future expected: %d\n",CkMyPe(),m->n_moves, migrates_expected, future_migrates_expected));
  // if (_lb_debug) CkPrintf("[%d] expecting %d objects migrating.\n", CkMyPe(), migrates_expected);

#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
	if(_restartFlag){
		sendDummyMigrationCounts(dummyCounts);
		_restartFlag  =0;
    	delete []dummyCounts;
	}
#endif


#if 0
  if (m->n_moves ==0) {
    theLbdb->SetLBPeriod(theLbdb->GetLBPeriod()*2);
  }
#endif
  cur_ld_balancer = m->next_lb;
  if((CkMyPe() == cur_ld_balancer) && (cur_ld_balancer != 0)){
      LBDatabaseObj()->set_avail_vector(m->avail_vector, -2);
  }

  if (migrates_expected == 0 || migrates_completed == migrates_expected)
    MigrationDone(1);
  delete m;

//	CkEvacuatedElement();
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
//  migrates_expected = 0;
//  //  ResumeClients(1);
#endif
#endif
}
Пример #25
0
CpmDestination CpmLDB()
{
  int pe = ( (CrnRand() & 0x7FFFFFFF) >>8 ) % CmiNumPes();
  return CpmSend(pe);
}
Пример #26
0
static void CmiStartThreads(char **argv)
{
  pthread_t pid;
  size_t i;
  int ok, tocreate;
  pthread_attr_t attr;
  int start, end;

  MACHSTATE(4,"CmiStartThreads")
  CmiMemLock_lock=CmiCreateLock();
  comm_mutex=CmiCreateLock();
  _smp_mutex = CmiCreateLock();
#if defined(CMK_NO_ASM_AVAILABLE) && CMK_PCQUEUE_LOCK
  cmiMemoryLock = CmiCreateLock();
  if (CmiMyNode()==0) printf("Charm++ warning> fences and atomic operations not available in native assembly\n");
#endif

#if ! (CMK_HAS_TLS_VARIABLES && !CMK_NOT_USE_TLS_THREAD)
  pthread_key_create(&Cmi_state_key, 0);
  Cmi_state_vector =
    (CmiState)calloc(_Cmi_mynodesize+1, sizeof(struct CmiStateStruct));
  for (i=0; i<_Cmi_mynodesize; i++)
    CmiStateInit(i+Cmi_nodestart, i, CmiGetStateN(i));
  /*Create a fake state structure for the comm. thread*/
/*  CmiStateInit(-1,_Cmi_mynodesize,CmiGetStateN(_Cmi_mynodesize)); */
  CmiStateInit(_Cmi_mynode+CmiNumPes(),_Cmi_mynodesize,CmiGetStateN(_Cmi_mynodesize));
#else
    /* for main thread */
  Cmi_state_vector = (CmiState *)calloc(_Cmi_mynodesize+1, sizeof(CmiState));
#if CMK_CONVERSE_MPI
      /* main thread is communication thread */
  if(!CharmLibInterOperate) {
    CmiStateInit(_Cmi_mynode+CmiNumPes(), _Cmi_mynodesize, &Cmi_mystate);
    Cmi_state_vector[_Cmi_mynodesize] = &Cmi_mystate;
  } else 
#endif
  {
    /* main thread is of rank 0 */
    CmiStateInit(Cmi_nodestart, 0, &Cmi_mystate);
    Cmi_state_vector[0] = &Cmi_mystate;
  }
#endif

#if CMK_MULTICORE || CMK_SMP_NO_COMMTHD
  if (!Cmi_commthread)
    tocreate = _Cmi_mynodesize-1;
  else
#endif
  tocreate = _Cmi_mynodesize;
#if CMK_CONVERSE_MPI
  if(!CharmLibInterOperate) {
    start = 0;
    end = tocreate - 1;                    /* skip comm thread */
  } else 
#endif
  {
    start = 1;
    end = tocreate;                       /* skip rank 0 main thread */
  }
  for (i=start; i<=end; i++) {        
    pthread_attr_init(&attr);
    pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
    ok = pthread_create(&pid, &attr, call_startfn, (void *)i);
    if (ok<0) PerrorExit("pthread_create"); 
    pthread_attr_destroy(&attr);
  }
#if ! (CMK_HAS_TLS_VARIABLES && !CMK_NOT_USE_TLS_THREAD)
#if CMK_CONVERSE_MPI
  if(!CharmLibInterOperate)
    pthread_setspecific(Cmi_state_key, Cmi_state_vector+_Cmi_mynodesize);
  else 
#endif
    pthread_setspecific(Cmi_state_key, Cmi_state_vector);
#endif

  MACHSTATE(4,"CmiStartThreads done")
}