Beispiel #1
0
/*\ return global ID of a process loc_proc_id in domain identified by id
 *  armci_domain_nproc(id)< loc_proc_id >=0
\*/
int armci_domain_glob_proc_id(armci_domain_t domain, int id, int loc_proc_id)
{
    if(id<0 || id>= armci_nclus) armci_die2("armci domain error",id,armci_nclus);
    if(loc_proc_id<0 || loc_proc_id>= armci_clus_info[id].nslave)
           armci_die2("armci domain proc error",loc_proc_id,armci_clus_info[id].nslave);
    return (armci_clus_info[id].master + loc_proc_id);
}
Beispiel #2
0
/*\ server receives request
\*/
void armci_rcv_req(void *mesg, void *phdr, void *pdescr,void *pdata,int *buflen)
{
request_header_t *msginfo = (request_header_t*)MessageRcvBuffer;
int hdrlen = sizeof(request_header_t);
int stat, p = *(int*)mesg;
int bytes;

    stat =armci_ReadFromSocket(CLN_sock[p],MessageRcvBuffer,hdrlen);
    if(stat<0) armci_die("armci_rcv_req: failed to receive header ",p);
     *(void**)phdr = msginfo;  
#if defined(USE_SOCKET_VECTOR_API)
    if(msginfo->operation == PUT && msginfo->datalen==0){
        if(msginfo->format==STRIDED)  
            armci_tcp_read_strided_data(msginfo,pdescr,p);
        if(msginfo->format==VECTOR){
            
            armci_tcp_read_vector_data(msginfo,pdescr,p);
        }  
        return;
    }
#endif
    *buflen = MSG_BUFLEN - hdrlen; 
    if (msginfo->operation == GET)
      bytes = msginfo->dscrlen; 
    else{
      bytes = msginfo->bytes;
      if(bytes >*buflen)armci_die2("armci_rcv_req: message overflowing rcv buf",
                                    msginfo->bytes,*buflen);
    }

    if(msginfo->bytes){
       stat = armci_ReadFromSocket(CLN_sock[p],msginfo+1,bytes);
       if(stat<0)armci_die("armci_rcv_req: read of data failed",stat);
       *(void**)pdescr = msginfo+1;
       *(void**)pdata  = msginfo->dscrlen + (char*)(msginfo+1); 
       *buflen -= msginfo->dscrlen; 

       if (msginfo->operation != GET)
           if(msginfo->datalen)*buflen -= msginfo->datalen;

    }else {

       *(void**)pdata  = msginfo+1;
       *(void**)pdescr = NULL;
    }
    
    if(msginfo->datalen>0 && msginfo->operation != GET){

       if(msginfo->datalen > MSG_BUFLEN -hdrlen -msginfo->dscrlen)
          armci_die2("armci_rcv_req:data overflowing buffer",
                      msginfo->dscrlen,msginfo->datalen);
       *buflen -= msginfo->datalen;
    }
}
Beispiel #3
0
   /* convert bufid into req id returned to user */
   id = dsc_id;
   id <<= 8;  /* buffer id is in second byte */
   cur_id = (cur_id+1)%255 +1; /* counter in LSB */
   id += cur_id;
   armci_pending_dscr[dsc_id].reqid = id;

   if(DEBUG_){
      printf("%d: init strided get: ptr=%p reqid=%d bufid=%d cid=%d levels=%d count[0]=%d\n",
             armci_me,ptr,id,dsc_id, cur_id, levels,count[0]); fflush(stdout);
   }

   return id;
}    


void _armci_asyn_complete_strided_get(int dsc_id, void *buf)
{
request_header_t *msginfo = (request_header_t*) buf;
strided_dscr_t *dscr;       

   dscr = &armci_pending_dscr[dsc_id].dscr.strided;
   armci_pending_dscr[dsc_id].reqid = 0;

   if(DEBUG_){
      printf("%d:complete_strided_get: ptr=%p bufid=%d levels=%d count[0]=%d\n",
      armci_me,dscr->ptr,dsc_id,dscr->stride_levels,dscr->count[0]); 
      fflush(stdout);
   }

   armci_rcv_strided_data(msginfo->to, msginfo, msginfo->datalen, dscr->ptr, 
                          dscr->stride_levels, dscr->stride_arr,dscr->count);
}

#if 0 /*this function has been added in armci.c*/
int PARMCI_Wait(int req_id)
{
int dsc_id = REQ_TO_DSC_ID(req_id);
void *buf;

   if(DEBUG_){
       printf("%d: WAIT for req id=%d bufid=%d\n",armci_me,req_id,dsc_id); 
       fflush(stdout);
   }

   buf = _armci_buf_ptr_from_id(dsc_id);

   if(dsc_id >MAX_PENDING_REQ) 
       armci_die2("PARMCI_Wait: bad id",dsc_id,MAX_PENDING_REQ);

   /* when 0 it means the request was completed to get the buffer */
   if(armci_pending_dscr[dsc_id].reqid == 0) return 0;

   /* return 1 if request id looks bad */ 
   if(armci_pending_dscr[dsc_id].reqid < req_id) return 1;

   _armci_asyn_complete_strided_get(dsc_id,buf);

   FREE_SEND_BUFFER(buf);

   return 0;
}
Beispiel #4
0
void PARMCI_Unlock(int mutex, int proc)
{
    if(DEBUG)fprintf(stderr,"%d enter unlock\n",armci_me);

    if(!num_mutexes) armci_die("armci_lock: create mutexes first",0);

    if(mutex > glob_mutex[proc].count)
        armci_die2("armci_lock: mutex not allocated", mutex,
                   glob_mutex[proc].count);

    if(armci_nproc == 1) return;

#       if defined(SERVER_LOCK)
    if(armci_nclus >1) {
        if(proc != armci_me)
            armci_rem_unlock(mutex, proc, glob_mutex[proc].tickets[mutex]);
        else {
            int ticket = glob_mutex[proc].tickets[mutex];
            msg_tag_t tag;
            int waiting;

            waiting = armci_server_unlock_mutex(mutex, proc, ticket, &tag);
            if(waiting >-1)
                armci_unlock_waiting_process(tag, waiting, ++ticket);
        }
    }
    else
#       endif
        armci_generic_unlock(mutex, proc);

    if(DEBUG)fprintf(stderr,"%d leave unlock\n",armci_me);
}
Beispiel #5
0
void PARMCI_Lock(int mutex, int proc)
{
#if defined(SERVER_LOCK)
    int direct;
#endif

    if(DEBUG)fprintf(stderr,"%d enter lock\n",armci_me);

    if(!num_mutexes) armci_die("armci_lock: create mutexes first",0);

    if(mutex > glob_mutex[proc].count)
        armci_die2("armci_lock: mutex not allocated", mutex,
                   glob_mutex[proc].count);

    if(armci_nproc == 1) return;

#       if defined(SERVER_LOCK)
    direct=SAMECLUSNODE(proc);
    if(!direct)
        armci_rem_lock(mutex,proc, glob_mutex[proc].tickets + mutex );
    else
#       endif
        armci_generic_lock(mutex,proc);

    if(DEBUG)fprintf(stderr,"%d leave lock\n",armci_me);
}
Beispiel #6
0
static void armci_generic_lock(int mutex, int proc)
{
    int i, myturn, factor=0, len=sizeof(int);
    int  *mutex_ticket, next_in_line;

    mutex_ticket= glob_mutex[proc].turn + mutex;
    myturn = register_in_mutex_queue(mutex, proc);

    /* code to reduce cost of unlocking mutex on the same SMP node goes here
     * lockinfo_node[me].ticket = mutex_ticket;
     * lockinfo_node[me].mutex  = mutex;
     */

    _dummy_work_ = 0.; /* must be global to fool the compiler */
    do {

        PARMCI_Get(mutex_ticket, &next_in_line, len, proc);
        if(next_in_line > myturn)
            armci_die2("armci: problem with tickets",myturn,next_in_line);

        /* apply a linear backoff delay before retrying  */
        for(i=0; i<  SPINMAX * factor; i++) _dummy_work_ += 1.;

        factor += 1;

    } while (myturn != next_in_line);

    glob_mutex[proc].tickets[mutex] = myturn; /* save ticket value */
}
Beispiel #7
0
/*\ blocking vector get 
\*/
void armcill_getv(int proc, int bytes, int count, void* src[], void* dst[])
{
int _j, i, batch, issued=0;
void *ps, *pd;

#if 0
    printf("%d: getv %d\n", armci_me, count); fflush(stdout);
#endif

    for (_j = 0;  _j < count;  ){
      /* how big a batch of requests can we issue */
      batch = (count - _j )<max_pending ? count - _j : max_pending;
      _j += batch;
      for(i=0; i< batch; i++){
        if(get_dscr[cur_get])elan_wait(get_dscr[cur_get],100);
        else pending_get++;
        ps = src[issued];
        pd = dst[issued];
        get_dscr[cur_get]= elan_get(elan_base->state,ps, pd,(size_t)bytes,proc);
        issued++;
        cur_get++;
        if(cur_get>=max_pending)cur_get=0;
      }
    }
    if(issued != count)
       armci_die2("armci-elan getv:mismatch %d %d \n", count,issued);

    for(i=0; i<max_pending; i++) if(get_dscr[i]){
        elan_wait(get_dscr[i],100);
        get_dscr[i]=(ELAN_EVENT*)0;
    }
}
Beispiel #8
0
void armcill_unlock(int m, int proc)
{
ELAN_LOCK *rem_locks = (ELAN_LOCK*)(all_locks + proc*num_locks);

   if(m<0 || m>= num_locks) armci_die2("armcill_unlock:bad lockid",m,num_locks);
   if(proc<0 || proc>=armci_nproc)armci_die("armcill_unlock: bad proc id",proc);

   elan_lockUnLock(elan_base->state, rem_locks + m);
}
Beispiel #9
0
/* return domain ID of the specified process */
int armci_domain_id(armci_domain_t domain, int glob_proc_id)
{
    int id = glob_proc_id;

    if(id <0 || id >= armci_nproc)  {
        armci_die2("armci domain error",id,armci_nproc);
    }

    return armci_clus_id(glob_proc_id);
}
Beispiel #10
0
/*\ return number of processes in the domain represented by id; id<0 means my node
  \*/
int armci_domain_nprocs(armci_domain_t domain, int id)
{
    if(id >= armci_nclus) 
        armci_die2("armci domain error",id,armci_nclus);
    /* This is an error condition */
    if(id < 0) {
        fprintf(stderr,"[%d] Returned domain is invalid\n", armci_me);
        id = armci_clus_me;
    }
    return armci_clus_info[id].nslave;
}
Beispiel #11
0
static int register_in_mutex_queue(int id, int proc)
{
    int *mutex_entry, ticket;

    if(glob_mutex[proc].count < id)
        armci_die2("armci:invalid mutex id",id, glob_mutex[proc].count);
    mutex_entry = glob_mutex[proc].token  + id;
    PARMCI_Rmw(ARMCI_FETCH_AND_ADD, &ticket, mutex_entry, 1, proc);

    return ticket;
}
Beispiel #12
0
/*\ allocate and initialize num locks on each processor (collective call)
\*/
void armcill_allocate_locks(int num)
{
   char *buf;
   int i,elems;
   long mod;

   if(MAX_LOCKS<num)armci_die2("too many locks",MAX_LOCKS,num);
   num_locks = num;

   /* allocate memory to hold lock info for all the processors */
   buf = malloc(armci_nproc*num *sizeof(ELAN_LOCK) + ELAN_LOCK_ALIGN);
   if(!buf) armci_die("armcill_init_locks: malloc failed",0);

   mod = ((long)buf) %ELAN_LOCK_ALIGN;
   all_locks = (ELAN_LOCK*)(buf +ELAN_LOCK_ALIGN-mod); 
   if(((long)all_locks) %ELAN_LOCK_ALIGN) 
        armci_die2("lock alligment failed",mod,ELAN_LOCK_ALIGN);
   bzero(all_locks,armci_nproc*num *sizeof(ELAN_LOCK));

   /* initialize local locks */
   my_locks = all_locks + armci_me * num;
   for(i=0; i<num; i++)
       elan_lockInit(elan_base->state, my_locks+i, ELAN_LOCK_NORMAL);

   /* now we use all-reduce to exchange locks info among everybody */
   elems = (num*armci_nproc*sizeof(ELAN_LOCK))/sizeof(long);
   if((num*sizeof(ELAN_LOCK))%sizeof(long)) 
       armci_die("armcill_init_locks: size mismatch",sizeof(ELAN_LOCK));
   armci_msg_lgop((long*)all_locks,elems,"+");
#if 0
   if(armci_me == 0){
     for(i=0; i<num*armci_nproc; i++) printf("%d:(%d) master=%d type=%d\n",i,elems,(all_locks+i)->lp_master, (all_locks+i)->lp_type);
   }
#endif
   armci_msg_barrier();
}
Beispiel #13
0
/*\  Acquire mutex  for "proc"
 *   -must be executed in hrecv/AM handler thread
 *   -application thread must use generic_lock routine
\*/
int armci_server_lock_mutex(int mutex, int proc, msg_tag_t tag)
{
    int myturn;
    int *mutex_ticket, next_in_line, len=sizeof(int);
    int owner = armci_me;


    if(DEBUG)fprintf(stderr,"SLOCK=%d owner=%d p=%d m=%d\n",
                         armci_me,owner, proc,mutex);

    mutex_ticket= glob_mutex[owner].turn + mutex;
    myturn = register_in_mutex_queue(mutex, owner);

    armci_copy(mutex_ticket, &next_in_line, len);

    if(next_in_line > myturn)
        armci_die2("armci-s: problem with tickets",myturn,next_in_line);

    if(next_in_line != myturn) {
        if(!blocked)armci_serv_mutex_create();
        blocked[proc].mutex = mutex;
        blocked[proc].turn = myturn;
        blocked[proc].tag  = tag;
        if(DEBUG) fprintf(stderr,"SLOCK=%d proc=%d blocked (%d,%d)\n",
                              armci_me, proc, next_in_line,myturn);
        return -1;

    } else {

        if(DEBUG) fprintf(stderr,"SLOCK=%d proc=%d sending ticket (%d)\n",
                              armci_me, proc, myturn);

        /* send ticket to requesting node */
        /* GA_SEND_REPLY(tag, &myturn, sizeof(int), proc); */
        return (myturn);
    }
}
Beispiel #14
0
/*\ strided get, nonblocking
\*/
void armcill_get2D(int proc, int bytes, int count, void* src_ptr,int src_stride,
                                                   void* dst_ptr,int dst_stride)
{
int _j, i, batch, issued=0;
char *ps=src_ptr, *pd=dst_ptr;

#if 1
    for (_j = 0;  _j < count;  ){
      /* how big a batch of requests can we issue */
      batch = (count - _j )<max_pending ? count - _j : max_pending;
      _j += batch;
      for(i=0; i< batch; i++){
#if 1
        if(get_dscr[cur_get])elan_wait(get_dscr[cur_get],100); 
        else pending_get++;
        get_dscr[cur_get]=elan_get(elan_base->state,ps,pd, (size_t)bytes, proc);
#else
        elan_wait(elan_get(elan_base->state, ps, pd, (size_t)bytes, proc),elan_base->waitType);
#endif
        issued++;
        ps += src_stride;
        pd += dst_stride;
        cur_get++;
        if(cur_get>=max_pending)cur_get=0;
      }
    }

    if(issued != count) 
       armci_die2("armci-elan get:mismatch %d %d \n", count,issued);
#else
      for (_j = 0;  _j < count;  _j++){
        elan_wait(elan_get(elan_base->state, ps, pd, (size_t)bytes, proc),elan_base->waitType);
        ps += src_stride;
        pd += dst_stride;
      }
#endif
}
Beispiel #15
0
int armci_gpc_local_exec(int h, int to, int from, void *hdr,   int hlen,
		     void *data,  int dlen,
		     void *rhdr,  int rhlen, 
		     void *rdata, int rdlen, int rtype) {
  int rhsize, rdsize;
  int (*func)();
  int hnd = -h + GPC_OFFSET;
  
  if(hnd <0 || hnd>= GPC_SLOTS) 
    armci_die2("armci_gpc_local_exec: bad callback handle",hnd,GPC_SLOTS);
  if(!_table[hnd]) armci_die("armci_gpc_local_exec: NULL function",hnd);
  
  func = _table[hnd];

  if(!SAMECLUSNODE(to)) 
    armci_die("armci_gpc_local_exec: GPC call to a different node received!", 
                    armci_me);

/*    func(to, from, hdr, hlen, data, dlen, rhdr, rhlen, &rhsize, */
/*  	 rdata, rdlen, &rdsize);  */
/*    return 0; */
  return func(to, from, hdr, hlen, data, dlen, rhdr, rhlen, &rhsize,
	      rdata, rdlen, &rdsize, rtype); 
}
Beispiel #16
0
void TestGlobals()
{
#define MAXLENG 256*1024
  double *dtest;
  int *itest;
  long *ltest;
  int len;
  int ifrom=nproc-1,lfrom=1,dfrom=1;

  if (me == 0) {
    printf("Global test ... broadcast and reduction for int, long, double\n----------\n");
    fflush(stdout);
  }

  if (!(dtest = (double *) malloc((unsigned) (MAXLENG*sizeof(double)))))
    ARMCI_Error("TestGlobals: failed to allocated dtest", MAXLENG);
  if (!(ltest = (long *) malloc((unsigned) (MAXLENG*sizeof(long)))))
    ARMCI_Error("TestGlobals: failed to allocated ltest", MAXLENG);
  if (!(itest = (int *) malloc((unsigned) (MAXLENG*sizeof(int)))))
    ARMCI_Error("TestGlobals: failed to allocated itest", MAXLENG);

  for (len=1; len<MAXLENG; len*=2) {
    int ilen = len*sizeof(int);
    int dlen = len*sizeof(double);
    int llen = len*sizeof(long);
    int i;
   
    ifrom = (ifrom+1)%nproc;
    lfrom = (lfrom+1)%nproc; 
    dfrom = (lfrom+1)%nproc;

#if 0
    printf("%d:ifrom=%d lfrom=%d dfrom=%d\n",me,ifrom,lfrom,dfrom);fflush(stdout);
#endif

    if (me == 0) {
      printf("Test length = %d ... ", len);
      fflush(stdout);
    }

    if(me == ifrom)for (i=0; i<len; i++)itest[i]=i;
    else for (i=0; i<len; i++)itest[i]=0;
    if(me == lfrom)for (i=0; i<len; i++)ltest[i]=(long)i;
    else for (i=0; i<len; i++)ltest[i]=0L;
    if(me == dfrom)for (i=0; i<len; i++)dtest[i]=(double)i;
    else for (i=0; i<len; i++)dtest[i]=0.0;
    
    /* Test broadcast */
    armci_msg_brdcst(itest, ilen, ifrom);
    armci_msg_brdcst(ltest, llen, lfrom);
    armci_msg_brdcst(dtest, dlen, dfrom);
   
    for (i=0; i<len; i++){
      if (itest[i] != i) armci_die2("int broadcast failed", i,itest[i]);
      if (ltest[i] != (long)i) 
                      armci_die2("long broadcast failed", i,(int)ltest[i]);
      if (dtest[i] != (double)i) 
                      armci_die2("double broadcast failed", i,(int)dtest[i]);
    }
      
    if (me == 0) {
      printf("broadcast OK ...");
      fflush(stdout);
    }

    /* Test global sum */
    for (i=0; i<len; i++) {
      itest[i] = i*me;
      ltest[i] = (long) itest[i];
      dtest[i] = (double) itest[i];
    }


    armci_msg_igop(itest, len, "+");
    armci_msg_lgop(ltest, len, "+");
    armci_msg_dgop(dtest, len, "+");
 

    for (i=0; i<len; i++) {
      int iresult = i*nproc*(nproc-1)/2;
      if (itest[i] != iresult || ltest[i] != (long)iresult || 
          dtest[i] != (double) iresult)
        ARMCI_Error("TestGlobals: global sum failed", (int) i);
    }


    if (me == 0) {
      printf("global sums OK\n");
      fflush(stdout);
    }
  }


  /* now we get timing data */
  time_gop(dtest,MAXLENG);
  time_reduce(dtest,MAXLENG);
     
  free((char *) itest);
  free((char *) ltest);
  free((char *) dtest);
}
Beispiel #17
0
/*\ return number of processes in the domain represented by id; id<0 means my node
\*/
int armci_domain_nprocs(armci_domain_t domain, int id)
{
    if(id>= armci_nclus) armci_die2("armci domain error",id,armci_nclus);
    if(id<0) id = armci_clus_me;
    return armci_clus_info[id].nslave;
}
/* server receives request */
void armci_rcv_req (void *mesg, void *phdr, void *pdescr,
                    void *pdata, int *buflen)
{
    request_header_t *msginfo = NULL;
    int hdrlen = sizeof(request_header_t);
    int p=-1;
    int bytes;

#if !defined(MULTIPLE_BUFS)
    MPI_Status status;
    msginfo = (request_header_t*) MessageRcvBuffer;
    p = * (int *) mesg;

    MPI_Check(
        MPI_Recv(MessageRcvBuffer, MSG_BUFLEN, MPI_BYTE, p, ARMCI_MPI_SPAWN_TAG,
                 MPI_COMM_SERVER2CLIENT, &status)
    );
#else
    int reqid = _reqid_ready;;/*get request id that is ready to be processed */

    msginfo = (request_header_t*) _mpi2_rcv_buf[reqid];
    p = * (int *) mesg;
    if(p != msginfo->from)
        armci_die("armci_rcv_req: invalid client", p);
#endif

    * (void **) phdr = msginfo;

    if( !(p >= 0 && p < armci_nproc) )
        armci_die("armci_rcv_req: request from invalid client", p);

    armci_mpi2_server_debug(armci_server_me,
                            "armci_rcv_req: op=%d mesg=%p, phdr=%p "
                            "pdata=%p, buflen=%p, p=%d\n", msginfo->operation,
                            mesg, phdr, pdata, buflen, p, MSG_BUFLEN);

#ifdef MPI_SPAWN_ZEROCOPY
    if(msginfo->operation==PUT && msginfo->datalen==0)
    {
        if(msginfo->format==STRIDED)
        {
            armci_mpi_rcv_strided_data(msginfo, pdescr, p);
        }
        if(msginfo->format==VECTOR)
        {
            armci_mpi_rcv_vector_data(msginfo, pdescr, p);
        }
        return;
    }
#endif

    *buflen = MSG_BUFLEN - hdrlen;
    if (msginfo->operation == GET)
    {
        bytes = msginfo->dscrlen;
    }
    else
    {
        bytes = msginfo->bytes;
        if (bytes > *buflen)
            armci_die2("armci_rcv_req: message overflowing rcv buf",
                       msginfo->bytes, *buflen);
    }

#if MPI_SPAWN_DEBUG && !defined(MPI_SPAWN_ZEROCOPY) && 0
    {
        int count;
        MPI_Get_count(&status, MPI_BYTE, &count);
        if (count != (bytes + hdrlen))
        {
            armci_mpi2_server_debug(armci_server_me, "armci_rcv_req: "
                                    "got %d bytes, expected %d bytes\n",
                                    count, bytes + hdrlen);
            printf("%d: armci_rcv_req: got %d bytes, expected %d bytes (%d)\n",
                   armci_me, count, bytes + hdrlen,  msginfo->datalen);
            armci_die("armci_rcv_req: count check failed.\n", 0);
        }
    }
#endif

    if (msginfo->bytes)
    {
        * (void **) pdescr = msginfo + 1;
        * (void **) pdata  = msginfo->dscrlen + (char *) (msginfo+1);
        *buflen -= msginfo->dscrlen;

        if (msginfo->operation != GET && msginfo->datalen)
        {
            *buflen -= msginfo->datalen;
        }
    }
    else
    {
        * (void**) pdata  = msginfo + 1;
        * (void**) pdescr = NULL;
    }

    if (msginfo->datalen > 0 && msginfo->operation != GET)
    {
        if (msginfo->datalen > (MSG_BUFLEN - hdrlen - msginfo->dscrlen)) {
            armci_die2("armci_rcv_req:data overflowing buffer",
                       msginfo->dscrlen, msginfo->datalen);
        }
        *buflen -= msginfo->datalen;
    }
}