/*\ return global ID of a process loc_proc_id in domain identified by id * armci_domain_nproc(id)< loc_proc_id >=0 \*/ int armci_domain_glob_proc_id(armci_domain_t domain, int id, int loc_proc_id) { if(id<0 || id>= armci_nclus) armci_die2("armci domain error",id,armci_nclus); if(loc_proc_id<0 || loc_proc_id>= armci_clus_info[id].nslave) armci_die2("armci domain proc error",loc_proc_id,armci_clus_info[id].nslave); return (armci_clus_info[id].master + loc_proc_id); }
/*\ server receives request \*/ void armci_rcv_req(void *mesg, void *phdr, void *pdescr,void *pdata,int *buflen) { request_header_t *msginfo = (request_header_t*)MessageRcvBuffer; int hdrlen = sizeof(request_header_t); int stat, p = *(int*)mesg; int bytes; stat =armci_ReadFromSocket(CLN_sock[p],MessageRcvBuffer,hdrlen); if(stat<0) armci_die("armci_rcv_req: failed to receive header ",p); *(void**)phdr = msginfo; #if defined(USE_SOCKET_VECTOR_API) if(msginfo->operation == PUT && msginfo->datalen==0){ if(msginfo->format==STRIDED) armci_tcp_read_strided_data(msginfo,pdescr,p); if(msginfo->format==VECTOR){ armci_tcp_read_vector_data(msginfo,pdescr,p); } return; } #endif *buflen = MSG_BUFLEN - hdrlen; if (msginfo->operation == GET) bytes = msginfo->dscrlen; else{ bytes = msginfo->bytes; if(bytes >*buflen)armci_die2("armci_rcv_req: message overflowing rcv buf", msginfo->bytes,*buflen); } if(msginfo->bytes){ stat = armci_ReadFromSocket(CLN_sock[p],msginfo+1,bytes); if(stat<0)armci_die("armci_rcv_req: read of data failed",stat); *(void**)pdescr = msginfo+1; *(void**)pdata = msginfo->dscrlen + (char*)(msginfo+1); *buflen -= msginfo->dscrlen; if (msginfo->operation != GET) if(msginfo->datalen)*buflen -= msginfo->datalen; }else { *(void**)pdata = msginfo+1; *(void**)pdescr = NULL; } if(msginfo->datalen>0 && msginfo->operation != GET){ if(msginfo->datalen > MSG_BUFLEN -hdrlen -msginfo->dscrlen) armci_die2("armci_rcv_req:data overflowing buffer", msginfo->dscrlen,msginfo->datalen); *buflen -= msginfo->datalen; } }
/* convert bufid into req id returned to user */ id = dsc_id; id <<= 8; /* buffer id is in second byte */ cur_id = (cur_id+1)%255 +1; /* counter in LSB */ id += cur_id; armci_pending_dscr[dsc_id].reqid = id; if(DEBUG_){ printf("%d: init strided get: ptr=%p reqid=%d bufid=%d cid=%d levels=%d count[0]=%d\n", armci_me,ptr,id,dsc_id, cur_id, levels,count[0]); fflush(stdout); } return id; } void _armci_asyn_complete_strided_get(int dsc_id, void *buf) { request_header_t *msginfo = (request_header_t*) buf; strided_dscr_t *dscr; dscr = &armci_pending_dscr[dsc_id].dscr.strided; armci_pending_dscr[dsc_id].reqid = 0; if(DEBUG_){ printf("%d:complete_strided_get: ptr=%p bufid=%d levels=%d count[0]=%d\n", armci_me,dscr->ptr,dsc_id,dscr->stride_levels,dscr->count[0]); fflush(stdout); } armci_rcv_strided_data(msginfo->to, msginfo, msginfo->datalen, dscr->ptr, dscr->stride_levels, dscr->stride_arr,dscr->count); } #if 0 /*this function has been added in armci.c*/ int PARMCI_Wait(int req_id) { int dsc_id = REQ_TO_DSC_ID(req_id); void *buf; if(DEBUG_){ printf("%d: WAIT for req id=%d bufid=%d\n",armci_me,req_id,dsc_id); fflush(stdout); } buf = _armci_buf_ptr_from_id(dsc_id); if(dsc_id >MAX_PENDING_REQ) armci_die2("PARMCI_Wait: bad id",dsc_id,MAX_PENDING_REQ); /* when 0 it means the request was completed to get the buffer */ if(armci_pending_dscr[dsc_id].reqid == 0) return 0; /* return 1 if request id looks bad */ if(armci_pending_dscr[dsc_id].reqid < req_id) return 1; _armci_asyn_complete_strided_get(dsc_id,buf); FREE_SEND_BUFFER(buf); return 0; }
void PARMCI_Unlock(int mutex, int proc) { if(DEBUG)fprintf(stderr,"%d enter unlock\n",armci_me); if(!num_mutexes) armci_die("armci_lock: create mutexes first",0); if(mutex > glob_mutex[proc].count) armci_die2("armci_lock: mutex not allocated", mutex, glob_mutex[proc].count); if(armci_nproc == 1) return; # if defined(SERVER_LOCK) if(armci_nclus >1) { if(proc != armci_me) armci_rem_unlock(mutex, proc, glob_mutex[proc].tickets[mutex]); else { int ticket = glob_mutex[proc].tickets[mutex]; msg_tag_t tag; int waiting; waiting = armci_server_unlock_mutex(mutex, proc, ticket, &tag); if(waiting >-1) armci_unlock_waiting_process(tag, waiting, ++ticket); } } else # endif armci_generic_unlock(mutex, proc); if(DEBUG)fprintf(stderr,"%d leave unlock\n",armci_me); }
void PARMCI_Lock(int mutex, int proc) { #if defined(SERVER_LOCK) int direct; #endif if(DEBUG)fprintf(stderr,"%d enter lock\n",armci_me); if(!num_mutexes) armci_die("armci_lock: create mutexes first",0); if(mutex > glob_mutex[proc].count) armci_die2("armci_lock: mutex not allocated", mutex, glob_mutex[proc].count); if(armci_nproc == 1) return; # if defined(SERVER_LOCK) direct=SAMECLUSNODE(proc); if(!direct) armci_rem_lock(mutex,proc, glob_mutex[proc].tickets + mutex ); else # endif armci_generic_lock(mutex,proc); if(DEBUG)fprintf(stderr,"%d leave lock\n",armci_me); }
static void armci_generic_lock(int mutex, int proc) { int i, myturn, factor=0, len=sizeof(int); int *mutex_ticket, next_in_line; mutex_ticket= glob_mutex[proc].turn + mutex; myturn = register_in_mutex_queue(mutex, proc); /* code to reduce cost of unlocking mutex on the same SMP node goes here * lockinfo_node[me].ticket = mutex_ticket; * lockinfo_node[me].mutex = mutex; */ _dummy_work_ = 0.; /* must be global to fool the compiler */ do { PARMCI_Get(mutex_ticket, &next_in_line, len, proc); if(next_in_line > myturn) armci_die2("armci: problem with tickets",myturn,next_in_line); /* apply a linear backoff delay before retrying */ for(i=0; i< SPINMAX * factor; i++) _dummy_work_ += 1.; factor += 1; } while (myturn != next_in_line); glob_mutex[proc].tickets[mutex] = myturn; /* save ticket value */ }
/*\ blocking vector get \*/ void armcill_getv(int proc, int bytes, int count, void* src[], void* dst[]) { int _j, i, batch, issued=0; void *ps, *pd; #if 0 printf("%d: getv %d\n", armci_me, count); fflush(stdout); #endif for (_j = 0; _j < count; ){ /* how big a batch of requests can we issue */ batch = (count - _j )<max_pending ? count - _j : max_pending; _j += batch; for(i=0; i< batch; i++){ if(get_dscr[cur_get])elan_wait(get_dscr[cur_get],100); else pending_get++; ps = src[issued]; pd = dst[issued]; get_dscr[cur_get]= elan_get(elan_base->state,ps, pd,(size_t)bytes,proc); issued++; cur_get++; if(cur_get>=max_pending)cur_get=0; } } if(issued != count) armci_die2("armci-elan getv:mismatch %d %d \n", count,issued); for(i=0; i<max_pending; i++) if(get_dscr[i]){ elan_wait(get_dscr[i],100); get_dscr[i]=(ELAN_EVENT*)0; } }
void armcill_unlock(int m, int proc) { ELAN_LOCK *rem_locks = (ELAN_LOCK*)(all_locks + proc*num_locks); if(m<0 || m>= num_locks) armci_die2("armcill_unlock:bad lockid",m,num_locks); if(proc<0 || proc>=armci_nproc)armci_die("armcill_unlock: bad proc id",proc); elan_lockUnLock(elan_base->state, rem_locks + m); }
/* return domain ID of the specified process */ int armci_domain_id(armci_domain_t domain, int glob_proc_id) { int id = glob_proc_id; if(id <0 || id >= armci_nproc) { armci_die2("armci domain error",id,armci_nproc); } return armci_clus_id(glob_proc_id); }
/*\ return number of processes in the domain represented by id; id<0 means my node \*/ int armci_domain_nprocs(armci_domain_t domain, int id) { if(id >= armci_nclus) armci_die2("armci domain error",id,armci_nclus); /* This is an error condition */ if(id < 0) { fprintf(stderr,"[%d] Returned domain is invalid\n", armci_me); id = armci_clus_me; } return armci_clus_info[id].nslave; }
static int register_in_mutex_queue(int id, int proc) { int *mutex_entry, ticket; if(glob_mutex[proc].count < id) armci_die2("armci:invalid mutex id",id, glob_mutex[proc].count); mutex_entry = glob_mutex[proc].token + id; PARMCI_Rmw(ARMCI_FETCH_AND_ADD, &ticket, mutex_entry, 1, proc); return ticket; }
/*\ allocate and initialize num locks on each processor (collective call) \*/ void armcill_allocate_locks(int num) { char *buf; int i,elems; long mod; if(MAX_LOCKS<num)armci_die2("too many locks",MAX_LOCKS,num); num_locks = num; /* allocate memory to hold lock info for all the processors */ buf = malloc(armci_nproc*num *sizeof(ELAN_LOCK) + ELAN_LOCK_ALIGN); if(!buf) armci_die("armcill_init_locks: malloc failed",0); mod = ((long)buf) %ELAN_LOCK_ALIGN; all_locks = (ELAN_LOCK*)(buf +ELAN_LOCK_ALIGN-mod); if(((long)all_locks) %ELAN_LOCK_ALIGN) armci_die2("lock alligment failed",mod,ELAN_LOCK_ALIGN); bzero(all_locks,armci_nproc*num *sizeof(ELAN_LOCK)); /* initialize local locks */ my_locks = all_locks + armci_me * num; for(i=0; i<num; i++) elan_lockInit(elan_base->state, my_locks+i, ELAN_LOCK_NORMAL); /* now we use all-reduce to exchange locks info among everybody */ elems = (num*armci_nproc*sizeof(ELAN_LOCK))/sizeof(long); if((num*sizeof(ELAN_LOCK))%sizeof(long)) armci_die("armcill_init_locks: size mismatch",sizeof(ELAN_LOCK)); armci_msg_lgop((long*)all_locks,elems,"+"); #if 0 if(armci_me == 0){ for(i=0; i<num*armci_nproc; i++) printf("%d:(%d) master=%d type=%d\n",i,elems,(all_locks+i)->lp_master, (all_locks+i)->lp_type); } #endif armci_msg_barrier(); }
/*\ Acquire mutex for "proc" * -must be executed in hrecv/AM handler thread * -application thread must use generic_lock routine \*/ int armci_server_lock_mutex(int mutex, int proc, msg_tag_t tag) { int myturn; int *mutex_ticket, next_in_line, len=sizeof(int); int owner = armci_me; if(DEBUG)fprintf(stderr,"SLOCK=%d owner=%d p=%d m=%d\n", armci_me,owner, proc,mutex); mutex_ticket= glob_mutex[owner].turn + mutex; myturn = register_in_mutex_queue(mutex, owner); armci_copy(mutex_ticket, &next_in_line, len); if(next_in_line > myturn) armci_die2("armci-s: problem with tickets",myturn,next_in_line); if(next_in_line != myturn) { if(!blocked)armci_serv_mutex_create(); blocked[proc].mutex = mutex; blocked[proc].turn = myturn; blocked[proc].tag = tag; if(DEBUG) fprintf(stderr,"SLOCK=%d proc=%d blocked (%d,%d)\n", armci_me, proc, next_in_line,myturn); return -1; } else { if(DEBUG) fprintf(stderr,"SLOCK=%d proc=%d sending ticket (%d)\n", armci_me, proc, myturn); /* send ticket to requesting node */ /* GA_SEND_REPLY(tag, &myturn, sizeof(int), proc); */ return (myturn); } }
/*\ strided get, nonblocking \*/ void armcill_get2D(int proc, int bytes, int count, void* src_ptr,int src_stride, void* dst_ptr,int dst_stride) { int _j, i, batch, issued=0; char *ps=src_ptr, *pd=dst_ptr; #if 1 for (_j = 0; _j < count; ){ /* how big a batch of requests can we issue */ batch = (count - _j )<max_pending ? count - _j : max_pending; _j += batch; for(i=0; i< batch; i++){ #if 1 if(get_dscr[cur_get])elan_wait(get_dscr[cur_get],100); else pending_get++; get_dscr[cur_get]=elan_get(elan_base->state,ps,pd, (size_t)bytes, proc); #else elan_wait(elan_get(elan_base->state, ps, pd, (size_t)bytes, proc),elan_base->waitType); #endif issued++; ps += src_stride; pd += dst_stride; cur_get++; if(cur_get>=max_pending)cur_get=0; } } if(issued != count) armci_die2("armci-elan get:mismatch %d %d \n", count,issued); #else for (_j = 0; _j < count; _j++){ elan_wait(elan_get(elan_base->state, ps, pd, (size_t)bytes, proc),elan_base->waitType); ps += src_stride; pd += dst_stride; } #endif }
int armci_gpc_local_exec(int h, int to, int from, void *hdr, int hlen, void *data, int dlen, void *rhdr, int rhlen, void *rdata, int rdlen, int rtype) { int rhsize, rdsize; int (*func)(); int hnd = -h + GPC_OFFSET; if(hnd <0 || hnd>= GPC_SLOTS) armci_die2("armci_gpc_local_exec: bad callback handle",hnd,GPC_SLOTS); if(!_table[hnd]) armci_die("armci_gpc_local_exec: NULL function",hnd); func = _table[hnd]; if(!SAMECLUSNODE(to)) armci_die("armci_gpc_local_exec: GPC call to a different node received!", armci_me); /* func(to, from, hdr, hlen, data, dlen, rhdr, rhlen, &rhsize, */ /* rdata, rdlen, &rdsize); */ /* return 0; */ return func(to, from, hdr, hlen, data, dlen, rhdr, rhlen, &rhsize, rdata, rdlen, &rdsize, rtype); }
void TestGlobals() { #define MAXLENG 256*1024 double *dtest; int *itest; long *ltest; int len; int ifrom=nproc-1,lfrom=1,dfrom=1; if (me == 0) { printf("Global test ... broadcast and reduction for int, long, double\n----------\n"); fflush(stdout); } if (!(dtest = (double *) malloc((unsigned) (MAXLENG*sizeof(double))))) ARMCI_Error("TestGlobals: failed to allocated dtest", MAXLENG); if (!(ltest = (long *) malloc((unsigned) (MAXLENG*sizeof(long))))) ARMCI_Error("TestGlobals: failed to allocated ltest", MAXLENG); if (!(itest = (int *) malloc((unsigned) (MAXLENG*sizeof(int))))) ARMCI_Error("TestGlobals: failed to allocated itest", MAXLENG); for (len=1; len<MAXLENG; len*=2) { int ilen = len*sizeof(int); int dlen = len*sizeof(double); int llen = len*sizeof(long); int i; ifrom = (ifrom+1)%nproc; lfrom = (lfrom+1)%nproc; dfrom = (lfrom+1)%nproc; #if 0 printf("%d:ifrom=%d lfrom=%d dfrom=%d\n",me,ifrom,lfrom,dfrom);fflush(stdout); #endif if (me == 0) { printf("Test length = %d ... ", len); fflush(stdout); } if(me == ifrom)for (i=0; i<len; i++)itest[i]=i; else for (i=0; i<len; i++)itest[i]=0; if(me == lfrom)for (i=0; i<len; i++)ltest[i]=(long)i; else for (i=0; i<len; i++)ltest[i]=0L; if(me == dfrom)for (i=0; i<len; i++)dtest[i]=(double)i; else for (i=0; i<len; i++)dtest[i]=0.0; /* Test broadcast */ armci_msg_brdcst(itest, ilen, ifrom); armci_msg_brdcst(ltest, llen, lfrom); armci_msg_brdcst(dtest, dlen, dfrom); for (i=0; i<len; i++){ if (itest[i] != i) armci_die2("int broadcast failed", i,itest[i]); if (ltest[i] != (long)i) armci_die2("long broadcast failed", i,(int)ltest[i]); if (dtest[i] != (double)i) armci_die2("double broadcast failed", i,(int)dtest[i]); } if (me == 0) { printf("broadcast OK ..."); fflush(stdout); } /* Test global sum */ for (i=0; i<len; i++) { itest[i] = i*me; ltest[i] = (long) itest[i]; dtest[i] = (double) itest[i]; } armci_msg_igop(itest, len, "+"); armci_msg_lgop(ltest, len, "+"); armci_msg_dgop(dtest, len, "+"); for (i=0; i<len; i++) { int iresult = i*nproc*(nproc-1)/2; if (itest[i] != iresult || ltest[i] != (long)iresult || dtest[i] != (double) iresult) ARMCI_Error("TestGlobals: global sum failed", (int) i); } if (me == 0) { printf("global sums OK\n"); fflush(stdout); } } /* now we get timing data */ time_gop(dtest,MAXLENG); time_reduce(dtest,MAXLENG); free((char *) itest); free((char *) ltest); free((char *) dtest); }
/*\ return number of processes in the domain represented by id; id<0 means my node \*/ int armci_domain_nprocs(armci_domain_t domain, int id) { if(id>= armci_nclus) armci_die2("armci domain error",id,armci_nclus); if(id<0) id = armci_clus_me; return armci_clus_info[id].nslave; }
/* server receives request */ void armci_rcv_req (void *mesg, void *phdr, void *pdescr, void *pdata, int *buflen) { request_header_t *msginfo = NULL; int hdrlen = sizeof(request_header_t); int p=-1; int bytes; #if !defined(MULTIPLE_BUFS) MPI_Status status; msginfo = (request_header_t*) MessageRcvBuffer; p = * (int *) mesg; MPI_Check( MPI_Recv(MessageRcvBuffer, MSG_BUFLEN, MPI_BYTE, p, ARMCI_MPI_SPAWN_TAG, MPI_COMM_SERVER2CLIENT, &status) ); #else int reqid = _reqid_ready;;/*get request id that is ready to be processed */ msginfo = (request_header_t*) _mpi2_rcv_buf[reqid]; p = * (int *) mesg; if(p != msginfo->from) armci_die("armci_rcv_req: invalid client", p); #endif * (void **) phdr = msginfo; if( !(p >= 0 && p < armci_nproc) ) armci_die("armci_rcv_req: request from invalid client", p); armci_mpi2_server_debug(armci_server_me, "armci_rcv_req: op=%d mesg=%p, phdr=%p " "pdata=%p, buflen=%p, p=%d\n", msginfo->operation, mesg, phdr, pdata, buflen, p, MSG_BUFLEN); #ifdef MPI_SPAWN_ZEROCOPY if(msginfo->operation==PUT && msginfo->datalen==0) { if(msginfo->format==STRIDED) { armci_mpi_rcv_strided_data(msginfo, pdescr, p); } if(msginfo->format==VECTOR) { armci_mpi_rcv_vector_data(msginfo, pdescr, p); } return; } #endif *buflen = MSG_BUFLEN - hdrlen; if (msginfo->operation == GET) { bytes = msginfo->dscrlen; } else { bytes = msginfo->bytes; if (bytes > *buflen) armci_die2("armci_rcv_req: message overflowing rcv buf", msginfo->bytes, *buflen); } #if MPI_SPAWN_DEBUG && !defined(MPI_SPAWN_ZEROCOPY) && 0 { int count; MPI_Get_count(&status, MPI_BYTE, &count); if (count != (bytes + hdrlen)) { armci_mpi2_server_debug(armci_server_me, "armci_rcv_req: " "got %d bytes, expected %d bytes\n", count, bytes + hdrlen); printf("%d: armci_rcv_req: got %d bytes, expected %d bytes (%d)\n", armci_me, count, bytes + hdrlen, msginfo->datalen); armci_die("armci_rcv_req: count check failed.\n", 0); } } #endif if (msginfo->bytes) { * (void **) pdescr = msginfo + 1; * (void **) pdata = msginfo->dscrlen + (char *) (msginfo+1); *buflen -= msginfo->dscrlen; if (msginfo->operation != GET && msginfo->datalen) { *buflen -= msginfo->datalen; } } else { * (void**) pdata = msginfo + 1; * (void**) pdescr = NULL; } if (msginfo->datalen > 0 && msginfo->operation != GET) { if (msginfo->datalen > (MSG_BUFLEN - hdrlen - msginfo->dscrlen)) { armci_die2("armci_rcv_req:data overflowing buffer", msginfo->dscrlen, msginfo->datalen); } *buflen -= msginfo->datalen; } }