static int _can_progress_putaccsplitorder(immbuf_t*vbuf) { if(!IS_IMM_MSG(*msginfo) && _nPendBufsUsed==PENDING_BUF_NUM) { return 0; /*This buffer needs a free pending buffer*/ } if(IS_IMM_MSG(*msginfo) && ARMCI_ACC(msginfo->operation)) { return 1; } if(info->immbuf_wlist_head && info->immbuf_wlist_head!=vbuf) { return 0; } if(msginfo->operation!=PUT && !ARMCI_ACC(msginfo->operation)) { if(info->order_head) return 0; return 1; } if(IS_IMM_MSG(*msginfo) && info->order_head) return 0; return 1; }
/*Messages are processed in-place in immediate buffers or issued into pending buffers for progress in order (like ONE_PBUF_PER_MESG). This rule relaxes ONE_PBUF_PER_MESG by allowing ACCs to be processed in-place/issued without waiting for the prior reqs to complete*/ static int _can_progress_accnoorder(immbuf_t *vbuf) { const request_header_t *msginfo=(request_header_t*)vbuf->buf; const int proc = msginfo->from; const proc_waitlist_t *info = &pbuf_proc_list_info[proc]; int i, nwaiting_on, nacc; pendbuf_t *ptr; assert(_pbufOrder == ACC_NO_ORDER); if(!IS_IMM_MSG(*msginfo) && _nPendBufsUsed==PENDING_BUF_NUM) { /* printf("%d(s): op=%d from=%d datalen=%d waiting for pending buffers\n",armci_me,msginfo->operation,msginfo->from,msginfo->tag.data_len); */ /* fflush(stdout); */ return 0; /*This buffer needs a free pending buffer*/ } if(IS_IMM_MSG(*msginfo) && ARMCI_ACC(msginfo->operation)) { return 1; } if(info->immbuf_wlist_head && info->immbuf_wlist_head!=vbuf) { /* printf("%d(s): op=%d from=%d datalen=%d not queue head\n",armci_me,msginfo->operation,msginfo->from,msginfo->tag.data_len); */ /* fflush(stdout); */ return 0; /*in order issue*/ } if(!ARMCI_ACC(msginfo->operation)) { if(info->order_head) return 0; return 1; } assert(ARMCI_ACC(msginfo->operation)); for(ptr=info->order_head; ptr!=NULL; ptr=ptr->order_next) { request_header_t *m = (request_header_t *)ptr->buf; assert(m->from == msginfo->from); if(!ARMCI_ACC(m->operation)) break; } if(ptr != NULL) return 0; return 1; }
/** Make progress on processing a pending buffer. This function, also * ensures any other waiting messages get processed if they can * be. Thus, progress and eventual termination is guaranteed by this * function. * @param _pbuf IN Pending buffer to make progress on * @return none */ static void _armci_serv_pendbuf_progress(pendbuf_t *_pbuf){ request_header_t *msginfo = (request_header_t *)_pbuf->buf; immbuf_t *vbuf = _pbuf->vbuf; pendbuf_t *pbuf = _pbuf; assert(pbuf->vbuf!=NULL); do { if(vbuf && !IS_IMM_MSG(*msginfo)) { assert(pbuf->vbuf == vbuf); } /* printf("%d(s):: progressing op=%d imm=%d from=%d datalen=%d pbuf=%p vbuf=%p n_pending=%d\n", armci_me, */ /* msginfo->operation,msginfo->tag.imm_msg,msginfo->from,msginfo->datalen, pbuf,vbuf,pbuf_proc_list_info[msginfo->from].n_pending); */ /* fflush(stdout); */ if(IS_IMM_MSG(*msginfo)) { armci_complete_immbuf(vbuf); } else { /*non-immediate message*/ proc_waitlist_t* info = &pbuf_proc_list_info[msginfo->from]; do { assert(pbuf->vbuf == vbuf); if(msginfo->operation == PUT || ARMCI_ACC(msginfo->operation)) { _armci_serv_pendbuf_progress_putacc(pbuf); } else if (msginfo->operation == GET) { _armci_serv_pendbuf_progress_get(pbuf); } else { armci_die("pending buffer processing for this op not yet implemented", msginfo->operation); } pbuf = info->order_head; vbuf = pbuf ? pbuf->vbuf : NULL; } while(info->order_head && info->order_head->commit_me); } /* sleep(2); */ vbuf = _armci_serv_pendbuf_promote(); if(vbuf) { msginfo = (request_header_t *)vbuf->buf; if(!msginfo->tag.imm_msg) { pbuf = _armci_serv_pendbuf_assignbuf(vbuf); assert(pbuf != NULL); } } } while(vbuf != NULL); }
/** Progress PUT/ACC requests. * @param pbuf IN Pending buffer containing the PUT/ACC request * @return none */ static void _armci_serv_pendbuf_progress_putacc(pendbuf_t *pbuf) { int index = (pbuf - serv_pendbuf_arr); request_header_t *msginfo = (request_header_t *)pbuf->buf; void *buffer =((char *)(msginfo+1))+msginfo->dscrlen; int *status = &pbuf->status; assert(msginfo->operation==PUT || ARMCI_ACC(msginfo->operation)); assert(sizeof(request_header_t)+msginfo->dscrlen+msginfo->datalen<PENDING_BUF_LEN); switch(*status) { case INIT: /* printf("%d(s): progressing new msg. index=%d op=%d from=%d\n", armci_me,index,msginfo->operation,msginfo->from); */ /* fflush(stdout); */ if(sizeof(request_header_t)+msginfo->dscrlen <= IMM_BUF_LEN) { /*Have the header and descriptor; go process*/ assert(sizeof(request_header_t)+msginfo->dscrlen+msginfo->tag.data_len < PENDING_BUF_LEN); armci_pbuf_start_get(msginfo,msginfo->tag.data_ptr,buffer,msginfo->tag.data_len, msginfo->from, index); /* printf("%d(s): PUT/ACC getting data. pbuf_num=%d data_ptr=%p data_len=%d bytes=%d\n", armci_me,index,msginfo->tag.data_ptr, msginfo->tag.data_len,msginfo->bytes); */ *status = RECV_DATA_PENDING; } else { /*Need to get rest of descriptor*/ const int bytes = sizeof(request_header_t)+msginfo->dscrlen-IMM_BUF_LEN; #warning "PEND_BUFS: Abusing msginfo->tag.ack_ptr for GETS with large descriptors!" assert(msginfo->tag.ack_ptr != NULL); /*sanity check. Should point to tag.ack on the client side*/ void *lptr = ((char *)msginfo)+IMM_BUF_LEN; void *rptr = ((char *)msginfo->tag.ack_ptr) - (int)(&((request_header_t *)0)->tag.ack) + IMM_BUF_LEN; /* printf("%d(s):: PUT getting rest of descriptor index=%d bytes=%d ptr=%p from=%d\n", */ /* armci_me,index,bytes,rptr,msginfo->from); */ /* fflush(stdout); */ assert(IMM_BUF_LEN+bytes < PENDING_BUF_LEN); armci_pbuf_start_get(msginfo,rptr,lptr,bytes,msginfo->from,index); *status = RECV_DSCR_PENDING; } break; case RECV_DSCR_PENDING: armci_die("call_data_server should set status to RECV_DSCR_DONE before calling progress",*status); break; case RECV_DATA_PENDING: armci_die("call_data_server should set status to RECV_DONE before calling progress",*status); break; case RECV_DSCR_DONE: assert(sizeof(request_header_t)+msginfo->dscrlen+msginfo->tag.data_len < PENDING_BUF_LEN); armci_pbuf_start_get(msginfo,msginfo->tag.data_ptr,buffer,msginfo->tag.data_len, msginfo->from, index); /* printf("%d(s): PUT/ACC getting data. pbuf_num=%d data_ptr=%p data_len=%d bytes=%d\n", armci_me,index,msginfo->tag.data_ptr, msginfo->tag.data_len,msginfo->bytes); */ *status = RECV_DATA_PENDING; break; case RECV_DATA_DONE: /* printf("%d(s):: Done PUT/ACC with buf index=%d op=%d datalen=%d from=%d\n", */ /* armci_me,index,msginfo->operation,msginfo->datalen,msginfo->from); */ /* fflush(stdout); */ if(msginfo->operation == PUT && pbuf->order_prev!=NULL) { assert(pbuf->commit_me == 0); /*Why called so many times in thie state?*/ pbuf->commit_me = 1; break; } pbuf->commit_me = 0; armci_complete_pendbuf(pbuf); _armci_serv_pendbuf_freebuf(pbuf); break; case SEND_DATA_PENDING: case SEND_DATA_DONE: default: armci_die("pendbuf_progress_putacc: invalid status", *status); } }
/** Implement ordering between messages. This function needs to be * implemented in conjunction with @_armci_serv_pendbuf_promote to * ensure ordered processing of messages. * @param vbuf IN Message in immediate buffer being checked * @return 1 if the message can be progressed (either in-place or * after copying to a pending buffer). 0 therwise. */ static int _armci_serv_pendbuf_can_progress(immbuf_t *vbuf) { const request_header_t *msginfo=(request_header_t*)vbuf->buf; const int proc = msginfo->from; const proc_waitlist_t *info = &pbuf_proc_list_info[proc]; if(_pbufOrder == ONE_PBUF_MESG) { /*Only one pending buffer used at any time*/ if(_nPendBufsUsed>0) return 0; return 1; } if(_pbufOrder == ONE_PBUF_MESG_PER_PROC) { /*Only one non-immediate mesg can be assigned to the pending buffers at any time*/ if(info->order_head || (info->immbuf_wlist_head && info->immbuf_wlist_head!=vbuf)) { return 0;/*other requests from this process remain*/ } if(!IS_IMM_MSG(*msginfo) && _nPendBufsUsed==PENDING_BUF_NUM) { return 0; /*This buffer needs a free pending buffer*/ } assert(info->n_pending == 0 || info->immbuf_wlist_head==vbuf); return 1; } if(_pbufOrder == ACC_NO_ORDER) { /*Messages are processed in-place in immediate buffers or issued into pending buffers for progress in order (like ONE_PBUF_PER_MESG). This rule relaxes ONE_PBUF_PER_MESG by allowing a sequence of ACCs to be processed in-place/issued without waiting for the prior ones to complete*/ int i, nwaiting_on, nacc; pendbuf_t *ptr; if(!IS_IMM_MSG(*msginfo) && _nPendBufsUsed==PENDING_BUF_NUM) { /* printf("%d(s): op=%d from=%d datalen=%d waiting for pending buffers\n",armci_me,msginfo->operation,msginfo->from,msginfo->tag.data_len); */ /* fflush(stdout); */ return 0; /*This buffer needs a free pending buffer*/ } #if 1 /*commented for now: it does work*/ if(IS_IMM_MSG(*msginfo) && ARMCI_ACC(msginfo->operation)) { return 1; } #endif if(info->immbuf_wlist_head && info->immbuf_wlist_head!=vbuf) { /* printf("%d(s): op=%d from=%d datalen=%d not queue head\n",armci_me,msginfo->operation,msginfo->from,msginfo->tag.data_len); */ /* fflush(stdout); */ return 0; /*in order issue*/ } if(!ARMCI_ACC(msginfo->operation)) { if(info->order_head) return 0; return 1; } assert(ARMCI_ACC(msginfo->operation)); for(ptr=info->order_head; ptr!=NULL; ptr=ptr->order_next) { request_header_t *m = (request_header_t *)ptr->buf; assert(m->from == msginfo->from); if(!ARMCI_ACC(m->operation)) break; } if(ptr != NULL) return 0; return 1; } if(_pbufOrder == PUTACC_SPLIT_ORDER) { if(!IS_IMM_MSG(*msginfo) && _nPendBufsUsed==PENDING_BUF_NUM) { return 0; /*This buffer needs a free pending buffer*/ } if(info->immbuf_wlist_head && info->immbuf_wlist_head!=vbuf) { return 0; } if(msginfo->operation!=PUT && !ARMCI_ACC(msginfo->operation)) { if(info->order_head) return 0; return 1; } if(IS_IMM_MSG(*msginfo) && ARMCI_ACC(msginfo->operation)) { return 1; } if(IS_IMM_MSG(*msginfo) && info->order_head) return 0; return 1; } if(_pbufOrder == GET_GET_REORDER) { if(!IS_IMM_MSG(*msginfo) && _nPendBufsUsed==PENDING_BUF_NUM) { return 0; /*This buffer needs a free pending buffer*/ } if(IS_IMM_MSG(*msginfo) && ARMCI_ACC(msginfo->operation)) { return 1; } if(info->immbuf_wlist_head && info->immbuf_wlist_head!=vbuf) { return 0; } if(msginfo->operation!=PUT && !ARMCI_ACC(msginfo->operation)) { if(info->order_tail) { request_header_t *m=(request_header_t*)info->order_tail->buf; if(msginfo->operation==GET && m->operation == GET) { /* printf("%d: Get Get progressing\n", armci_me); */ return 1; } return 0; } return 1; } if(IS_IMM_MSG(*msginfo) && info->order_head) return 0; return 1; } armci_die("Unknown pbuf ordering rule",_pbufOrder); return 0; }
void armci_send_req(int proc, request_header_t* msginfo, int len) { int msglen = sizeof(request_header_t); lapi_cntr_t *pcmpl_cntr, *pcntr = &(BUF_TO_EVBUF(msginfo)->cntr); int rc; msginfo->tag.cntr= pcntr; #if ARMCI_ENABLE_GPC_CALLS if(msginfo->operation==GET && msginfo->format==VECTOR && msginfo->ehlen){ msginfo->tag.buf = (char *)(msginfo+1)+msginfo->dscrlen; } else #endif msginfo->tag.buf = msginfo+1; if(msginfo->operation==GET || msginfo->operation==LOCK){ SET_COUNTER(*(lapi_cmpl_t*)pcntr,1);/*dataarrive in same buf*/ /*The GPC case. Note that we don't use the parameter len*/ if(msginfo->format==VECTOR && msginfo->ehlen > 0) msglen += msginfo->datalen; if(lapi_max_uhdr_data_sz < msginfo->dscrlen){ msginfo->dscrlen = -msginfo->dscrlen; /* no room for descriptor */ pcntr = NULL; /* GET(descr) from CH will increment buf cntr */ }else msglen += msginfo->dscrlen; /* we should send the mutex, too. When op==LOCK, Value of len parameter is already sizeof(reqest_header_t)+sizeof(int), since we dont use len but construct our own msglen, we need to add sizeof(int). */ if(msginfo->operation==LOCK) msglen += sizeof(int); pcmpl_cntr=NULL; /* don't trace completion status for load ops */ }else if (msginfo->operation==UNLOCK){ msglen += msginfo->dscrlen; pcmpl_cntr=NULL; /* don't trace completion status for unlock */ }else{ if(lapi_max_uhdr_data_sz < (msginfo->datalen + msginfo->dscrlen)){ msginfo->datalen = -msginfo->datalen; msginfo->dscrlen = -msginfo->dscrlen; pcntr = NULL; /* GET/LOCK from CH will increment buf cntr */ }else msglen += msginfo->dscrlen+msginfo->datalen; /* trace completion of store ops */ pcmpl_cntr = &cmpl_arr[msginfo->to].cntr; } if(msginfo->operation==PUT || ARMCI_ACC(msginfo->operation)) UPDATE_FENCE_STATE(msginfo->to, msginfo->operation, 1); if((rc=LAPI_Amsend(lapi_handle,(uint)msginfo->to, (void*)armci_header_handler, msginfo, msglen, NULL, 0, NULL, pcntr, pcmpl_cntr))) armci_die("AM failed",rc); if(DEBUG_) fprintf(stderr,"%d sending req=%d to %d\n", armci_me, msginfo->operation, proc); }
/*\ The function decomposes a multi-dimensional patch so that it fits in the * internal ARMCI buffer. * It works by recursively reducing patch dimension until some portion of the * subpatch fits in the buffer. * The recursive process is controlled by "fit_level" and "nb" arguments, * which have to be set to -1 at the top-level of the recursion tree. * * Argument last and variable looplast are used to indicate to sending/packing * routine that we are dealing with the last portion of the request. * Due to the recursive nature of packing code, the algorithm is following: * if last=1 then internal for loop passes 1 for the last chunk * else it passes 0 * \*/ int armci_pack_strided(int op, void* scale, int proc, void *src_ptr, int src_stride_arr[], void* dst_ptr, int dst_stride_arr[], int count[], int stride_levels, ext_header_t *h, int fit_level, int nb, int last,armci_ihdl_t nb_handle) { int rc=0, bufsize=BUFSIZE,noswap=0; long sn; void *src, *dst; #ifdef REMOTE_OP int flag=0; #else int flag=1; #endif int b; static int call_count; #ifdef STRIDED_GET_BUFLEN if(op==GET)bufsize=STRIDED_GET_BUFLEN; # ifdef HITACHI else if(stride_levels || ARMCI_ACC(op))bufsize=MSG_BUFLEN_SMALL-PAGE_SIZE; # endif #endif #if (defined(GM_) || defined(VIA_) || defined(VAPI_)) /*we cant assume that the entire available buffer will be used for data, fact that the header and descriptor also go in the same buffer should be considered while packing. */ bufsize-=(sizeof(request_header_t)+(MAX_STRIDE_LEVEL+4)*sizeof(int)+2*sizeof(void *)); # if defined(PIPE_BUFSIZE) && defined(MAX_PIPELINE_CHUNKS) bufsize-=8*MAX_PIPELINE_CHUNKS; # endif #endif #ifdef BALANCE_FACTOR /* Added the following for balancing buffers */ if(op==PUT){ int bytes=1, i; for(i=0; i<= stride_levels; i++) bytes *= count[i]; if(bytes > bufsize && bytes/bufsize < 3 && bytes%bufsize < BALANCE_BUFSIZE){ /* bytes div bufsize - 1 is to increase the balence factor for 3 buffer case */ bufsize = bytes/ (bytes/bufsize - 1 + BALANCE_FACTOR); noswap = 1; /*** yuck: if set to 1, error in buffers.c ***/ } bytes = bufsize%8; bufsize -= bytes; } #endif /* determine decomposition of the patch to fit in the buffer */ if(fit_level<0){ armci_fit_buffer(count, stride_levels, &fit_level, &nb, bufsize); last = 1; } if(fit_level == stride_levels){ /* we can fit subpatch into the buffer */ int chunk = count[fit_level]; int dst_stride, src_stride; if(nb == chunk){ /* take shortcut when whole patch fits in the buffer */ if(h) h->last = last?1:0; if(nb_handle && call_count ){ nb_handle->bufid=NB_MULTI; call_count++; } return(OP_STRIDED(op, scale, proc, src_ptr, src_stride_arr, dst_ptr,dst_stride_arr,count,stride_levels,h,flag,nb_handle)); } if(fit_level){ dst_stride = dst_stride_arr[fit_level -1]; src_stride = src_stride_arr[fit_level -1]; }else{ dst_stride = src_stride = 1; } if(op == GET || noswap == 1) b =nb; else{ b = chunk%nb; if(b==0)b=nb; } /* put smallest piece first */ for(sn = 0; sn < chunk; ){ src = (char*)src_ptr + src_stride* sn; dst = (char*)dst_ptr + dst_stride* sn; count[fit_level] = ARMCI_MIN(b, chunk-sn); /*modify count for this level*/ if(h) h->last = (last && ((sn+b)>=chunk))? 1: 0 ; if(nb_handle)call_count++; rc = OP_STRIDED( op, scale, proc, src, src_stride_arr, dst,dst_stride_arr,count,fit_level,h,flag,nb_handle); if(rc) break; sn += b; b = nb; } count[fit_level] = chunk; /* restore original count */ } else { for(sn = 0; sn < count[stride_levels]; sn++){ int looplast =0; src = (char*)src_ptr + src_stride_arr[stride_levels -1]* sn; dst = (char*)dst_ptr + dst_stride_arr[stride_levels -1]* sn; if(last && (sn == count[stride_levels]-1)) looplast =1; rc = armci_pack_strided(op, scale, proc, src, src_stride_arr, dst, dst_stride_arr, count, stride_levels -1, h,fit_level, nb, looplast,nb_handle); if(rc) return rc; } } if(nb_handle && call_count ) nb_handle->bufid=NB_MULTI; return rc; }
int armci_pack_vector(int op, void *scale, armci_giov_t darr[],int len, int proc,armci_ihdl_t nb_handle) { armci_giov_t extra; /* keeps data remainder of set to be processed in chunks */ armci_giov_t save; /* keeps original value of set to be processed in chunks */ armci_giov_t *ndarr; /* points to first array element to be processed now */ int rc=0, nlen, count=0; armcip_init_giov_t(&extra); armcip_init_giov_t(&save); ndarr = darr; save.src_ptr_array=NULL; /* indicates that save slot is empty */ while(len){ armci_split_dscr_array(ndarr, len, &extra, &nlen, &save); # if defined(REMOTE_OP) /* A problem will occur if len is 1 and nlen is 0. This corresponds to a * situation where the size of an individual element is found to exceed * BUFSIZE1. Treat this as a single transfer of contiguous data using * the standard PARMCI_Get/Put/Acc call */ if (len == 1 && nlen == 0) { if(ARMCI_ACC(op))rc=PARMCI_Acc(op, scale, ndarr[0].src_ptr_array[0], ndarr[0].dst_ptr_array[0],ndarr[0].bytes, proc); else if(op == GET)rc=PARMCI_Get(ndarr[0].src_ptr_array[0], ndarr[0].dst_ptr_array[0],ndarr[0].bytes, proc); else if(op == PUT)rc=PARMCI_Put(ndarr[0].src_ptr_array[0], ndarr[0].dst_ptr_array[0],ndarr[0].bytes, proc); else armci_die("Unknown op in armci_pack_vector",op); nlen = 1; } else { rc = armci_rem_vector(op, scale, ndarr,nlen,proc,0,nb_handle); } # else if(ARMCI_ACC(op))rc=armci_acc_vector(op,scale,ndarr,nlen,proc); else rc = armci_copy_vector(op,ndarr,nlen,proc); # endif if(rc) break; /* non-NULL pointer indicates that set was split */ if(extra.src_ptr_array){ if(nb_handle) { nb_handle->bufid = NB_MULTI; /*can be set multiple times here; but not reset here*/ } ndarr[nlen-1]=extra; /* set the pointer to remainder of last set */ nlen--; /* since last set not done in full need to process it again */ }else{ if(save.src_ptr_array){ ndarr[0]=save; save.src_ptr_array=NULL; /* indicates that save slot is empty */ } if(nlen == 0) armci_die("vector packetization problem:buffer too small",BUFSIZE1); } len -=nlen; ndarr +=nlen; count ++; } return rc; }