示例#1
0
文件: pendbufs.c 项目: jeffhammond/ga
static int _can_progress_putaccsplitorder(immbuf_t*vbuf) {
  if(!IS_IMM_MSG(*msginfo) && _nPendBufsUsed==PENDING_BUF_NUM) {
    return 0; /*This buffer needs a free pending buffer*/
  }
  if(IS_IMM_MSG(*msginfo) && ARMCI_ACC(msginfo->operation)) {
    return 1;
  }
  if(info->immbuf_wlist_head && info->immbuf_wlist_head!=vbuf) {
    return 0;
  }
  if(msginfo->operation!=PUT && !ARMCI_ACC(msginfo->operation)) {
    if(info->order_head)
      return 0;
    return 1;
  }
  if(IS_IMM_MSG(*msginfo) && info->order_head)
    return 0;
  return 1;  
}
示例#2
0
文件: pendbufs.c 项目: jeffhammond/ga
/*Messages are processed in-place in immediate buffers or issued
  into pending buffers for progress in order (like
  ONE_PBUF_PER_MESG). This rule relaxes ONE_PBUF_PER_MESG by
  allowing ACCs to be processed in-place/issued
  without waiting for the prior reqs to complete*/
static int _can_progress_accnoorder(immbuf_t *vbuf) {
  const request_header_t *msginfo=(request_header_t*)vbuf->buf;
  const int proc = msginfo->from;
  const proc_waitlist_t *info = &pbuf_proc_list_info[proc];
  int i, nwaiting_on, nacc;
  pendbuf_t *ptr;
  
  assert(_pbufOrder == ACC_NO_ORDER);
  if(!IS_IMM_MSG(*msginfo) && _nPendBufsUsed==PENDING_BUF_NUM) {
    /*       printf("%d(s): op=%d from=%d datalen=%d waiting for pending buffers\n",armci_me,msginfo->operation,msginfo->from,msginfo->tag.data_len); */
    /*       fflush(stdout); */
    return 0; /*This buffer needs a free pending buffer*/
  }
  if(IS_IMM_MSG(*msginfo) && ARMCI_ACC(msginfo->operation)) {
    return 1;
  }
  if(info->immbuf_wlist_head && info->immbuf_wlist_head!=vbuf) {
    /*       printf("%d(s): op=%d from=%d datalen=%d not queue head\n",armci_me,msginfo->operation,msginfo->from,msginfo->tag.data_len); */
    /*       fflush(stdout); */
    return 0; /*in order issue*/
  }

  if(!ARMCI_ACC(msginfo->operation)) {
    if(info->order_head)
      return 0;
    return 1;
  }
  
  assert(ARMCI_ACC(msginfo->operation));
  for(ptr=info->order_head; ptr!=NULL; ptr=ptr->order_next) {
    request_header_t *m = (request_header_t *)ptr->buf;
    assert(m->from == msginfo->from);
    if(!ARMCI_ACC(m->operation)) 
      break;
  }
  if(ptr != NULL) 
    return 0;
  return 1;
}
示例#3
0
文件: pendbufs.c 项目: jeffhammond/ga
/** Make progress on processing a pending buffer. This function, also
 * ensures any other waiting messages get processed if they can
 * be. Thus, progress and eventual termination is guaranteed by this
 * function. 
 * @param _pbuf IN Pending buffer to make progress on
 * @return none
 */
static void _armci_serv_pendbuf_progress(pendbuf_t *_pbuf){
  request_header_t *msginfo = (request_header_t *)_pbuf->buf;
  immbuf_t *vbuf = _pbuf->vbuf;
  pendbuf_t *pbuf = _pbuf;

  assert(pbuf->vbuf!=NULL);
  do {
    if(vbuf && !IS_IMM_MSG(*msginfo)) { assert(pbuf->vbuf == vbuf); }
/*     printf("%d(s):: progressing op=%d imm=%d from=%d datalen=%d pbuf=%p vbuf=%p n_pending=%d\n", armci_me, */
/* 	   msginfo->operation,msginfo->tag.imm_msg,msginfo->from,msginfo->datalen, pbuf,vbuf,pbuf_proc_list_info[msginfo->from].n_pending); */
/*     fflush(stdout); */
    if(IS_IMM_MSG(*msginfo)) {
      armci_complete_immbuf(vbuf);
    }
    else { /*non-immediate message*/
      proc_waitlist_t* info = &pbuf_proc_list_info[msginfo->from];
      
      do {
	assert(pbuf->vbuf == vbuf);
	if(msginfo->operation == PUT || ARMCI_ACC(msginfo->operation)) {
	  _armci_serv_pendbuf_progress_putacc(pbuf);
	}
	else if (msginfo->operation == GET) {
	  _armci_serv_pendbuf_progress_get(pbuf);
	}
	else {
	  armci_die("pending buffer processing for this op not yet implemented", msginfo->operation);
	}
	pbuf = info->order_head;
	vbuf = pbuf ? pbuf->vbuf : NULL;
      } while(info->order_head && info->order_head->commit_me);
    }
/*     sleep(2); */
    vbuf = _armci_serv_pendbuf_promote();
    if(vbuf) {
      msginfo = (request_header_t *)vbuf->buf;
      if(!msginfo->tag.imm_msg) {
	pbuf = _armci_serv_pendbuf_assignbuf(vbuf);
	assert(pbuf != NULL);
      }
    }
  } while(vbuf != NULL);
}
示例#4
0
文件: pendbufs.c 项目: jeffhammond/ga
/** Progress PUT/ACC requests.
 * @param pbuf IN Pending buffer containing the PUT/ACC request
 * @return none
 */
static void _armci_serv_pendbuf_progress_putacc(pendbuf_t *pbuf) {
  int index = (pbuf - serv_pendbuf_arr);
  request_header_t *msginfo = (request_header_t *)pbuf->buf;
  void *buffer =((char *)(msginfo+1))+msginfo->dscrlen;
  int *status = &pbuf->status;  

  assert(msginfo->operation==PUT || ARMCI_ACC(msginfo->operation));
  assert(sizeof(request_header_t)+msginfo->dscrlen+msginfo->datalen<PENDING_BUF_LEN);
  switch(*status) {
  case INIT:
/*     printf("%d(s): progressing new msg. index=%d op=%d from=%d\n", armci_me,index,msginfo->operation,msginfo->from); */
/*     fflush(stdout); */
    if(sizeof(request_header_t)+msginfo->dscrlen <= IMM_BUF_LEN) {
      /*Have the header and descriptor; go process*/
      assert(sizeof(request_header_t)+msginfo->dscrlen+msginfo->tag.data_len < PENDING_BUF_LEN);
      armci_pbuf_start_get(msginfo,msginfo->tag.data_ptr,buffer,msginfo->tag.data_len,
			   msginfo->from, index);
      /*       printf("%d(s): PUT/ACC getting data. pbuf_num=%d data_ptr=%p data_len=%d bytes=%d\n", armci_me,index,msginfo->tag.data_ptr, msginfo->tag.data_len,msginfo->bytes); */
      *status = RECV_DATA_PENDING;
    }
    else { /*Need to get rest of descriptor*/
      const int bytes = sizeof(request_header_t)+msginfo->dscrlen-IMM_BUF_LEN;
#warning "PEND_BUFS: Abusing msginfo->tag.ack_ptr for GETS with large descriptors!"
      assert(msginfo->tag.ack_ptr != NULL); /*sanity check. Should point to tag.ack on the client side*/
      void *lptr = ((char *)msginfo)+IMM_BUF_LEN;
      void *rptr = ((char *)msginfo->tag.ack_ptr) - (int)(&((request_header_t *)0)->tag.ack) + IMM_BUF_LEN;
/*       printf("%d(s):: PUT getting rest of descriptor index=%d bytes=%d ptr=%p from=%d\n", */
/* 	     armci_me,index,bytes,rptr,msginfo->from); */
/*       fflush(stdout); */
      assert(IMM_BUF_LEN+bytes < PENDING_BUF_LEN);
      armci_pbuf_start_get(msginfo,rptr,lptr,bytes,msginfo->from,index);
      *status = RECV_DSCR_PENDING;      
    }
    break;
  case RECV_DSCR_PENDING:
    armci_die("call_data_server should set status to RECV_DSCR_DONE before calling progress",*status);
    break;
  case RECV_DATA_PENDING:
    armci_die("call_data_server should set status to RECV_DONE before calling progress",*status);
    break;
  case RECV_DSCR_DONE:
      assert(sizeof(request_header_t)+msginfo->dscrlen+msginfo->tag.data_len < PENDING_BUF_LEN);
      armci_pbuf_start_get(msginfo,msginfo->tag.data_ptr,buffer,msginfo->tag.data_len,
			   msginfo->from, index);
/*     printf("%d(s): PUT/ACC getting data. pbuf_num=%d data_ptr=%p data_len=%d bytes=%d\n", armci_me,index,msginfo->tag.data_ptr, msginfo->tag.data_len,msginfo->bytes); */
    *status = RECV_DATA_PENDING;
    break;
  case RECV_DATA_DONE:
/*     printf("%d(s):: Done PUT/ACC with buf index=%d op=%d datalen=%d from=%d\n", */
/* 	   armci_me,index,msginfo->operation,msginfo->datalen,msginfo->from); */
/*     fflush(stdout); */
    if(msginfo->operation == PUT && pbuf->order_prev!=NULL) {
      assert(pbuf->commit_me == 0); /*Why called so many times in thie
				      state?*/
      pbuf->commit_me = 1;
      break;
    }
    pbuf->commit_me = 0;
    armci_complete_pendbuf(pbuf);
    _armci_serv_pendbuf_freebuf(pbuf);
    break;
  case SEND_DATA_PENDING:
  case SEND_DATA_DONE:
  default:
    armci_die("pendbuf_progress_putacc: invalid status", *status);
  }
}
示例#5
0
文件: pendbufs.c 项目: jeffhammond/ga
/** Implement ordering between messages. This function needs to be
 * implemented in conjunction with @_armci_serv_pendbuf_promote to
 * ensure ordered processing of messages. 
 * @param vbuf IN Message in immediate buffer being checked 
 * @return 1 if the message can be progressed (either in-place or
 * after copying to a pending buffer). 0 therwise.
 */
static int _armci_serv_pendbuf_can_progress(immbuf_t *vbuf) {
  const request_header_t *msginfo=(request_header_t*)vbuf->buf;
  const int proc = msginfo->from;
  const proc_waitlist_t *info = &pbuf_proc_list_info[proc];

  if(_pbufOrder == ONE_PBUF_MESG) {
    /*Only one pending buffer used at any time*/
    if(_nPendBufsUsed>0) 
      return 0;
    return 1;
  }
  if(_pbufOrder == ONE_PBUF_MESG_PER_PROC) {
    /*Only one non-immediate mesg can be assigned to the pending
      buffers at any time*/
    if(info->order_head 
       || (info->immbuf_wlist_head && info->immbuf_wlist_head!=vbuf)) {
      return 0;/*other requests from this process remain*/
    }
    if(!IS_IMM_MSG(*msginfo) && _nPendBufsUsed==PENDING_BUF_NUM) {
      return 0; /*This buffer needs a free pending buffer*/
    }
    assert(info->n_pending == 0 || info->immbuf_wlist_head==vbuf);
    return 1;
  }
  if(_pbufOrder == ACC_NO_ORDER) {
    /*Messages are processed in-place in immediate buffers or issued
      into pending buffers for progress in order (like
      ONE_PBUF_PER_MESG). This rule relaxes ONE_PBUF_PER_MESG by
      allowing a sequence of ACCs to be processed in-place/issued
      without waiting for the prior ones to complete*/
    int i, nwaiting_on, nacc;
    pendbuf_t *ptr;
    if(!IS_IMM_MSG(*msginfo) && _nPendBufsUsed==PENDING_BUF_NUM) {
/*       printf("%d(s): op=%d from=%d datalen=%d waiting for pending buffers\n",armci_me,msginfo->operation,msginfo->from,msginfo->tag.data_len); */
/*       fflush(stdout); */
      return 0; /*This buffer needs a free pending buffer*/
    }
#if 1 /*commented for now: it does work*/
    if(IS_IMM_MSG(*msginfo) && ARMCI_ACC(msginfo->operation)) {
      return 1;
    }
#endif
    if(info->immbuf_wlist_head && info->immbuf_wlist_head!=vbuf) {
/*       printf("%d(s): op=%d from=%d datalen=%d not queue head\n",armci_me,msginfo->operation,msginfo->from,msginfo->tag.data_len); */
/*       fflush(stdout); */
      return 0; /*in order issue*/
    }

    if(!ARMCI_ACC(msginfo->operation)) {
      if(info->order_head)
	return 0;
      return 1;
    }

    assert(ARMCI_ACC(msginfo->operation));
    for(ptr=info->order_head; ptr!=NULL; ptr=ptr->order_next) {
      request_header_t *m = (request_header_t *)ptr->buf;
      assert(m->from == msginfo->from);
      if(!ARMCI_ACC(m->operation)) 
	break;
    }
    if(ptr != NULL) 
      return 0;
    return 1;
  }
  if(_pbufOrder == PUTACC_SPLIT_ORDER) {
    if(!IS_IMM_MSG(*msginfo) && _nPendBufsUsed==PENDING_BUF_NUM) {
      return 0; /*This buffer needs a free pending buffer*/
    }
    if(info->immbuf_wlist_head && info->immbuf_wlist_head!=vbuf) {
      return 0;
    }
    if(msginfo->operation!=PUT && !ARMCI_ACC(msginfo->operation)) {
      if(info->order_head)
	return 0;
      return 1;
    }
    if(IS_IMM_MSG(*msginfo) && ARMCI_ACC(msginfo->operation)) {
      return 1;
    }
    if(IS_IMM_MSG(*msginfo) && info->order_head)
      return 0;
    return 1;
  }
  if(_pbufOrder == GET_GET_REORDER) {
    if(!IS_IMM_MSG(*msginfo) && _nPendBufsUsed==PENDING_BUF_NUM) {
      return 0; /*This buffer needs a free pending buffer*/
    }
    if(IS_IMM_MSG(*msginfo) && ARMCI_ACC(msginfo->operation)) {
      return 1;
    }
    if(info->immbuf_wlist_head && info->immbuf_wlist_head!=vbuf) {
      return 0;
    }
    if(msginfo->operation!=PUT && !ARMCI_ACC(msginfo->operation)) {
      if(info->order_tail) {
	request_header_t *m=(request_header_t*)info->order_tail->buf;
	if(msginfo->operation==GET && m->operation == GET) {
/* 	  printf("%d: Get Get progressing\n", armci_me); */
	  return 1;
	}
	return 0;
      }
      return 1;
    }
    if(IS_IMM_MSG(*msginfo) && info->order_head)
      return 0;
    return 1;
  }
  armci_die("Unknown pbuf ordering rule",_pbufOrder);
  return 0;
}
示例#6
0
文件: lapi.c 项目: bcernohous/ga
void armci_send_req(int proc, request_header_t* msginfo, int len)
{
    int msglen = sizeof(request_header_t);
    lapi_cntr_t *pcmpl_cntr, *pcntr = &(BUF_TO_EVBUF(msginfo)->cntr);
    int rc;

    msginfo->tag.cntr= pcntr;
#if ARMCI_ENABLE_GPC_CALLS
    if(msginfo->operation==GET && msginfo->format==VECTOR && msginfo->ehlen){ 
        msginfo->tag.buf = (char *)(msginfo+1)+msginfo->dscrlen;
    }
    else 
#endif
        msginfo->tag.buf = msginfo+1;

    if(msginfo->operation==GET || msginfo->operation==LOCK){

        SET_COUNTER(*(lapi_cmpl_t*)pcntr,1);/*dataarrive in same buf*/
        /*The GPC case. Note that we don't use the parameter len*/
        if(msginfo->format==VECTOR && msginfo->ehlen > 0) 
            msglen += msginfo->datalen;
        if(lapi_max_uhdr_data_sz < msginfo->dscrlen){

            msginfo->dscrlen = -msginfo->dscrlen; /* no room for descriptor */
            pcntr = NULL; /* GET(descr) from CH will increment buf cntr */

        }else msglen += msginfo->dscrlen;

        /*
           we should send the mutex, too. When op==LOCK, Value of len parameter
           is already sizeof(reqest_header_t)+sizeof(int), since we dont use 
           len but construct our own msglen, we need to add sizeof(int).
           */
        if(msginfo->operation==LOCK) msglen += sizeof(int);

        pcmpl_cntr=NULL; /* don't trace completion status for load ops */

    }else if (msginfo->operation==UNLOCK){

        msglen += msginfo->dscrlen;
        pcmpl_cntr=NULL; /* don't trace completion status for unlock */

    }else{

        if(lapi_max_uhdr_data_sz < (msginfo->datalen + msginfo->dscrlen)){

            msginfo->datalen = -msginfo->datalen;
            msginfo->dscrlen = -msginfo->dscrlen;
            pcntr = NULL; /* GET/LOCK from CH will increment buf cntr */

        }else msglen += msginfo->dscrlen+msginfo->datalen;

        /* trace completion of store ops */
        pcmpl_cntr = &cmpl_arr[msginfo->to].cntr; 

    }

    if(msginfo->operation==PUT || ARMCI_ACC(msginfo->operation)) 
        UPDATE_FENCE_STATE(msginfo->to, msginfo->operation, 1);

    if((rc=LAPI_Amsend(lapi_handle,(uint)msginfo->to,
                    (void*)armci_header_handler, msginfo, msglen, NULL, 0,
                    NULL, pcntr, pcmpl_cntr))) armci_die("AM failed",rc);

    if(DEBUG_) fprintf(stderr,"%d sending req=%d to %d\n",
            armci_me, msginfo->operation, proc);
}
示例#7
0
文件: pack.c 项目: jeffhammond/ga
/*\ The function decomposes a multi-dimensional patch so that it fits in the
 *  internal ARMCI buffer.
 *  It works by recursively reducing patch dimension until some portion of the
 *  subpatch fits in the buffer.
 *  The recursive process is controlled by "fit_level" and "nb" arguments, 
 *  which have to be set to -1 at the top-level of the recursion tree.
 *
 *  Argument last and variable looplast are used to indicate to sending/packing
 *  routine that we are dealing with the last portion of the request.
 *  Due to the recursive nature of packing code, the algorithm is following:
 *      if last=1  then internal for loop passes 1 for the last chunk
 *      else it passes 0
 *  
\*/
int armci_pack_strided(int op, void* scale, int proc,
                       void *src_ptr, int src_stride_arr[],
                       void* dst_ptr, int dst_stride_arr[],
                       int count[], int stride_levels, ext_header_t *h,
                       int fit_level, int nb, int last,armci_ihdl_t nb_handle)
{
    int rc=0, bufsize=BUFSIZE,noswap=0;
    long sn;
    void *src, *dst;
#ifdef REMOTE_OP
    int flag=0;
#else
    int flag=1;
#endif
    int b;
    static int call_count;

#ifdef STRIDED_GET_BUFLEN
    if(op==GET)bufsize=STRIDED_GET_BUFLEN;
#  ifdef HITACHI
    else 
	if(stride_levels || ARMCI_ACC(op))bufsize=MSG_BUFLEN_SMALL-PAGE_SIZE;
#  endif
#endif

#if (defined(GM_) || defined(VIA_) || defined(VAPI_))
    /*we cant assume that the entire available buffer will be used for data, 
      fact that the header and descriptor also go in the same buffer should be
      considered while packing.
    */
    bufsize-=(sizeof(request_header_t)+(MAX_STRIDE_LEVEL+4)*sizeof(int)+2*sizeof(void *));
#  if defined(PIPE_BUFSIZE) && defined(MAX_PIPELINE_CHUNKS)
    bufsize-=8*MAX_PIPELINE_CHUNKS;
#  endif
#endif

#ifdef BALANCE_FACTOR
    /* Added the following for balancing buffers */
    if(op==PUT){
        int bytes=1, i;
        for(i=0; i<= stride_levels; i++)
                bytes *= count[i];
        if(bytes > bufsize && bytes/bufsize < 3 && bytes%bufsize < BALANCE_BUFSIZE){
        /* bytes div bufsize - 1 is to increase the balence factor for 3 buffer case */
                bufsize = bytes/ (bytes/bufsize - 1 + BALANCE_FACTOR);
                noswap = 1; /*** yuck: if set to 1, error in buffers.c ***/
        }
        bytes = bufsize%8;
        bufsize -= bytes;
    }
#endif

    /* determine decomposition of the patch to fit in the buffer */
    if(fit_level<0){
       armci_fit_buffer(count, stride_levels, &fit_level, &nb, bufsize);
       last = 1;
    }

    if(fit_level == stride_levels){

        /* we can fit subpatch into the buffer */
        int chunk = count[fit_level];
        int dst_stride, src_stride;

        if(nb == chunk){ /* take shortcut when whole patch fits in the buffer */
           if(h) h->last = last?1:0;
           if(nb_handle  && call_count ){
             nb_handle->bufid=NB_MULTI;
             call_count++;
           }
           return(OP_STRIDED(op, scale, proc, src_ptr, src_stride_arr,
                  dst_ptr,dst_stride_arr,count,stride_levels,h,flag,nb_handle));
        }

        if(fit_level){
           dst_stride = dst_stride_arr[fit_level -1];
           src_stride = src_stride_arr[fit_level -1];
        }else{
           dst_stride = src_stride = 1;
        }
        if(op == GET || noswap == 1) b =nb; 
        else{ b = chunk%nb; if(b==0)b=nb; } /* put smallest piece first */

        for(sn = 0; sn < chunk; ){
           src = (char*)src_ptr + src_stride* sn;
           dst = (char*)dst_ptr + dst_stride* sn;
           count[fit_level] = ARMCI_MIN(b, chunk-sn); /*modify count for this level*/

           if(h) h->last = (last && ((sn+b)>=chunk))? 1: 0 ;
           if(nb_handle)call_count++;
           rc = OP_STRIDED( op, scale, proc, src, src_stride_arr,
                           dst,dst_stride_arr,count,fit_level,h,flag,nb_handle);
           if(rc) break;

           sn += b;
           b = nb;
        }
        count[fit_level] = chunk; /* restore original count */

    }
    else {
        for(sn = 0; sn < count[stride_levels]; sn++){
           int looplast =0;
           src = (char*)src_ptr + src_stride_arr[stride_levels -1]* sn;
           dst = (char*)dst_ptr + dst_stride_arr[stride_levels -1]* sn;

           if(last && (sn == count[stride_levels]-1)) looplast =1;
           rc = armci_pack_strided(op, scale, proc, src, src_stride_arr,
                                   dst, dst_stride_arr, count, stride_levels -1,
                                   h,fit_level, nb, looplast,nb_handle);
           if(rc) return rc;
        }
    }
    if(nb_handle && call_count )
       nb_handle->bufid=NB_MULTI;
    return rc;
}
示例#8
0
文件: pack.c 项目: jeffhammond/ga
int armci_pack_vector(int op, void *scale, armci_giov_t darr[],int len,
                      int proc,armci_ihdl_t nb_handle)
{
armci_giov_t extra; /* keeps data remainder of set to be processed in chunks */
armci_giov_t save;  /* keeps original value of set to be processed in chunks */
armci_giov_t *ndarr; /* points to first array element to be processed now */
int rc=0, nlen, count=0;

    armcip_init_giov_t(&extra);
    armcip_init_giov_t(&save);
    ndarr = darr;

    save.src_ptr_array=NULL; /* indicates that save slot is empty */
    while(len){

       armci_split_dscr_array(ndarr, len, &extra, &nlen, &save); 
#  if defined(REMOTE_OP) 
       /* A problem will occur if len is 1 and nlen is 0. This corresponds to a
        * situation where the size of an individual element is found to exceed
        * BUFSIZE1. Treat this as a single transfer of contiguous data using
        * the standard PARMCI_Get/Put/Acc call */
       if (len == 1 && nlen == 0) {
         if(ARMCI_ACC(op))rc=PARMCI_Acc(op, scale, ndarr[0].src_ptr_array[0],
            ndarr[0].dst_ptr_array[0],ndarr[0].bytes, proc);
         else if(op == GET)rc=PARMCI_Get(ndarr[0].src_ptr_array[0],
            ndarr[0].dst_ptr_array[0],ndarr[0].bytes, proc);
         else if(op == PUT)rc=PARMCI_Put(ndarr[0].src_ptr_array[0],
            ndarr[0].dst_ptr_array[0],ndarr[0].bytes, proc);
         else armci_die("Unknown op in armci_pack_vector",op);
         nlen = 1;
       } else {
         rc = armci_rem_vector(op, scale, ndarr,nlen,proc,0,nb_handle);
       }
#  else
       if(ARMCI_ACC(op))rc=armci_acc_vector(op,scale,ndarr,nlen,proc);
       else rc = armci_copy_vector(op,ndarr,nlen,proc);
#  endif
       if(rc) break;

       /* non-NULL pointer indicates that set was split */
       if(extra.src_ptr_array){

	 if(nb_handle) {
	   nb_handle->bufid = NB_MULTI; /*can be set multiple times here; but not reset here*/
	 }

          ndarr[nlen-1]=extra; /* set the pointer to remainder of last set */
          nlen--; /* since last set not done in full need to process it again */

       }else{

          if(save.src_ptr_array){
             ndarr[0]=save;
             save.src_ptr_array=NULL; /* indicates that save slot is empty */
          }

          if(nlen == 0)
            armci_die("vector packetization problem:buffer too small",BUFSIZE1);
       }

       len -=nlen;
       ndarr +=nlen;
       count ++;
    }

    return rc;
}