static int two_phase_fill_send_buffer(mca_io_ompio_file_t *fh,
				      void *buf,
				      Flatlist_node *flat_buf,
				      char **send_buf,
				      struct iovec *offset_length,
				      int *send_size,
				      MPI_Request *requests,
				      int *sent_to_proc,
				      int contig_access_count, 
				      OMPI_MPI_OFFSET_TYPE min_st_offset,
				      OMPI_MPI_OFFSET_TYPE fd_size,
				      OMPI_MPI_OFFSET_TYPE *fd_start,
				      OMPI_MPI_OFFSET_TYPE *fd_end,
				      int *send_buf_idx,
				      int *curr_to_proc, 
				      int *done_to_proc,
				      int iter, MPI_Aint buftype_extent,
				      int striping_unit, int *aggregator_list){

    int i, p, flat_buf_idx;
    OMPI_MPI_OFFSET_TYPE flat_buf_sz, size_in_buf, buf_incr, size;
    int jj, n_buftypes, ret=OMPI_SUCCESS;
    OMPI_MPI_OFFSET_TYPE off, len, rem_len, user_buf_idx;

    for (i=0; i < fh->f_size; i++) {
	send_buf_idx[i] = curr_to_proc[i] = 0;
	done_to_proc[i] = sent_to_proc[i];
    }
    jj = 0;
    
    user_buf_idx = flat_buf->indices[0];
    flat_buf_idx = 0;
    n_buftypes = 0;
    flat_buf_sz = flat_buf->blocklens[0];
    
    for (i=0; i<contig_access_count; i++) { 
	
      off     = (OMPI_MPI_OFFSET_TYPE) (intptr_t)offset_length[i].iov_base;
	rem_len = (OMPI_MPI_OFFSET_TYPE)offset_length[i].iov_len;
	

	while (rem_len != 0) {
	    len = rem_len;
	    p = mca_fcoll_two_phase_calc_aggregator(fh,
						    off,
						    min_st_offset,
						    &len,
						    fd_size,
						    fd_start,
						    fd_end,
						    striping_unit,
						    mca_fcoll_two_phase_num_io_procs,
						    aggregator_list);

	    if (send_buf_idx[p] < send_size[p]) {
		if (curr_to_proc[p]+len > done_to_proc[p]) {
		    if (done_to_proc[p] > curr_to_proc[p]) {
			size = OMPIO_MIN(curr_to_proc[p] + len - 
					 done_to_proc[p], send_size[p]-send_buf_idx[p]);
			buf_incr = done_to_proc[p] - curr_to_proc[p];
			TWO_PHASE_BUF_INCR
		        buf_incr = curr_to_proc[p] + len - done_to_proc[p];
			curr_to_proc[p] = done_to_proc[p] + size;
		        TWO_PHASE_BUF_COPY
		    }
		    else {
			size = OMPIO_MIN(len,send_size[p]-send_buf_idx[p]);
			buf_incr = len;
			curr_to_proc[p] += size;
			TWO_PHASE_BUF_COPY
		    }
		    if (send_buf_idx[p] == send_size[p]) {

		      ret = MCA_PML_CALL(isend(send_buf[p],
					       send_size[p],
					       MPI_BYTE,
					       p,
					       fh->f_rank+p+100*iter,
					       MCA_PML_BASE_SEND_STANDARD, 
					       fh->f_comm,
					       requests+jj));	
		      
		      if ( OMPI_SUCCESS != ret ){
			return ret;
		      }
		      jj++;
		    }
		}
		else {
		    curr_to_proc[p] += len;
		    buf_incr = len;
		    TWO_PHASE_BUF_INCR
		}
	    }
Пример #2
0
int mca_fcoll_two_phase_calc_my_requests (mca_io_ompio_file_t *fh,
					  struct iovec *offset_len,
					  int contig_access_count,
					  OMPI_MPI_OFFSET_TYPE min_st_offset,
					  OMPI_MPI_OFFSET_TYPE *fd_start,
					  OMPI_MPI_OFFSET_TYPE *fd_end,
					  OMPI_MPI_OFFSET_TYPE fd_size,
					  int *count_my_req_procs_ptr,
					  int **count_my_req_per_proc_ptr,
					  mca_io_ompio_access_array_t **my_req_ptr,
					  size_t **buf_indices,
					  int striping_unit,
					  int num_aggregators,
					  int *aggregator_list)
{
    
    int *count_my_req_per_proc, count_my_req_procs;
    size_t *buf_idx = NULL;
    int i, l, proc;
    OMPI_MPI_OFFSET_TYPE fd_len, rem_len, curr_idx, off;
    mca_io_ompio_access_array_t *my_req;
    
    
    *count_my_req_per_proc_ptr = (int*)malloc(fh->f_size*sizeof(int)); 
    
    if ( NULL == count_my_req_per_proc_ptr ){
	return OMPI_ERR_OUT_OF_RESOURCE;
    }

    count_my_req_per_proc = *count_my_req_per_proc_ptr;
    
    for (i=0;i<fh->f_size;i++){
	count_my_req_per_proc[i] = 0;
    }
        
    buf_idx = (size_t *) malloc (fh->f_size * sizeof(size_t));
    
    if ( NULL == buf_idx ){ 
	return OMPI_ERR_OUT_OF_RESOURCE;
    }
    
    for (i=0; i < fh->f_size; i++) buf_idx[i] = -1;
    
    for (i=0;i<contig_access_count; i++){

	if (offset_len[i].iov_len==0)
	    continue;
	off = (OMPI_MPI_OFFSET_TYPE)(intptr_t)offset_len[i].iov_base;
	fd_len = (OMPI_MPI_OFFSET_TYPE)offset_len[i].iov_len;
	proc = mca_fcoll_two_phase_calc_aggregator(fh, off, min_st_offset, &fd_len, fd_size, 
					     fd_start, fd_end, striping_unit, num_aggregators,aggregator_list);
	count_my_req_per_proc[proc]++;
	rem_len = offset_len[i].iov_len - fd_len;
	
	while (rem_len != 0) {
	    off += fd_len; /* point to first remaining byte */
	    fd_len = rem_len; /* save remaining size, pass to calc */
	    proc = mca_fcoll_two_phase_calc_aggregator(fh, off, min_st_offset, &fd_len, 
						 fd_size, fd_start, fd_end, striping_unit,
						 num_aggregators, aggregator_list);

	    count_my_req_per_proc[proc]++;
	    rem_len -= fd_len; /* reduce remaining length by amount from fd */
	}

    }
    
/*    printf("%d: fh->f_size : %d\n", fh->f_rank,fh->f_size);*/
    *my_req_ptr =  (mca_io_ompio_access_array_t *)
	malloc (fh->f_size * sizeof(mca_io_ompio_access_array_t));
    if ( NULL == *my_req_ptr ) {
	return OMPI_ERR_OUT_OF_RESOURCE;
    }
    my_req = *my_req_ptr;
    
    count_my_req_procs = 0;
    for (i = 0; i < fh->f_size; i++){
	if(count_my_req_per_proc[i]) {
	    my_req[i].offsets = (OMPI_MPI_OFFSET_TYPE *)
		malloc(count_my_req_per_proc[i] * sizeof(OMPI_MPI_OFFSET_TYPE));
	    
	    if ( NULL == my_req[i].offsets ) {
		return OMPI_ERR_OUT_OF_RESOURCE;
	    }

	    my_req[i].lens = (int *)
		malloc(count_my_req_per_proc[i] * sizeof(int));

	    if ( NULL == my_req[i].lens ) {
		return OMPI_ERR_OUT_OF_RESOURCE;
	    }
	    count_my_req_procs++;
	}
	my_req[i].count = 0; 
    }
    curr_idx = 0;
    for (i=0; i<contig_access_count; i++) { 
	if ((int)offset_len[i].iov_len == 0)
	    continue;
	off = (OMPI_MPI_OFFSET_TYPE)(intptr_t)offset_len[i].iov_base;
	fd_len = (OMPI_MPI_OFFSET_TYPE)offset_len[i].iov_len;
 	proc = mca_fcoll_two_phase_calc_aggregator(fh, off, min_st_offset, &fd_len,
					     fd_size, fd_start, fd_end,
					     striping_unit, num_aggregators,
					     aggregator_list);
	if (buf_idx[proc] == (size_t) -1){
	    buf_idx[proc] = (int) curr_idx;
	}
	l = my_req[proc].count;
	curr_idx += fd_len;
	rem_len = offset_len[i].iov_len - fd_len;
	my_req[proc].offsets[l] = off;
	my_req[proc].lens[l] = (int)fd_len;
	my_req[proc].count++;

	while (rem_len != 0) {
	    off += fd_len;
	    fd_len = rem_len;
	    proc = mca_fcoll_two_phase_calc_aggregator(fh, off, min_st_offset,
						       &fd_len, fd_size, fd_start,
						       fd_end, striping_unit,
						       num_aggregators,
						       aggregator_list);
	    
	    if (buf_idx[proc] == (size_t) -1){
		buf_idx[proc] = (int) curr_idx;
	    }
		    
	    l = my_req[proc].count;
	    curr_idx += fd_len;
	    rem_len -= fd_len;

	    my_req[proc].offsets[l] = off;
	    my_req[proc].lens[l] = (int) fd_len;
	    my_req[proc].count++;
 
	}
	
    }
    
  #if 0
    for (i=0; i<fh->f_size; i++) {
	if (count_my_req_per_proc[i] > 0) {
	    fprintf(stdout, "data needed from %d (count = %d):\n", i, 
		    my_req[i].count);
	    for (l=0; l < my_req[i].count; l++) {
		fprintf(stdout, " %d: off[%d] = %lld, len[%d] = %d\n", fh->f_rank, l,
		my_req[i].offsets[l], l, my_req[i].lens[l]);
	    }
	    fprintf(stdout, "%d: buf_idx[%d] = 0x%x\n", fh->f_rank, i, buf_idx[i]);
	}
    }
#endif
    
    
    *count_my_req_procs_ptr = count_my_req_procs;
    *buf_indices = buf_idx;
    
    return OMPI_SUCCESS;
}
static void two_phase_fill_user_buffer(mca_io_ompio_file_t *fh,
				       void *buf, 
				       Flatlist_node *flat_buf,
				       char **recv_buf,
				       struct iovec *offset_length, 
				       unsigned *recv_size, 
				       MPI_Request *requests, 
				       int *recd_from_proc,
				       int contig_access_count, 
				       OMPI_MPI_OFFSET_TYPE min_st_offset, 
				       OMPI_MPI_OFFSET_TYPE fd_size, 
				       OMPI_MPI_OFFSET_TYPE *fd_start, 
				       OMPI_MPI_OFFSET_TYPE *fd_end,
				       MPI_Aint buftype_extent,
				       int striping_unit, int *aggregator_list){
  
  int i = 0, p = 0, flat_buf_idx = 0;
  OMPI_MPI_OFFSET_TYPE flat_buf_sz = 0, size_in_buf = 0, buf_incr = 0, size = 0;
  int n_buftypes = 0;
  OMPI_MPI_OFFSET_TYPE off=0, len=0, rem_len=0, user_buf_idx=0;
  unsigned *curr_from_proc=NULL, *done_from_proc=NULL, *recv_buf_idx=NULL;
  
  curr_from_proc = (unsigned *) malloc (fh->f_size * sizeof(unsigned));
  done_from_proc = (unsigned *) malloc (fh->f_size * sizeof(unsigned));
  recv_buf_idx = (unsigned *) malloc (fh->f_size * sizeof(unsigned));
  
  
  for (i=0; i < fh->f_size; i++) {
    recv_buf_idx[i] = curr_from_proc[i] = 0;
    done_from_proc[i] = recd_from_proc[i];
  }
  
  

  
  user_buf_idx = flat_buf->indices[0];
  flat_buf_idx = 0;
  n_buftypes = 0;
  flat_buf_sz = flat_buf->blocklens[0];
  
  /* flat_buf_idx = current index into flattened buftype
     flat_buf_sz = size of current contiguous component in 
     flattened buf */
  
  for (i=0; i<contig_access_count; i++) { 
    
    off     = (OMPI_MPI_OFFSET_TYPE)(intptr_t)offset_length[i].iov_base;
    rem_len = (OMPI_MPI_OFFSET_TYPE)offset_length[i].iov_len;
    
    /* this request may span the file domains of more than one process */
    while (rem_len != 0) {
      len = rem_len;
      /* NOTE: len value is modified by ADIOI_Calc_aggregator() to be no
       * longer than the single region that processor "p" is responsible
       * for.
       */
      p = mca_fcoll_two_phase_calc_aggregator(fh,
					      off,
					      min_st_offset,
					      &len,
					      fd_size,
					      fd_start,
					      fd_end,
					      striping_unit,
					      mca_fcoll_two_phase_num_io_procs,
					      aggregator_list);
      
      if (recv_buf_idx[p] < recv_size[p]) {
	if (curr_from_proc[p]+len > done_from_proc[p]) {
	  if (done_from_proc[p] > curr_from_proc[p]) {
	    size = OMPIO_MIN(curr_from_proc[p] + len - 
			     done_from_proc[p], recv_size[p]-recv_buf_idx[p]);
	    buf_incr = done_from_proc[p] - curr_from_proc[p];
	    TWO_PHASE_BUF_INCR
	    buf_incr = curr_from_proc[p]+len-done_from_proc[p];
	    curr_from_proc[p] = done_from_proc[p] + size;
	    TWO_PHASE_BUF_COPY
	   }
	  else {
	    size = OMPIO_MIN(len,recv_size[p]-recv_buf_idx[p]);
	    buf_incr = len;
	    curr_from_proc[p] += (unsigned) size;
	    TWO_PHASE_BUF_COPY
	  }
	}
	else {
	  curr_from_proc[p] += (unsigned) len;
	  buf_incr = len;
	  TWO_PHASE_BUF_INCR
	}
      }