static int two_phase_fill_send_buffer(mca_io_ompio_file_t *fh, void *buf, Flatlist_node *flat_buf, char **send_buf, struct iovec *offset_length, int *send_size, MPI_Request *requests, int *sent_to_proc, int contig_access_count, OMPI_MPI_OFFSET_TYPE min_st_offset, OMPI_MPI_OFFSET_TYPE fd_size, OMPI_MPI_OFFSET_TYPE *fd_start, OMPI_MPI_OFFSET_TYPE *fd_end, int *send_buf_idx, int *curr_to_proc, int *done_to_proc, int iter, MPI_Aint buftype_extent, int striping_unit, int *aggregator_list){ int i, p, flat_buf_idx; OMPI_MPI_OFFSET_TYPE flat_buf_sz, size_in_buf, buf_incr, size; int jj, n_buftypes, ret=OMPI_SUCCESS; OMPI_MPI_OFFSET_TYPE off, len, rem_len, user_buf_idx; for (i=0; i < fh->f_size; i++) { send_buf_idx[i] = curr_to_proc[i] = 0; done_to_proc[i] = sent_to_proc[i]; } jj = 0; user_buf_idx = flat_buf->indices[0]; flat_buf_idx = 0; n_buftypes = 0; flat_buf_sz = flat_buf->blocklens[0]; for (i=0; i<contig_access_count; i++) { off = (OMPI_MPI_OFFSET_TYPE) (intptr_t)offset_length[i].iov_base; rem_len = (OMPI_MPI_OFFSET_TYPE)offset_length[i].iov_len; while (rem_len != 0) { len = rem_len; p = mca_fcoll_two_phase_calc_aggregator(fh, off, min_st_offset, &len, fd_size, fd_start, fd_end, striping_unit, mca_fcoll_two_phase_num_io_procs, aggregator_list); if (send_buf_idx[p] < send_size[p]) { if (curr_to_proc[p]+len > done_to_proc[p]) { if (done_to_proc[p] > curr_to_proc[p]) { size = OMPIO_MIN(curr_to_proc[p] + len - done_to_proc[p], send_size[p]-send_buf_idx[p]); buf_incr = done_to_proc[p] - curr_to_proc[p]; TWO_PHASE_BUF_INCR buf_incr = curr_to_proc[p] + len - done_to_proc[p]; curr_to_proc[p] = done_to_proc[p] + size; TWO_PHASE_BUF_COPY } else { size = OMPIO_MIN(len,send_size[p]-send_buf_idx[p]); buf_incr = len; curr_to_proc[p] += size; TWO_PHASE_BUF_COPY } if (send_buf_idx[p] == send_size[p]) { ret = MCA_PML_CALL(isend(send_buf[p], send_size[p], MPI_BYTE, p, fh->f_rank+p+100*iter, MCA_PML_BASE_SEND_STANDARD, fh->f_comm, requests+jj)); if ( OMPI_SUCCESS != ret ){ return ret; } jj++; } } else { curr_to_proc[p] += len; buf_incr = len; TWO_PHASE_BUF_INCR } }
int mca_fcoll_two_phase_calc_my_requests (mca_io_ompio_file_t *fh, struct iovec *offset_len, int contig_access_count, OMPI_MPI_OFFSET_TYPE min_st_offset, OMPI_MPI_OFFSET_TYPE *fd_start, OMPI_MPI_OFFSET_TYPE *fd_end, OMPI_MPI_OFFSET_TYPE fd_size, int *count_my_req_procs_ptr, int **count_my_req_per_proc_ptr, mca_io_ompio_access_array_t **my_req_ptr, size_t **buf_indices, int striping_unit, int num_aggregators, int *aggregator_list) { int *count_my_req_per_proc, count_my_req_procs; size_t *buf_idx = NULL; int i, l, proc; OMPI_MPI_OFFSET_TYPE fd_len, rem_len, curr_idx, off; mca_io_ompio_access_array_t *my_req; *count_my_req_per_proc_ptr = (int*)malloc(fh->f_size*sizeof(int)); if ( NULL == count_my_req_per_proc_ptr ){ return OMPI_ERR_OUT_OF_RESOURCE; } count_my_req_per_proc = *count_my_req_per_proc_ptr; for (i=0;i<fh->f_size;i++){ count_my_req_per_proc[i] = 0; } buf_idx = (size_t *) malloc (fh->f_size * sizeof(size_t)); if ( NULL == buf_idx ){ return OMPI_ERR_OUT_OF_RESOURCE; } for (i=0; i < fh->f_size; i++) buf_idx[i] = -1; for (i=0;i<contig_access_count; i++){ if (offset_len[i].iov_len==0) continue; off = (OMPI_MPI_OFFSET_TYPE)(intptr_t)offset_len[i].iov_base; fd_len = (OMPI_MPI_OFFSET_TYPE)offset_len[i].iov_len; proc = mca_fcoll_two_phase_calc_aggregator(fh, off, min_st_offset, &fd_len, fd_size, fd_start, fd_end, striping_unit, num_aggregators,aggregator_list); count_my_req_per_proc[proc]++; rem_len = offset_len[i].iov_len - fd_len; while (rem_len != 0) { off += fd_len; /* point to first remaining byte */ fd_len = rem_len; /* save remaining size, pass to calc */ proc = mca_fcoll_two_phase_calc_aggregator(fh, off, min_st_offset, &fd_len, fd_size, fd_start, fd_end, striping_unit, num_aggregators, aggregator_list); count_my_req_per_proc[proc]++; rem_len -= fd_len; /* reduce remaining length by amount from fd */ } } /* printf("%d: fh->f_size : %d\n", fh->f_rank,fh->f_size);*/ *my_req_ptr = (mca_io_ompio_access_array_t *) malloc (fh->f_size * sizeof(mca_io_ompio_access_array_t)); if ( NULL == *my_req_ptr ) { return OMPI_ERR_OUT_OF_RESOURCE; } my_req = *my_req_ptr; count_my_req_procs = 0; for (i = 0; i < fh->f_size; i++){ if(count_my_req_per_proc[i]) { my_req[i].offsets = (OMPI_MPI_OFFSET_TYPE *) malloc(count_my_req_per_proc[i] * sizeof(OMPI_MPI_OFFSET_TYPE)); if ( NULL == my_req[i].offsets ) { return OMPI_ERR_OUT_OF_RESOURCE; } my_req[i].lens = (int *) malloc(count_my_req_per_proc[i] * sizeof(int)); if ( NULL == my_req[i].lens ) { return OMPI_ERR_OUT_OF_RESOURCE; } count_my_req_procs++; } my_req[i].count = 0; } curr_idx = 0; for (i=0; i<contig_access_count; i++) { if ((int)offset_len[i].iov_len == 0) continue; off = (OMPI_MPI_OFFSET_TYPE)(intptr_t)offset_len[i].iov_base; fd_len = (OMPI_MPI_OFFSET_TYPE)offset_len[i].iov_len; proc = mca_fcoll_two_phase_calc_aggregator(fh, off, min_st_offset, &fd_len, fd_size, fd_start, fd_end, striping_unit, num_aggregators, aggregator_list); if (buf_idx[proc] == (size_t) -1){ buf_idx[proc] = (int) curr_idx; } l = my_req[proc].count; curr_idx += fd_len; rem_len = offset_len[i].iov_len - fd_len; my_req[proc].offsets[l] = off; my_req[proc].lens[l] = (int)fd_len; my_req[proc].count++; while (rem_len != 0) { off += fd_len; fd_len = rem_len; proc = mca_fcoll_two_phase_calc_aggregator(fh, off, min_st_offset, &fd_len, fd_size, fd_start, fd_end, striping_unit, num_aggregators, aggregator_list); if (buf_idx[proc] == (size_t) -1){ buf_idx[proc] = (int) curr_idx; } l = my_req[proc].count; curr_idx += fd_len; rem_len -= fd_len; my_req[proc].offsets[l] = off; my_req[proc].lens[l] = (int) fd_len; my_req[proc].count++; } } #if 0 for (i=0; i<fh->f_size; i++) { if (count_my_req_per_proc[i] > 0) { fprintf(stdout, "data needed from %d (count = %d):\n", i, my_req[i].count); for (l=0; l < my_req[i].count; l++) { fprintf(stdout, " %d: off[%d] = %lld, len[%d] = %d\n", fh->f_rank, l, my_req[i].offsets[l], l, my_req[i].lens[l]); } fprintf(stdout, "%d: buf_idx[%d] = 0x%x\n", fh->f_rank, i, buf_idx[i]); } } #endif *count_my_req_procs_ptr = count_my_req_procs; *buf_indices = buf_idx; return OMPI_SUCCESS; }
static void two_phase_fill_user_buffer(mca_io_ompio_file_t *fh, void *buf, Flatlist_node *flat_buf, char **recv_buf, struct iovec *offset_length, unsigned *recv_size, MPI_Request *requests, int *recd_from_proc, int contig_access_count, OMPI_MPI_OFFSET_TYPE min_st_offset, OMPI_MPI_OFFSET_TYPE fd_size, OMPI_MPI_OFFSET_TYPE *fd_start, OMPI_MPI_OFFSET_TYPE *fd_end, MPI_Aint buftype_extent, int striping_unit, int *aggregator_list){ int i = 0, p = 0, flat_buf_idx = 0; OMPI_MPI_OFFSET_TYPE flat_buf_sz = 0, size_in_buf = 0, buf_incr = 0, size = 0; int n_buftypes = 0; OMPI_MPI_OFFSET_TYPE off=0, len=0, rem_len=0, user_buf_idx=0; unsigned *curr_from_proc=NULL, *done_from_proc=NULL, *recv_buf_idx=NULL; curr_from_proc = (unsigned *) malloc (fh->f_size * sizeof(unsigned)); done_from_proc = (unsigned *) malloc (fh->f_size * sizeof(unsigned)); recv_buf_idx = (unsigned *) malloc (fh->f_size * sizeof(unsigned)); for (i=0; i < fh->f_size; i++) { recv_buf_idx[i] = curr_from_proc[i] = 0; done_from_proc[i] = recd_from_proc[i]; } user_buf_idx = flat_buf->indices[0]; flat_buf_idx = 0; n_buftypes = 0; flat_buf_sz = flat_buf->blocklens[0]; /* flat_buf_idx = current index into flattened buftype flat_buf_sz = size of current contiguous component in flattened buf */ for (i=0; i<contig_access_count; i++) { off = (OMPI_MPI_OFFSET_TYPE)(intptr_t)offset_length[i].iov_base; rem_len = (OMPI_MPI_OFFSET_TYPE)offset_length[i].iov_len; /* this request may span the file domains of more than one process */ while (rem_len != 0) { len = rem_len; /* NOTE: len value is modified by ADIOI_Calc_aggregator() to be no * longer than the single region that processor "p" is responsible * for. */ p = mca_fcoll_two_phase_calc_aggregator(fh, off, min_st_offset, &len, fd_size, fd_start, fd_end, striping_unit, mca_fcoll_two_phase_num_io_procs, aggregator_list); if (recv_buf_idx[p] < recv_size[p]) { if (curr_from_proc[p]+len > done_from_proc[p]) { if (done_from_proc[p] > curr_from_proc[p]) { size = OMPIO_MIN(curr_from_proc[p] + len - done_from_proc[p], recv_size[p]-recv_buf_idx[p]); buf_incr = done_from_proc[p] - curr_from_proc[p]; TWO_PHASE_BUF_INCR buf_incr = curr_from_proc[p]+len-done_from_proc[p]; curr_from_proc[p] = done_from_proc[p] + size; TWO_PHASE_BUF_COPY } else { size = OMPIO_MIN(len,recv_size[p]-recv_buf_idx[p]); buf_incr = len; curr_from_proc[p] += (unsigned) size; TWO_PHASE_BUF_COPY } } else { curr_from_proc[p] += (unsigned) len; buf_incr = len; TWO_PHASE_BUF_INCR } }