/* ADIOI_Calc_my_req() - calculate what portions of the access requests * of this process are located in the file domains of various processes * (including this one) */ void ADIOI_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset *fd_start, ADIO_Offset *fd_end, ADIO_Offset fd_size, int nprocs, int *count_my_req_procs_ptr, int **count_my_req_per_proc_ptr, ADIOI_Access **my_req_ptr, int **buf_idx_ptr) /* Possibly reconsider if buf_idx's are ok as int's, or should they be aints/offsets? They are used as memory buffer indices so it seems like the 2G limit is in effect */ { int *count_my_req_per_proc, count_my_req_procs, *buf_idx; int i, l, proc; ADIO_Offset fd_len, rem_len, curr_idx, off; ADIOI_Access *my_req; #ifdef AGGREGATION_PROFILE MPE_Log_event (5024, 0, NULL); #endif *count_my_req_per_proc_ptr = (int *) ADIOI_Calloc(nprocs,sizeof(int)); count_my_req_per_proc = *count_my_req_per_proc_ptr; /* count_my_req_per_proc[i] gives the no. of contig. requests of this process in process i's file domain. calloc initializes to zero. I'm allocating memory of size nprocs, so that I can do an MPI_Alltoall later on.*/ buf_idx = (int *) ADIOI_Malloc(nprocs*sizeof(int)); /* buf_idx is relevant only if buftype_is_contig. buf_idx[i] gives the index into user_buf where data received from proc. i should be placed. This allows receives to be done without extra buffer. This can't be done if buftype is not contig. */ /* initialize buf_idx to -1 */ for (i=0; i < nprocs; i++) buf_idx[i] = -1; /* one pass just to calculate how much space to allocate for my_req; * contig_access_count was calculated way back in ADIOI_Calc_my_off_len() */ for (i=0; i < contig_access_count; i++) { /* short circuit offset/len processing if len == 0 * (zero-byte read/write */ if (len_list[i] == 0) continue; off = offset_list[i]; fd_len = len_list[i]; /* note: we set fd_len to be the total size of the access. then * ADIOI_Calc_aggregator() will modify the value to return the * amount that was available from the file domain that holds the * first part of the access. */ proc = ADIOI_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size, fd_start, fd_end); count_my_req_per_proc[proc]++; /* figure out how much data is remaining in the access (i.e. wasn't * part of the file domain that had the starting byte); we'll take * care of this data (if there is any) in the while loop below. */ rem_len = len_list[i] - fd_len; while (rem_len != 0) { off += fd_len; /* point to first remaining byte */ fd_len = rem_len; /* save remaining size, pass to calc */ proc = ADIOI_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size, fd_start, fd_end); count_my_req_per_proc[proc]++; rem_len -= fd_len; /* reduce remaining length by amount from fd */ } } /* now allocate space for my_req, offset, and len */ *my_req_ptr = (ADIOI_Access *) ADIOI_Malloc(nprocs*sizeof(ADIOI_Access)); my_req = *my_req_ptr; count_my_req_procs = 0; for (i=0; i < nprocs; i++) { if (count_my_req_per_proc[i]) { my_req[i].offsets = (ADIO_Offset *) ADIOI_Malloc(count_my_req_per_proc[i] * sizeof(ADIO_Offset)); my_req[i].lens = (int *) ADIOI_Malloc(count_my_req_per_proc[i] * sizeof(int)); count_my_req_procs++; } my_req[i].count = 0; /* will be incremented where needed later */ } /* now fill in my_req */ curr_idx = 0; for (i=0; i<contig_access_count; i++) { /* short circuit offset/len processing if len == 0 * (zero-byte read/write */ if (len_list[i] == 0) continue; off = offset_list[i]; fd_len = len_list[i]; proc = ADIOI_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size, fd_start, fd_end); /* for each separate contiguous access from this process */ if (buf_idx[proc] == -1) { ADIOI_Assert(curr_idx == (int) curr_idx); buf_idx[proc] = (int) curr_idx; } l = my_req[proc].count; curr_idx += fd_len; rem_len = len_list[i] - fd_len; /* store the proc, offset, and len information in an array * of structures, my_req. Each structure contains the * offsets and lengths located in that process's FD, * and the associated count. */ my_req[proc].offsets[l] = off; ADIOI_Assert(fd_len == (int) fd_len); my_req[proc].lens[l] = (int) fd_len; my_req[proc].count++; while (rem_len != 0) { off += fd_len; fd_len = rem_len; proc = ADIOI_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size, fd_start, fd_end); if (buf_idx[proc] == -1) { ADIOI_Assert(curr_idx == (int) curr_idx); buf_idx[proc] = (int) curr_idx; } l = my_req[proc].count; curr_idx += fd_len; rem_len -= fd_len; my_req[proc].offsets[l] = off; ADIOI_Assert(fd_len == (int) fd_len); my_req[proc].lens[l] = (int) fd_len; my_req[proc].count++; } } #ifdef AGG_DEBUG for (i=0; i<nprocs; i++) { if (count_my_req_per_proc[i] > 0) { FPRINTF(stdout, "data needed from %d (count = %d):\n", i, my_req[i].count); for (l=0; l < my_req[i].count; l++) { FPRINTF(stdout, " off[%d] = %lld, len[%d] = %d\n", l, my_req[i].offsets[l], l, my_req[i].lens[l]); } FPRINTF(stdout, "buf_idx[%d] = 0x%x\n", i, buf_idx[i]); } } #endif *count_my_req_procs_ptr = count_my_req_procs; *buf_idx_ptr = buf_idx; #ifdef AGGREGATION_PROFILE MPE_Log_event (5025, 0, NULL); #endif }
void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node * flat_buf, char **send_buf, ADIO_Offset * offset_list, ADIO_Offset * len_list, int *send_size, MPI_Request * requests, int *sent_to_proc, int nprocs, int myrank, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset * fd_start, ADIO_Offset * fd_end, int *send_buf_idx, int *curr_to_proc, int *done_to_proc, int iter, MPI_Aint buftype_extent) { /* this function is only called if buftype is not contig */ int i, p, flat_buf_idx; ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size; int jj, n_buftypes; ADIO_Offset off, len, rem_len, user_buf_idx; /* curr_to_proc[p] = amount of data sent to proc. p that has already been accounted for so far done_to_proc[p] = amount of data already sent to proc. p in previous iterations user_buf_idx = current location in user buffer send_buf_idx[p] = current location in send_buf of proc. p */ for (i = 0; i < nprocs; i++) { send_buf_idx[i] = curr_to_proc[i] = 0; done_to_proc[i] = sent_to_proc[i]; } jj = 0; user_buf_idx = flat_buf->indices[0]; flat_buf_idx = 0; n_buftypes = 0; flat_buf_sz = flat_buf->blocklens[0]; /* flat_buf_idx = current index into flattened buftype * flat_buf_sz = size of current contiguous component in * flattened buf */ for (i = 0; i < contig_access_count; i++) { off = offset_list[i]; rem_len = len_list[i]; /*this request may span the file domains of more than one process */ while (rem_len != 0) { len = rem_len; /* NOTE: len value is modified by ADIOI_Calc_aggregator() to be no * longer than the single region that processor "p" is responsible * for. */ p = ADIOI_Calc_aggregator(fd, off, min_st_offset, &len, fd_size, fd_start, fd_end); if (send_buf_idx[p] < send_size[p]) { if (curr_to_proc[p] + len > done_to_proc[p]) { if (done_to_proc[p] > curr_to_proc[p]) { size = MPL_MIN(curr_to_proc[p] + len - done_to_proc[p], send_size[p] - send_buf_idx[p]); buf_incr = done_to_proc[p] - curr_to_proc[p]; ADIOI_BUF_INCR ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned) (curr_to_proc[p] + len - done_to_proc[p])); buf_incr = curr_to_proc[p] + len - done_to_proc[p]; ADIOI_Assert((done_to_proc[p] + size) == (unsigned) (done_to_proc[p] + size)); /* ok to cast: bounded by cb buffer size */ curr_to_proc[p] = done_to_proc[p] + (int) size; ADIOI_BUF_COPY} else { size = MPL_MIN(len, send_size[p] - send_buf_idx[p]); buf_incr = len; ADIOI_Assert((curr_to_proc[p] + size) == (unsigned) ((ADIO_Offset) curr_to_proc[p] + size)); curr_to_proc[p] += size; ADIOI_BUF_COPY} if (send_buf_idx[p] == send_size[p]) { MPI_Isend(send_buf[p], send_size[p], MPI_BYTE, p, myrank + p + 100 * iter, fd->comm, requests + jj); jj++; } } else { ADIOI_Assert((curr_to_proc[p] + len) == (unsigned) ((ADIO_Offset) curr_to_proc[p] + len)); curr_to_proc[p] += len; buf_incr = len; ADIOI_BUF_INCR} } else {
void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, char **recv_buf, ADIO_Offset *offset_list, ADIO_Offset *len_list, unsigned *recv_size, MPI_Request *requests, MPI_Status *statuses, int *recd_from_proc, int nprocs, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, MPI_Aint buftype_extent) { /* this function is only called if buftype is not contig */ int i, p, flat_buf_idx; ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size; int n_buftypes; ADIO_Offset off, len, rem_len, user_buf_idx; /* Not sure unsigned is necessary, but it makes the math safer */ unsigned *curr_from_proc, *done_from_proc, *recv_buf_idx; ADIOI_UNREFERENCED_ARG(requests); ADIOI_UNREFERENCED_ARG(statuses); /* curr_from_proc[p] = amount of data recd from proc. p that has already been accounted for so far done_from_proc[p] = amount of data already recd from proc. p and filled into user buffer in previous iterations user_buf_idx = current location in user buffer recv_buf_idx[p] = current location in recv_buf of proc. p */ curr_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned)); done_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned)); recv_buf_idx = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned)); for (i=0; i < nprocs; i++) { recv_buf_idx[i] = curr_from_proc[i] = 0; done_from_proc[i] = recd_from_proc[i]; } user_buf_idx = flat_buf->indices[0]; flat_buf_idx = 0; n_buftypes = 0; flat_buf_sz = flat_buf->blocklens[0]; /* flat_buf_idx = current index into flattened buftype flat_buf_sz = size of current contiguous component in flattened buf */ for (i=0; i<contig_access_count; i++) { off = offset_list[i]; rem_len = len_list[i]; /* this request may span the file domains of more than one process */ while (rem_len != 0) { len = rem_len; /* NOTE: len value is modified by ADIOI_Calc_aggregator() to be no * longer than the single region that processor "p" is responsible * for. */ p = ADIOI_Calc_aggregator(fd, off, min_st_offset, &len, fd_size, fd_start, fd_end); if (recv_buf_idx[p] < recv_size[p]) { if (curr_from_proc[p]+len > done_from_proc[p]) { if (done_from_proc[p] > curr_from_proc[p]) { size = ADIOI_MIN(curr_from_proc[p] + len - done_from_proc[p], recv_size[p]-recv_buf_idx[p]); buf_incr = done_from_proc[p] - curr_from_proc[p]; ADIOI_BUF_INCR buf_incr = curr_from_proc[p]+len-done_from_proc[p]; ADIOI_Assert((done_from_proc[p] + size) == (unsigned)((ADIO_Offset)done_from_proc[p] + size)); curr_from_proc[p] = done_from_proc[p] + size; ADIOI_BUF_COPY } else { size = ADIOI_MIN(len,recv_size[p]-recv_buf_idx[p]); buf_incr = len; ADIOI_Assert((curr_from_proc[p] + size) == (unsigned)((ADIO_Offset)curr_from_proc[p] + size)); curr_from_proc[p] += (unsigned) size; ADIOI_BUF_COPY } } else { ADIOI_Assert((curr_from_proc[p] + len) == (unsigned)((ADIO_Offset)curr_from_proc[p] + len)); curr_from_proc[p] += (unsigned) len; buf_incr = len; ADIOI_BUF_INCR } }