예제 #1
0
void ADIOI_Get_position(ADIO_File fd, ADIO_Offset *offset)
{
    ADIOI_Flatlist_node *flat_file;
    int i, flag;
    MPI_Count filetype_size, etype_size;
    int filetype_is_contig;
    MPI_Aint filetype_extent;
    ADIO_Offset disp, byte_offset, sum=0, size_in_file, n_filetypes, frd_size;
    
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
    etype_size = fd->etype_size;

    if (filetype_is_contig) *offset = (fd->fp_ind - fd->disp)/etype_size;
    else {
	flat_file = ADIOI_Flatten_and_find(fd->filetype);

	MPI_Type_size_x(fd->filetype, &filetype_size);
	MPI_Type_extent(fd->filetype, &filetype_extent);

	disp = fd->disp;
	byte_offset = fd->fp_ind;
	n_filetypes = -1;
	flag = 0;
	while (!flag) {
	    sum = 0;
	    n_filetypes++;
	    for (i=0; i<flat_file->count; i++) {
		sum += flat_file->blocklens[i];
		if (disp + flat_file->indices[i] + 
	     	    n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent + flat_file->blocklens[i] 
		    >= byte_offset) {
		    frd_size = disp + flat_file->indices[i] + 
			n_filetypes * ADIOI_AINT_CAST_TO_OFFSET filetype_extent
			+ flat_file->blocklens[i] - byte_offset;
		    sum -= frd_size;
		    flag = 1;
		    break;
		}
	    }
	}
	size_in_file = n_filetypes * (ADIO_Offset)filetype_size + sum;
	*offset = size_in_file/etype_size;
    }
}
예제 #2
0
void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset, ADIO_Offset * disp)
{
    ADIOI_Flatlist_node *flat_file;
    int i;
    ADIO_Offset n_filetypes, etype_in_filetype, sum, abs_off_in_filetype = 0, size_in_filetype;
    MPI_Count n_etypes_in_filetype, filetype_size, etype_size;
    int filetype_is_contig;
    MPI_Aint filetype_extent;

    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
    etype_size = fd->etype_size;

    if (filetype_is_contig)
        *disp = fd->disp + etype_size * offset;
    else {
        flat_file = ADIOI_Flatten_and_find(fd->filetype);

        MPI_Type_size_x(fd->filetype, &filetype_size);
        n_etypes_in_filetype = filetype_size / etype_size;
        n_filetypes = offset / n_etypes_in_filetype;
        etype_in_filetype = offset % n_etypes_in_filetype;
        size_in_filetype = etype_in_filetype * etype_size;

        sum = 0;
        for (i = 0; i < flat_file->count; i++) {
            sum += flat_file->blocklens[i];
            if (sum > size_in_filetype) {
                abs_off_in_filetype = flat_file->indices[i] +
                    size_in_filetype - (sum - flat_file->blocklens[i]);
                break;
            }
        }

        /* abs. offset in bytes in the file */
        MPI_Type_extent(fd->filetype, &filetype_extent);
        *disp =
            fd->disp + n_filetypes * ADIOI_AINT_CAST_TO_OFFSET filetype_extent +
            abs_off_in_filetype;
    }
}
예제 #3
0
/* If successful, error_code is set to MPI_SUCCESS.  Otherwise an error
 * code is created and returned in error_code.
 */
static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
                                 datatype, int nprocs,
                                 int myrank,
                                 ADIOI_Access
                                 * others_req, ADIO_Offset * offset_list,
                                 ADIO_Offset * len_list, int contig_access_count,
                                 ADIO_Offset min_st_offset, ADIO_Offset fd_size,
                                 ADIO_Offset * fd_start, ADIO_Offset * fd_end,
                                 MPI_Aint * buf_idx, int *error_code)
{
/* Send data to appropriate processes and write in sizes of no more
   than coll_bufsize.
   The idea is to reduce the amount of extra memory required for
   collective I/O. If all data were written all at once, which is much
   easier, it would require temp space more than the size of user_buf,
   which is often unacceptable. For example, to write a distributed
   array to a file, where each local array is 8Mbytes, requiring
   at least another 8Mbytes of temp space is unacceptable. */

    /* Not convinced end_loc-st_loc couldn't be > int, so make these offsets */
    ADIO_Offset size = 0;
    int hole, i, j, m, ntimes, max_ntimes, buftype_is_contig;
    ADIO_Offset st_loc = -1, end_loc = -1, off, done, req_off;
    char *write_buf = NULL;
    int *curr_offlen_ptr, *count, *send_size, req_len, *recv_size;
    int *partial_recv, *sent_to_proc, *start_pos, flag;
    int *send_buf_idx, *curr_to_proc, *done_to_proc;
    MPI_Status status;
    ADIOI_Flatlist_node *flat_buf = NULL;
    MPI_Aint buftype_extent;
    int info_flag, coll_bufsize;
    char *value;
    static char myname[] = "ADIOI_EXCH_AND_WRITE";

    *error_code = MPI_SUCCESS;  /* changed below if error */
    /* only I/O errors are currently reported */

/* calculate the number of writes of size coll_bufsize
   to be done by each process and the max among all processes.
   That gives the no. of communication phases as well. */

    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL + 1) * sizeof(char));
    ADIOI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag);
    coll_bufsize = atoi(value);
    ADIOI_Free(value);


    for (i = 0; i < nprocs; i++) {
        if (others_req[i].count) {
            st_loc = others_req[i].offsets[0];
            end_loc = others_req[i].offsets[0];
            break;
        }
    }

    for (i = 0; i < nprocs; i++)
        for (j = 0; j < others_req[i].count; j++) {
            st_loc = MPL_MIN(st_loc, others_req[i].offsets[j]);
            end_loc = MPL_MAX(end_loc, (others_req[i].offsets[j]
                                        + others_req[i].lens[j] - 1));
        }

/* ntimes=ceiling_div(end_loc - st_loc + 1, coll_bufsize)*/

    ntimes = (int) ((end_loc - st_loc + coll_bufsize) / coll_bufsize);

    if ((st_loc == -1) && (end_loc == -1)) {
        ntimes = 0;     /* this process does no writing. */
    }

    MPI_Allreduce(&ntimes, &max_ntimes, 1, MPI_INT, MPI_MAX, fd->comm);

    write_buf = fd->io_buf;

    curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs * 10, sizeof(int));
    /* its use is explained below. calloc initializes to 0. */

    count = curr_offlen_ptr + nprocs;
    /* to store count of how many off-len pairs per proc are satisfied
     * in an iteration. */

    partial_recv = count + nprocs;
    /* if only a portion of the last off-len pair is recd. from a process
     * in a particular iteration, the length recd. is stored here.
     * calloc initializes to 0. */

    send_size = partial_recv + nprocs;
    /* total size of data to be sent to each proc. in an iteration.
     * Of size nprocs so that I can use MPI_Alltoall later. */

    recv_size = send_size + nprocs;
    /* total size of data to be recd. from each proc. in an iteration. */

    sent_to_proc = recv_size + nprocs;
    /* amount of data sent to each proc so far. Used in
     * ADIOI_Fill_send_buffer. initialized to 0 here. */

    send_buf_idx = sent_to_proc + nprocs;
    curr_to_proc = send_buf_idx + nprocs;
    done_to_proc = curr_to_proc + nprocs;
    /* Above three are used in ADIOI_Fill_send_buffer */

    start_pos = done_to_proc + nprocs;
    /* used to store the starting value of curr_offlen_ptr[i] in
     * this iteration */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    if (!buftype_is_contig) {
        flat_buf = ADIOI_Flatten_and_find(datatype);
    }
    MPI_Type_extent(datatype, &buftype_extent);


/* I need to check if there are any outstanding nonblocking writes to
   the file, which could potentially interfere with the writes taking
   place in this collective write call. Since this is not likely to be
   common, let me do the simplest thing possible here: Each process
   completes all pending nonblocking operations before completing. */

    /*ADIOI_Complete_async(error_code);
     * if (*error_code != MPI_SUCCESS) return;
     * MPI_Barrier(fd->comm);
     */

    done = 0;
    off = st_loc;

    for (m = 0; m < ntimes; m++) {
        /* go through all others_req and check which will be satisfied
         * by the current write */

        /* Note that MPI guarantees that displacements in filetypes are in
         * monotonically nondecreasing order and that, for writes, the
         * filetypes cannot specify overlapping regions in the file. This
         * simplifies implementation a bit compared to reads. */

        /* off = start offset in the file for the data to be written in
         * this iteration
         * size = size of data written (bytes) corresponding to off
         * req_off = off in file for a particular contiguous request
         * minus what was satisfied in previous iteration
         * req_size = size corresponding to req_off */

        /* first calculate what should be communicated */

        for (i = 0; i < nprocs; i++)
            count[i] = recv_size[i] = 0;

        size = MPL_MIN((unsigned) coll_bufsize, end_loc - st_loc + 1 - done);

        for (i = 0; i < nprocs; i++) {
            if (others_req[i].count) {
                start_pos[i] = curr_offlen_ptr[i];
                for (j = curr_offlen_ptr[i]; j < others_req[i].count; j++) {
                    if (partial_recv[i]) {
                        /* this request may have been partially
                         * satisfied in the previous iteration. */
                        req_off = others_req[i].offsets[j] + partial_recv[i];
                        req_len = others_req[i].lens[j] - partial_recv[i];
                        partial_recv[i] = 0;
                        /* modify the off-len pair to reflect this change */
                        others_req[i].offsets[j] = req_off;
                        others_req[i].lens[j] = req_len;
                    } else {
                        req_off = others_req[i].offsets[j];
                        req_len = others_req[i].lens[j];
                    }
                    if (req_off < off + size) {
                        count[i]++;
                        ADIOI_Assert((((ADIO_Offset) (uintptr_t) write_buf) + req_off - off) ==
                                     (ADIO_Offset) (uintptr_t) (write_buf + req_off - off));
                        MPI_Address(write_buf + req_off - off, &(others_req[i].mem_ptrs[j]));
                        ADIOI_Assert((off + size - req_off) == (int) (off + size - req_off));
                        recv_size[i] += (int) (MPL_MIN(off + size - req_off, (unsigned) req_len));

                        if (off + size - req_off < (unsigned) req_len) {
                            partial_recv[i] = (int) (off + size - req_off);

                            /* --BEGIN ERROR HANDLING-- */
                            if ((j + 1 < others_req[i].count) &&
                                (others_req[i].offsets[j + 1] < off + size)) {
                                *error_code = MPIO_Err_create_code(MPI_SUCCESS,
                                                                   MPIR_ERR_RECOVERABLE,
                                                                   myname,
                                                                   __LINE__,
                                                                   MPI_ERR_ARG,
                                                                   "Filetype specifies overlapping write regions (which is illegal according to the MPI-2 specification)",
                                                                   0);
                                /* allow to continue since additional
                                 * communication might have to occur
                                 */
                            }
                            /* --END ERROR HANDLING-- */
                            break;
                        }
                    } else
                        break;
                }
                curr_offlen_ptr[i] = j;
            }
        }

        ADIOI_W_Exchange_data(fd, buf, write_buf, flat_buf, offset_list,
                              len_list, send_size, recv_size, off, size, count,
                              start_pos, partial_recv,
                              sent_to_proc, nprocs, myrank,
                              buftype_is_contig, contig_access_count,
                              min_st_offset, fd_size, fd_start, fd_end,
                              others_req, send_buf_idx, curr_to_proc,
                              done_to_proc, &hole, m, buftype_extent, buf_idx, error_code);
        if (*error_code != MPI_SUCCESS)
            return;

        flag = 0;
        for (i = 0; i < nprocs; i++)
            if (count[i])
                flag = 1;

        if (flag) {
            ADIOI_Assert(size == (int) size);
            ADIO_WriteContig(fd, write_buf, (int) size, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
                             off, &status, error_code);
            if (*error_code != MPI_SUCCESS)
                return;
        }

        off += size;
        done += size;
    }

    for (i = 0; i < nprocs; i++)
        count[i] = recv_size[i] = 0;
    for (m = ntimes; m < max_ntimes; m++) {
        ADIOI_Assert(size == (int) size);
        /* nothing to recv, but check for send. */
        ADIOI_W_Exchange_data(fd, buf, write_buf, flat_buf, offset_list,
                              len_list, send_size, recv_size, off, (int) size, count,
                              start_pos, partial_recv,
                              sent_to_proc, nprocs, myrank,
                              buftype_is_contig, contig_access_count,
                              min_st_offset, fd_size, fd_start, fd_end,
                              others_req, send_buf_idx,
                              curr_to_proc, done_to_proc, &hole, m,
                              buftype_extent, buf_idx, error_code);
        if (*error_code != MPI_SUCCESS)
            return;
    }

    ADIOI_Free(curr_offlen_ptr);
}
예제 #4
0
/* If successful, error_code is set to MPI_SUCCESS.  Otherwise an error
 * code is created and returned in error_code.
 */
static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, const void *buf,
					MPI_Datatype datatype, int nprocs,
					int myrank, ADIOI_Access *others_req,
                                        ADIOI_Access *my_req,
					ADIO_Offset *offset_list,
                                        ADIO_Offset *len_list, 
					int contig_access_count,
                                        int *striping_info, int **buf_idx,
                                        int *error_code)
{
    /* Send data to appropriate processes and write in sizes of no more
     * than lustre stripe_size.
     * The idea is to reduce the amount of extra memory required for
     * collective I/O. If all data were written all at once, which is much
     * easier, it would require temp space more than the size of user_buf,
     * which is often unacceptable. For example, to write a distributed
     * array to a file, where each local array is 8Mbytes, requiring
     * at least another 8Mbytes of temp space is unacceptable.
     */

    int hole, i, j, m, flag, ntimes = 1 , max_ntimes, buftype_is_contig;
    ADIO_Offset st_loc = -1, end_loc = -1, min_st_loc, max_end_loc;
    ADIO_Offset off, req_off, send_off, iter_st_off, *off_list;
    ADIO_Offset max_size, step_size = 0;
    int real_size, req_len, send_len;
    int *recv_curr_offlen_ptr, *recv_count, *recv_size;
    int *send_curr_offlen_ptr, *send_size;
    int *sent_to_proc, *recv_start_pos;
    int *send_buf_idx, *curr_to_proc, *done_to_proc;
    int *this_buf_idx;
    char *write_buf = NULL;
    MPI_Status status;
    ADIOI_Flatlist_node *flat_buf = NULL;
    MPI_Aint buftype_extent;
    int stripe_size = striping_info[0], avail_cb_nodes = striping_info[2];
    int data_sieving = 0;
    ADIO_Offset *srt_off = NULL;
    int *srt_len = NULL;
    int srt_num = 0;
    ADIO_Offset block_offset;
    int block_len;

    *error_code = MPI_SUCCESS;	/* changed below if error */
    /* only I/O errors are currently reported */

    /* calculate the number of writes of stripe size to be done.
     * That gives the no. of communication phases as well.
     * Note:
     *   Because we redistribute data in stripe-contiguous pattern for Lustre,
     *   each process has the same no. of communication phases.
     */

    for (i = 0; i < nprocs; i++) {
	if (others_req[i].count) {
	    st_loc = others_req[i].offsets[0];
	    end_loc = others_req[i].offsets[0];
	    break;
	}
    }
    for (i = 0; i < nprocs; i++) {
	for (j = 0; j < others_req[i].count; j++) {
	    st_loc = MPL_MIN(st_loc, others_req[i].offsets[j]);
	    end_loc = MPL_MAX(end_loc, (others_req[i].offsets[j] +
                                          others_req[i].lens[j] - 1));
	}
    }
    /* this process does no writing. */
    if ((st_loc == -1) && (end_loc == -1))
	ntimes = 0;
    MPI_Allreduce(&end_loc, &max_end_loc, 1, MPI_LONG_LONG_INT, MPI_MAX, fd->comm);
    /* avoid min_st_loc be -1 */
    if (st_loc == -1)
        st_loc = max_end_loc;
    MPI_Allreduce(&st_loc, &min_st_loc, 1, MPI_LONG_LONG_INT, MPI_MIN, fd->comm);
    /* align downward */
    min_st_loc -= min_st_loc % (ADIO_Offset)stripe_size;

    /* Each time, only avail_cb_nodes number of IO clients perform IO,
     * so, step_size=avail_cb_nodes*stripe_size IO will be performed at most,
     * and ntimes=whole_file_portion/step_size
     */
    step_size = (ADIO_Offset) avail_cb_nodes * stripe_size;
    max_ntimes = (max_end_loc - min_st_loc + 1) / step_size
        + (((max_end_loc - min_st_loc + 1) % step_size) ? 1 : 0);
/*     max_ntimes = (int)((max_end_loc - min_st_loc) / step_size + 1); */
    if (ntimes)
	write_buf = (char *) ADIOI_Malloc(stripe_size);

    /* calculate the start offset for each iteration */
    off_list = (ADIO_Offset *) ADIOI_Malloc(max_ntimes * sizeof(ADIO_Offset));
    for (m = 0; m < max_ntimes; m ++)
        off_list[m] = max_end_loc;
    for (i = 0; i < nprocs; i++) {
        for (j = 0; j < others_req[i].count; j ++) {
            req_off = others_req[i].offsets[j];
            m = (int)((req_off - min_st_loc) / step_size);
            off_list[m] = MPL_MIN(off_list[m], req_off);
        }
    }

    recv_curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int));
    send_curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int));
    /* their use is explained below. calloc initializes to 0. */

    recv_count = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    /* to store count of how many off-len pairs per proc are satisfied
       in an iteration. */

    send_size = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    /* total size of data to be sent to each proc. in an iteration.
       Of size nprocs so that I can use MPI_Alltoall later. */

    recv_size = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    /* total size of data to be recd. from each proc. in an iteration. */

    sent_to_proc = (int *) ADIOI_Calloc(nprocs, sizeof(int));
    /* amount of data sent to each proc so far. Used in
       ADIOI_Fill_send_buffer. initialized to 0 here. */

    send_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    curr_to_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    done_to_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    /* Above three are used in ADIOI_Fill_send_buffer */

    this_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int));

    recv_start_pos = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    /* used to store the starting value of recv_curr_offlen_ptr[i] in
       this iteration */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    if (!buftype_is_contig) {
	flat_buf = ADIOI_Flatten_and_find(datatype);
    }
    MPI_Type_extent(datatype, &buftype_extent);
    /* I need to check if there are any outstanding nonblocking writes to
     * the file, which could potentially interfere with the writes taking
     * place in this collective write call. Since this is not likely to be
     * common, let me do the simplest thing possible here: Each process
     * completes all pending nonblocking operations before completing.
     */
    /*ADIOI_Complete_async(error_code);
    if (*error_code != MPI_SUCCESS) return;
    MPI_Barrier(fd->comm);
    */

    iter_st_off = min_st_loc;

    /* Although we have recognized the data according to OST index,
     * a read-modify-write will be done if there is a hole between the data.
     * For example: if blocksize=60, xfersize=30 and stripe_size=100,
     * then rank0 will collect data [0, 30] and [60, 90] then write. There
     * is a hole in [30, 60], which will cause a read-modify-write in [0, 90].
     *
     * To reduce its impact on the performance, we can disable data sieving
     * by hint "ds_in_coll".
     */
    /* check the hint for data sieving */
    data_sieving = fd->hints->fs_hints.lustre.ds_in_coll;

    for (m = 0; m < max_ntimes; m++) {
	/* go through all others_req and my_req to check which will be received
         * and sent in this iteration.
         */

	/* Note that MPI guarantees that displacements in filetypes are in
	   monotonically nondecreasing order and that, for writes, the
	   filetypes cannot specify overlapping regions in the file. This
	   simplifies implementation a bit compared to reads. */

	/*
           off         = start offset in the file for the data to be written in
                         this iteration
           iter_st_off = start offset of this iteration
           real_size   = size of data written (bytes) corresponding to off
           max_size    = possible maximum size of data written in this iteration
           req_off     = offset in the file for a particular contiguous request minus
                         what was satisfied in previous iteration
           send_off    = offset the request needed by other processes in this iteration
           req_len     = size corresponding to req_off
           send_len    = size corresponding to send_off
         */

	/* first calculate what should be communicated */
	for (i = 0; i < nprocs; i++)
	    recv_count[i] = recv_size[i] = send_size[i] = 0;

        off = off_list[m];
        max_size = MPL_MIN(step_size, max_end_loc - iter_st_off + 1);
        real_size = (int) MPL_MIN((off / stripe_size + 1) * stripe_size -
                                    off,
                                    end_loc - off + 1);

	for (i = 0; i < nprocs; i++) {
            if (my_req[i].count) {
                this_buf_idx[i] = buf_idx[i][send_curr_offlen_ptr[i]];
                for (j = send_curr_offlen_ptr[i]; j < my_req[i].count; j++) {
                    send_off = my_req[i].offsets[j];
                    send_len = my_req[i].lens[j];
                    if (send_off < iter_st_off + max_size) {
                        send_size[i] += send_len;
                    } else {
                        break;
                    }
                }
                send_curr_offlen_ptr[i] = j;
            }
	    if (others_req[i].count) {
		recv_start_pos[i] = recv_curr_offlen_ptr[i];
		for (j = recv_curr_offlen_ptr[i]; j < others_req[i].count; j++) {
                    req_off = others_req[i].offsets[j];
                    req_len = others_req[i].lens[j];
		    if (req_off < iter_st_off + max_size) {
			recv_count[i]++;
                        ADIOI_Assert((((ADIO_Offset)(MPIU_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIU_Upint)(write_buf+req_off-off));
			MPI_Address(write_buf + req_off - off,
				    &(others_req[i].mem_ptrs[j]));
                        recv_size[i] += req_len;
		    } else {
			break;
                    }
		}
		recv_curr_offlen_ptr[i] = j;
	    }
	}
        /* use variable "hole" to pass data_sieving flag into W_Exchange_data */
        hole = data_sieving;
	ADIOI_LUSTRE_W_Exchange_data(fd, buf, write_buf, flat_buf, offset_list,
                                     len_list, send_size, recv_size, off, real_size,
                                     recv_count, recv_start_pos,
                                     sent_to_proc, nprocs, myrank,
                                     buftype_is_contig, contig_access_count,
                                     striping_info, others_req, send_buf_idx,
                                     curr_to_proc, done_to_proc, &hole, m,
                                  buftype_extent, this_buf_idx,
                                  &srt_off, &srt_len, &srt_num, error_code);

	if (*error_code != MPI_SUCCESS)
            goto over;

	flag = 0;
	for (i = 0; i < nprocs; i++)
	    if (recv_count[i]) {
		flag = 1;
		break;
	    }
	if (flag) {
            /* check whether to do data sieving */
            if(data_sieving == ADIOI_HINT_ENABLE) {
	        ADIO_WriteContig(fd, write_buf, real_size, MPI_BYTE,
			         ADIO_EXPLICIT_OFFSET, off, &status,
			         error_code);
            } else {
                /* if there is no hole, write data in one time;
                 * otherwise, write data in several times */
                if (!hole) {
                    ADIO_WriteContig(fd, write_buf, real_size, MPI_BYTE,
                                     ADIO_EXPLICIT_OFFSET, off, &status,
                                     error_code);
                } else {
                    block_offset = -1;
                    block_len = 0;
                    for (i = 0; i < srt_num; ++i) {
                        if (srt_off[i] < off + real_size &&
                            srt_off[i] >= off) {
                            if (block_offset == -1) {
                                block_offset = srt_off[i];
                                block_len = srt_len[i];
                            } else {
                                if (srt_off[i] == block_offset + block_len) {
                                    block_len += srt_len[i];
                                } else {
                                    ADIO_WriteContig(fd,
                                                     write_buf + block_offset - off,
                                                     block_len,
                                                     MPI_BYTE, ADIO_EXPLICIT_OFFSET,
                                                     block_offset, &status,
                                                     error_code);
	                            if (*error_code != MPI_SUCCESS)
		                        goto over;
                                    block_offset = srt_off[i];
                                    block_len = srt_len[i];
                                }
                            }
                        }
                    }
                    if (block_offset != -1) {
                        ADIO_WriteContig(fd,
                                         write_buf + block_offset - off,
                                         block_len,
                                         MPI_BYTE, ADIO_EXPLICIT_OFFSET,
                                         block_offset, &status,
                                         error_code);
                        if (*error_code != MPI_SUCCESS)
                            goto over;
                    }
                }
            }
	    if (*error_code != MPI_SUCCESS)
		goto over;
	}
        iter_st_off += max_size;
    }
over:
    if (srt_off)
        ADIOI_Free(srt_off);
    if (srt_len)
        ADIOI_Free(srt_len);
    if (ntimes)
	ADIOI_Free(write_buf);
    ADIOI_Free(recv_curr_offlen_ptr);
    ADIOI_Free(send_curr_offlen_ptr);
    ADIOI_Free(recv_count);
    ADIOI_Free(send_size);
    ADIOI_Free(recv_size);
    ADIOI_Free(sent_to_proc);
    ADIOI_Free(recv_start_pos);
    ADIOI_Free(send_buf_idx);
    ADIOI_Free(curr_to_proc);
    ADIOI_Free(done_to_proc);
    ADIOI_Free(this_buf_idx);
    ADIOI_Free(off_list);
}
예제 #5
0
void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count,
                            MPI_Datatype datatype, int file_ptr_type,
                            ADIO_Offset offset, ADIO_Status * status, int
                            *error_code)
{
/* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int i, j, k, err = -1, bwr_size, st_index = 0;
    ADIO_Offset i_offset, sum, size_in_filetype;
    ADIO_Offset num, size, n_etypes_in_filetype;
    MPI_Count bufsize;
    ADIO_Offset n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype = 0;
    int req_len;
    MPI_Count filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent;
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off;
    ADIO_Offset off, req_off, disp, end_offset = 0, writebuf_off, start_off;
    char *writebuf = NULL, *value;
    int st_n_filetypes, writebuf_len, write_sz;
    ADIO_Offset fwr_size = 0, new_fwr_size, st_fwr_size;
    int new_bwr_size, err_flag = 0, info_flag, max_bufsize;
    static char myname[] = "ADIOI_NFS_WRITESTRIDED";

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size_x(fd->filetype, &filetype_size);
    if (!filetype_size) {
#ifdef HAVE_STATUS_SET_BYTES
        MPIR_Status_set_bytes(status, datatype, 0);
#endif
        *error_code = MPI_SUCCESS;
        return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size_x(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;

    bufsize = buftype_size * count;

/* get max_bufsize from the info object. */

    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL + 1) * sizeof(char));
    ADIOI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag);
    max_bufsize = atoi(value);
    ADIOI_Free(value);

    if (!buftype_is_contig && filetype_is_contig) {

/* noncontiguous in memory, contiguous in file. */

        flat_buf = ADIOI_Flatten_and_find(datatype);

        off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : fd->disp + etype_size * offset;

        start_off = off;
        end_offset = off + bufsize - 1;
        writebuf_off = off;
        writebuf = (char *) ADIOI_Malloc(max_bufsize);
        writebuf_len = (int) (MPL_MIN(max_bufsize, end_offset - writebuf_off + 1));

/* if atomicity is true, lock the region to be accessed */
        if (fd->atomicity)
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset - start_off + 1);

        for (j = 0; j < count; j++)
            for (i = 0; i < flat_buf->count; i++) {
                userbuf_off = j * buftype_extent + flat_buf->indices[i];
                req_off = off;
                req_len = flat_buf->blocklens[i];
                ADIOI_BUFFERED_WRITE_WITHOUT_READ off += flat_buf->blocklens[i];
            }

        /* write the buffer out finally */
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event(ADIOI_MPE_lseek_a, 0, NULL);
#endif
        lseek(fd->fd_sys, writebuf_off, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event(ADIOI_MPE_lseek_b, 0, NULL);
#endif
        if (!(fd->atomicity))
            ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event(ADIOI_MPE_write_a, 0, NULL);
#endif
        err = write(fd->fd_sys, writebuf, writebuf_len);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event(ADIOI_MPE_write_b, 0, NULL);
#endif
        if (!(fd->atomicity))
            ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
        if (err == -1)
            err_flag = 1;

        if (fd->atomicity)
            ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset - start_off + 1);

        if (file_ptr_type == ADIO_INDIVIDUAL)
            fd->fp_ind = off;
        if (err_flag) {
            *error_code = MPIO_Err_create_code(MPI_SUCCESS,
                                               MPIR_ERR_RECOVERABLE, myname,
                                               __LINE__, MPI_ERR_IO, "**io",
                                               "**io %s", strerror(errno));
        } else
            *error_code = MPI_SUCCESS;
    }

    else {      /* noncontiguous in file */

        flat_file = ADIOI_Flatten_and_find(fd->filetype);
        disp = fd->disp;

        if (file_ptr_type == ADIO_INDIVIDUAL) {
            /* Wei-keng reworked type processing to be a bit more efficient */
            offset = fd->fp_ind - disp;
            n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
            offset -= (ADIO_Offset) n_filetypes *filetype_extent;
            /* now offset is local to this extent */

            /* find the block where offset is located, skip blocklens[i]==0 */
            for (i = 0; i < flat_file->count; i++) {
                ADIO_Offset dist;
                if (flat_file->blocklens[i] == 0)
                    continue;
                dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
                /* fwr_size is from offset to the end of block i */
                if (dist == 0) {
                    i++;
                    offset = flat_file->indices[i];
                    fwr_size = flat_file->blocklens[i];
                    break;
                }
                if (dist > 0) {
                    fwr_size = dist;
                    break;
                }
            }
            st_index = i;       /* starting index in flat_file->indices[] */
            offset += disp + (ADIO_Offset) n_filetypes *filetype_extent;
        } else {
            n_etypes_in_filetype = filetype_size / etype_size;
            n_filetypes = offset / n_etypes_in_filetype;
            etype_in_filetype = offset % n_etypes_in_filetype;
            size_in_filetype = etype_in_filetype * etype_size;

            sum = 0;
            for (i = 0; i < flat_file->count; i++) {
                sum += flat_file->blocklens[i];
                if (sum > size_in_filetype) {
                    st_index = i;
                    fwr_size = sum - size_in_filetype;
                    abs_off_in_filetype = flat_file->indices[i] +
                        size_in_filetype - (sum - flat_file->blocklens[i]);
                    break;
                }
            }

            /* abs. offset in bytes in the file */
            offset = disp + (ADIO_Offset) n_filetypes *filetype_extent + abs_off_in_filetype;
        }

        start_off = offset;
        /* Wei-keng Liao:write request is within single flat_file contig block */
        /* this could happen, for example, with subarray types that are
         * actually fairly contiguous */
        if (buftype_is_contig && bufsize <= fwr_size) {
            /* though MPI api has an integer 'count' parameter, derived
             * datatypes might describe more bytes than can fit into an integer.
             * if we've made it this far, we can pass a count of original
             * datatypes, instead of a count of bytes (which might overflow)
             * Other WriteContig calls in this path are operating on data
             * sieving buffer */
            ADIO_WriteContig(fd, buf, count, datatype, ADIO_EXPLICIT_OFFSET,
                             offset, status, error_code);

            if (file_ptr_type == ADIO_INDIVIDUAL) {
                /* update MPI-IO file pointer to point to the first byte
                 * that can be accessed in the fileview. */
                fd->fp_ind = offset + bufsize;
                if (bufsize == fwr_size) {
                    do {
                        st_index++;
                        if (st_index == flat_file->count) {
                            st_index = 0;
                            n_filetypes++;
                        }
                    } while (flat_file->blocklens[st_index] == 0);
                    fd->fp_ind = disp + flat_file->indices[st_index]
                    + (ADIO_Offset) n_filetypes *filetype_extent;
                }
            }
            fd->fp_sys_posn = -1;       /* set it to null. */
#ifdef HAVE_STATUS_SET_BYTES
            MPIR_Status_set_bytes(status, datatype, bufsize);
#endif
            goto fn_exit;
        }

        /* Calculate end_offset, the last byte-offset that will be accessed.
         * e.g., if start_offset=0 and 100 bytes to be write, end_offset=99 */

        st_fwr_size = fwr_size;
        st_n_filetypes = n_filetypes;
        i_offset = 0;
        j = st_index;
        off = offset;
        fwr_size = MPL_MIN(st_fwr_size, bufsize);
        while (i_offset < bufsize) {
            i_offset += fwr_size;
            end_offset = off + fwr_size - 1;

            j = (j + 1) % flat_file->count;
            n_filetypes += (j == 0) ? 1 : 0;
            while (flat_file->blocklens[j] == 0) {
                j = (j + 1) % flat_file->count;
                n_filetypes += (j == 0) ? 1 : 0;
            }

            off = disp + flat_file->indices[j] + n_filetypes * (ADIO_Offset) filetype_extent;
            fwr_size = MPL_MIN(flat_file->blocklens[j], bufsize - i_offset);
        }

/* if atomicity is true, lock the region to be accessed */
        if (fd->atomicity)
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset - start_off + 1);

        /* initial read for the read-modify-write */
        writebuf_off = offset;
        writebuf = (char *) ADIOI_Malloc(max_bufsize);
        memset(writebuf, -1, max_bufsize);
        writebuf_len = (int) (MPL_MIN(max_bufsize, end_offset - writebuf_off + 1));
        if (!(fd->atomicity))
            ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event(ADIOI_MPE_lseek_a, 0, NULL);
#endif
        lseek(fd->fd_sys, writebuf_off, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event(ADIOI_MPE_lseek_b, 0, NULL);
#endif
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event(ADIOI_MPE_read_a, 0, NULL);
#endif
        err = read(fd->fd_sys, writebuf, writebuf_len);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event(ADIOI_MPE_read_b, 0, NULL);
#endif
        if (err == -1) {
            *error_code = MPIO_Err_create_code(MPI_SUCCESS,
                                               MPIR_ERR_RECOVERABLE,
                                               myname, __LINE__,
                                               MPI_ERR_IO,
                                               "ADIOI_NFS_WriteStrided: ROMIO tries to optimize this access by doing a read-modify-write, but is unable to read the file. Please give the file read permission and open it with MPI_MODE_RDWR.",
                                               0);
            goto fn_exit;
        }

        if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

            i_offset = 0;
            j = st_index;
            off = offset;
            n_filetypes = st_n_filetypes;
            fwr_size = MPL_MIN(st_fwr_size, bufsize);
            while (i_offset < bufsize) {
                if (fwr_size) {
                    /* TYPE_UB and TYPE_LB can result in
                     * fwr_size = 0. save system call in such cases */
                    /* lseek(fd->fd_sys, off, SEEK_SET);
                     * err = write(fd->fd_sys, ((char *) buf) + i, fwr_size); */

                    req_off = off;
                    req_len = fwr_size;
                    userbuf_off = i_offset;
                ADIOI_BUFFERED_WRITE}
                i_offset += fwr_size;

                if (off + fwr_size < disp + flat_file->indices[j] +
                    flat_file->blocklens[j] + n_filetypes * (ADIO_Offset) filetype_extent)
                    off += fwr_size;
                /* did not reach end of contiguous block in filetype.
                 * no more I/O needed. off is incremented by fwr_size. */
                else {
                    j = (j + 1) % flat_file->count;
                    n_filetypes += (j == 0) ? 1 : 0;
                    while (flat_file->blocklens[j] == 0) {
                        j = (j + 1) % flat_file->count;
                        n_filetypes += (j == 0) ? 1 : 0;
                    }
                    off = disp + flat_file->indices[j] +
                        n_filetypes * (ADIO_Offset) filetype_extent;
                    fwr_size = MPL_MIN(flat_file->blocklens[j], bufsize - i_offset);
                }
            }
        } else {
예제 #6
0
/* ADIOI_Exchange_file_views - Sends all the aggregators the file
 * views and file view states of the clients.  It fills in the
 * client_file_view_state_arr for the aggregators and the
 * my_mem_view_state for the client.  It also initializes the
 * agg_file_view_state for all clients, which is the view for each
 * aggregator of a client's filetype. */
void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type,
                           ADIO_File fd, int count,
                           MPI_Datatype datatype, ADIO_Offset off,
                           view_state * my_mem_view_state_arr,
                           view_state * agg_file_view_state_arr,
                           view_state * client_file_view_state_arr)
{
    /* Convert my own fileview to an ADIOI_Flattened type and a
     * disp. MPI_Alltoall the count of ADIOI_Flatlist nodes.
     * MPI_Isend/Irecv the block_lens, indices of ADIOI_Flatlist node
     * to/from each of the aggregators with the rest of the file view
     * state. */

    int i = -1, j = -1;
    amount_and_extra_data_t *send_count_arr = NULL;
    amount_and_extra_data_t *recv_count_arr = NULL;
    int send_req_arr_sz = 0;
    int recv_req_arr_sz = 0;
    MPI_Request *send_req_arr = NULL, *recv_req_arr = NULL;
    MPI_Status *statuses = NULL;
    ADIO_Offset disp_off_sz_ext_typesz[6];
    MPI_Aint memtype_extent, filetype_extent;
    int ret = -1;

    /* parameters for datatypes */
    ADIOI_Flatlist_node *flat_mem_p = NULL, *flat_file_p = NULL;
    MPI_Count memtype_sz = -1;
    int memtype_is_contig = -1;
    ADIO_Offset filetype_sz = -1;

#ifdef AGGREGATION_PROFILE
    MPE_Log_event(5014, 0, NULL);
#endif
    /* The memtype will be freed after the call.  The filetype will be
     * freed in the close and should have been flattened in the file
     * view. */
    MPI_Type_size_x(datatype, &memtype_sz);
    MPI_Type_extent(datatype, &memtype_extent);
    if (memtype_sz == memtype_extent) {
        memtype_is_contig = 1;
        flat_mem_p = ADIOI_Flatten_and_find(datatype);
        flat_mem_p->blocklens[0] = memtype_sz * count;
    } else {
        flat_mem_p = ADIOI_Flatten_and_find(datatype);
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size_x(fd->filetype, &filetype_sz);
    flat_file_p = ADIOI_Flatten_and_find(fd->filetype);
    if (filetype_extent == filetype_sz) {
        flat_file_p->blocklens[0] = memtype_sz * count;
        filetype_extent = memtype_sz * count;
        filetype_sz = filetype_extent;
    }

    disp_off_sz_ext_typesz[0] = fd->fp_ind;
    disp_off_sz_ext_typesz[1] = fd->disp;
    disp_off_sz_ext_typesz[2] = off;
    disp_off_sz_ext_typesz[3] = memtype_sz * count;
    disp_off_sz_ext_typesz[4] = (ADIO_Offset) filetype_extent;
    disp_off_sz_ext_typesz[5] = (ADIO_Offset) filetype_sz;

    if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
        recv_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t));
        send_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t));
    } else {
        send_count_arr = ADIOI_Calloc(fd->hints->cb_nodes, sizeof(amount_and_extra_data_t));

        /* only aggregators receive data */
        if (fd->is_agg) {
            recv_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t));
            recv_req_arr = ADIOI_Malloc(nprocs * sizeof(MPI_Request));
            for (i = 0; i < nprocs; i++)
                MPI_Irecv(&recv_count_arr[i], sizeof(amount_and_extra_data_t),
                          MPI_BYTE, i, COUNT_EXCH, fd->comm, &recv_req_arr[i]);
        }

        /* only send data to aggregators */
        send_req_arr = ADIOI_Calloc(fd->hints->cb_nodes, sizeof(MPI_Request));
        for (i = 0; i < fd->hints->cb_nodes; i++) {
            send_count_arr[i].count = flat_file_p->count;
            send_count_arr[i].fp_ind = disp_off_sz_ext_typesz[0];
            send_count_arr[i].disp = disp_off_sz_ext_typesz[1];
            send_count_arr[i].byte_off = disp_off_sz_ext_typesz[2];
            send_count_arr[i].sz = disp_off_sz_ext_typesz[3];
            send_count_arr[i].ext = disp_off_sz_ext_typesz[4];
            send_count_arr[i].type_sz = disp_off_sz_ext_typesz[5];
            MPI_Isend(&send_count_arr[i], sizeof(amount_and_extra_data_t),
                      MPI_BYTE, fd->hints->ranklist[i], COUNT_EXCH, fd->comm, &send_req_arr[i]);
        }
    }


    /* Every client has to build mem and file view_states for each aggregator.
     * We initialize their values here.  and we also initialize
     * send_count_arr */

    if (memtype_is_contig) {
        /* if memory is contigous, we now replace memtype_sz and
         * memtype_extent with the full access size */
        memtype_sz *= count;
        memtype_extent = memtype_sz;
    }

    for (i = 0; i < fd->hints->cb_nodes; i++) {
        int tmp_agg_idx = fd->hints->ranklist[i];
        memset(&(my_mem_view_state_arr[tmp_agg_idx]), 0, sizeof(view_state));
        my_mem_view_state_arr[tmp_agg_idx].sz = disp_off_sz_ext_typesz[3];
        my_mem_view_state_arr[tmp_agg_idx].ext = (ADIO_Offset) memtype_extent;
        my_mem_view_state_arr[tmp_agg_idx].type_sz = (ADIO_Offset) memtype_sz;
        my_mem_view_state_arr[tmp_agg_idx].flat_type_p = flat_mem_p;
        ADIOI_init_view_state(file_ptr_type, 1, &(my_mem_view_state_arr[tmp_agg_idx]), TEMP_OFF);
        ADIOI_init_view_state(file_ptr_type, 1, &(my_mem_view_state_arr[tmp_agg_idx]), REAL_OFF);

        memset(&(agg_file_view_state_arr[tmp_agg_idx]), 0, sizeof(view_state));
        agg_file_view_state_arr[tmp_agg_idx].fp_ind = disp_off_sz_ext_typesz[0];
        agg_file_view_state_arr[tmp_agg_idx].disp = disp_off_sz_ext_typesz[1];
        agg_file_view_state_arr[tmp_agg_idx].byte_off = disp_off_sz_ext_typesz[2];
        agg_file_view_state_arr[tmp_agg_idx].sz = disp_off_sz_ext_typesz[3];
        agg_file_view_state_arr[tmp_agg_idx].ext = disp_off_sz_ext_typesz[4];
        agg_file_view_state_arr[tmp_agg_idx].type_sz = disp_off_sz_ext_typesz[5];
        agg_file_view_state_arr[tmp_agg_idx].flat_type_p = flat_file_p;

        ADIOI_init_view_state(file_ptr_type, 1, &(agg_file_view_state_arr[tmp_agg_idx]), TEMP_OFF);
        ADIOI_init_view_state(file_ptr_type, 1, &(agg_file_view_state_arr[tmp_agg_idx]), REAL_OFF);

        if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
            send_count_arr[tmp_agg_idx].count = flat_file_p->count;
            send_count_arr[tmp_agg_idx].fp_ind = disp_off_sz_ext_typesz[0];
            send_count_arr[tmp_agg_idx].disp = disp_off_sz_ext_typesz[1];
            send_count_arr[tmp_agg_idx].byte_off = disp_off_sz_ext_typesz[2];
            send_count_arr[tmp_agg_idx].sz = disp_off_sz_ext_typesz[3];
            send_count_arr[tmp_agg_idx].ext = disp_off_sz_ext_typesz[4];
            send_count_arr[tmp_agg_idx].type_sz = disp_off_sz_ext_typesz[5];
        }
    }

#ifdef DEBUG2
    fprintf(stderr, "my own flattened memtype: ");
    ADIOI_Print_flatlist_node(flat_mem_p);
    fprintf(stderr, "my own flattened filetype: ");
    ADIOI_Print_flatlist_node(flat_file_p);
#endif

    if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
        ret = MPI_Alltoall(send_count_arr, sizeof(amount_and_extra_data_t),
                           MPI_BYTE,
                           recv_count_arr, sizeof(amount_and_extra_data_t), MPI_BYTE, fd->comm);
        if (ret != MPI_SUCCESS) {
            fprintf(stderr, "ADIOI_Exchange_file_views: MPI_Alltoall failed " "with error %d", ret);
            return;
        }
    } else {
#ifdef MPI_STATUSES_IGNORE
        statuses = MPI_STATUSES_IGNORE;
#else
        statuses = (MPI_Status *) ADIOI_Malloc(1 + nprocs * sizeof(MPI_Status));
#endif
        if (fd->is_agg) {
            MPI_Waitall(nprocs, recv_req_arr, statuses);
            ADIOI_Free(recv_req_arr);
        }
        MPI_Waitall(fd->hints->cb_nodes, send_req_arr, statuses);
#ifndef MPI_STATUSES_IGNORE
        ADIOI_Free(statuses);
#endif
        ADIOI_Free(send_req_arr);
    }
#ifdef DEBUG2
    if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
        fprintf(stderr, "send_count_arr:");
        for (i = 0; i < nprocs; i++) {
            fprintf(stderr, "[%d]=%d ", i, send_count_arr[i].count);
        }
        fprintf(stderr, "\n");
        fprintf(stderr, "recv_count_arr:");
        for (i = 0; i < nprocs; i++) {
            fprintf(stderr, "[%d]=%d ", i, recv_count_arr[i].count);
        }
        fprintf(stderr, "\n");
    } else {
        fprintf(stderr, "send_count_arr:");
        for (i = 0; i < fd->hints->cb_nodes; i++) {
            fprintf(stderr, "[%d]=%d ", i, send_count_arr[i].count);
        }
        fprintf(stderr, "\n");
        if (fd->is_agg) {
            fprintf(stderr, "recv_count_arr:");
            for (i = 0; i < nprocs; i++) {
                fprintf(stderr, "[%d]=%d ", i, recv_count_arr[i].count);
            }
            fprintf(stderr, "\n");
        }
    }
#endif

    if (fd->hints->cb_alltoall == ADIOI_HINT_DISABLE) {
        for (i = 0; i < fd->hints->cb_nodes; i++)
            if (send_count_arr[i].count > 0)
                send_req_arr_sz++;
    }
    /* Figure out how many counts to send/recv */
    for (i = 0; i < nprocs; i++) {
        if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
            if (send_count_arr[i].count > 0)
                send_req_arr_sz++;
        }
        /* Only aggregators should recv */
        if (fd->is_agg) {
            if (recv_count_arr[i].count > 0) {
                if ((client_file_view_state_arr[i].flat_type_p =
                     (ADIOI_Flatlist_node *) ADIOI_Malloc(sizeof(ADIOI_Flatlist_node))) == NULL) {
                    fprintf(stderr, "ADIOI_Exchange_file_views: malloc " "flat_type_p failed\n");
                }
                client_file_view_state_arr[i].flat_type_p->count = recv_count_arr[i].count;
                client_file_view_state_arr[i].flat_type_p->indices =
                    (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count, sizeof(ADIO_Offset));
                client_file_view_state_arr[i].flat_type_p->blocklens =
                    (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count, sizeof(ADIO_Offset));

                /* Copy the extra data out of the stuff we Alltoall'd */
                memcpy(&client_file_view_state_arr[i].fp_ind,
                       &recv_count_arr[i].fp_ind, 6 * sizeof(ADIO_Offset));

                recv_req_arr_sz++;
            }
        }
    }

    /* Since ADIOI_Calloc may do other things we add the +1
     * to avoid a 0-size malloc */
    send_req_arr = (MPI_Request *) ADIOI_Calloc(2 * (send_req_arr_sz) + 1, sizeof(MPI_Request));

    j = 0;
    if (recv_req_arr_sz > 0) {
        assert(fd->is_agg);
        recv_req_arr = (MPI_Request *) ADIOI_Calloc(2 * (recv_req_arr_sz), sizeof(MPI_Request));
        for (i = 0; i < nprocs; i++) {
            if (recv_count_arr[i].count > 0) {
                MPI_Irecv(client_file_view_state_arr[i].flat_type_p->indices,
                          recv_count_arr[i].count, ADIO_OFFSET, i,
                          INDICES, fd->comm, &recv_req_arr[j]);
                j++;
                MPI_Irecv(client_file_view_state_arr[i].flat_type_p->blocklens,
                          recv_count_arr[i].count, ADIO_OFFSET, i,
                          BLOCK_LENS, fd->comm, &recv_req_arr[j]);
                j++;
            }
        }
    }

    if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
        j = 0;
        for (i = 0; i < nprocs; i++) {
            if (send_count_arr[i].count > 0) {
                MPI_Isend(flat_file_p->indices,
                          send_count_arr[i].count, ADIO_OFFSET, i,
                          INDICES, fd->comm, &send_req_arr[j]);
                j++;
                MPI_Isend(flat_file_p->blocklens,
                          send_count_arr[i].count, ADIO_OFFSET, i,
                          BLOCK_LENS, fd->comm, &send_req_arr[j]);
                j++;
            }
        }
    } else {
        j = 0;
        for (i = 0; i < fd->hints->cb_nodes; i++) {
            if (send_count_arr[i].count > 0) {
                MPI_Isend(flat_file_p->indices,
                          send_count_arr[i].count, ADIO_OFFSET,
                          fd->hints->ranklist[i], INDICES, fd->comm, &send_req_arr[j]);
                j++;
                MPI_Isend(flat_file_p->blocklens,
                          send_count_arr[i].count, ADIO_OFFSET,
                          fd->hints->ranklist[i], BLOCK_LENS, fd->comm, &send_req_arr[j]);
                j++;
            }
        }
    }

    /* Since ADIOI_Malloc may do other things we add the +1
     * to avoid a 0-size malloc */
#ifdef MPI_STATUSES_IGNORE
    statuses = MPI_STATUSES_IGNORE;
#else
    statuses = (MPI_Status *)
        ADIOI_Malloc(1 + 2 * MPL_MAX(send_req_arr_sz, recv_req_arr_sz)
                     * sizeof(MPI_Status));
#endif

    if (send_req_arr_sz > 0) {
        MPI_Waitall(2 * send_req_arr_sz, send_req_arr, statuses);
        ADIOI_Free(send_count_arr);
        ADIOI_Free(send_req_arr);
    }
    if (recv_req_arr_sz > 0) {
        MPI_Waitall(2 * recv_req_arr_sz, recv_req_arr, statuses);
        ADIOI_Free(recv_count_arr);
        ADIOI_Free(recv_req_arr);
    }
#ifndef MPI_STATUSES_IGNORE
    ADIOI_Free(statuses);
#endif

    if (fd->is_agg == 1) {
        ADIOI_init_view_state(file_ptr_type, nprocs, client_file_view_state_arr, TEMP_OFF);
        ADIOI_init_view_state(file_ptr_type, nprocs, client_file_view_state_arr, REAL_OFF);
    }
#ifdef DEBUG
    if (fd->is_agg == 1) {
        ADIOI_Flatlist_node *fr_node_p;
        for (i = 0; i < nprocs; i++) {
            fprintf(stderr, "client_file_view_state_arr[%d]=(fp_ind=%Ld,"
                    "disp=%Ld,byte_off=%Ld,sz=%Ld,ext=%Ld\n", i,
                    client_file_view_state_arr[i].fp_ind,
                    client_file_view_state_arr[i].disp,
                    client_file_view_state_arr[i].byte_off,
                    client_file_view_state_arr[i].sz, client_file_view_state_arr[i].ext);
        }

        fr_node_p = ADIOI_Flatten_and_find(fd->file_realm_types[fd->my - cb_nodes_index]);
        assert(fr_node_p != NULL);

        fprintf(stderr, "my file realm (idx=%d,st_off=%Ld) ",
                fd->my_cb_nodes_index, fd->file_realm_st_offs[fd->my_cb_nodes_index]);
        ADIOI_Print_flatlist_node(fr_node_p);
    }
#endif

#ifdef DEBUG2
    if (fd->is_agg == 1) {
        for (i = 0; i < nprocs; i++) {
            fprintf(stderr, "client_file_view_state_arr[%d]: ", i);
            ADIOI_Print_flatlist_node(client_file_view_state_arr[i].flat_type_p);
        }
    }
#endif
#ifdef AGGREGATION_PROFILE
    MPE_Log_event(5015, 0, NULL);
#endif
}
예제 #7
0
ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset, int whence, int *error_code)
{
/* implemented for whence=SEEK_SET only. SEEK_CUR and SEEK_END must
   be converted to the equivalent with SEEK_SET before calling this
   routine. */
/* offset is in units of etype relative to the filetype */

    ADIO_Offset off;
    ADIOI_Flatlist_node *flat_file;

    int i;
    ADIO_Offset n_etypes_in_filetype, n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype = 0;
    ADIO_Offset size_in_filetype, sum;
    MPI_Count filetype_size, etype_size;
    int filetype_is_contig;
    MPI_Aint filetype_extent;

    MPL_UNREFERENCED_ARG(whence);

    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
    etype_size = fd->etype_size;

    if (filetype_is_contig)
        off = fd->disp + etype_size * offset;
    else {
        flat_file = ADIOI_Flatten_and_find(fd->filetype);

        MPI_Type_extent(fd->filetype, &filetype_extent);
        MPI_Type_size_x(fd->filetype, &filetype_size);
        if (!filetype_size) {
            /* Since offset relative to the filetype size, we can't
             * do compute the offset when that result is zero.
             * Return zero for the offset for now */
            *error_code = MPI_SUCCESS;
            return 0;
        }

        n_etypes_in_filetype = filetype_size / etype_size;
        n_filetypes = offset / n_etypes_in_filetype;
        etype_in_filetype = offset % n_etypes_in_filetype;
        size_in_filetype = etype_in_filetype * etype_size;

        sum = 0;
        for (i = 0; i < flat_file->count; i++) {
            sum += flat_file->blocklens[i];
            if (sum > size_in_filetype) {
                abs_off_in_filetype = flat_file->indices[i] +
                    size_in_filetype - (sum - flat_file->blocklens[i]);
                break;
            }
        }

        /* abs. offset in bytes in the file */
        off = fd->disp + n_filetypes * filetype_extent + abs_off_in_filetype;
    }

/*
 * we used to call lseek here and update both fp_ind and fp_sys_posn, but now
 * we don't seek and only update fp_ind (ROMIO's idea of where we are in the
 * file).  We leave the system file descriptor and fp_sys_posn alone.
 * The fs-specifc ReadContig and WriteContig will seek to the correct place in
 * the file before reading/writing if the 'offset' parameter doesn't match
 * fp_sys_posn
 */
    fd->fp_ind = off;

    *error_code = MPI_SUCCESS;

    return off;
}
예제 #8
0
/* Some of this code is from the old Calc_my_off_len() function.
 * It calculates the 1st and last byte accessed */
void ADIOI_Calc_bounds(ADIO_File fd, int count, MPI_Datatype buftype,
                       int file_ptr_type, ADIO_Offset offset,
                       ADIO_Offset * st_offset, ADIO_Offset * end_offset)
{
    MPI_Count filetype_size, buftype_size, etype_size;
    int sum;
    MPI_Aint filetype_extent;
    ADIO_Offset total_io;
    int filetype_is_contig;
    ADIO_Offset i, remainder;
    ADIOI_Flatlist_node *flat_file;

    ADIO_Offset st_byte_off, end_byte_off;

#ifdef AGGREGATION_PROFILE
    MPE_Log_event(5000, 0, NULL);
#endif

    if (!count) {
        /* Max signed positive value for ADIO_Offset
         * (arch. dependent?).  is there a better way? */
        memset(st_offset, 8, sizeof(ADIO_Offset));
        *st_offset = *st_offset / 2;
        *end_offset = -1;
        return;
    }

    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size_x(fd->filetype, &filetype_size);
    ADIOI_Assert(filetype_size != 0);
    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size_x(fd->etype, &etype_size);
    MPI_Type_size_x(buftype, &buftype_size);

    total_io = buftype_size * count;

    if (filetype_is_contig) {
        if (file_ptr_type == ADIO_INDIVIDUAL)
            st_byte_off = fd->fp_ind;
        else
            st_byte_off = fd->disp + etype_size * offset;

        end_byte_off = st_byte_off + total_io - 1;
    } else {
        flat_file = ADIOI_Flatten_and_find(fd->filetype);

        /* we need to take care of some weirdness since fd->fp_ind
         * points at an accessible byte in file.  the first accessible
         * byte in the file is not necessarily the first byte, nor is
         * it necessarily the first off/len pair in the filetype. */
        if (file_ptr_type == ADIO_INDIVIDUAL) {
            st_byte_off = fd->fp_ind;
            /* find end byte of I/O (may be in middle of an etype) */

            /* calculate byte starting point of first filetype */
            end_byte_off = (ADIO_Offset)
                ((fd->fp_ind - fd->disp - flat_file->indices[0]) /
                 filetype_extent) * filetype_extent + fd->disp + flat_file->indices[0];
            /* number of absolute bytes into first filetype */
            remainder = (fd->fp_ind - fd->disp - flat_file->indices[0]) % filetype_extent;
            if (remainder) {
                /* find how many file viewable bytes into first filetype */
                sum = 0;
                for (i = 0; i < flat_file->count; i++) {
                    sum += flat_file->blocklens[i];
                    if ((flat_file->indices[i] - flat_file->indices[0] +
                         flat_file->blocklens[i]) >= remainder) {
                        sum -= (flat_file->blocklens[i] - (sum - remainder));
                        break;
                    }
                }
                total_io += sum;
            }
            /* byte starting point of last filetype */
            end_byte_off += (total_io - 1) / filetype_size * filetype_extent;
            /* number of bytes into last filetype */
            remainder = total_io % filetype_size;
            if (!remainder) {
                for (i = flat_file->count - 1; i >= 0; i--) {
                    if (flat_file->blocklens[i])
                        break;
                }
                ADIOI_Assert(i > -1);
                end_byte_off += flat_file->indices[i] + flat_file->blocklens[i] - 1;
                end_byte_off -= flat_file->indices[0];
            } else {
                sum = 0;
                for (i = 0; i < flat_file->count; i++) {
                    sum += flat_file->blocklens[i];
                    if (sum >= remainder) {
                        end_byte_off += flat_file->indices[i] +
                            flat_file->blocklens[i] - sum + remainder - 1;
                        break;
                    }
                }
                end_byte_off -= flat_file->indices[0];
            }
        } else {
            /* find starting byte of I/O (must be aligned with an etype) */
            /* byte starting point of starting filetype */
            st_byte_off = fd->disp + ((offset * etype_size) / filetype_size) * filetype_extent;
            /* number of file viewable bytes into starting filetype */
            remainder = (etype_size * offset) % filetype_size;

            sum = 0;
            for (i = 0; i < flat_file->count; i++) {
                sum += flat_file->blocklens[i];
                if (sum >= remainder) {
                    if (sum == remainder)
                        st_byte_off += flat_file->indices[i + 1];
                    else
                        st_byte_off += flat_file->indices[i] +
                            flat_file->blocklens[i] - sum + remainder;
                    break;
                }
            }

            /* find end byte of I/O (may be in middle of an etype) */
            /* byte starting point of last filetype */
            end_byte_off = fd->disp + (offset * etype_size + total_io) /
                filetype_size * filetype_extent;
            /* number of bytes into last filetype */
            remainder = (offset * etype_size + total_io) % filetype_size;

            if (!remainder) {
                /* the last non-zero off/len pair */
                for (i = flat_file->count - 1; i >= 0; i--) {
                    if (flat_file->blocklens[i])
                        break;
                }
                ADIOI_Assert(i >= 0);
                /* back up a whole filetype, and put back up to the
                 * last byte of the last non-zero offlen pair */
                /* end_byte_off = (end_byte_off - filetype_extent) +
                 * flat_file->indices[i] +
                 * flat_file->blocklens[i] - 1; */
                /* equivalent of above commented out equation */
                end_byte_off -= filetype_extent - flat_file->indices[i] -
                    flat_file->blocklens[i] + 1;
            } else {
                sum = 0;
                for (i = 0; i < flat_file->count; i++) {
                    sum += flat_file->blocklens[i];
                    if (sum >= remainder) {
                        end_byte_off += flat_file->indices[i] +
                            flat_file->blocklens[i] - sum + remainder - 1;
                        break;
                    }
                }
            }
        }
    }

    *st_offset = st_byte_off;
    *end_offset = end_byte_off;
#ifdef DEBUG
    printf("st_offset = %lld\nend_offset = %lld\n", st_byte_off, end_byte_off);
#endif
#ifdef AGGREGATION_PROFILE
    MPE_Log_event(5001, 0, NULL);
#endif
}
예제 #9
0
/* Copied from ADIOI_PVFS2_OldReadStrided.  It would be good to have fewer
 * copies of this code... */
void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count,
			     MPI_Datatype datatype, int file_ptr_type,
			     ADIO_Offset offset, ADIO_Status *status, int
			     *error_code)
{
    /* offset is in units of etype relative to the filetype. */
    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int i, j, k,  brd_size, frd_size=0, st_index=0;
    int sum, n_etypes_in_filetype, size_in_filetype;
    MPI_Count bufsize;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    MPI_Count filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent; 
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset off, disp, start_off, initial_off;
    int flag, st_frd_size, st_n_filetypes;

    size_t mem_list_count, file_list_count;
    void ** mem_offsets;
    uint64_t *file_offsets;
    size_t *mem_lengths;
    uint64_t *file_lengths;
    int total_blks_to_read;

    int max_mem_list, max_file_list;

    int b_blks_read;
    int f_data_read;
    int size_read=0, n_read_lists, extra_blks;

    int end_brd_size, end_frd_size;
    int start_k, start_j, new_file_read, new_buffer_read;
    int start_mem_offset;
    ADIOI_ZOIDFS_object * zoidfs_obj_ptr;
    int err_flag=0;
    MPI_Offset total_bytes_read = 0;
    static char myname[] = "ADIOI_ZOIDFS_ReadStrided";

    /* note: I don't know what zoidfs will do if you pass it a super-long list,
     * so let's keep with the PVFS limit for now */
#define MAX_ARRAY_SIZE 64

    *error_code = MPI_SUCCESS;  /* changed below if error */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    /* the HDF5 tests showed a bug in this list processing code (see many many
     * lines down below).  We added a workaround, but common HDF5 file types
     * are actually contiguous and do not need the expensive workarond */
    if (!filetype_is_contig) {
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
	if (flat_file->count == 1 && !buftype_is_contig)
	    filetype_is_contig = 1;
    }

    MPI_Type_size_x(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, 0);
#endif
	*error_code = MPI_SUCCESS; 
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size_x(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;

    bufsize = buftype_size * count;
    
    zoidfs_obj_ptr = (ADIOI_ZOIDFS_object *)fd->fs_ptr;

    if (!buftype_is_contig && filetype_is_contig) {

/* noncontiguous in memory, contiguous in file. */
        uint64_t file_offsets;
	uint64_t file_lengths;

	flat_buf = ADIOI_Flatten_and_find(datatype);

	off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
	    fd->disp + etype_size * offset;

	file_list_count = 1;
	file_offsets = off;
	file_lengths = 0;
	total_blks_to_read = count*flat_buf->count;
	b_blks_read = 0;

	/* allocate arrays according to max usage */
	if (total_blks_to_read > MAX_ARRAY_SIZE)
	    mem_list_count = MAX_ARRAY_SIZE;
	else mem_list_count = total_blks_to_read;
	mem_offsets = (void*)ADIOI_Malloc(mem_list_count*sizeof(void*));
	mem_lengths = (size_t*)ADIOI_Malloc(mem_list_count*sizeof(size_t));

	/* TODO: CHECK RESULTS OF MEMORY ALLOCATION */

	j = 0;
	/* step through each block in memory, filling memory arrays */
	while (b_blks_read < total_blks_to_read) {
	    for (i=0; i<flat_buf->count; i++) {
		mem_offsets[b_blks_read % MAX_ARRAY_SIZE] = 
		    buf + j*buftype_extent + flat_buf->indices[i];
		mem_lengths[b_blks_read % MAX_ARRAY_SIZE] = 
		    flat_buf->blocklens[i];
		file_lengths += flat_buf->blocklens[i];
		b_blks_read++;
		if (!(b_blks_read % MAX_ARRAY_SIZE) ||
		    (b_blks_read == total_blks_to_read)) {

		    /* in the case of the last read list call,
		       adjust mem_list_count */
		    if (b_blks_read == total_blks_to_read) {
		        mem_list_count = total_blks_to_read % MAX_ARRAY_SIZE;
			/* in case last read list call fills max arrays */
			if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
		    }
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
		    NO_STALE(err_flag, fd, zoidfs_obj_ptr,
				    zoidfs_read(zoidfs_obj_ptr,
					    mem_list_count,
					    mem_offsets, mem_lengths,
					    1, &file_offsets, &file_lengths, ZOIDFS_NO_OP_HINT));
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
		    /* --BEGIN ERROR HANDLING-- */
		    if (err_flag != ZFS_OK) {
			*error_code = MPIO_Err_create_code(MPI_SUCCESS,
							   MPIR_ERR_RECOVERABLE,
							   myname, __LINE__,
							   ADIOI_ZOIDFS_error_convert(err_flag),
							   "Error in zoidfs_read", 0);
			goto error_state;
		    }
		    total_bytes_read += file_lengths;
		    /* --END ERROR HANDLING-- */
		  
		    /* in the case of error or the last read list call, 
		     * leave here */
		    if (err_flag || b_blks_read == total_blks_to_read) break;

		    file_offsets += file_lengths;
		    file_lengths = 0;
		} 
	    } /* for (i=0; i<flat_buf->count; i++) */
	    j++;
	} /* while (b_blks_read < total_blks_to_read) */
	ADIOI_Free(mem_offsets);
	ADIOI_Free(mem_lengths);

        if (file_ptr_type == ADIO_INDIVIDUAL) 
	    fd->fp_ind += total_bytes_read;

	fd->fp_sys_posn = -1;  /* set it to null. */

#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, bufsize);
	/* This isa temporary way of filling in status.  The right way is to
	   keep tracke of how much data was actually read adn placed in buf
	   by ADIOI_BUFFERED_READ. */
#endif
	ADIOI_Delete_flattened(datatype);

	return;
    } /* if (!buftype_is_contig && filetype_is_contig) */

    /* know file is noncontiguous from above */
    /* noncontiguous in file */

    /* filetype already flattened in ADIO_Open */
    flat_file = ADIOI_Flatlist;
    while (flat_file->type != fd->filetype) flat_file = flat_file->next;

    disp = fd->disp;
    initial_off = offset;


    /* for each case - ADIO_Individual pointer or explicit, find the file
       offset in bytes (offset), n_filetypes (how many filetypes into
       file to start), frd_size (remaining amount of data in present
       file block), and st_index (start point in terms of blocks in
       starting filetype) */
    if (file_ptr_type == ADIO_INDIVIDUAL) {
        offset = fd->fp_ind; /* in bytes */
	n_filetypes = -1;
	flag = 0;
	while (!flag) {
	    n_filetypes++;
	    for (i=0; i<flat_file->count; i++) {
	        if (disp + flat_file->indices[i] + 
		    ((ADIO_Offset) n_filetypes)*filetype_extent +
		    flat_file->blocklens[i]  >= offset) {
		    st_index = i;
		    frd_size = disp + flat_file->indices[i] + 
				    ((ADIO_Offset) n_filetypes)*filetype_extent
				      + flat_file->blocklens[i] - offset;
		    flag = 1;
		    break;
		}
	    }
	} /* while (!flag) */
    } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
    else {
        n_etypes_in_filetype = filetype_size/etype_size;
	n_filetypes = (int) (offset / n_etypes_in_filetype);
	etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	size_in_filetype = etype_in_filetype * etype_size;
	
	sum = 0;
	for (i=0; i<flat_file->count; i++) {
	    sum += flat_file->blocklens[i];
	    if (sum > size_in_filetype) {
	        st_index = i;
		frd_size = sum - size_in_filetype;
		abs_off_in_filetype = flat_file->indices[i] +
		    size_in_filetype - (sum - flat_file->blocklens[i]);
		break;
	    }
	}
	
	/* abs. offset in bytes in the file */
	offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent + 
	    abs_off_in_filetype;
    } /* else [file_ptr_type != ADIO_INDIVIDUAL] */

    start_off = offset;
    st_frd_size = frd_size;
    st_n_filetypes = n_filetypes;
    
    if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

	/* only one memory off-len pair, so no array here */
        size_t mem_lengths;
	size_t mem_offsets;
	
	i = 0;
	j = st_index;
	n_filetypes = st_n_filetypes;
	
	mem_list_count = 1;
	
	/* determine how many blocks in file to read */
	f_data_read = ADIOI_MIN(st_frd_size, bufsize);
	total_blks_to_read = 1;
	if (j < (flat_file->count-1)) j++;
	else {
	    j = 0;
	    n_filetypes++;
	}
	while (f_data_read < bufsize) {
	    f_data_read += flat_file->blocklens[j];
	    total_blks_to_read++;
	    if (j<(flat_file->count-1)) j++;
	    else j = 0;	
	}
      
	j = st_index;
	n_filetypes = st_n_filetypes;
	n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE;
	extra_blks = total_blks_to_read%MAX_ARRAY_SIZE;
	
	mem_offsets = (size_t)buf;
	mem_lengths = 0;
	
	/* if at least one full readlist, allocate file arrays
	   at max array size and don't free until very end */
	if (n_read_lists) {
	    file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
						  sizeof(int64_t));
	    file_lengths = (uint64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
						  sizeof(uint64_t));
	}
	/* if there's no full readlist allocate file arrays according
	   to needed size (extra_blks) */
	else {
	    file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
						  sizeof(int64_t));
	    file_lengths = (uint64_t*)ADIOI_Malloc(extra_blks*
						  sizeof(uint64_t));
	}
	
	/* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
	for (i=0; i<n_read_lists; i++) {
	    file_list_count = MAX_ARRAY_SIZE;
	    if(!i) {
	        file_offsets[0] = offset;
		file_lengths[0] = st_frd_size;
		mem_lengths = st_frd_size;
	    }
	    for (k=0; k<MAX_ARRAY_SIZE; k++) {
	        if (i || k) {
		    file_offsets[k] = disp + 
			((ADIO_Offset)n_filetypes)*filetype_extent
		      + flat_file->indices[j];
		    file_lengths[k] = flat_file->blocklens[j];
		    mem_lengths += file_lengths[k];
		}
		if (j<(flat_file->count - 1)) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (k=0; k<MAX_ARRAY_SIZE; k++) */
	    /* --END ERROR HANDLING-- */
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
	    NO_STALE(err_flag, fd, zoidfs_obj_ptr,
			    zoidfs_read(zoidfs_obj_ptr,
				    1, buf, &mem_lengths,
				    file_list_count,
				    file_offsets, file_lengths, ZOIDFS_NO_OP_HINT));
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
	    /* --BEGIN ERROR HANDLING-- */
	    if (err_flag != ZFS_OK) {
		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
						   MPIR_ERR_RECOVERABLE,
						   myname, __LINE__,
						   ADIOI_ZOIDFS_error_convert(err_flag),
						   "Error in zoidfs_read", 0);
		goto error_state;
	    }
	    /* --END ERROR HANDING-- */
	    total_bytes_read += mem_lengths;

	    mem_offsets += mem_lengths;
	    mem_lengths = 0;
	} /* for (i=0; i<n_read_lists; i++) */

	/* for file arrays smaller than MAX_ARRAY_SIZE (last read_list call) */
	if (extra_blks) {
	    file_list_count = extra_blks;
	    if(!i) {
	        file_offsets[0] = offset;
		file_lengths[0] = ADIOI_MIN(st_frd_size, bufsize);
	    }
	    for (k=0; k<extra_blks; k++) {
	        if(i || k) {
		    file_offsets[k] = disp + 
			((ADIO_Offset)n_filetypes)*filetype_extent +
			flat_file->indices[j];
		    if (k == (extra_blks - 1)) {
		        file_lengths[k] = bufsize - mem_lengths
			  - mem_offsets + (size_t)buf;
		    }
		    else file_lengths[k] = flat_file->blocklens[j];
		} /* if(i || k) */
		mem_lengths += file_lengths[k];
		if (j<(flat_file->count - 1)) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (k=0; k<extra_blks; k++) */
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
	    NO_STALE(err_flag, fd, zoidfs_obj_ptr,
			    zoidfs_read(zoidfs_obj_ptr, 1,
				   (void **)&mem_offsets,
				   &mem_lengths,
				   file_list_count,
				   file_offsets, file_lengths, ZOIDFS_NO_OP_HINT));
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
	    /* --BEGIN ERROR HANDLING-- */
	    if (err_flag != 0) {
		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
						   MPIR_ERR_RECOVERABLE,
						   myname, __LINE__,
						   ADIOI_ZOIDFS_error_convert(err_flag),
						   "Error in zoidfs_read", 0);		
		goto error_state;
	    }
	    /* --END ERROR HANDLING-- */
	    total_bytes_read += mem_lengths;
	}
    }
    else {
/* noncontiguous in memory as well as in file */
      
	flat_buf = ADIOI_Flatten_and_find(datatype);

	size_read = 0;
	n_filetypes = st_n_filetypes;
	frd_size = st_frd_size;
	brd_size = flat_buf->blocklens[0];
	buf_count = 0;
	start_mem_offset = 0;
	start_k = k = 0;
	start_j = st_index;
	max_mem_list = 0;
	max_file_list = 0;

	/* run through and file max_file_list and max_mem_list so that you 
	   can allocate the file and memory arrays less than MAX_ARRAY_SIZE
	   if possible */

	while (size_read < bufsize) {
	    k = start_k;
	    new_buffer_read = 0;
	    mem_list_count = 0;
	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
		   (new_buffer_read < bufsize-size_read)) {
	        /* find mem_list_count and file_list_count such that both are
		   less than MAX_ARRAY_SIZE, the sum of their lengths are
		   equal, and the sum of all the data read and data to be
		   read in the next immediate read list is less than
		   bufsize */
	        if(mem_list_count) {
		    if((new_buffer_read + flat_buf->blocklens[k] + 
			size_read) > bufsize) {
		        end_brd_size = new_buffer_read + 
			    flat_buf->blocklens[k] - (bufsize - size_read);
			new_buffer_read = bufsize - size_read;
		    }
		    else {
		        new_buffer_read += flat_buf->blocklens[k];
			end_brd_size = flat_buf->blocklens[k];
		    }
		}
		else {
		    if (brd_size > (bufsize - size_read)) {
		        new_buffer_read = bufsize - size_read;
			brd_size = new_buffer_read;
		    }
		    else new_buffer_read = brd_size;
		}
		mem_list_count++;
		k = (k + 1)%flat_buf->count;
	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
	       (new_buffer_read < bufsize-size_read)) */
	    j = start_j;
	    new_file_read = 0;
	    file_list_count = 0;
	    while ((file_list_count < MAX_ARRAY_SIZE) && 
		   (new_file_read < new_buffer_read)) {
	        if(file_list_count) {
		    if((new_file_read + flat_file->blocklens[j]) > 
		       new_buffer_read) {
		        end_frd_size = new_buffer_read - new_file_read;
			new_file_read = new_buffer_read;
			j--;
		    }
		    else {
		        new_file_read += flat_file->blocklens[j];
			end_frd_size = flat_file->blocklens[j];
		    }
		}
		else {
		    if (frd_size > new_buffer_read) {
		        new_file_read = new_buffer_read;
			frd_size = new_file_read;
		    }
		    else new_file_read = frd_size;
		}
		file_list_count++;
		if (j < (flat_file->count - 1)) j++;
		else j = 0;
		
		k = start_k;
		if ((new_file_read < new_buffer_read) && 
		    (file_list_count == MAX_ARRAY_SIZE)) {
		    new_buffer_read = 0;
		    mem_list_count = 0;
		    while (new_buffer_read < new_file_read) {
		        if(mem_list_count) {
			    if((new_buffer_read + flat_buf->blocklens[k]) >
			       new_file_read) {
			        end_brd_size = new_file_read - new_buffer_read;
				new_buffer_read = new_file_read;
				k--;
			    }
			    else {
			        new_buffer_read += flat_buf->blocklens[k];
				end_brd_size = flat_buf->blocklens[k];
			    }
			}
			else {
			    new_buffer_read = brd_size;
			    if (brd_size > (bufsize - size_read)) {
			        new_buffer_read = bufsize - size_read;
				brd_size = new_buffer_read;
			    }
			}
			mem_list_count++;
			k = (k + 1)%flat_buf->count;
		    } /* while (new_buffer_read < new_file_read) */
		} /* if ((new_file_read < new_buffer_read) && (file_list_count
		     == MAX_ARRAY_SIZE)) */
	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
		 (new_buffer_read < bufsize-size_read)) */

	    /*  fakes filling the readlist arrays of lengths found above  */
	    k = start_k;
	    j = start_j;
	    for (i=0; i<mem_list_count; i++) {	     
		if(i) {
		    if (i == (mem_list_count - 1)) {
			if (flat_buf->blocklens[k] == end_brd_size)
			    brd_size = flat_buf->blocklens[(k+1)%
							  flat_buf->count];
			else {
			    brd_size = flat_buf->blocklens[k] - end_brd_size;
			    k--;
			    buf_count--;
			}
		    }
		}
		buf_count++;
		k = (k + 1)%flat_buf->count;
	    } /* for (i=0; i<mem_list_count; i++) */
	    for (i=0; i<file_list_count; i++) {
		if (i) {
		    if (i == (file_list_count - 1)) {
			if (flat_file->blocklens[j] == end_frd_size)
			    frd_size = flat_file->blocklens[(j+1)%
							  flat_file->count];   
			else {
			    frd_size = flat_file->blocklens[j] - end_frd_size;
			    j--;
			}
		    }
		}
		if (j < flat_file->count - 1) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (i=0; i<file_list_count; i++) */
	    size_read += new_buffer_read;
	    start_k = k;
	    start_j = j;
	    if (max_mem_list < mem_list_count)
	        max_mem_list = mem_list_count;
	    if (max_file_list < file_list_count)
	        max_file_list = file_list_count;
	} /* while (size_read < bufsize) */

	/* one last check before we actually carry out the operation:
	 * this code has hard-to-fix bugs when a noncontiguous file type has
	 * such large pieces that the sum of the lengths of the memory type is
	 * not larger than one of those pieces (and vice versa for large memory
	 * types and many pices of file types.  In these cases, give up and
	 * fall back to naive reads and writes.  The testphdf5 test created a
	 * type with two very large memory regions and 600 very small file
	 * regions.  The same test also created a type with one very large file
	 * region and many (700) very small memory regions.  both cases caused
	 * problems for this code */

	if ( ( (file_list_count == 1) && 
		    (new_file_read < flat_file->blocklens[0] ) ) ||
		((mem_list_count == 1) && 
		    (new_buffer_read < flat_buf->blocklens[0]) ) ||
		((file_list_count == MAX_ARRAY_SIZE) && 
		    (new_file_read < flat_buf->blocklens[0]) ) ||
		( (mem_list_count == MAX_ARRAY_SIZE) &&
		    (new_buffer_read < flat_file->blocklens[0])) )
	{

	    ADIOI_Delete_flattened(datatype);
	    ADIOI_GEN_ReadStrided_naive(fd, buf, count, datatype,
		    file_ptr_type, initial_off, status, error_code);
	    return;
	}

	mem_offsets = (void *)ADIOI_Malloc(max_mem_list*sizeof(void *));
	mem_lengths = (size_t*)ADIOI_Malloc(max_mem_list*sizeof(size_t));
	file_offsets = (uint64_t *)ADIOI_Malloc(max_file_list*sizeof(uint64_t));
	file_lengths = (uint64_t *)ADIOI_Malloc(max_file_list*sizeof(uint64_t));
	    
	size_read = 0;
	n_filetypes = st_n_filetypes;
	frd_size = st_frd_size;
	brd_size = flat_buf->blocklens[0];
	buf_count = 0;
	start_mem_offset = 0;
	start_k = k = 0;
	start_j = st_index;

	/*  this section calculates mem_list_count and file_list_count
	    and also finds the possibly odd sized last array elements
	    in new_frd_size and new_brd_size  */
	
	while (size_read < bufsize) {
	    k = start_k;
	    new_buffer_read = 0;
	    mem_list_count = 0;
	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
		   (new_buffer_read < bufsize-size_read)) {
	        /* find mem_list_count and file_list_count such that both are
		   less than MAX_ARRAY_SIZE, the sum of their lengths are
		   equal, and the sum of all the data read and data to be
		   read in the next immediate read list is less than
		   bufsize */
	        if(mem_list_count) {
		    if((new_buffer_read + flat_buf->blocklens[k] + 
			size_read) > bufsize) {
		        end_brd_size = new_buffer_read + 
			    flat_buf->blocklens[k] - (bufsize - size_read);
			new_buffer_read = bufsize - size_read;
		    }
		    else {
		        new_buffer_read += flat_buf->blocklens[k];
			end_brd_size = flat_buf->blocklens[k];
		    }
		}
		else {
		    if (brd_size > (bufsize - size_read)) {
		        new_buffer_read = bufsize - size_read;
			brd_size = new_buffer_read;
		    }
		    else new_buffer_read = brd_size;
		}
		mem_list_count++;
		k = (k + 1)%flat_buf->count;
	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
	       (new_buffer_read < bufsize-size_read)) */
	    j = start_j;
	    new_file_read = 0;
	    file_list_count = 0;
	    while ((file_list_count < MAX_ARRAY_SIZE) && 
		   (new_file_read < new_buffer_read)) {
	        if(file_list_count) {
		    if((new_file_read + flat_file->blocklens[j]) > 
		       new_buffer_read) {
		        end_frd_size = new_buffer_read - new_file_read;
			new_file_read = new_buffer_read;
			j--;
		    }
		    else {
		        new_file_read += flat_file->blocklens[j];
			end_frd_size = flat_file->blocklens[j];
		    }
		}
		else {
		    if (frd_size > new_buffer_read) {
		        new_file_read = new_buffer_read;
			frd_size = new_file_read;
		    }
		    else new_file_read = frd_size;
		}
		file_list_count++;
		if (j < (flat_file->count - 1)) j++;
		else j = 0;
		
		k = start_k;
		if ((new_file_read < new_buffer_read) && 
		    (file_list_count == MAX_ARRAY_SIZE)) {
		    new_buffer_read = 0;
		    mem_list_count = 0;
		    while (new_buffer_read < new_file_read) {
		        if(mem_list_count) {
			    if((new_buffer_read + flat_buf->blocklens[k]) >
			       new_file_read) {
			        end_brd_size = new_file_read - new_buffer_read;
				new_buffer_read = new_file_read;
				k--;
			    }
			    else {
			        new_buffer_read += flat_buf->blocklens[k];
				end_brd_size = flat_buf->blocklens[k];
			    }
			}
			else {
			    new_buffer_read = brd_size;
			    if (brd_size > (bufsize - size_read)) {
			        new_buffer_read = bufsize - size_read;
				brd_size = new_buffer_read;
			    }
			}
			mem_list_count++;
			k = (k + 1)%flat_buf->count;
		    } /* while (new_buffer_read < new_file_read) */
		} /* if ((new_file_read < new_buffer_read) && (file_list_count
		     == MAX_ARRAY_SIZE)) */
	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
		 (new_buffer_read < bufsize-size_read)) */

	    /*  fills the allocated readlist arrays  */
	    k = start_k;
	    j = start_j;
	    for (i=0; i<mem_list_count; i++) {	     
	        mem_offsets[i] = buf + 
			buftype_extent* (buf_count/flat_buf->count) +
					 flat_buf->indices[k];
		if(!i) {
		    mem_lengths[0] = brd_size;
		    mem_offsets[0] += flat_buf->blocklens[k] - brd_size;
		}
		else {
		    if (i == (mem_list_count - 1)) {
		        mem_lengths[i] = end_brd_size;
			if (flat_buf->blocklens[k] == end_brd_size)
			    brd_size = flat_buf->blocklens[(k+1)%
							  flat_buf->count];
			else {
			    brd_size = flat_buf->blocklens[k] - end_brd_size;
			    k--;
			    buf_count--;
			}
		    }
		    else {
		        mem_lengths[i] = flat_buf->blocklens[k];
		    }
		}
		buf_count++;
		k = (k + 1)%flat_buf->count;
	    } /* for (i=0; i<mem_list_count; i++) */
	    for (i=0; i<file_list_count; i++) {
	        file_offsets[i] = disp + flat_file->indices[j] + 
		    ((ADIO_Offset)n_filetypes) * filetype_extent;
	        if (!i) {
		    file_lengths[0] = frd_size;
		    file_offsets[0] += flat_file->blocklens[j] - frd_size;
		}
		else {
		    if (i == (file_list_count - 1)) {
		        file_lengths[i] = end_frd_size;
			if (flat_file->blocklens[j] == end_frd_size)
			    frd_size = flat_file->blocklens[(j+1)%
							  flat_file->count];   
			else {
			    frd_size = flat_file->blocklens[j] - end_frd_size;
			    j--;
			}
		    }
		    else file_lengths[i] = flat_file->blocklens[j];
		}
		if (j < flat_file->count - 1) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (i=0; i<file_list_count; i++) */

#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
	    NO_STALE(err_flag, fd, zoidfs_obj_ptr,
			    zoidfs_read(zoidfs_obj_ptr,
				    mem_list_count, mem_offsets, mem_lengths,
				    file_list_count,
				    file_offsets, file_lengths, ZOIDFS_NO_OP_HINT));
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
	    /* --BEGIN ERROR HANDLING-- */
	    if (err_flag != ZFS_OK) {
		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
						   MPIR_ERR_RECOVERABLE,
						   myname, __LINE__,
						   ADIOI_ZOIDFS_error_convert(err_flag),
						   "Error in zoidfs_read", 0);
	    }
	    /* --END ERROR HANDLING-- */
	    size_read += new_buffer_read;
	    total_bytes_read += new_buffer_read; /* XXX: is this right? */
	    start_k = k;
	    start_j = j;
	} /* while (size_read < bufsize) */
	ADIOI_Free(mem_offsets);
	ADIOI_Free(mem_lengths);
    }
    /* Other ADIO routines will convert absolute bytes into counts of datatypes */
    /* when incrementing fp_ind, need to also take into account the file type:
     * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
     * if we wrote N elements, offset needs to point at beginning of type, not
     * at empty region at offset N+1) 
     *
     * As we discussed on mpich-discuss in may/june 2009, the code below might
     * look wierd, but by putting fp_ind at the last byte written, the next
     * time we run through the strided code we'll update the fp_ind to the
     * right location. */
    if (file_ptr_type == ADIO_INDIVIDUAL) {
	fd->fp_ind = file_offsets[file_list_count-1]+
	    file_lengths[file_list_count-1];
    }
    
    ADIOI_Free(file_offsets);
    ADIOI_Free(file_lengths);
    
    if (err_flag == 0) *error_code = MPI_SUCCESS;

error_state:
    fd->fp_sys_posn = -1;   /* set it to null. */

#ifdef HAVE_STATUS_SET_BYTES
    MPIR_Status_set_bytes(status, datatype, bufsize);
    /* This is a temporary way of filling in status. The right way is to 
       keep track of how much data was actually read and placed in buf 
       by ADIOI_BUFFERED_READ. */
#endif
    
    if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
}
예제 #10
0
void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, const void *buf, int count,
			       MPI_Datatype datatype, int file_ptr_type,
			       ADIO_Offset offset, ADIO_Status * status,
			       int *error_code)
{
    /* offset is in units of etype relative to the filetype. */
    ADIOI_Flatlist_node *flat_buf, *flat_file;
    ADIO_Offset i_offset, sum, size_in_filetype;
    int i, j, k, st_index=0;
    int n_etypes_in_filetype;
    ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes;
    ADIO_Offset abs_off_in_filetype=0;
    MPI_Count filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent;
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off;
    ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
    char *writebuf;
    unsigned bufsize, writebuf_len, write_sz;
    ADIO_Status status1;
    ADIO_Offset new_bwr_size, new_fwr_size, st_fwr_size, fwr_size=0, bwr_size, req_len;
    int stripe_size;
    static char myname[] = "ADIOI_LUSTRE_WriteStrided";

    if (fd->hints->ds_write == ADIOI_HINT_DISABLE) {
	/* if user has disabled data sieving on writes, use naive
	 * approach instead.
	 */
	ADIOI_GEN_WriteStrided_naive(fd,
				     buf,
				     count,
				     datatype,
				     file_ptr_type,
				     offset, status, error_code);
	return;
    }

    *error_code = MPI_SUCCESS;	/* changed below if error */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size_x(fd->filetype, &filetype_size);
    if (!filetype_size) {
#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, 0);
#endif
	*error_code = MPI_SUCCESS;
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size_x(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;

    ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
    bufsize = buftype_size * count;

    /* get striping info */
    stripe_size = fd->hints->striping_unit;

    /* Different buftype to different filetype */
    if (!buftype_is_contig && filetype_is_contig) {
        /* noncontiguous in memory, contiguous in file. */
	flat_buf = ADIOI_Flatten_and_find(datatype);

	off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
            fd->disp + (ADIO_Offset)etype_size * offset;

	start_off = off;
	end_offset = start_off + bufsize - 1;
        /* write stripe size buffer each time */
	writebuf = (char *) ADIOI_Malloc(MPL_MIN(bufsize, stripe_size));
        writebuf_off = 0;
        writebuf_len = 0;

        /* if atomicity is true, lock the region to be accessed */
	if (fd->atomicity)
	    ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, bufsize);

	for (j = 0; j < count; j++) {
	    for (i = 0; i < flat_buf->count; i++) {
                userbuf_off = (ADIO_Offset)j * (ADIO_Offset)buftype_extent +
                    flat_buf->indices[i];
		req_off = off;
		req_len = flat_buf->blocklens[i];
		ADIOI_BUFFERED_WRITE_WITHOUT_READ
		off += flat_buf->blocklens[i];
	    }
        }

	/* write the buffer out finally */
	ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE,
			 ADIO_EXPLICIT_OFFSET, writebuf_off, &status1,
			 error_code);

	if (fd->atomicity)
	    ADIOI_UNLOCK(fd, start_off, SEEK_SET, bufsize);
	if (*error_code != MPI_SUCCESS) {
            ADIOI_Free(writebuf);
	    return;
        }
	ADIOI_Free(writebuf);
	if (file_ptr_type == ADIO_INDIVIDUAL)
	    fd->fp_ind = off;
    } else {
        /* noncontiguous in file */
	flat_file = ADIOI_Flatten_and_find(fd->filetype);
	disp = fd->disp;

	if (file_ptr_type == ADIO_INDIVIDUAL) {
            /* Wei-keng reworked type processing to be a bit more efficient */
            offset       = fd->fp_ind - disp;
            n_filetypes  = (offset - flat_file->indices[0]) / filetype_extent;
            offset      -= (ADIO_Offset)n_filetypes * filetype_extent;
            /* now offset is local to this extent */

            /* find the block where offset is located, skip blocklens[i]==0 */
            for (i=0; i<flat_file->count; i++) {
                ADIO_Offset dist;
                if (flat_file->blocklens[i] == 0) continue;
                dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
                /* fwr_size is from offset to the end of block i */
                if (dist == 0) {
                    i++;
                    offset   = flat_file->indices[i];
                    fwr_size = flat_file->blocklens[i];
			break;
		    }
                if (dist > 0) {
                    fwr_size = dist;
                    break;
		}
	    }
            st_index = i;  /* starting index in flat_file->indices[] */
            offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
        }
        else {
            n_etypes_in_filetype = filetype_size/etype_size;
            n_filetypes = offset / n_etypes_in_filetype;
            etype_in_filetype = offset % n_etypes_in_filetype;
	    size_in_filetype = etype_in_filetype * etype_size;

	    sum = 0;
	    for (i = 0; i < flat_file->count; i++) {
		sum += flat_file->blocklens[i];
		if (sum > size_in_filetype) {
		    st_index = i;
		    fwr_size = sum - size_in_filetype;
		    abs_off_in_filetype = flat_file->indices[i] +
			size_in_filetype - (sum - flat_file->blocklens[i]);
		    break;
		}
	    }

	    /* abs. offset in bytes in the file */
	    offset = disp + (ADIO_Offset) n_filetypes *filetype_extent +
		     abs_off_in_filetype;
	}

	start_off = offset;

        /* Wei-keng Liao:write request is within single flat_file
         * contig block*/
        /* this could happen, for example, with subarray types that are
         * actually fairly contiguous */
        if (buftype_is_contig && bufsize <= fwr_size) {
            req_off = start_off;
            req_len = bufsize;
            end_offset = start_off + bufsize - 1;
	    writebuf = (char *) ADIOI_Malloc(MPL_MIN(bufsize, stripe_size));
	    memset(writebuf, -1, MPL_MIN(bufsize, stripe_size));
            writebuf_off = 0;
            writebuf_len = 0;
            userbuf_off = 0;
            ADIOI_BUFFERED_WRITE_WITHOUT_READ
            /* write the buffer out finally */
            ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE,
                             ADIO_EXPLICIT_OFFSET, writebuf_off, &status1,
                             error_code);

            if (file_ptr_type == ADIO_INDIVIDUAL) {
                /* update MPI-IO file pointer to point to the first byte
                 * that can be accessed in the fileview. */
                fd->fp_ind = offset + bufsize;
                if (bufsize == fwr_size) {
                    do {
                        st_index++;
                        if (st_index == flat_file->count) {
                            st_index = 0;
                            n_filetypes++;
                        }
                    } while (flat_file->blocklens[st_index] == 0);
                    fd->fp_ind = disp + flat_file->indices[st_index]
                        + (ADIO_Offset)n_filetypes*filetype_extent;
                }
            }
            fd->fp_sys_posn = -1;   /* set it to null. */
#ifdef HAVE_STATUS_SET_BYTES
            MPIR_Status_set_bytes(status, datatype, bufsize);
#endif
            ADIOI_Free(writebuf);
            return;
        }

	    /* Calculate end_offset, the last byte-offset that will be accessed.
           e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/

	    st_fwr_size = fwr_size;
	    st_n_filetypes = n_filetypes;
        i_offset = 0;
	    j = st_index;
	    off = offset;
	    fwr_size = MPL_MIN(st_fwr_size, bufsize);
        while (i_offset < bufsize) {
            i_offset += fwr_size;
		end_offset = off + fwr_size - 1;

            j = (j+1) % flat_file->count;
            n_filetypes += (j == 0) ? 1 : 0;
            while (flat_file->blocklens[j]==0) {
                j = (j+1) % flat_file->count;
                n_filetypes += (j == 0) ? 1 : 0;
		}

		off = disp + flat_file->indices[j] +
                n_filetypes*(ADIO_Offset)filetype_extent;
            fwr_size = MPL_MIN(flat_file->blocklens[j], bufsize-i_offset);
	    }

/* if atomicity is true, lock the region to be accessed */
        if (fd->atomicity)
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

	    writebuf_off = 0;
	    writebuf_len = 0;
	    writebuf = (char *) ADIOI_Malloc(stripe_size);
	    memset(writebuf, -1, stripe_size);

	    if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
		   common case. */

            i_offset = 0;
		j = st_index;
		off = offset;
		n_filetypes = st_n_filetypes;
		fwr_size = MPL_MIN(st_fwr_size, bufsize);
            while (i_offset < bufsize) {
		    if (fwr_size) {
			/* TYPE_UB and TYPE_LB can result in
			   fwr_size = 0. save system call in such cases */
                    /* lseek(fd->fd_sys, off, SEEK_SET);
                       err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/

			req_off = off;
			req_len = fwr_size;
                    userbuf_off = i_offset;
			ADIOI_BUFFERED_WRITE
                    }
                i_offset += fwr_size;

		    if (off + fwr_size < disp + flat_file->indices[j] +
		                         flat_file->blocklens[j] +
                    n_filetypes*(ADIO_Offset)filetype_extent)
		        off += fwr_size;
		    /* did not reach end of contiguous block in filetype.
		    no more I/O needed. off is incremented by fwr_size. */
		    else {
                    j = (j+1) % flat_file->count;
                    n_filetypes += (j == 0) ? 1 : 0;
                    while (flat_file->blocklens[j]==0) {
                        j = (j+1) % flat_file->count;
                        n_filetypes += (j == 0) ? 1 : 0;
			}
			off = disp + flat_file->indices[j] +
                        n_filetypes*(ADIO_Offset)filetype_extent;
			fwr_size = MPL_MIN(flat_file->blocklens[j],
                                         bufsize-i_offset);
		    }
		}
        }
        else {
예제 #11
0
void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count,
			     MPI_Datatype datatype, int file_ptr_type,
			     ADIO_Offset offset, ADIO_Status *status, int
			     *error_code)
{
/* Since PVFS does not support file locking, can't do buffered writes
   as on Unix */

/* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0;
    int num, size, sum, n_etypes_in_filetype, size_in_filetype;
    MPI_Count bufsize;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    MPI_Count filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent, indx;
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset off, disp;
    int flag, new_bwr_size, new_fwr_size, err_flag=0;
    static char myname[] = "ADIOI_PVFS_WRITESTRIDED";

#ifdef HAVE_PVFS_LISTIO
    if ( fd->hints->fs_hints.pvfs.listio_write == ADIOI_HINT_ENABLE ) {
	    ADIOI_PVFS_WriteStridedListIO(fd, buf, count, datatype, 
			    file_ptr_type, offset, status, error_code);
	    return;
    }
#endif
    /* if hint set to DISABLE or AUTOMATIC, don't use listio */

    /* --BEGIN ERROR HANDLING-- */
    if (fd->atomicity) {
	*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
					   myname, __LINE__,
					   MPI_ERR_INTERN,
					   "Atomic mode set in PVFS I/O function", 0);
	return;
    }
    /* --END ERROR HANDLING-- */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size_x(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, 0);
#endif
	*error_code = MPI_SUCCESS; 
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size_x(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;
    
    bufsize = buftype_size * count;

    if (!buftype_is_contig && filetype_is_contig) {
	char *combine_buf, *combine_buf_ptr;
	ADIO_Offset combine_buf_remain;
/* noncontiguous in memory, contiguous in file. use writev */

	flat_buf = ADIOI_Flatten_and_find(datatype);

	/* allocate our "combine buffer" to pack data into before writing */
	combine_buf = (char *) ADIOI_Malloc(fd->hints->ind_wr_buffer_size);
	combine_buf_ptr = combine_buf;
	combine_buf_remain = fd->hints->ind_wr_buffer_size;

	/* seek to the right spot in the file */
	if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
	    off = fd->disp + etype_size * offset;
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
	    pvfs_lseek64(fd->fd_sys, off, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
	}
	else {
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
            off = pvfs_lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
        }

	/* loop through all the flattened pieces.  combine into buffer until
	 * no more will fit, then write.
	 *
	 * special case of a given piece being bigger than the combine buffer
	 * is also handled.
	 */
	for (j=0; j<count; j++) {
	    for (i=0; i<flat_buf->count; i++) {
		if (flat_buf->blocklens[i] > combine_buf_remain && combine_buf != combine_buf_ptr) {
		    /* there is data in the buffer; write out the buffer so far */
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
		    err = pvfs_write(fd->fd_sys,
				     combine_buf,
				     fd->hints->ind_wr_buffer_size - combine_buf_remain);
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
		    if (err == -1) err_flag = 1;

		    /* reset our buffer info */
		    combine_buf_ptr = combine_buf;
		    combine_buf_remain = fd->hints->ind_wr_buffer_size;
		}

		/* TODO: heuristic for when to not bother to use combine buffer? */
		if (flat_buf->blocklens[i] >= combine_buf_remain) {
		    /* special case: blocklen is as big as or bigger than the combine buf;
		     * write directly
		     */
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
		    err = pvfs_write(fd->fd_sys,
				     ((char *) buf) + j*buftype_extent + flat_buf->indices[i],
				     flat_buf->blocklens[i]);
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
		    if (err == -1) err_flag = 1;
		    off += flat_buf->blocklens[i]; /* keep up with the final file offset too */
		}
		else {
		    /* copy more data into combine buffer */
		    memcpy(combine_buf_ptr,
			   ((char *) buf) + j*buftype_extent + flat_buf->indices[i],
			   flat_buf->blocklens[i]);
		    combine_buf_ptr += flat_buf->blocklens[i];
		    combine_buf_remain -= flat_buf->blocklens[i];
		    off += flat_buf->blocklens[i]; /* keep up with the final file offset too */
		}
	    }
	}

	if (combine_buf_ptr != combine_buf) {
	    /* data left in buffer to write */
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
	    err = pvfs_write(fd->fd_sys,
			     combine_buf,
			     fd->hints->ind_wr_buffer_size - combine_buf_remain);
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
	    if (err == -1) err_flag = 1;
	}

	if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;

	ADIOI_Free(combine_buf);

	if (err_flag) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO, "**io",
					       "**io %s", strerror(errno));
	}
	else *error_code = MPI_SUCCESS;
    } /* if (!buftype_is_contig && filetype_is_contig)  ... */

    else {  /* noncontiguous in file */

/* split up into several contiguous writes */

/* find starting location in the file */

/* filetype already flattened in ADIO_Open */
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
        disp = fd->disp;

	if (file_ptr_type == ADIO_INDIVIDUAL) {
	    offset = fd->fp_ind; /* in bytes */
            n_filetypes = -1;
            flag = 0;
            while (!flag) {
                n_filetypes++;
                for (i=0; i<flat_file->count; i++) {
                    if (disp + flat_file->indices[i] + 
                        (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] 
                            >= offset) {
                        st_index = i;
                        fwr_size = disp + flat_file->indices[i] + 
                                (ADIO_Offset) n_filetypes*filetype_extent
                                 + flat_file->blocklens[i] - offset;
                        flag = 1;
                        break;
                    }
                }
            }
	}
	else {
	    n_etypes_in_filetype = filetype_size/etype_size;
	    n_filetypes = (int) (offset / n_etypes_in_filetype);
	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	    size_in_filetype = etype_in_filetype * etype_size;
 
	    sum = 0;
	    for (i=0; i<flat_file->count; i++) {
		sum += flat_file->blocklens[i];
		if (sum > size_in_filetype) {
		    st_index = i;
		    fwr_size = sum - size_in_filetype;
		    abs_off_in_filetype = flat_file->indices[i] +
			size_in_filetype - (sum - flat_file->blocklens[i]);
		    break;
		}
	    }

	    /* abs. offset in bytes in the file */
            offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
	}

	if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

	    i = 0;
	    j = st_index;
	    off = offset;
	    fwr_size = MPL_MIN(fwr_size, bufsize);
	    while (i < bufsize) {
                if (fwr_size) { 
                    /* TYPE_UB and TYPE_LB can result in 
                       fwr_size = 0. save system call in such cases */ 
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
		    pvfs_lseek64(fd->fd_sys, off, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
		    err = pvfs_write(fd->fd_sys, ((char *) buf) + i, fwr_size);
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
		    if (err == -1) err_flag = 1;
		}
		i += fwr_size;

                if (off + fwr_size < disp + flat_file->indices[j] +
                   flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
                       off += fwr_size;
                /* did not reach end of contiguous block in filetype.
                   no more I/O needed. off is incremented by fwr_size. */
                else {
		    if (j < (flat_file->count - 1)) j++;
		    else {
			j = 0;
			n_filetypes++;
		    }
		    off = disp + flat_file->indices[j] + 
                                        (ADIO_Offset) n_filetypes*filetype_extent;
		    fwr_size = MPL_MIN(flat_file->blocklens[j], bufsize-i);
		}
	    }
	}
	else {
/* noncontiguous in memory as well as in file */

	    flat_buf = ADIOI_Flatten_and_find(datatype);

	    k = num = buf_count = 0;
	    indx = flat_buf->indices[0];
	    j = st_index;
	    off = offset;
	    bwr_size = flat_buf->blocklens[0];

	    while (num < bufsize) {
		size = MPL_MIN(fwr_size, bwr_size);
		if (size) {
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
		    pvfs_lseek64(fd->fd_sys, off, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
		    err = pvfs_write(fd->fd_sys, ((char *) buf) + indx, size);
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
		    if (err == -1) err_flag = 1;
		}

		new_fwr_size = fwr_size;
		new_bwr_size = bwr_size;

		if (size == fwr_size) {
/* reached end of contiguous block in file */
                    if (j < (flat_file->count - 1)) j++;
                    else {
                        j = 0;
                        n_filetypes++;
                    }

                    off = disp + flat_file->indices[j] + 
                                   (ADIO_Offset) n_filetypes*filetype_extent;

		    new_fwr_size = flat_file->blocklens[j];
		    if (size != bwr_size) {
			indx += size;
			new_bwr_size -= size;
		    }
		}

		if (size == bwr_size) {
/* reached end of contiguous block in memory */

		    k = (k + 1)%flat_buf->count;
		    buf_count++;
		    indx = buftype_extent*(buf_count/flat_buf->count) +
			flat_buf->indices[k]; 
		    new_bwr_size = flat_buf->blocklens[k];
		    if (size != fwr_size) {
			off += size;
			new_fwr_size -= size;
		    }
		}
		num += size;
		fwr_size = new_fwr_size;
                bwr_size = new_bwr_size;
	    }
	}

        if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
	if (err_flag) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO, "**io",
					       "**io %s", strerror(errno));
	}
	else *error_code = MPI_SUCCESS;
    }

    fd->fp_sys_posn = -1;   /* set it to null. */

#ifdef HAVE_STATUS_SET_BYTES
    MPIR_Status_set_bytes(status, datatype, bufsize);
/* This is a temporary way of filling in status. The right way is to 
   keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
#endif

    if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
}
예제 #12
0
void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count,
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code) 
{
/* Since PVFS does not support file locking, can't do buffered writes
   as on Unix */

/* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0;
    int size, sum, n_etypes_in_filetype, size_in_filetype;
    MPI_Count bufsize;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    MPI_Count filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent;
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off;
    ADIO_Offset off, disp, start_off;
    int flag, st_fwr_size, st_n_filetypes;
    int new_bwr_size, new_fwr_size, err_flag=0;

    int mem_list_count, file_list_count;
    char ** mem_offsets;
    int64_t *file_offsets;
    int *mem_lengths;
    int32_t *file_lengths;
    int total_blks_to_write;

    int max_mem_list, max_file_list;

    int b_blks_wrote;
    int f_data_wrote;
    int size_wrote=0, n_write_lists, extra_blks;

    int end_bwr_size, end_fwr_size;
    int start_k, start_j, new_file_write, new_buffer_write;
    int start_mem_offset;
#define MAX_ARRAY_SIZE 1024
    static char myname[] = "ADIOI_PVFS_WRITESTRIDED";

/* PFS file pointer modes are not relevant here, because PFS does
   not support strided accesses. */

    /* --BEGIN ERROR HANDLING-- */
    if (fd->atomicity) {
	*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
					   myname, __LINE__,
					   MPI_ERR_INTERN,
					   "Atomic mode set in PVFS I/O function", 0);
	return;
    }
    /* --END ERROR HANDLING-- */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size_x(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, 0);
#endif
	*error_code = MPI_SUCCESS; 
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size_x(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;
    
    bufsize = buftype_size * count;

    if (!buftype_is_contig && filetype_is_contig) {

/* noncontiguous in memory, contiguous in file.  */
        int64_t file_offsets;
	int32_t file_lengths;

	flat_buf = ADIOI_Flatten_and_find(datatype);
	
	if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
	    off = fd->disp + etype_size * offset;
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
	    pvfs_lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
	}
	else {
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
            off = pvfs_lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
        }

	file_list_count = 1;
	file_offsets = off;
	file_lengths = 0;
	total_blks_to_write = count*flat_buf->count;
	b_blks_wrote = 0;

	/* allocate arrays according to max usage */
	if (total_blks_to_write > MAX_ARRAY_SIZE)
	    mem_list_count = MAX_ARRAY_SIZE;
	else mem_list_count = total_blks_to_write;
	mem_offsets = (char**)ADIOI_Malloc(mem_list_count*sizeof(char*));
	mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int));

	j = 0;
	/* step through each block in memory, filling memory arrays */
	while (b_blks_wrote < total_blks_to_write) {
	    for (i=0; i<flat_buf->count; i++) {
		mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] = 
		    ((char*)buf + j*buftype_extent + flat_buf->indices[i]);
		mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = 
		    flat_buf->blocklens[i];
		file_lengths += flat_buf->blocklens[i];
		b_blks_wrote++;
		if (!(b_blks_wrote % MAX_ARRAY_SIZE) ||
		    (b_blks_wrote == total_blks_to_write)) {

		    /* in the case of the last read list call,
		       adjust mem_list_count */
		    if (b_blks_wrote == total_blks_to_write) {
		        mem_list_count = total_blks_to_write % MAX_ARRAY_SIZE;
			/* in case last read list call fills max arrays */
			if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
		    }

		    pvfs_write_list(fd->fd_sys ,mem_list_count, mem_offsets,
				   mem_lengths, file_list_count,
				   &file_offsets, &file_lengths);
		  
		    /* in the case of the last read list call, leave here */
		    if (b_blks_wrote == total_blks_to_write) break;

		    file_offsets += file_lengths;
		    file_lengths = 0;
		} 
	    } /* for (i=0; i<flat_buf->count; i++) */
	    j++;
	} /* while (b_blks_wrote < total_blks_to_write) */
	ADIOI_Free(mem_offsets);
	ADIOI_Free(mem_lengths);

	if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;

	if (err_flag) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO, "**io",
					       "**io %s", strerror(errno));
	}
	else *error_code = MPI_SUCCESS;

	fd->fp_sys_posn = -1;   /* clear this. */

#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, bufsize);
/* This is a temporary way of filling in status. The right way is to 
   keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
#endif

	ADIOI_Delete_flattened(datatype);
	return;
    } /* if (!buftype_is_contig && filetype_is_contig) */

    /* already know that file is noncontiguous from above */
    /* noncontiguous in file */

/* filetype already flattened in ADIO_Open */
    flat_file = ADIOI_Flatlist;
    while (flat_file->type != fd->filetype) flat_file = flat_file->next;

    disp = fd->disp;

    /* for each case - ADIO_Individual pointer or explicit, find offset
       (file offset in bytes), n_filetypes (how many filetypes into file 
       to start), fwr_size (remaining amount of data in present file
       block), and st_index (start point in terms of blocks in starting
       filetype) */
    if (file_ptr_type == ADIO_INDIVIDUAL) {
        offset = fd->fp_ind; /* in bytes */
	n_filetypes = -1;
	flag = 0;
	while (!flag) {
	    n_filetypes++;
	    for (i=0; i<flat_file->count; i++) {
	        if (disp + flat_file->indices[i] + 
		    (ADIO_Offset) n_filetypes*filetype_extent +
		      flat_file->blocklens[i] >= offset) {
		  st_index = i;
		  fwr_size = disp + flat_file->indices[i] + 
		    (ADIO_Offset) n_filetypes*filetype_extent
		    + flat_file->blocklens[i] - offset;
		  flag = 1;
		  break;
		}
	    }
	} /* while (!flag) */
    } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
    else {
        n_etypes_in_filetype = filetype_size/etype_size;
	n_filetypes = (int) (offset / n_etypes_in_filetype);
	etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	size_in_filetype = etype_in_filetype * etype_size;
	
	sum = 0;
	for (i=0; i<flat_file->count; i++) {
	    sum += flat_file->blocklens[i];
	    if (sum > size_in_filetype) {
	        st_index = i;
		fwr_size = sum - size_in_filetype;
		abs_off_in_filetype = flat_file->indices[i] +
		    size_in_filetype - (sum - flat_file->blocklens[i]);
		break;
	    }
	}

	/* abs. offset in bytes in the file */
	offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
	    abs_off_in_filetype;
    } /* else [file_ptr_type != ADIO_INDIVIDUAL] */

    start_off = offset;
    st_fwr_size = fwr_size;
    st_n_filetypes = n_filetypes;
    
    if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

        int mem_lengths;
	char *mem_offsets;
        
	i = 0;
	j = st_index;
	off = offset;
	n_filetypes = st_n_filetypes;
        
	mem_list_count = 1;
        
	/* determine how many blocks in file to read */
	f_data_wrote = MPL_MIN(st_fwr_size, bufsize);
	total_blks_to_write = 1;
	j++;
	while (f_data_wrote < bufsize) {
	    f_data_wrote += flat_file->blocklens[j];
	    total_blks_to_write++;
	    if (j<(flat_file->count-1)) j++;
	    else j = 0; 
	}
	    
	j = st_index;
	n_filetypes = st_n_filetypes;
	n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE;
	extra_blks = total_blks_to_write%MAX_ARRAY_SIZE;
        
	mem_offsets = buf;
	mem_lengths = 0;
        
	/* if at least one full readlist, allocate file arrays
	   at max array size and don't free until very end */
	if (n_write_lists) {
	    file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
						  sizeof(int64_t));
	    file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
						  sizeof(int32_t));
	}
	/* if there's no full readlist allocate file arrays according
	   to needed size (extra_blks) */
	else {
	    file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
                                                  sizeof(int64_t));
            file_lengths = (int32_t*)ADIOI_Malloc(extra_blks*
                                                  sizeof(int32_t));
        }
        
        /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
        for (i=0; i<n_write_lists; i++) {
            file_list_count = MAX_ARRAY_SIZE;
            if(!i) {
                file_offsets[0] = offset;
                file_lengths[0] = st_fwr_size;
                mem_lengths = st_fwr_size;
            }
            for (k=0; k<MAX_ARRAY_SIZE; k++) {
                if (i || k) {
                    file_offsets[k] = disp + n_filetypes*filetype_extent
                      + flat_file->indices[j];
                    file_lengths[k] = flat_file->blocklens[j];
                    mem_lengths += file_lengths[k];
                }
                if (j<(flat_file->count - 1)) j++;
                else {
                    j = 0;
                    n_filetypes++;
                }
            } /* for (k=0; k<MAX_ARRAY_SIZE; k++) */
            pvfs_write_list(fd->fd_sys, mem_list_count,
                           &mem_offsets, &mem_lengths,
                           file_list_count, file_offsets,
                           file_lengths);
            mem_offsets += mem_lengths;
            mem_lengths = 0;
        } /* for (i=0; i<n_write_lists; i++) */

        /* for file arrays smaller than MAX_ARRAY_SIZE (last read_list call) */
        if (extra_blks) {
            file_list_count = extra_blks;
            if(!i) {
                file_offsets[0] = offset;
                file_lengths[0] = st_fwr_size;
            }
            for (k=0; k<extra_blks; k++) {
                if(i || k) {
                    file_offsets[k] = disp + n_filetypes*filetype_extent +
                      flat_file->indices[j];
                    if (k == (extra_blks - 1)) {
                        file_lengths[k] = bufsize - (int32_t) mem_lengths
                          - (int32_t) mem_offsets + (int32_t)  buf;
                    }
                    else file_lengths[k] = flat_file->blocklens[j];
                } /* if(i || k) */
                mem_lengths += file_lengths[k];
                if (j<(flat_file->count - 1)) j++;
                else {
                    j = 0;
                    n_filetypes++;
                }
            } /* for (k=0; k<extra_blks; k++) */
            pvfs_write_list(fd->fd_sys, mem_list_count, &mem_offsets,
                           &mem_lengths, file_list_count, file_offsets,
                           file_lengths);
        }
    } 
    else {
        /* noncontiguous in memory as well as in file */

	flat_buf = ADIOI_Flatten_and_find(datatype);

	size_wrote = 0;
	n_filetypes = st_n_filetypes;
	fwr_size = st_fwr_size;
	bwr_size = flat_buf->blocklens[0];
	buf_count = 0;
	start_mem_offset = 0;
	start_k = k = 0;
	start_j = st_index;
	max_mem_list = 0;
	max_file_list = 0;

	/* run through and file max_file_list and max_mem_list so that you 
	   can allocate the file and memory arrays less than MAX_ARRAY_SIZE
	   if possible */

	while (size_wrote < bufsize) {
	    k = start_k;
	    new_buffer_write = 0;
	    mem_list_count = 0;
	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
		   (new_buffer_write < bufsize-size_wrote)) {
	        /* find mem_list_count and file_list_count such that both are
		   less than MAX_ARRAY_SIZE, the sum of their lengths are
		   equal, and the sum of all the data read and data to be
		   read in the next immediate read list is less than
		   bufsize */
	        if(mem_list_count) {
		    if((new_buffer_write + flat_buf->blocklens[k] + 
			size_wrote) > bufsize) {
		        end_bwr_size = new_buffer_write + 
			    flat_buf->blocklens[k] - (bufsize - size_wrote);
			new_buffer_write = bufsize - size_wrote;
		    }
		    else {
		        new_buffer_write += flat_buf->blocklens[k];
			end_bwr_size = flat_buf->blocklens[k];
		    }
		}
		else {
		    if (bwr_size > (bufsize - size_wrote)) {
		        new_buffer_write = bufsize - size_wrote;
			bwr_size = new_buffer_write;
		    }
		    else new_buffer_write = bwr_size;
		}
		mem_list_count++;
		k = (k + 1)%flat_buf->count;
	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
	       (new_buffer_write < bufsize-size_wrote)) */
	    j = start_j;
	    new_file_write = 0;
	    file_list_count = 0;
	    while ((file_list_count < MAX_ARRAY_SIZE) && 
		   (new_file_write < new_buffer_write)) {
	        if(file_list_count) {
		    if((new_file_write + flat_file->blocklens[j]) > 
		       new_buffer_write) {
		        end_fwr_size = new_buffer_write - new_file_write;
			new_file_write = new_buffer_write;
			j--;
		    }
		    else {
		        new_file_write += flat_file->blocklens[j];
			end_fwr_size = flat_file->blocklens[j];
		    }
		}
		else {
		    if (fwr_size > new_buffer_write) {
		        new_file_write = new_buffer_write;
			fwr_size = new_file_write;
		    }
		    else new_file_write = fwr_size;
		}
		file_list_count++;
		if (j < (flat_file->count - 1)) j++;
		else j = 0;
		
		k = start_k;
		if ((new_file_write < new_buffer_write) && 
		    (file_list_count == MAX_ARRAY_SIZE)) {
		    new_buffer_write = 0;
		    mem_list_count = 0;
		    while (new_buffer_write < new_file_write) {
		        if(mem_list_count) {
			    if((new_buffer_write + flat_buf->blocklens[k]) >
			       new_file_write) {
			        end_bwr_size = new_file_write - 
				    new_buffer_write;
				new_buffer_write = new_file_write;
				k--;
			    }
			    else {
			        new_buffer_write += flat_buf->blocklens[k];
				end_bwr_size = flat_buf->blocklens[k];
			    }
			}
			else {
			    new_buffer_write = bwr_size;
			    if (bwr_size > (bufsize - size_wrote)) {
			        new_buffer_write = bufsize - size_wrote;
				bwr_size = new_buffer_write;
			    }
			}
			mem_list_count++;
			k = (k + 1)%flat_buf->count;
		    } /* while (new_buffer_write < new_file_write) */
		} /* if ((new_file_write < new_buffer_write) &&
		     (file_list_count == MAX_ARRAY_SIZE)) */
	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
		 (new_buffer_write < bufsize-size_wrote)) */

	    /*  fakes filling the writelist arrays of lengths found above  */
	    k = start_k;
	    j = start_j;
	    for (i=0; i<mem_list_count; i++) {	     
		if(i) {
		    if (i == (mem_list_count - 1)) {
			if (flat_buf->blocklens[k] == end_bwr_size)
			    bwr_size = flat_buf->blocklens[(k+1)%
							  flat_buf->count];
			else {
			    bwr_size = flat_buf->blocklens[k] - end_bwr_size;
			    k--;
			    buf_count--;
			}
		    }
		}
		buf_count++;
		k = (k + 1)%flat_buf->count;
	    } /* for (i=0; i<mem_list_count; i++) */
	    for (i=0; i<file_list_count; i++) {
		if (i) {
		    if (i == (file_list_count - 1)) {
			if (flat_file->blocklens[j] == end_fwr_size)
			    fwr_size = flat_file->blocklens[(j+1)%
							  flat_file->count];   
			else {
			    fwr_size = flat_file->blocklens[j] - end_fwr_size;
			    j--;
			}
		    }
		}
		if (j < flat_file->count - 1) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (i=0; i<file_list_count; i++) */
	    size_wrote += new_buffer_write;
	    start_k = k;
	    start_j = j;
	    if (max_mem_list < mem_list_count)
	        max_mem_list = mem_list_count;
	    if (max_file_list < file_list_count)
	        max_file_list = file_list_count;
	} /* while (size_wrote < bufsize) */

	mem_offsets = (char **)ADIOI_Malloc(max_mem_list*sizeof(char *));
	mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int));
	file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t));
	file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t));
	    
	size_wrote = 0;
	n_filetypes = st_n_filetypes;
	fwr_size = st_fwr_size;
	bwr_size = flat_buf->blocklens[0];
	buf_count = 0;
	start_mem_offset = 0;
	start_k = k = 0;
	start_j = st_index;

	/*  this section calculates mem_list_count and file_list_count
	    and also finds the possibly odd sized last array elements
	    in new_fwr_size and new_bwr_size  */
	
	while (size_wrote < bufsize) {
	    k = start_k;
	    new_buffer_write = 0;
	    mem_list_count = 0;
	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
		   (new_buffer_write < bufsize-size_wrote)) {
	        /* find mem_list_count and file_list_count such that both are
		   less than MAX_ARRAY_SIZE, the sum of their lengths are
		   equal, and the sum of all the data read and data to be
		   read in the next immediate read list is less than
		   bufsize */
	        if(mem_list_count) {
		    if((new_buffer_write + flat_buf->blocklens[k] + 
			size_wrote) > bufsize) {
		        end_bwr_size = new_buffer_write + 
			    flat_buf->blocklens[k] - (bufsize - size_wrote);
			new_buffer_write = bufsize - size_wrote;
		    }
		    else {
		        new_buffer_write += flat_buf->blocklens[k];
			end_bwr_size = flat_buf->blocklens[k];
		    }
		}
		else {
		    if (bwr_size > (bufsize - size_wrote)) {
		        new_buffer_write = bufsize - size_wrote;
			bwr_size = new_buffer_write;
		    }
		    else new_buffer_write = bwr_size;
		}
		mem_list_count++;
		k = (k + 1)%flat_buf->count;
	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
	       (new_buffer_write < bufsize-size_wrote)) */
	    j = start_j;
	    new_file_write = 0;
	    file_list_count = 0;
	    while ((file_list_count < MAX_ARRAY_SIZE) && 
		   (new_file_write < new_buffer_write)) {
	        if(file_list_count) {
		    if((new_file_write + flat_file->blocklens[j]) > 
		       new_buffer_write) {
		        end_fwr_size = new_buffer_write - new_file_write;
			new_file_write = new_buffer_write;
			j--;
		    }
		    else {
		        new_file_write += flat_file->blocklens[j];
			end_fwr_size = flat_file->blocklens[j];
		    }
		}
		else {
		    if (fwr_size > new_buffer_write) {
		        new_file_write = new_buffer_write;
			fwr_size = new_file_write;
		    }
		    else new_file_write = fwr_size;
		}
		file_list_count++;
		if (j < (flat_file->count - 1)) j++;
		else j = 0;
		
		k = start_k;
		if ((new_file_write < new_buffer_write) && 
		    (file_list_count == MAX_ARRAY_SIZE)) {
		    new_buffer_write = 0;
		    mem_list_count = 0;
		    while (new_buffer_write < new_file_write) {
		        if(mem_list_count) {
			    if((new_buffer_write + flat_buf->blocklens[k]) >
			       new_file_write) {
			        end_bwr_size = new_file_write -
				  new_buffer_write;
				new_buffer_write = new_file_write;
				k--;
			    }
			    else {
			        new_buffer_write += flat_buf->blocklens[k];
				end_bwr_size = flat_buf->blocklens[k];
			    }
			}
			else {
			    new_buffer_write = bwr_size;
			    if (bwr_size > (bufsize - size_wrote)) {
			        new_buffer_write = bufsize - size_wrote;
				bwr_size = new_buffer_write;
			    }
			}
			mem_list_count++;
			k = (k + 1)%flat_buf->count;
		    } /* while (new_buffer_write < new_file_write) */
		} /* if ((new_file_write < new_buffer_write) &&
		     (file_list_count == MAX_ARRAY_SIZE)) */
	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
		 (new_buffer_write < bufsize-size_wrote)) */

	    /*  fills the allocated readlist arrays  */
	    k = start_k;
	    j = start_j;
	    for (i=0; i<mem_list_count; i++) {	     
	        mem_offsets[i] = ((char*)buf + buftype_extent*
					 (buf_count/flat_buf->count) +
					 (int)flat_buf->indices[k]);
		
		if(!i) {
		    mem_lengths[0] = bwr_size;
		    mem_offsets[0] += flat_buf->blocklens[k] - bwr_size;
		}
		else {
		    if (i == (mem_list_count - 1)) {
		        mem_lengths[i] = end_bwr_size;
			if (flat_buf->blocklens[k] == end_bwr_size)
			    bwr_size = flat_buf->blocklens[(k+1)%
							  flat_buf->count];
			else {
			    bwr_size = flat_buf->blocklens[k] - end_bwr_size;
			    k--;
			    buf_count--;
			}
		    }
		    else {
		        mem_lengths[i] = flat_buf->blocklens[k];
		    }
		}
		buf_count++;
		k = (k + 1)%flat_buf->count;
	    } /* for (i=0; i<mem_list_count; i++) */
	    for (i=0; i<file_list_count; i++) {
	        file_offsets[i] = disp + flat_file->indices[j] + n_filetypes *
		    filetype_extent;
	        if (!i) {
		    file_lengths[0] = fwr_size;
		    file_offsets[0] += flat_file->blocklens[j] - fwr_size;
		}
		else {
		    if (i == (file_list_count - 1)) {
		        file_lengths[i] = end_fwr_size;
			if (flat_file->blocklens[j] == end_fwr_size)
			    fwr_size = flat_file->blocklens[(j+1)%
							  flat_file->count];   
			else {
			    fwr_size = flat_file->blocklens[j] - end_fwr_size;
			    j--;
			}
		    }
		    else file_lengths[i] = flat_file->blocklens[j];
		}
		if (j < flat_file->count - 1) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (i=0; i<file_list_count; i++) */
	    pvfs_write_list(fd->fd_sys,mem_list_count, mem_offsets,
			   mem_lengths, file_list_count, file_offsets,
			   file_lengths);
	    size_wrote += new_buffer_write;
	    start_k = k;
	    start_j = j;
	} /* while (size_wrote < bufsize) */
	ADIOI_Free(mem_offsets);
	ADIOI_Free(mem_lengths);
    }
    ADIOI_Free(file_offsets);
    ADIOI_Free(file_lengths);

    if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
    if (err_flag) {
	*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
					   myname, __LINE__, MPI_ERR_IO,
					   "**io",
					   "**io %s", strerror(errno));
    }
    else *error_code = MPI_SUCCESS;

    fd->fp_sys_posn = -1;   /* set it to null. */

#ifdef HAVE_STATUS_SET_BYTES
    MPIR_Status_set_bytes(status, datatype, bufsize);
/* This is a temporary way of filling in status. The right way is to 
   keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
#endif

    if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
}
예제 #13
0
void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count,
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code)
{
/* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int i, j, k, l, brd_size, frd_size=0, st_index=0;
    int sum, n_etypes_in_filetype, size_in_filetype;
    MPI_Count bufsize;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    MPI_Count filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent; 
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off;
    ADIO_Offset off, disp, start_off;
    int flag, st_frd_size, st_n_filetypes;
    int new_brd_size, new_frd_size;

    int mem_list_count, file_list_count;
    char **mem_offsets;
    int64_t *file_offsets;
    int *mem_lengths;
    int32_t *file_lengths;
    int total_blks_to_read;

    int max_mem_list, max_file_list;

    int b_blks_read;
    int f_data_read;
    int size_read=0, n_read_lists, extra_blks;

    int end_brd_size, end_frd_size;
    int start_k, start_j, new_file_read, new_buffer_read;
    int start_mem_offset;

#define MAX_ARRAY_SIZE 1024

#ifndef PRINT_ERR_MESG
  static char myname[] = "ADIOI_PVFS_ReadStrided";
#endif

    *error_code = MPI_SUCCESS;  /* changed below if error */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
    MPI_Type_size_x(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, 0);
#endif
	*error_code = MPI_SUCCESS; 
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size_x(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;

    bufsize = buftype_size * count;

    if (!buftype_is_contig && filetype_is_contig) {

/* noncontiguous in memory, contiguous in file. */
        int64_t file_offsets;
	int32_t file_lengths;

	flat_buf = ADIOI_Flatten_and_find(datatype);

	off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
	    fd->disp + etype_size * offset;

	file_list_count = 1;
	file_offsets = off;
	file_lengths = 0;
	total_blks_to_read = count*flat_buf->count;
	b_blks_read = 0;

	/* allocate arrays according to max usage */
	if (total_blks_to_read > MAX_ARRAY_SIZE)
	    mem_list_count = MAX_ARRAY_SIZE;
	else mem_list_count = total_blks_to_read;
	mem_offsets = (char**)ADIOI_Malloc(mem_list_count*sizeof(char*));
	mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int));

	j = 0;
	/* step through each block in memory, filling memory arrays */
	while (b_blks_read < total_blks_to_read) {
	    for (i=0; i<flat_buf->count; i++) {
		mem_offsets[b_blks_read % MAX_ARRAY_SIZE] = 
		    (char*)((char *)buf + j*buftype_extent + flat_buf->indices[i]);
		mem_lengths[b_blks_read % MAX_ARRAY_SIZE] = 
		    flat_buf->blocklens[i];
		file_lengths += flat_buf->blocklens[i];
		b_blks_read++;
		if (!(b_blks_read % MAX_ARRAY_SIZE) ||
		    (b_blks_read == total_blks_to_read)) {

		    /* in the case of the last read list call,
		       adjust mem_list_count */
		    if (b_blks_read == total_blks_to_read) {
		        mem_list_count = total_blks_to_read % MAX_ARRAY_SIZE;
			/* in case last read list call fills max arrays */
			if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
		    }

		    pvfs_read_list(fd->fd_sys ,mem_list_count, mem_offsets,
				   mem_lengths, file_list_count,
				   &file_offsets, &file_lengths);
		  
		    /* in the case of the last read list call, leave here */
		    if (b_blks_read == total_blks_to_read) break;

		    file_offsets += file_lengths;
		    file_lengths = 0;
		} 
	    } /* for (i=0; i<flat_buf->count; i++) */
	    j++;
	} /* while (b_blks_read < total_blks_to_read) */
	ADIOI_Free(mem_offsets);
	ADIOI_Free(mem_lengths);

        if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;

	fd->fp_sys_posn = -1;  /* set it to null. */

#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, bufsize);
	/* This isa temporary way of filling in status.  The right way is to
	   keep tracke of how much data was actually read adn placed in buf
	   by ADIOI_BUFFERED_READ. */
#endif
	ADIOI_Delete_flattened(datatype);

	return;
    } /* if (!buftype_is_contig && filetype_is_contig) */

    /* know file is noncontiguous from above */
    /* noncontiguous in file */

    /* filetype already flattened in ADIO_Open */
    flat_file = ADIOI_Flatlist;
    while (flat_file->type != fd->filetype) flat_file = flat_file->next;

    disp = fd->disp;

    /* for each case - ADIO_Individual pointer or explicit, find the file
       offset in bytes (offset), n_filetypes (how many filetypes into
       file to start), frd_size (remaining amount of data in present
       file block), and st_index (start point in terms of blocks in
       starting filetype) */
    if (file_ptr_type == ADIO_INDIVIDUAL) {
        offset = fd->fp_ind; /* in bytes */
	n_filetypes = -1;
	flag = 0;
	while (!flag) {
	    n_filetypes++;
	    for (i=0; i<flat_file->count; i++) {
	        if (disp + flat_file->indices[i] + 
		    (ADIO_Offset) n_filetypes*filetype_extent +
		    flat_file->blocklens[i]  >= offset) {
		    st_index = i;
		    frd_size = (int) (disp + flat_file->indices[i] + 
				      (ADIO_Offset) n_filetypes*filetype_extent
				      + flat_file->blocklens[i] - offset);
		    flag = 1;
		    break;
		}
	    }
	} /* while (!flag) */
    } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
    else {
        n_etypes_in_filetype = filetype_size/etype_size;
	n_filetypes = (int) (offset / n_etypes_in_filetype);
	etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	size_in_filetype = etype_in_filetype * etype_size;
	
	sum = 0;
	for (i=0; i<flat_file->count; i++) {
	    sum += flat_file->blocklens[i];
	    if (sum > size_in_filetype) {
	        st_index = i;
		frd_size = sum - size_in_filetype;
		abs_off_in_filetype = flat_file->indices[i] +
		    size_in_filetype - (sum - flat_file->blocklens[i]);
		break;
	    }
	}
	
	/* abs. offset in bytes in the file */
	offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + 
	    abs_off_in_filetype;
    } /* else [file_ptr_type != ADIO_INDIVIDUAL] */

    start_off = offset;
    st_frd_size = frd_size;
    st_n_filetypes = n_filetypes;
    
    if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

        int mem_lengths;
	char *mem_offsets;
	
	i = 0;
	j = st_index;
	n_filetypes = st_n_filetypes;
	
	mem_list_count = 1;
	
	/* determine how many blocks in file to read */
	f_data_read = ADIOI_MIN(st_frd_size, bufsize);
	total_blks_to_read = 1;
	j++;
	while (f_data_read < bufsize) {
	    f_data_read += flat_file->blocklens[j];
	    total_blks_to_read++;
	    if (j<(flat_file->count-1)) j++;
	    else j = 0;	
	}
      
	j = st_index;
	n_filetypes = st_n_filetypes;
	n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE;
	extra_blks = total_blks_to_read%MAX_ARRAY_SIZE;
	
	mem_offsets = buf;
	mem_lengths = 0;
	
	/* if at least one full readlist, allocate file arrays
	   at max array size and don't free until very end */
	if (n_read_lists) {
	    file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
						  sizeof(int64_t));
	    file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
						  sizeof(int32_t));
	}
	/* if there's no full readlist allocate file arrays according
	   to needed size (extra_blks) */
	else {
	    file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
						  sizeof(int64_t));
	    file_lengths = (int32_t*)ADIOI_Malloc(extra_blks*
						  sizeof(int32_t));
	}
	
	/* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
	for (i=0; i<n_read_lists; i++) {
	    file_list_count = MAX_ARRAY_SIZE;
	    if(!i) {
	        file_offsets[0] = offset;
		file_lengths[0] = st_frd_size;
		mem_lengths = st_frd_size;
	    }
	    for (k=0; k<MAX_ARRAY_SIZE; k++) {
	        if (i || k) {
		    file_offsets[k] = disp + n_filetypes*filetype_extent
		      + flat_file->indices[j];
		    file_lengths[k] = flat_file->blocklens[j];
		    mem_lengths += file_lengths[k];
		}
		if (j<(flat_file->count - 1)) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (k=0; k<MAX_ARRAY_SIZE; k++) */
	    pvfs_read_list(fd->fd_sys, mem_list_count,
			   &mem_offsets, &mem_lengths,
			   file_list_count, file_offsets,
			   file_lengths);
	    mem_offsets += mem_lengths;
	    mem_lengths = 0;
	} /* for (i=0; i<n_read_lists; i++) */

	/* for file arrays smaller than MAX_ARRAY_SIZE (last read_list call) */
	if (extra_blks) {
	    file_list_count = extra_blks;
	    if(!i) {
	        file_offsets[0] = offset;
		file_lengths[0] = st_frd_size;
	    }
	    for (k=0; k<extra_blks; k++) {
	        if(i || k) {
		    file_offsets[k] = disp + n_filetypes*filetype_extent +
		      flat_file->indices[j];
		    if (k == (extra_blks - 1)) {
		        file_lengths[k] = bufsize - (int32_t) mem_lengths
			  - (int32_t) mem_offsets + (int32_t)  buf;
		    }
		    else file_lengths[k] = flat_file->blocklens[j];
		} /* if(i || k) */
		mem_lengths += file_lengths[k];
		if (j<(flat_file->count - 1)) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (k=0; k<extra_blks; k++) */
	    pvfs_read_list(fd->fd_sys, mem_list_count, &mem_offsets,
			   &mem_lengths, file_list_count, file_offsets,
			   file_lengths);
	}
    }
    else {
/* noncontiguous in memory as well as in file */
      
	flat_buf = ADIOI_Flatten_and_find(datatype);

	size_read = 0;
	n_filetypes = st_n_filetypes;
	frd_size = st_frd_size;
	brd_size = flat_buf->blocklens[0];
	buf_count = 0;
	start_mem_offset = 0;
	start_k = k = 0;
	start_j = st_index;
	max_mem_list = 0;
	max_file_list = 0;

	/* run through and file max_file_list and max_mem_list so that you 
	   can allocate the file and memory arrays less than MAX_ARRAY_SIZE
	   if possible */

	while (size_read < bufsize) {
	    k = start_k;
	    new_buffer_read = 0;
	    mem_list_count = 0;
	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
		   (new_buffer_read < bufsize-size_read)) {
	        /* find mem_list_count and file_list_count such that both are
		   less than MAX_ARRAY_SIZE, the sum of their lengths are
		   equal, and the sum of all the data read and data to be
		   read in the next immediate read list is less than
		   bufsize */
	        if(mem_list_count) {
		    if((new_buffer_read + flat_buf->blocklens[k] + 
			size_read) > bufsize) {
		        end_brd_size = new_buffer_read + 
			    flat_buf->blocklens[k] - (bufsize - size_read);
			new_buffer_read = bufsize - size_read;
		    }
		    else {
		        new_buffer_read += flat_buf->blocklens[k];
			end_brd_size = flat_buf->blocklens[k];
		    }
		}
		else {
		    if (brd_size > (bufsize - size_read)) {
		        new_buffer_read = bufsize - size_read;
			brd_size = new_buffer_read;
		    }
		    else new_buffer_read = brd_size;
		}
		mem_list_count++;
		k = (k + 1)%flat_buf->count;
	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
	       (new_buffer_read < bufsize-size_read)) */
	    j = start_j;
	    new_file_read = 0;
	    file_list_count = 0;
	    while ((file_list_count < MAX_ARRAY_SIZE) && 
		   (new_file_read < new_buffer_read)) {
	        if(file_list_count) {
		    if((new_file_read + flat_file->blocklens[j]) > 
		       new_buffer_read) {
		        end_frd_size = new_buffer_read - new_file_read;
			new_file_read = new_buffer_read;
			j--;
		    }
		    else {
		        new_file_read += flat_file->blocklens[j];
			end_frd_size = flat_file->blocklens[j];
		    }
		}
		else {
		    if (frd_size > new_buffer_read) {
		        new_file_read = new_buffer_read;
			frd_size = new_file_read;
		    }
		    else new_file_read = frd_size;
		}
		file_list_count++;
		if (j < (flat_file->count - 1)) j++;
		else j = 0;
		
		k = start_k;
		if ((new_file_read < new_buffer_read) && 
		    (file_list_count == MAX_ARRAY_SIZE)) {
		    new_buffer_read = 0;
		    mem_list_count = 0;
		    while (new_buffer_read < new_file_read) {
		        if(mem_list_count) {
			    if((new_buffer_read + flat_buf->blocklens[k]) >
			       new_file_read) {
			        end_brd_size = new_file_read - new_buffer_read;
				new_buffer_read = new_file_read;
				k--;
			    }
			    else {
			        new_buffer_read += flat_buf->blocklens[k];
				end_brd_size = flat_buf->blocklens[k];
			    }
			}
			else {
			    new_buffer_read = brd_size;
			    if (brd_size > (bufsize - size_read)) {
			        new_buffer_read = bufsize - size_read;
				brd_size = new_buffer_read;
			    }
			}
			mem_list_count++;
			k = (k + 1)%flat_buf->count;
		    } /* while (new_buffer_read < new_file_read) */
		} /* if ((new_file_read < new_buffer_read) && (file_list_count
		     == MAX_ARRAY_SIZE)) */
	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
		 (new_buffer_read < bufsize-size_read)) */

	    /*  fakes filling the readlist arrays of lengths found above  */
	    k = start_k;
	    j = start_j;
	    for (i=0; i<mem_list_count; i++) {	     
		if(i) {
		    if (i == (mem_list_count - 1)) {
			if (flat_buf->blocklens[k] == end_brd_size)
			    brd_size = flat_buf->blocklens[(k+1)%
							  flat_buf->count];
			else {
			    brd_size = flat_buf->blocklens[k] - end_brd_size;
			    k--;
			    buf_count--;
			}
		    }
		}
		buf_count++;
		k = (k + 1)%flat_buf->count;
	    } /* for (i=0; i<mem_list_count; i++) */
	    for (i=0; i<file_list_count; i++) {
		if (i) {
		    if (i == (file_list_count - 1)) {
			if (flat_file->blocklens[j] == end_frd_size)
			    frd_size = flat_file->blocklens[(j+1)%
							  flat_file->count];   
			else {
			    frd_size = flat_file->blocklens[j] - end_frd_size;
			    j--;
			}
		    }
		}
		if (j < flat_file->count - 1) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (i=0; i<file_list_count; i++) */
	    size_read += new_buffer_read;
	    start_k = k;
	    start_j = j;
	    if (max_mem_list < mem_list_count)
	        max_mem_list = mem_list_count;
	    if (max_file_list < file_list_count)
	        max_file_list = file_list_count;
	} /* while (size_read < bufsize) */

	mem_offsets = (char **)ADIOI_Malloc(max_mem_list*sizeof(char *));
	mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int));
	file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t));
	file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t));
	    
	size_read = 0;
	n_filetypes = st_n_filetypes;
	frd_size = st_frd_size;
	brd_size = flat_buf->blocklens[0];
	buf_count = 0;
	start_mem_offset = 0;
	start_k = k = 0;
	start_j = st_index;

	/*  this section calculates mem_list_count and file_list_count
	    and also finds the possibly odd sized last array elements
	    in new_frd_size and new_brd_size  */
	
	while (size_read < bufsize) {
	    k = start_k;
	    new_buffer_read = 0;
	    mem_list_count = 0;
	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
		   (new_buffer_read < bufsize-size_read)) {
	        /* find mem_list_count and file_list_count such that both are
		   less than MAX_ARRAY_SIZE, the sum of their lengths are
		   equal, and the sum of all the data read and data to be
		   read in the next immediate read list is less than
		   bufsize */
	        if(mem_list_count) {
		    if((new_buffer_read + flat_buf->blocklens[k] + 
			size_read) > bufsize) {
		        end_brd_size = new_buffer_read + 
			    flat_buf->blocklens[k] - (bufsize - size_read);
			new_buffer_read = bufsize - size_read;
		    }
		    else {
		        new_buffer_read += flat_buf->blocklens[k];
			end_brd_size = flat_buf->blocklens[k];
		    }
		}
		else {
		    if (brd_size > (bufsize - size_read)) {
		        new_buffer_read = bufsize - size_read;
			brd_size = new_buffer_read;
		    }
		    else new_buffer_read = brd_size;
		}
		mem_list_count++;
		k = (k + 1)%flat_buf->count;
	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
	       (new_buffer_read < bufsize-size_read)) */
	    j = start_j;
	    new_file_read = 0;
	    file_list_count = 0;
	    while ((file_list_count < MAX_ARRAY_SIZE) && 
		   (new_file_read < new_buffer_read)) {
	        if(file_list_count) {
		    if((new_file_read + flat_file->blocklens[j]) > 
		       new_buffer_read) {
		        end_frd_size = new_buffer_read - new_file_read;
			new_file_read = new_buffer_read;
			j--;
		    }
		    else {
		        new_file_read += flat_file->blocklens[j];
			end_frd_size = flat_file->blocklens[j];
		    }
		}
		else {
		    if (frd_size > new_buffer_read) {
		        new_file_read = new_buffer_read;
			frd_size = new_file_read;
		    }
		    else new_file_read = frd_size;
		}
		file_list_count++;
		if (j < (flat_file->count - 1)) j++;
		else j = 0;
		
		k = start_k;
		if ((new_file_read < new_buffer_read) && 
		    (file_list_count == MAX_ARRAY_SIZE)) {
		    new_buffer_read = 0;
		    mem_list_count = 0;
		    while (new_buffer_read < new_file_read) {
		        if(mem_list_count) {
			    if((new_buffer_read + flat_buf->blocklens[k]) >
			       new_file_read) {
			        end_brd_size = new_file_read - new_buffer_read;
				new_buffer_read = new_file_read;
				k--;
			    }
			    else {
			        new_buffer_read += flat_buf->blocklens[k];
				end_brd_size = flat_buf->blocklens[k];
			    }
			}
			else {
			    new_buffer_read = brd_size;
			    if (brd_size > (bufsize - size_read)) {
			        new_buffer_read = bufsize - size_read;
				brd_size = new_buffer_read;
			    }
			}
			mem_list_count++;
			k = (k + 1)%flat_buf->count;
		    } /* while (new_buffer_read < new_file_read) */
		} /* if ((new_file_read < new_buffer_read) && (file_list_count
		     == MAX_ARRAY_SIZE)) */
	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
		 (new_buffer_read < bufsize-size_read)) */

	    /*  fills the allocated readlist arrays  */
	    k = start_k;
	    j = start_j;
	    for (i=0; i<mem_list_count; i++) {	     
	        mem_offsets[i] = (char*)((char *)buf + buftype_extent*
					 (buf_count/flat_buf->count) +
					 (int)flat_buf->indices[k]);
		if(!i) {
		    mem_lengths[0] = brd_size;
		    mem_offsets[0] += flat_buf->blocklens[k] - brd_size;
		}
		else {
		    if (i == (mem_list_count - 1)) {
		        mem_lengths[i] = end_brd_size;
			if (flat_buf->blocklens[k] == end_brd_size)
			    brd_size = flat_buf->blocklens[(k+1)%
							  flat_buf->count];
			else {
			    brd_size = flat_buf->blocklens[k] - end_brd_size;
			    k--;
			    buf_count--;
			}
		    }
		    else {
		        mem_lengths[i] = flat_buf->blocklens[k];
		    }
		}
		buf_count++;
		k = (k + 1)%flat_buf->count;
	    } /* for (i=0; i<mem_list_count; i++) */
	    for (i=0; i<file_list_count; i++) {
	        file_offsets[i] = disp + flat_file->indices[j] + n_filetypes *
		    filetype_extent;
	        if (!i) {
		    file_lengths[0] = frd_size;
		    file_offsets[0] += flat_file->blocklens[j] - frd_size;
		}
		else {
		    if (i == (file_list_count - 1)) {
		        file_lengths[i] = end_frd_size;
			if (flat_file->blocklens[j] == end_frd_size)
			    frd_size = flat_file->blocklens[(j+1)%
							  flat_file->count];   
			else {
			    frd_size = flat_file->blocklens[j] - end_frd_size;
			    j--;
			}
		    }
		    else file_lengths[i] = flat_file->blocklens[j];
		}
		if (j < flat_file->count - 1) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (i=0; i<file_list_count; i++) */

	    /* 
	    printf("about to call read_list in noncontig/noncontig\n");
	    printf("offsets and lengths in terms of integers\n");
	    printf("\nmem_list_count = %d\n", mem_list_count);
	    for (i=0; i<mem_list_count; i++) {
	      printf("mem_offsets[%2d] = %2d   ", i, (int)(mem_offsets[i] - (int)buf)/4);
	      printf("mem_lengths[%2d] = %2d\n", i, mem_lengths[i]/4);
	    }
	    printf("\nfile_list_count = %d\n", file_list_count);
	    for (i=0; i<file_list_count; i++) {
	      printf("file_offsets[%2d] = %2d   ", i, (int)file_offsets[i]/4);
	      printf("file_lengths[%2d] = %2d\n", i, file_lengths[i]/4);
	    }
	    printf("\n\n");
	    */
	    pvfs_read_list(fd->fd_sys,mem_list_count, mem_offsets,
			   mem_lengths, file_list_count, file_offsets,
			   file_lengths);
	    size_read += new_buffer_read;
	    start_k = k;
	    start_j = j;
	} /* while (size_read < bufsize) */
	ADIOI_Free(mem_offsets);
	ADIOI_Free(mem_lengths);
    }
    ADIOI_Free(file_offsets);
    ADIOI_Free(file_lengths);
    
    if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
    fd->fp_sys_posn = -1;   /* set it to null. */
    
#ifdef HAVE_STATUS_SET_BYTES
    MPIR_Status_set_bytes(status, datatype, bufsize);
    /* This is a temporary way of filling in status. The right way is to 
       keep track of how much data was actually read and placed in buf 
       by ADIOI_BUFFERED_READ. */
#endif
    
    if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
}
예제 #14
0
void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count,
                       MPI_Datatype buftype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code)
{
    /* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    /* bwr == buffer write; fwr == file write */
    ADIO_Offset bwr_size, fwr_size=0, sum, size_in_filetype; 
    int b_index;
    MPI_Count bufsize;
    ADIO_Offset n_etypes_in_filetype;
    ADIO_Offset size, n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0, req_len;
    MPI_Count filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent, lb; 
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off;
    ADIO_Offset off, req_off, disp, end_offset=0, start_off;
    ADIO_Status status1;

    *error_code = MPI_SUCCESS;  /* changed below if error */

    ADIOI_Datatype_iscontig(buftype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size_x(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, buftype, 0);
#endif
	*error_code = MPI_SUCCESS; 
	return;
    }

    MPI_Type_get_extent(fd->filetype, &lb, &filetype_extent);
    MPI_Type_size_x(buftype, &buftype_size);
    MPI_Type_get_extent(buftype, &lb, &buftype_extent);
    etype_size = fd->etype_size;

    ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
    bufsize = buftype_size * count;

    /* contiguous in buftype and filetype is handled elsewhere */

    if (!buftype_is_contig && filetype_is_contig) {
    	int b_count;
	/* noncontiguous in memory, contiguous in file. */

	flat_buf = ADIOI_Flatten_and_find(buftype);

        off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
              fd->disp + (ADIO_Offset)etype_size * offset;

	start_off = off;
	end_offset = off + bufsize - 1;

	/* if atomicity is true, lock (exclusive) the region to be accessed */
        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
	{
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
	}

	/* for each region in the buffer, grab the data and put it in
	 * place
	 */
        for (b_count=0; b_count < count; b_count++) {
            for (b_index=0; b_index < flat_buf->count; b_index++) {
                userbuf_off = (ADIO_Offset)b_count*(ADIO_Offset)buftype_extent + 
		              flat_buf->indices[b_index];
		req_off = off;
		req_len = flat_buf->blocklens[b_index];

    ADIOI_Assert(req_len == (int) req_len);
    ADIOI_Assert((((ADIO_Offset)(MPIU_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIU_Upint)((MPIU_Upint)buf + userbuf_off));
		ADIO_WriteContig(fd, 
				(char *) buf + userbuf_off,
				(int)req_len, 
				MPI_BYTE, 
		    		ADIO_EXPLICIT_OFFSET,
				req_off,
				&status1,
				error_code);
		if (*error_code != MPI_SUCCESS) return;

		/* off is (potentially) used to save the final offset later */
                off += flat_buf->blocklens[b_index];
            }
	}

        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
	{
            ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
	}

        if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;

    }

    else {  /* noncontiguous in file */
    	int f_index, st_index = 0;
      ADIO_Offset st_fwr_size, st_n_filetypes;
	int flag;

        /* First we're going to calculate a set of values for use in all
	 * the noncontiguous in file cases:
	 * start_off - starting byte position of data in file
	 * end_offset - last byte offset to be acessed in the file
	 * st_n_filetypes - how far into the file we start in terms of
	 *                  whole filetypes
	 * st_index - index of block in first filetype that we will be
	 *            starting in (?)
	 * st_fwr_size - size of the data in the first filetype block
	 *               that we will write (accounts for being part-way
	 *               into writing this block of the filetype
	 *
	 */

	/* filetype already flattened in ADIO_Open */
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
	disp = fd->disp;

	if (file_ptr_type == ADIO_INDIVIDUAL) {
	    start_off = fd->fp_ind; /* in bytes */
	    n_filetypes = -1;
	    flag = 0;
	    while (!flag) {
                n_filetypes++;
		for (f_index=0; f_index < flat_file->count; f_index++) {
		    if (disp + flat_file->indices[f_index] + 
                       n_filetypes*(ADIO_Offset)filetype_extent + 
		       flat_file->blocklens[f_index] >= start_off) 
		    {
		    	/* this block contains our starting position */

			st_index = f_index;
			fwr_size = disp + flat_file->indices[f_index] + 
		 	           n_filetypes*(ADIO_Offset)filetype_extent + 
				   flat_file->blocklens[f_index] - start_off;
			flag = 1;
			break;
		    }
		}
	    }
	}
	else {
	    n_etypes_in_filetype = filetype_size/etype_size;
	    n_filetypes = offset / n_etypes_in_filetype;
	    etype_in_filetype = offset % n_etypes_in_filetype;
	    size_in_filetype = etype_in_filetype * etype_size;
 
	    sum = 0;
	    for (f_index=0; f_index < flat_file->count; f_index++) {
		sum += flat_file->blocklens[f_index];
		if (sum > size_in_filetype) {
		    st_index = f_index;
		    fwr_size = sum - size_in_filetype;
		    abs_off_in_filetype = flat_file->indices[f_index] +
			                  size_in_filetype - 
			                  (sum - flat_file->blocklens[f_index]);
		    break;
		}
	    }

	    /* abs. offset in bytes in the file */
	    start_off = disp + n_filetypes*(ADIO_Offset)filetype_extent + 
	    	        abs_off_in_filetype;
	}

	st_fwr_size = fwr_size;
	st_n_filetypes = n_filetypes;

	/* start_off, st_n_filetypes, st_index, and st_fwr_size are 
	 * all calculated at this point
	 */

        /* Calculate end_offset, the last byte-offset that will be accessed.
         * e.g., if start_off=0 and 100 bytes to be written, end_offset=99
	 */
	userbuf_off = 0;
	f_index = st_index;
	off = start_off;
	fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
	while (userbuf_off < bufsize) {
	    userbuf_off += fwr_size;
	    end_offset = off + fwr_size - 1;

	    if (f_index < (flat_file->count - 1)) f_index++;
	    else {
		f_index = 0;
		n_filetypes++;
	    }

	    off = disp + flat_file->indices[f_index] + 
	          n_filetypes*(ADIO_Offset)filetype_extent;
	    fwr_size = ADIOI_MIN(flat_file->blocklens[f_index], 
	                         bufsize-(unsigned)userbuf_off);
	}

	/* End of calculations.  At this point the following values have
	 * been calculated and are ready for use:
	 * - start_off
	 * - end_offset
	 * - st_n_filetypes
	 * - st_index
	 * - st_fwr_size
	 */

	/* if atomicity is true, lock (exclusive) the region to be accessed */
        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
	{
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
	}

	if (buftype_is_contig && !filetype_is_contig) {
	    /* contiguous in memory, noncontiguous in file. should be the
	     * most common case.
	     */

	    userbuf_off = 0;
	    f_index = st_index;
	    off = start_off;
	    n_filetypes = st_n_filetypes;
	    fwr_size = ADIOI_MIN(st_fwr_size, bufsize);

	    /* while there is still space in the buffer, write more data */
	    while (userbuf_off < bufsize) {
                if (fwr_size) { 
                    /* TYPE_UB and TYPE_LB can result in 
                       fwr_size = 0. save system call in such cases */ 
		    req_off = off;
		    req_len = fwr_size;

        ADIOI_Assert(req_len == (int) req_len);
        ADIOI_Assert((((ADIO_Offset)(MPIU_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIU_Upint)((MPIU_Upint)buf + userbuf_off));
		    ADIO_WriteContig(fd, 
				    (char *) buf + userbuf_off,
				    (int)req_len, 
				    MPI_BYTE, 
				    ADIO_EXPLICIT_OFFSET,
				    req_off,
				    &status1,
				    error_code);
		    if (*error_code != MPI_SUCCESS) return;
		}
		userbuf_off += fwr_size;

                if (off + fwr_size < disp + flat_file->indices[f_index] +
                   flat_file->blocklens[f_index] + 
		   n_filetypes*(ADIO_Offset)filetype_extent)
		{
		    /* important that this value be correct, as it is
		     * used to set the offset in the fd near the end of
		     * this function.
		     */
                    off += fwr_size;
		}
                /* did not reach end of contiguous block in filetype.
                 * no more I/O needed. off is incremented by fwr_size.
		 */
                else {
		    if (f_index < (flat_file->count - 1)) f_index++;
		    else {
			f_index = 0;
			n_filetypes++;
		    }
		    off = disp + flat_file->indices[f_index] + 
                          n_filetypes*(ADIO_Offset)filetype_extent;
		    fwr_size = ADIOI_MIN(flat_file->blocklens[f_index], 
		                         bufsize-(unsigned)userbuf_off);
		}
	    }
	}
	else {
	    ADIO_Offset i_offset, tmp_bufsize = 0;
	    /* noncontiguous in memory as well as in file */

	    flat_buf = ADIOI_Flatten_and_find(buftype);

	    b_index = buf_count = 0;
	    i_offset = flat_buf->indices[0];
	    f_index = st_index;
	    off = start_off;
	    n_filetypes = st_n_filetypes;
	    fwr_size = st_fwr_size;
	    bwr_size = flat_buf->blocklens[0];

	    /* while we haven't read size * count bytes, keep going */
	    while (tmp_bufsize < bufsize) {
    		ADIO_Offset new_bwr_size = bwr_size, new_fwr_size = fwr_size;

		size = ADIOI_MIN(fwr_size, bwr_size);
		if (size) {
		    req_off = off;
		    req_len = size;
		    userbuf_off = i_offset;

        ADIOI_Assert(req_len == (int) req_len);
        ADIOI_Assert((((ADIO_Offset)(MPIU_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIU_Upint)((MPIU_Upint)buf + userbuf_off));
		    ADIO_WriteContig(fd, 
				    (char *) buf + userbuf_off,
				    (int)req_len, 
				    MPI_BYTE, 
				    ADIO_EXPLICIT_OFFSET,
				    req_off,
				    &status1,
				    error_code);
		    if (*error_code != MPI_SUCCESS) return;
		}

		if (size == fwr_size) {
		    /* reached end of contiguous block in file */
		    if (f_index < (flat_file->count - 1)) f_index++;
		    else {
			f_index = 0;
			n_filetypes++;
		    }

		    off = disp + flat_file->indices[f_index] + 
                          n_filetypes*(ADIO_Offset)filetype_extent;

		    new_fwr_size = flat_file->blocklens[f_index];
		    if (size != bwr_size) {
			i_offset += size;
			new_bwr_size -= size;
		    }
		}

		if (size == bwr_size) {
		    /* reached end of contiguous block in memory */

		    b_index = (b_index + 1)%flat_buf->count;
		    buf_count++;
		    i_offset = (ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) +
			flat_buf->indices[b_index];
		    new_bwr_size = flat_buf->blocklens[b_index];
		    if (size != fwr_size) {
			off += size;
			new_fwr_size -= size;
		    }
		}
		tmp_bufsize += size;
		fwr_size = new_fwr_size;
                bwr_size = new_bwr_size;
	    }
	}

	/* unlock the file region if we locked it */
        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
	{
            ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
	}

	if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
    } /* end of (else noncontiguous in file) */

    fd->fp_sys_posn = -1;   /* mark it as invalid. */

#ifdef HAVE_STATUS_SET_BYTES
    MPIR_Status_set_bytes(status, buftype, bufsize);
    /* This is a temporary way of filling in status. The right way is to 
     * keep track of how much data was actually written and placed in buf 
     */
#endif

    if (!buftype_is_contig) ADIOI_Delete_flattened(buftype);
}
예제 #15
0
                    j = (j+1) % flat_file->count;
                    n_filetypes += (j == 0) ? 1 : 0;
                    while (flat_file->blocklens[j]==0) {
                        j = (j+1) % flat_file->count;
                        n_filetypes += (j == 0) ? 1 : 0;
                    }
                    off = disp + flat_file->indices[j] +
                          n_filetypes*(ADIO_Offset)filetype_extent;
                    frd_size = MPL_MIN(flat_file->blocklens[j], bufsize-i_offset);
                }
            }
        }
        else {
            /* noncontiguous in memory as well as in file */

            flat_buf = ADIOI_Flatten_and_find(datatype);

            k = num = buf_count = 0;
            i_offset = flat_buf->indices[0];
            j = st_index;
            off = offset;
            n_filetypes = st_n_filetypes;
            frd_size = st_frd_size;
            brd_size = flat_buf->blocklens[0];

            while (num < bufsize) {
                size = MPL_MIN(frd_size, brd_size);
                if (size) {
                    /* lseek(fd->fd_sys, off, SEEK_SET);
                    err = read(fd->fd_sys, ((char *) buf) + i, size); */
예제 #16
0
void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
                           MPI_Datatype datatype, int file_ptr_type,
                           ADIO_Offset offset, ADIO_Status *status, int
                           *error_code)
{


    /* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    ADIO_Offset i_offset, new_brd_size, brd_size, size;
    int i, j, k, st_index=0;
    MPI_Count num, bufsize;
    int n_etypes_in_filetype;
    ADIO_Offset n_filetypes, etype_in_filetype, st_n_filetypes, size_in_filetype;
    ADIO_Offset abs_off_in_filetype=0, new_frd_size, frd_size=0, st_frd_size;
    MPI_Count filetype_size, etype_size, buftype_size, partial_read;
    MPI_Aint filetype_extent, buftype_extent;
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off, req_len, sum;
    ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off;
    char *readbuf, *tmp_buf, *value;
    int info_flag;
    unsigned max_bufsize, readbuf_len;
    ADIO_Status status1;

    if (fd->hints->ds_read == ADIOI_HINT_DISABLE) {
        /* if user has disabled data sieving on reads, use naive
        * approach instead.
        */
        ADIOI_GEN_ReadStrided_naive(fd,
                                    buf,
                                    count,
                                    datatype,
                                    file_ptr_type,
                                    offset,
                                    status,
                                    error_code);
        return;
    }

    *error_code = MPI_SUCCESS;  /* changed below if error */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size_x(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
        MPIR_Status_set_bytes(status, datatype, 0);
#endif
        *error_code = MPI_SUCCESS;
        return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size_x(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;

    ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(MPI_Count)buftype_size * (ADIO_Offset)count));
    bufsize = buftype_size * count;

    /* get max_bufsize from the info object. */

    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
    ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value,
                   &info_flag);
    max_bufsize = atoi(value);
    ADIOI_Free(value);


    if (!buftype_is_contig && filetype_is_contig) {

        /* noncontiguous in memory, contiguous in file. */

        flat_buf = ADIOI_Flatten_and_find(datatype);

        off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
              fd->disp + (ADIO_Offset)etype_size * offset;

        start_off = off;
        end_offset = off + bufsize - 1;
        readbuf_off = off;
        readbuf = (char *) ADIOI_Malloc(max_bufsize);
        readbuf_len = (unsigned) (MPL_MIN(max_bufsize, end_offset-readbuf_off+1));

        /* if atomicity is true, lock (exclusive) the region to be accessed */
        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

        ADIO_ReadContig(fd, readbuf, readbuf_len, MPI_BYTE,
                        ADIO_EXPLICIT_OFFSET, readbuf_off, &status1, error_code);
        if (*error_code != MPI_SUCCESS) return;

        for (j=0; j<count; j++)
        {
            for (i=0; i<flat_buf->count; i++) {
                userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i];
                req_off = off;
                req_len = flat_buf->blocklens[i];
                ADIOI_BUFFERED_READ
                off += flat_buf->blocklens[i];
            }
        }

        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
            ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

        if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;

        ADIOI_Free(readbuf);
    }

    else {  /* noncontiguous in file */

        flat_file = ADIOI_Flatten_and_find(fd->filetype);
        disp = fd->disp;

        if (file_ptr_type == ADIO_INDIVIDUAL) {
            /* Wei-keng reworked type processing to be a bit more efficient */
            offset       = fd->fp_ind - disp;
            n_filetypes  = (offset - flat_file->indices[0]) / filetype_extent;
            offset -= (ADIO_Offset)n_filetypes * filetype_extent;
            /* now offset is local to this extent */

            /* find the block where offset is located, skip blocklens[i]==0 */
            for (i=0; i<flat_file->count; i++) {
                ADIO_Offset dist;
                if (flat_file->blocklens[i] == 0) continue;
                dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
                /* frd_size is from offset to the end of block i */
                if (dist == 0) {
                    i++;
                    offset   = flat_file->indices[i];
                    frd_size = flat_file->blocklens[i];
                    break;
                }
                if (dist > 0) {
                    frd_size = dist;
                    break;
                }
            }
            st_index = i;  /* starting index in flat_file->indices[] */
            offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
        }
        else {
            n_etypes_in_filetype = filetype_size/etype_size;
            n_filetypes = offset / n_etypes_in_filetype;
            etype_in_filetype = offset % n_etypes_in_filetype;
            size_in_filetype = etype_in_filetype * etype_size;

            sum = 0;
            for (i=0; i<flat_file->count; i++) {
                sum += flat_file->blocklens[i];
                if (sum > size_in_filetype) {
                    st_index = i;
                    frd_size = sum - size_in_filetype;
                    abs_off_in_filetype = flat_file->indices[i] +
                                          size_in_filetype - (sum - flat_file->blocklens[i]);
                    break;
                }
            }

            /* abs. offset in bytes in the file */
            offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
                     abs_off_in_filetype;
        }

        start_off = offset;

        /* Wei-keng Liao: read request is within a single flat_file contig
         * block e.g. with subarray types that actually describe the whole
         * array */
        if (buftype_is_contig && bufsize <= frd_size) {
            /* a count of bytes can overflow. operate on original type instead */
            ADIO_ReadContig(fd, buf, count, datatype, ADIO_EXPLICIT_OFFSET,
                            offset, status, error_code);

            if (file_ptr_type == ADIO_INDIVIDUAL) {
                /* update MPI-IO file pointer to point to the first byte that
                * can be accessed in the fileview. */
                fd->fp_ind = offset + bufsize;
                if (bufsize == frd_size) {
                    do {
                        st_index++;
                        if (st_index == flat_file->count) {
                            st_index = 0;
                            n_filetypes++;
                        }
                    } while (flat_file->blocklens[st_index] == 0);
                    fd->fp_ind = disp + flat_file->indices[st_index]
                                 + n_filetypes*filetype_extent;
                }
            }
            fd->fp_sys_posn = -1;   /* set it to null. */
#ifdef HAVE_STATUS_SET_BYTES
            MPIR_Status_set_bytes(status, datatype, bufsize);
#endif
            return;
        }

        /* Calculate end_offset, the last byte-offset that will be accessed.
          e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/

        st_frd_size = frd_size;
        st_n_filetypes = n_filetypes;
        i_offset = 0;
        j = st_index;
        off = offset;
        frd_size = MPL_MIN(st_frd_size, bufsize);
        while (i_offset < bufsize) {
            i_offset += frd_size;
            end_offset = off + frd_size - 1;

            j = (j+1) % flat_file->count;
            n_filetypes += (j == 0) ? 1 : 0;
            while (flat_file->blocklens[j]==0) {
                j = (j+1) % flat_file->count;
                n_filetypes += (j == 0) ? 1 : 0;
            }
            off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent;
            frd_size = MPL_MIN(flat_file->blocklens[j], bufsize-i_offset);
        }

        /* if atomicity is true, lock (exclusive) the region to be accessed */
        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

        readbuf_off = 0;
        readbuf_len = 0;
        readbuf = (char *) ADIOI_Malloc(max_bufsize);

        if (buftype_is_contig && !filetype_is_contig) {

            /* contiguous in memory, noncontiguous in file. should be the most
               common case. */

            i_offset = 0;
            j = st_index;
            off = offset;
            n_filetypes = st_n_filetypes;
            frd_size = MPL_MIN(st_frd_size, bufsize);
            while (i_offset < bufsize) {
                if (frd_size) {
                    /* TYPE_UB and TYPE_LB can result in
                       frd_size = 0. save system call in such cases */
                    /* lseek(fd->fd_sys, off, SEEK_SET);
                    err = read(fd->fd_sys, ((char *) buf) + i, frd_size);*/

                    req_off = off;
                    req_len = frd_size;
                    userbuf_off = i_offset;
                    ADIOI_BUFFERED_READ
                }
                i_offset += frd_size;

                if (off + frd_size < disp + flat_file->indices[j] +
                        flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
                    off += frd_size;
                /* did not reach end of contiguous block in filetype.
                   no more I/O needed. off is incremented by frd_size. */
                else {
                    j = (j+1) % flat_file->count;
                    n_filetypes += (j == 0) ? 1 : 0;
                    while (flat_file->blocklens[j]==0) {
                        j = (j+1) % flat_file->count;
                        n_filetypes += (j == 0) ? 1 : 0;
                    }
                    off = disp + flat_file->indices[j] +
                          n_filetypes*(ADIO_Offset)filetype_extent;
                    frd_size = MPL_MIN(flat_file->blocklens[j], bufsize-i_offset);
                }
            }
        }
        else {
예제 #17
0
void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code)
{
/* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int i, j, k, err=-1, brd_size, frd_size=0, st_index=0;
    int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    int req_len, partial_read;
    MPI_Count filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent; 
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off;
    ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off;
    char *readbuf, *tmp_buf, *value;
    int st_frd_size, st_n_filetypes, readbuf_len;
    int new_brd_size, new_frd_size, err_flag=0, info_flag, max_bufsize;

    static char myname[] = "ADIOI_NFS_READSTRIDED";

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size_x(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, 0);
#endif
	*error_code = MPI_SUCCESS; 
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size_x(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;

    bufsize = buftype_size * count;

/* get max_bufsize from the info object. */

    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
    ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, 
                 &info_flag);
    max_bufsize = atoi(value);
    ADIOI_Free(value);

    if (!buftype_is_contig && filetype_is_contig) {

/* noncontiguous in memory, contiguous in file. */

	flat_buf = ADIOI_Flatten_and_find(datatype);

        off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
                 fd->disp + etype_size * offset;

	start_off = off;
	end_offset = off + bufsize - 1;
        readbuf_off = off;
        readbuf = (char *) ADIOI_Malloc(max_bufsize);
        readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));

/* if atomicity is true, lock (exclusive) the region to be accessed */
        if (fd->atomicity)
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
	lseek(fd->fd_sys, readbuf_off, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
        if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
        err = read(fd->fd_sys, readbuf, readbuf_len);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
        if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off, SEEK_SET, readbuf_len);
        if (err == -1) err_flag = 1;

        for (j=0; j<count; j++) 
            for (i=0; i<flat_buf->count; i++) {
                userbuf_off = j*buftype_extent + flat_buf->indices[i];
		req_off = off;
		req_len = flat_buf->blocklens[i];
		ADIOI_BUFFERED_READ
                off += flat_buf->blocklens[i];
            }

        if (fd->atomicity)
            ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

        if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;

	ADIOI_Free(readbuf); /* malloced in the buffered_read macro */

	if (err_flag) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO, "**io",
					       "**io %s", strerror(errno));
	}
	else *error_code = MPI_SUCCESS;
    }

    else {  /* noncontiguous in file */

/* filetype already flattened in ADIO_Open */
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
	disp = fd->disp;

	if (file_ptr_type == ADIO_INDIVIDUAL) {
          /* Wei-keng reworked type processing to be a bit more efficient */
           offset       = fd->fp_ind - disp;
           n_filetypes  = (offset - flat_file->indices[0]) / filetype_extent;
          offset -= (ADIO_Offset)n_filetypes * filetype_extent;
          /* now offset is local to this extent */
 
           /* find the block where offset is located, skip blocklens[i]==0 */
           for (i=0; i<flat_file->count; i++) {
               ADIO_Offset dist;
               if (flat_file->blocklens[i] == 0) continue;
               dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
               /* frd_size is from offset to the end of block i */
              if (dist == 0) {
                  i++;
                  offset   = flat_file->indices[i];
                  frd_size = flat_file->blocklens[i];
                  break;
              }
              if (dist > 0 ) { 
                   frd_size = dist;
		   break;
              }
          }
           st_index = i;  /* starting index in flat_file->indices[] */
           offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
       }
	else {
	    n_etypes_in_filetype = filetype_size/etype_size;
	    n_filetypes = (int) (offset / n_etypes_in_filetype);
	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	    size_in_filetype = etype_in_filetype * etype_size;
 
	    sum = 0;
	    for (i=0; i<flat_file->count; i++) {
		sum += flat_file->blocklens[i];
		if (sum > size_in_filetype) {
		    st_index = i;
		    frd_size = sum - size_in_filetype;
		    abs_off_in_filetype = flat_file->indices[i] +
			size_in_filetype - (sum - flat_file->blocklens[i]);
		    break;
		}
	    }

	    /* abs. offset in bytes in the file */
	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + 
		    abs_off_in_filetype;
	}

        start_off = offset;

       /* Wei-keng Liao: read request is within a single flat_file contig
        * block e.g. with subarray types that actually describe the whole
        * array */
       if (buftype_is_contig && bufsize <= frd_size) {
            ADIO_ReadContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
                             offset, status, error_code);

           if (file_ptr_type == ADIO_INDIVIDUAL) {
                /* update MPI-IO file pointer to point to the first byte that 
                * can be accessed in the fileview. */
               fd->fp_ind = offset + bufsize;
               if (bufsize == frd_size) {
                   do {
                       st_index++;
                       if (st_index == flat_file->count) {
                           st_index = 0;
                           n_filetypes++;
                       }
                    } while (flat_file->blocklens[st_index] == 0);
                   fd->fp_ind = disp + flat_file->indices[st_index]
                               + n_filetypes*filetype_extent;
               }
           }
           fd->fp_sys_posn = -1;   /* set it to null. */ 
#ifdef HAVE_STATUS_SET_BYTES
           MPIR_Status_set_bytes(status, datatype, bufsize);
#endif 
            return;
       }

       /* Calculate end_offset, the last byte-offset that will be accessed.
         e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/

	st_frd_size = frd_size;
	st_n_filetypes = n_filetypes;
	i = 0;
	j = st_index;
	off = offset;
	frd_size = ADIOI_MIN(st_frd_size, bufsize);
	while (i < bufsize) {
	    i += frd_size;
	    end_offset = off + frd_size - 1;
            j = (j+1) % flat_file->count;
            n_filetypes += (j == 0) ? 1 : 0;
            while (flat_file->blocklens[j]==0) {
               j = (j+1) % flat_file->count;
               n_filetypes += (j == 0) ? 1 : 0;
	    }

	    off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
	    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
	}

/* if atomicity is true, lock (exclusive) the region to be accessed */
        if (fd->atomicity)
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

        /* initial read into readbuf */
	readbuf_off = offset;
	readbuf = (char *) ADIOI_Malloc(max_bufsize);
	readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));

#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
	lseek(fd->fd_sys, offset, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
        if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, offset, SEEK_SET, readbuf_len);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
        err = read(fd->fd_sys, readbuf, readbuf_len);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
        if (!(fd->atomicity)) ADIOI_UNLOCK(fd, offset, SEEK_SET, readbuf_len);

        if (err == -1) err_flag = 1;

	if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

	    i = 0;
	    j = st_index;
	    off = offset;
	    n_filetypes = st_n_filetypes;
	    frd_size = ADIOI_MIN(st_frd_size, bufsize);
	    while (i < bufsize) {
                if (frd_size) { 
                    /* TYPE_UB and TYPE_LB can result in 
                       frd_size = 0. save system call in such cases */ 
		    /* lseek(fd->fd_sys, off, SEEK_SET);
		    err = read(fd->fd_sys, ((char *) buf) + i, frd_size);*/

		    req_off = off;
		    req_len = frd_size;
		    userbuf_off = i;
		    ADIOI_BUFFERED_READ
		}
		i += frd_size;

                if (off + frd_size < disp + flat_file->indices[j] +
                   flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
                       off += frd_size;
                /* did not reach end of contiguous block in filetype.
                   no more I/O needed. off is incremented by frd_size. */
                else {
                    j = (j+1) % flat_file->count;
                    n_filetypes += (j == 0) ? 1 : 0;
                    while (flat_file->blocklens[j]==0) {
                        j = (j+1) % flat_file->count;
                        n_filetypes += (j == 0) ? 1 : 0;
                    }
		    off = disp + flat_file->indices[j] + 
                                        (ADIO_Offset) n_filetypes*filetype_extent;
		    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
		}
	    }
	}
	else {