Ejemplo n.º 1
0
void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, void *buf, int count,
			       MPI_Datatype datatype, int file_ptr_type,
			       ADIO_Offset offset, ADIO_Status * status,
			       int *error_code)
{
    /* offset is in units of etype relative to the filetype. */
    ADIOI_Flatlist_node *flat_buf, *flat_file;
    ADIO_Offset i_offset, sum, size_in_filetype;
    int i, j, k, st_index=0;
    int n_etypes_in_filetype;
    ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes;
    ADIO_Offset abs_off_in_filetype=0;
    int filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent;
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off;
    ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
    char *writebuf;
    unsigned bufsize, writebuf_len, write_sz;
    ADIO_Status status1;
    ADIO_Offset new_bwr_size, new_fwr_size, st_fwr_size, fwr_size=0, bwr_size, req_len;
    int stripe_size;
    static char myname[] = "ADIOI_LUSTRE_WriteStrided";

    if (fd->hints->ds_write == ADIOI_HINT_DISABLE) {
	/* if user has disabled data sieving on writes, use naive
	 * approach instead.
	 */
	ADIOI_GEN_WriteStrided_naive(fd,
				     buf,
				     count,
				     datatype,
				     file_ptr_type,
				     offset, status, error_code);
	return;
    }

    *error_code = MPI_SUCCESS;	/* changed below if error */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size(fd->filetype, &filetype_size);
    if (!filetype_size) {
#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, 0);
#endif
	*error_code = MPI_SUCCESS;
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;

    ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
    bufsize = buftype_size * count;

    /* get striping info */
    stripe_size = fd->hints->striping_unit;

    /* Different buftype to different filetype */
    if (!buftype_is_contig && filetype_is_contig) {
        /* noncontiguous in memory, contiguous in file. */
	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype)
	    flat_buf = flat_buf->next;

	off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
            fd->disp + (ADIO_Offset)etype_size * offset;

	start_off = off;
	end_offset = start_off + bufsize - 1;
        /* write stripe size buffer each time */
	writebuf = (char *) ADIOI_Malloc(ADIOI_MIN(bufsize, stripe_size));
        writebuf_off = 0;
        writebuf_len = 0;

        /* if atomicity is true, lock the region to be accessed */
	if (fd->atomicity)
	    ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, bufsize);

	for (j = 0; j < count; j++) {
	    for (i = 0; i < flat_buf->count; i++) {
                userbuf_off = (ADIO_Offset)j * (ADIO_Offset)buftype_extent +
                    flat_buf->indices[i];
		req_off = off;
		req_len = flat_buf->blocklens[i];
		ADIOI_BUFFERED_WRITE_WITHOUT_READ
		off += flat_buf->blocklens[i];
	    }
        }

	/* write the buffer out finally */
	ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE,
			 ADIO_EXPLICIT_OFFSET, writebuf_off, &status1,
			 error_code);

	if (fd->atomicity)
	    ADIOI_UNLOCK(fd, start_off, SEEK_SET, bufsize);
	if (*error_code != MPI_SUCCESS) {
            ADIOI_Free(writebuf);
	    return;
        }
	ADIOI_Free(writebuf);
	if (file_ptr_type == ADIO_INDIVIDUAL)
	    fd->fp_ind = off;
    } else {
        /* noncontiguous in file */
        /* filetype already flattened in ADIO_Open */
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype)
	    flat_file = flat_file->next;
	disp = fd->disp;

	if (file_ptr_type == ADIO_INDIVIDUAL) {
            /* Wei-keng reworked type processing to be a bit more efficient */
            offset       = fd->fp_ind - disp;
            n_filetypes  = (offset - flat_file->indices[0]) / filetype_extent;
            offset      -= (ADIO_Offset)n_filetypes * filetype_extent;
            /* now offset is local to this extent */

            /* find the block where offset is located, skip blocklens[i]==0 */
            for (i=0; i<flat_file->count; i++) {
                ADIO_Offset dist;
                if (flat_file->blocklens[i] == 0) continue;
                dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
                /* fwr_size is from offset to the end of block i */
                if (dist == 0) {
                    i++;
                    offset   = flat_file->indices[i];
                    fwr_size = flat_file->blocklens[i];
			break;
		    }
                if (dist > 0) {
                    fwr_size = dist;
                    break;
		}
	    }
            st_index = i;  /* starting index in flat_file->indices[] */
            offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
        }
        else {
            n_etypes_in_filetype = filetype_size/etype_size;
            n_filetypes = offset / n_etypes_in_filetype;
            etype_in_filetype = offset % n_etypes_in_filetype;
	    size_in_filetype = etype_in_filetype * etype_size;

	    sum = 0;
	    for (i = 0; i < flat_file->count; i++) {
		sum += flat_file->blocklens[i];
		if (sum > size_in_filetype) {
		    st_index = i;
		    fwr_size = sum - size_in_filetype;
		    abs_off_in_filetype = flat_file->indices[i] +
			size_in_filetype - (sum - flat_file->blocklens[i]);
		    break;
		}
	    }

	    /* abs. offset in bytes in the file */
	    offset = disp + (ADIO_Offset) n_filetypes *filetype_extent +
		     abs_off_in_filetype;
	}

	start_off = offset;

        /* Wei-keng Liao:write request is within single flat_file
         * contig block*/
        /* this could happen, for example, with subarray types that are
         * actually fairly contiguous */
        if (buftype_is_contig && bufsize <= fwr_size) {
            req_off = start_off;
            req_len = bufsize;
            end_offset = start_off + bufsize - 1;
	    writebuf = (char *) ADIOI_Malloc(ADIOI_MIN(bufsize, stripe_size));
	    memset(writebuf, -1, ADIOI_MIN(bufsize, stripe_size));
            writebuf_off = 0;
            writebuf_len = 0;
            userbuf_off = 0;
            ADIOI_BUFFERED_WRITE_WITHOUT_READ
            /* write the buffer out finally */
            ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE,
                             ADIO_EXPLICIT_OFFSET, writebuf_off, &status1,
                             error_code);

            if (file_ptr_type == ADIO_INDIVIDUAL) {
                /* update MPI-IO file pointer to point to the first byte
                 * that can be accessed in the fileview. */
                fd->fp_ind = offset + bufsize;
                if (bufsize == fwr_size) {
                    do {
                        st_index++;
                        if (st_index == flat_file->count) {
                            st_index = 0;
                            n_filetypes++;
                        }
                    } while (flat_file->blocklens[st_index] == 0);
                    fd->fp_ind = disp + flat_file->indices[st_index]
                        + (ADIO_Offset)n_filetypes*filetype_extent;
                }
            }
            fd->fp_sys_posn = -1;   /* set it to null. */
#ifdef HAVE_STATUS_SET_BYTES
            MPIR_Status_set_bytes(status, datatype, bufsize);
#endif
            ADIOI_Free(writebuf);
            return;
        }

	    /* Calculate end_offset, the last byte-offset that will be accessed.
           e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/

	    st_fwr_size = fwr_size;
	    st_n_filetypes = n_filetypes;
        i_offset = 0;
	    j = st_index;
	    off = offset;
	    fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
        while (i_offset < bufsize) {
            i_offset += fwr_size;
		end_offset = off + fwr_size - 1;

            j = (j+1) % flat_file->count;
            n_filetypes += (j == 0) ? 1 : 0;
            while (flat_file->blocklens[j]==0) {
                j = (j+1) % flat_file->count;
                n_filetypes += (j == 0) ? 1 : 0;
		}

		off = disp + flat_file->indices[j] +
                n_filetypes*(ADIO_Offset)filetype_extent;
            fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
	    }

/* if atomicity is true, lock the region to be accessed */
        if (fd->atomicity)
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

	    writebuf_off = 0;
	    writebuf_len = 0;
	    writebuf = (char *) ADIOI_Malloc(stripe_size);
	    memset(writebuf, -1, stripe_size);

	    if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
		   common case. */

            i_offset = 0;
		j = st_index;
		off = offset;
		n_filetypes = st_n_filetypes;
		fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
            while (i_offset < bufsize) {
		    if (fwr_size) {
			/* TYPE_UB and TYPE_LB can result in
			   fwr_size = 0. save system call in such cases */
                    /* lseek(fd->fd_sys, off, SEEK_SET);
                       err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/

			req_off = off;
			req_len = fwr_size;
                    userbuf_off = i_offset;
			ADIOI_BUFFERED_WRITE
                    }
                i_offset += fwr_size;

		    if (off + fwr_size < disp + flat_file->indices[j] +
		                         flat_file->blocklens[j] +
                    n_filetypes*(ADIO_Offset)filetype_extent)
		        off += fwr_size;
		    /* did not reach end of contiguous block in filetype.
		    no more I/O needed. off is incremented by fwr_size. */
		    else {
                    j = (j+1) % flat_file->count;
                    n_filetypes += (j == 0) ? 1 : 0;
                    while (flat_file->blocklens[j]==0) {
                        j = (j+1) % flat_file->count;
                        n_filetypes += (j == 0) ? 1 : 0;
			}
			off = disp + flat_file->indices[j] +
                        n_filetypes*(ADIO_Offset)filetype_extent;
			fwr_size = ADIOI_MIN(flat_file->blocklens[j],
                                         bufsize-i_offset);
		    }
		}
        }
        else {
Ejemplo n.º 2
0
void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code)
{
/* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0;
    int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    int filetype_size, etype_size, buftype_size, req_len;
    MPI_Aint filetype_extent, buftype_extent; 
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off;
    ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
    char *writebuf, *value;
    int flag, st_fwr_size, st_n_filetypes, writebuf_len, write_sz;
    int new_bwr_size, new_fwr_size, err_flag=0, info_flag, max_bufsize;
    static char myname[] = "ADIOI_NFS_WRITESTRIDED";

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
	*error_code = MPI_SUCCESS; 
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;

    bufsize = buftype_size * count;

/* get max_bufsize from the info object. */

    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
    MPI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, 
                 &info_flag);
    max_bufsize = atoi(value);
    ADIOI_Free(value);

    if (!buftype_is_contig && filetype_is_contig) {

/* noncontiguous in memory, contiguous in file. */

	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;

        off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
                 fd->disp + etype_size * offset;

        start_off = off;
	end_offset = off + bufsize - 1;
        writebuf_off = off;
        writebuf = (char *) ADIOI_Malloc(max_bufsize);
        writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));

/* if atomicity is true, lock the region to be accessed */
        if (fd->atomicity) 
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

        for (j=0; j<count; j++) 
            for (i=0; i<flat_buf->count; i++) {
                userbuf_off = j*buftype_extent + flat_buf->indices[i];
		req_off = off;
		req_len = flat_buf->blocklens[i];
		ADIOI_BUFFERED_WRITE_WITHOUT_READ
                off += flat_buf->blocklens[i];
            }

        /* write the buffer out finally */
	lseek(fd->fd_sys, writebuf_off, SEEK_SET); 
	if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
	err = write(fd->fd_sys, writebuf, writebuf_len); 
        if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
        if (err == -1) err_flag = 1; 

        if (fd->atomicity) 
            ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

	ADIOI_Free(writebuf); /* malloced in the buffered_write macro */

        if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
	if (err_flag) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO, "**io",
					       "**io %s", strerror(errno));
	}
	else *error_code = MPI_SUCCESS;
    }

    else {  /* noncontiguous in file */

/* filetype already flattened in ADIO_Open */
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
	disp = fd->disp;

	if (file_ptr_type == ADIO_INDIVIDUAL) {
	    offset = fd->fp_ind; /* in bytes */
	    n_filetypes = -1;
	    flag = 0;
	    while (!flag) {
                n_filetypes++;
		for (i=0; i<flat_file->count; i++) {
		    if (disp + flat_file->indices[i] + 
                        (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] 
                            >= offset) {
			st_index = i;
			fwr_size = (int) (disp + flat_file->indices[i] + 
			        (ADIO_Offset) n_filetypes*filetype_extent
			         + flat_file->blocklens[i] - offset);
			flag = 1;
			break;
		    }
		}
	    }
	}
	else {
	    n_etypes_in_filetype = filetype_size/etype_size;
	    n_filetypes = (int) (offset / n_etypes_in_filetype);
	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	    size_in_filetype = etype_in_filetype * etype_size;
 
	    sum = 0;
	    for (i=0; i<flat_file->count; i++) {
		sum += flat_file->blocklens[i];
		if (sum > size_in_filetype) {
		    st_index = i;
		    fwr_size = sum - size_in_filetype;
		    abs_off_in_filetype = flat_file->indices[i] +
			size_in_filetype - (sum - flat_file->blocklens[i]);
		    break;
		}
	    }

	    /* abs. offset in bytes in the file */
	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
	}

        start_off = offset;

       /* Calculate end_offset, the last byte-offset that will be accessed.
         e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/

	st_fwr_size = fwr_size;
	st_n_filetypes = n_filetypes;
	i = 0;
	j = st_index;
	off = offset;
	fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
	while (i < bufsize) {
	    i += fwr_size;
	    end_offset = off + fwr_size - 1;

	    if (j < (flat_file->count - 1)) j++;
	    else {
		j = 0;
		n_filetypes++;
	    }

	    off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
	    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
	}

/* if atomicity is true, lock the region to be accessed */
        if (fd->atomicity) 
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

        /* initial read for the read-modify-write */
        writebuf_off = offset;
        writebuf = (char *) ADIOI_Malloc(max_bufsize);
        writebuf_len = (int)(ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
	if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
	lseek(fd->fd_sys, writebuf_off, SEEK_SET); 
	err = read(fd->fd_sys, writebuf, writebuf_len); 
        if (err == -1) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE,
					       myname, __LINE__,
					       MPI_ERR_IO,
					       "ADIOI_NFS_WriteStrided: ROMIO tries to optimize this access by doing a read-modify-write, but is unable to read the file. Please give the file read permission and open it with MPI_MODE_RDWR.", 0);
	    return;
        } 

	if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

	    i = 0;
	    j = st_index;
	    off = offset;
	    n_filetypes = st_n_filetypes;
	    fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
	    while (i < bufsize) {
                if (fwr_size) { 
                    /* TYPE_UB and TYPE_LB can result in 
                       fwr_size = 0. save system call in such cases */ 
		    /* lseek(fd->fd_sys, off, SEEK_SET);
		    err = write(fd->fd_sys, ((char *) buf) + i, fwr_size);*/

		    req_off = off;
		    req_len = fwr_size;
		    userbuf_off = i;
		    ADIOI_BUFFERED_WRITE
		}
		i += fwr_size;

                if (off + fwr_size < disp + flat_file->indices[j] +
                   flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
                       off += fwr_size;
                /* did not reach end of contiguous block in filetype.
                   no more I/O needed. off is incremented by fwr_size. */
                else {
		    if (j < (flat_file->count - 1)) j++;
		    else {
			j = 0;
			n_filetypes++;
		    }
		    off = disp + flat_file->indices[j] + 
                                        (ADIO_Offset) n_filetypes*filetype_extent;
		    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
		}
	    }
	}
	else {
Ejemplo n.º 3
0
/* wrapper function for ADIO_WriteStrided and ADIO_ReadStrided.  Used
 * by new 2 phase code to pass an arbitrary file type directly to
 * WriteStrided call without affecting existing code.  For the new 2
 * phase code, we really only need to set a custom_ftype, and we can
 * assume that this uses MPI_BYTE for the etype, and disp is 0 */
void ADIOI_IOFiletype(ADIO_File fd, void *buf, int count,
                      MPI_Datatype datatype, int file_ptr_type,
                      ADIO_Offset offset, MPI_Datatype custom_ftype,
                      int rdwr, ADIO_Status * status, int *error_code)
{
    MPI_Datatype user_filetype;
    MPI_Datatype user_etype;
    ADIO_Offset user_disp;
    int user_ind_wr_buffer_size;
    int user_ind_rd_buffer_size;
    int f_is_contig, m_is_contig;
    int user_ds_read, user_ds_write;
    MPI_Aint f_extent;
    MPI_Count f_size;
    int f_ds_percent;           /* size/extent */

#ifdef AGGREGATION_PROFILE
    if (rdwr == ADIOI_READ)
        MPE_Log_event(5006, 0, NULL);
    else
        MPE_Log_event(5008, 0, NULL);
#endif
    MPI_Type_extent(custom_ftype, &f_extent);
    MPI_Type_size_x(custom_ftype, &f_size);
    f_ds_percent = 100 * f_size / f_extent;

    /* temporarily store file view information */
    user_filetype = fd->filetype;
    user_etype = fd->etype;
    user_disp = fd->disp;
    user_ds_read = fd->hints->ds_read;
    user_ds_write = fd->hints->ds_write;
    /* temporarily override the independent I/O datasieve buffer size */
    user_ind_wr_buffer_size = fd->hints->ind_wr_buffer_size;
    user_ind_rd_buffer_size = fd->hints->ind_rd_buffer_size;

    /* set new values for temporary file view */
    fd->filetype = custom_ftype;
    fd->etype = MPI_BYTE;
    /* set new values for independent I/O datasieve buffer size */
    fd->hints->ind_wr_buffer_size = fd->hints->cb_buffer_size;
    fd->hints->ind_rd_buffer_size = fd->hints->cb_buffer_size;
    /* decide whether or not to do datasieving */
#ifdef DEBUG
    printf("f_ds_percent = %d cb_ds_threshold = %d\n", f_ds_percent, fd->hints->cb_ds_threshold);
#endif
    if (f_ds_percent >= fd->hints->cb_ds_threshold) {
        fd->hints->ds_read = ADIOI_HINT_ENABLE;
        fd->hints->ds_write = ADIOI_HINT_ENABLE;
    } else {
        fd->hints->ds_read = ADIOI_HINT_DISABLE;
        fd->hints->ds_write = ADIOI_HINT_DISABLE;
    }

    /* flatten the new filetype since the strided calls expect it to
     * have been flattened in set file view.  in the two phase code,
     * the datatype passed down should always be MPI_BYTE, and
     * therefore contiguous, but just for completeness sake, we'll
     * check the memory datatype anyway */
    ADIOI_Datatype_iscontig(custom_ftype, &f_is_contig);
    ADIOI_Datatype_iscontig(datatype, &m_is_contig);
    if (!f_is_contig)
        ADIOI_Flatten_datatype(custom_ftype);

    /* make appropriate Read/Write calls.  Let ROMIO figure out file
     * system specific stuff. */
    if (f_is_contig && m_is_contig) {
        fd->disp = 0;
        if (rdwr == ADIOI_READ)
            ADIO_ReadContig(fd, buf, count, datatype, file_ptr_type, offset, status, error_code);
        else
            ADIO_WriteContig(fd, buf, count, datatype, file_ptr_type, offset, status, error_code);
    } else {
        fd->disp = offset;
        if (rdwr == ADIOI_READ)
            ADIO_ReadStrided(fd, buf, count, datatype, file_ptr_type, 0, status, error_code);
        else
            ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type, 0, status, error_code);
    }


    /* restore the user specified file view to cover our tracks */
    fd->filetype = user_filetype;
    fd->etype = user_etype;
    fd->disp = user_disp;
    fd->hints->ds_read = user_ds_read;
    fd->hints->ds_write = user_ds_write;
    fd->hints->ind_wr_buffer_size = user_ind_wr_buffer_size;
    fd->hints->ind_rd_buffer_size = user_ind_rd_buffer_size;
#ifdef AGGREGATION_PROFILE
    if (rdwr == ADIOI_READ)
        MPE_Log_event(5007, 0, NULL);
    else
        MPE_Log_event(5009, 0, NULL);
#endif
}
Ejemplo n.º 4
0
void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code)
{
/* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int i, j, k, err=-1, brd_size, st_index=0;
    int num, size, sum, n_etypes_in_filetype, size_in_filetype;
    MPI_Count bufsize;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    int req_len, partial_read;
    MPI_Count filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent;
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off;
    ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off;
    char *readbuf, *tmp_buf, *value;
    int st_n_filetypes, readbuf_len;
    ADIO_Offset frd_size=0, new_frd_size, st_frd_size;
    int new_brd_size, err_flag=0, info_flag, max_bufsize;

    static char myname[] = "ADIOI_NFS_READSTRIDED";

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size_x(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, 0);
#endif
	*error_code = MPI_SUCCESS;
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size_x(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;

    bufsize = buftype_size * count;

/* get max_bufsize from the info object. */

    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
    ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value,
                 &info_flag);
    max_bufsize = atoi(value);
    ADIOI_Free(value);

    if (!buftype_is_contig && filetype_is_contig) {

/* noncontiguous in memory, contiguous in file. */

	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;

        off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
                 fd->disp + etype_size * offset;

	start_off = off;
	end_offset = off + bufsize - 1;
        readbuf_off = off;
        readbuf = (char *) ADIOI_Malloc(max_bufsize);
        readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));

/* if atomicity is true, lock (exclusive) the region to be accessed */
        if (fd->atomicity)
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
	lseek(fd->fd_sys, readbuf_off, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
        if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
        err = read(fd->fd_sys, readbuf, readbuf_len);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
        if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off, SEEK_SET, readbuf_len);
        if (err == -1) err_flag = 1;

        for (j=0; j<count; j++)
            for (i=0; i<flat_buf->count; i++) {
                userbuf_off = j*buftype_extent + flat_buf->indices[i];
		req_off = off;
		req_len = flat_buf->blocklens[i];
		ADIOI_BUFFERED_READ
                off += flat_buf->blocklens[i];
            }

        if (fd->atomicity)
            ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

        if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;

	ADIOI_Free(readbuf); /* malloced in the buffered_read macro */

	if (err_flag) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO, "**io",
					       "**io %s", strerror(errno));
	}
	else *error_code = MPI_SUCCESS;
    }

    else {  /* noncontiguous in file */

/* filetype already flattened in ADIO_Open */
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
	disp = fd->disp;

	if (file_ptr_type == ADIO_INDIVIDUAL) {
          /* Wei-keng reworked type processing to be a bit more efficient */
           offset       = fd->fp_ind - disp;
           n_filetypes  = (offset - flat_file->indices[0]) / filetype_extent;
          offset -= (ADIO_Offset)n_filetypes * filetype_extent;
          /* now offset is local to this extent */

           /* find the block where offset is located, skip blocklens[i]==0 */
           for (i=0; i<flat_file->count; i++) {
               ADIO_Offset dist;
               if (flat_file->blocklens[i] == 0) continue;
               dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
               /* frd_size is from offset to the end of block i */
              if (dist == 0) {
                  i++;
                  offset   = flat_file->indices[i];
                  frd_size = flat_file->blocklens[i];
                  break;
              }
              if (dist > 0 ) {
                   frd_size = dist;
		   break;
              }
          }
           st_index = i;  /* starting index in flat_file->indices[] */
           offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
       }
	else {
	    n_etypes_in_filetype = filetype_size/etype_size;
	    n_filetypes = (int) (offset / n_etypes_in_filetype);
	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	    size_in_filetype = etype_in_filetype * etype_size;

	    sum = 0;
	    for (i=0; i<flat_file->count; i++) {
		sum += flat_file->blocklens[i];
		if (sum > size_in_filetype) {
		    st_index = i;
		    frd_size = sum - size_in_filetype;
		    abs_off_in_filetype = flat_file->indices[i] +
			size_in_filetype - (sum - flat_file->blocklens[i]);
		    break;
		}
	    }

	    /* abs. offset in bytes in the file */
	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
		    abs_off_in_filetype;
	}

        start_off = offset;

       /* Wei-keng Liao: read request is within a single flat_file contig
        * block e.g. with subarray types that actually describe the whole
        * array */
       if (buftype_is_contig && bufsize <= frd_size) {
            ADIO_ReadContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
                             offset, status, error_code);

           if (file_ptr_type == ADIO_INDIVIDUAL) {
                /* update MPI-IO file pointer to point to the first byte that
                * can be accessed in the fileview. */
               fd->fp_ind = offset + bufsize;
               if (bufsize == frd_size) {
                   do {
                       st_index++;
                       if (st_index == flat_file->count) {
                           st_index = 0;
                           n_filetypes++;
                       }
                    } while (flat_file->blocklens[st_index] == 0);
                   fd->fp_ind = disp + flat_file->indices[st_index]
                               + n_filetypes*filetype_extent;
               }
           }
           fd->fp_sys_posn = -1;   /* set it to null. */
#ifdef HAVE_STATUS_SET_BYTES
           MPIR_Status_set_bytes(status, datatype, bufsize);
#endif
            return;
       }

       /* Calculate end_offset, the last byte-offset that will be accessed.
         e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/

	st_frd_size = frd_size;
	st_n_filetypes = n_filetypes;
	i = 0;
	j = st_index;
	off = offset;
	frd_size = ADIOI_MIN(st_frd_size, bufsize);
	while (i < bufsize) {
	    i += frd_size;
	    end_offset = off + frd_size - 1;
            j = (j+1) % flat_file->count;
            n_filetypes += (j == 0) ? 1 : 0;
            while (flat_file->blocklens[j]==0) {
               j = (j+1) % flat_file->count;
               n_filetypes += (j == 0) ? 1 : 0;
	    }

	    off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
	    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
	}

/* if atomicity is true, lock (exclusive) the region to be accessed */
        if (fd->atomicity)
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

        /* initial read into readbuf */
	readbuf_off = offset;
	readbuf = (char *) ADIOI_Malloc(max_bufsize);
	readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));

#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
	lseek(fd->fd_sys, offset, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
        if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, offset, SEEK_SET, readbuf_len);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
        err = read(fd->fd_sys, readbuf, readbuf_len);
#ifdef ADIOI_MPE_LOGGING
        MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
        if (!(fd->atomicity)) ADIOI_UNLOCK(fd, offset, SEEK_SET, readbuf_len);

        if (err == -1) err_flag = 1;

	if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

	    i = 0;
	    j = st_index;
	    off = offset;
	    n_filetypes = st_n_filetypes;
	    frd_size = ADIOI_MIN(st_frd_size, bufsize);
	    while (i < bufsize) {
                if (frd_size) {
                    /* TYPE_UB and TYPE_LB can result in
                       frd_size = 0. save system call in such cases */
		    /* lseek(fd->fd_sys, off, SEEK_SET);
		    err = read(fd->fd_sys, ((char *) buf) + i, frd_size);*/

		    req_off = off;
		    req_len = frd_size;
		    userbuf_off = i;
		    ADIOI_BUFFERED_READ
		}
		i += frd_size;

                if (off + frd_size < disp + flat_file->indices[j] +
                   flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
                       off += frd_size;
                /* did not reach end of contiguous block in filetype.
                   no more I/O needed. off is incremented by frd_size. */
                else {
                    j = (j+1) % flat_file->count;
                    n_filetypes += (j == 0) ? 1 : 0;
                    while (flat_file->blocklens[j]==0) {
                        j = (j+1) % flat_file->count;
                        n_filetypes += (j == 0) ? 1 : 0;
                    }
		    off = disp + flat_file->indices[j] +
                                        (ADIO_Offset) n_filetypes*filetype_extent;
		    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
		}
	    }
	}
	else {
Ejemplo n.º 5
0
void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count,
			     MPI_Datatype datatype, int file_ptr_type,
			     ADIO_Offset offset, ADIO_Status *status, int
			     *error_code)
{
/* Since PVFS does not support file locking, can't do buffered writes
   as on Unix */

/* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0;
    int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    int filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent, indx;
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset off, disp;
    int flag, new_bwr_size, new_fwr_size, err_flag=0;
    static char myname[] = "ADIOI_PVFS_WRITESTRIDED";

#ifdef HAVE_PVFS_LISTIO
    if ( fd->hints->fs_hints.pvfs.listio_write == ADIOI_HINT_ENABLE ) {
	    ADIOI_PVFS_WriteStridedListIO(fd, buf, count, datatype, 
			    file_ptr_type, offset, status, error_code);
	    return;
    }
#endif
    /* if hint set to DISABLE or AUTOMATIC, don't use listio */

    /* --BEGIN ERROR HANDLING-- */
    if (fd->atomicity) {
	*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
					   myname, __LINE__,
					   MPI_ERR_INTERN,
					   "Atomic mode set in PVFS I/O function", 0);
	return;
    }
    /* --END ERROR HANDLING-- */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
	*error_code = MPI_SUCCESS; 
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;
    
    bufsize = buftype_size * count;

    if (!buftype_is_contig && filetype_is_contig) {
	char *combine_buf, *combine_buf_ptr;
	ADIO_Offset combine_buf_remain;
/* noncontiguous in memory, contiguous in file. use writev */

	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;

	/* allocate our "combine buffer" to pack data into before writing */
	combine_buf = (char *) ADIOI_Malloc(fd->hints->ind_wr_buffer_size);
	combine_buf_ptr = combine_buf;
	combine_buf_remain = fd->hints->ind_wr_buffer_size;

	/* seek to the right spot in the file */
	if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
	    off = fd->disp + etype_size * offset;
	    pvfs_lseek64(fd->fd_sys, off, SEEK_SET);
	}
	else off = pvfs_lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET);

	/* loop through all the flattened pieces.  combine into buffer until
	 * no more will fit, then write.
	 *
	 * special case of a given piece being bigger than the combine buffer
	 * is also handled.
	 */
	for (j=0; j<count; j++) {
	    for (i=0; i<flat_buf->count; i++) {
		if (flat_buf->blocklens[i] > combine_buf_remain && combine_buf != combine_buf_ptr) {
		    /* there is data in the buffer; write out the buffer so far */
		    err = pvfs_write(fd->fd_sys,
				     combine_buf,
				     fd->hints->ind_wr_buffer_size - combine_buf_remain);
		    if (err == -1) err_flag = 1;

		    /* reset our buffer info */
		    combine_buf_ptr = combine_buf;
		    combine_buf_remain = fd->hints->ind_wr_buffer_size;
		}

		/* TODO: heuristic for when to not bother to use combine buffer? */
		if (flat_buf->blocklens[i] >= combine_buf_remain) {
		    /* special case: blocklen is as big as or bigger than the combine buf;
		     * write directly
		     */
		    err = pvfs_write(fd->fd_sys,
				     ((char *) buf) + j*buftype_extent + flat_buf->indices[i],
				     flat_buf->blocklens[i]);
		    if (err == -1) err_flag = 1;
		    off += flat_buf->blocklens[i]; /* keep up with the final file offset too */
		}
		else {
		    /* copy more data into combine buffer */
		    memcpy(combine_buf_ptr,
			   ((char *) buf) + j*buftype_extent + flat_buf->indices[i],
			   flat_buf->blocklens[i]);
		    combine_buf_ptr += flat_buf->blocklens[i];
		    combine_buf_remain -= flat_buf->blocklens[i];
		    off += flat_buf->blocklens[i]; /* keep up with the final file offset too */
		}
	    }
	}

	if (combine_buf_ptr != combine_buf) {
	    /* data left in buffer to write */
	    err = pvfs_write(fd->fd_sys,
			     combine_buf,
			     fd->hints->ind_wr_buffer_size - combine_buf_remain);
	    if (err == -1) err_flag = 1;
	}

	if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;

	ADIOI_Free(combine_buf);

	if (err_flag) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO, "**io",
					       "**io %s", strerror(errno));
	}
	else *error_code = MPI_SUCCESS;
    } /* if (!buftype_is_contig && filetype_is_contig)  ... */

    else {  /* noncontiguous in file */

/* split up into several contiguous writes */

/* find starting location in the file */

/* filetype already flattened in ADIO_Open */
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
        disp = fd->disp;

	if (file_ptr_type == ADIO_INDIVIDUAL) {
	    offset = fd->fp_ind; /* in bytes */
            n_filetypes = -1;
            flag = 0;
            while (!flag) {
                n_filetypes++;
                for (i=0; i<flat_file->count; i++) {
                    if (disp + flat_file->indices[i] + 
                        (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] 
                            >= offset) {
                        st_index = i;
                        fwr_size = disp + flat_file->indices[i] + 
                                (ADIO_Offset) n_filetypes*filetype_extent
                                 + flat_file->blocklens[i] - offset;
                        flag = 1;
                        break;
                    }
                }
            }
	}
	else {
	    n_etypes_in_filetype = filetype_size/etype_size;
	    n_filetypes = (int) (offset / n_etypes_in_filetype);
	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	    size_in_filetype = etype_in_filetype * etype_size;
 
	    sum = 0;
	    for (i=0; i<flat_file->count; i++) {
		sum += flat_file->blocklens[i];
		if (sum > size_in_filetype) {
		    st_index = i;
		    fwr_size = sum - size_in_filetype;
		    abs_off_in_filetype = flat_file->indices[i] +
			size_in_filetype - (sum - flat_file->blocklens[i]);
		    break;
		}
	    }

	    /* abs. offset in bytes in the file */
            offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
	}

	if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

	    i = 0;
	    j = st_index;
	    off = offset;
	    fwr_size = ADIOI_MIN(fwr_size, bufsize);
	    while (i < bufsize) {
                if (fwr_size) { 
                    /* TYPE_UB and TYPE_LB can result in 
                       fwr_size = 0. save system call in such cases */ 
#ifdef PROFILE
		    MPE_Log_event(11, 0, "start seek");
#endif
		    pvfs_lseek64(fd->fd_sys, off, SEEK_SET);
#ifdef PROFILE
		    MPE_Log_event(12, 0, "end seek");
		    MPE_Log_event(5, 0, "start write");
#endif
		    err = pvfs_write(fd->fd_sys, ((char *) buf) + i, fwr_size);
#ifdef PROFILE
		    MPE_Log_event(6, 0, "end write");
#endif
		    if (err == -1) err_flag = 1;
		}
		i += fwr_size;

                if (off + fwr_size < disp + flat_file->indices[j] +
                   flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
                       off += fwr_size;
                /* did not reach end of contiguous block in filetype.
                   no more I/O needed. off is incremented by fwr_size. */
                else {
		    if (j < (flat_file->count - 1)) j++;
		    else {
			j = 0;
			n_filetypes++;
		    }
		    off = disp + flat_file->indices[j] + 
                                        (ADIO_Offset) n_filetypes*filetype_extent;
		    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
		}
	    }
	}
	else {
/* noncontiguous in memory as well as in file */

	    ADIOI_Flatten_datatype(datatype);
	    flat_buf = ADIOI_Flatlist;
	    while (flat_buf->type != datatype) flat_buf = flat_buf->next;

	    k = num = buf_count = 0;
	    indx = flat_buf->indices[0];
	    j = st_index;
	    off = offset;
	    bwr_size = flat_buf->blocklens[0];

	    while (num < bufsize) {
		size = ADIOI_MIN(fwr_size, bwr_size);
		if (size) {
#ifdef PROFILE
		    MPE_Log_event(11, 0, "start seek");
#endif
		    pvfs_lseek64(fd->fd_sys, off, SEEK_SET);
#ifdef PROFILE
		    MPE_Log_event(12, 0, "end seek");
		    MPE_Log_event(5, 0, "start write");
#endif
		    err = pvfs_write(fd->fd_sys, ((char *) buf) + indx, size);
#ifdef PROFILE
		    MPE_Log_event(6, 0, "end write");
#endif
		    if (err == -1) err_flag = 1;
		}

		new_fwr_size = fwr_size;
		new_bwr_size = bwr_size;

		if (size == fwr_size) {
/* reached end of contiguous block in file */
                    if (j < (flat_file->count - 1)) j++;
                    else {
                        j = 0;
                        n_filetypes++;
                    }

                    off = disp + flat_file->indices[j] + 
                                   (ADIO_Offset) n_filetypes*filetype_extent;

		    new_fwr_size = flat_file->blocklens[j];
		    if (size != bwr_size) {
			indx += size;
			new_bwr_size -= size;
		    }
		}

		if (size == bwr_size) {
/* reached end of contiguous block in memory */

		    k = (k + 1)%flat_buf->count;
		    buf_count++;
		    indx = buftype_extent*(buf_count/flat_buf->count) +
			flat_buf->indices[k]; 
		    new_bwr_size = flat_buf->blocklens[k];
		    if (size != fwr_size) {
			off += size;
			new_fwr_size -= size;
		    }
		}
		num += size;
		fwr_size = new_fwr_size;
                bwr_size = new_bwr_size;
	    }
	}

        if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
	if (err_flag) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO, "**io",
					       "**io %s", strerror(errno));
	}
	else *error_code = MPI_SUCCESS;
    }

    fd->fp_sys_posn = -1;   /* set it to null. */

#ifdef HAVE_STATUS_SET_BYTES
    MPIR_Status_set_bytes(status, datatype, bufsize);
/* This is a temporary way of filling in status. The right way is to 
   keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
#endif

    if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
}
Ejemplo n.º 6
0
void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count,
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code)
{
/* Since PIOFS does not support file locking, can't do buffered writes
   as on Unix */

/* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    struct iovec *iov;
    int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0;
    int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    int filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent, indx;
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset off, disp;
    int flag, new_bwr_size, new_fwr_size, err_flag=0;
#ifndef PRINT_ERR_MSG
    static char myname[] = "ADIOI_PIOFS_WRITESTRIDED";
#endif

    if (fd->atomicity) {
	FPRINTF(stderr, "ROMIO cannot guarantee atomicity of noncontiguous accesses in atomic mode, as PIOFS doesn't support file locking. Use nonatomic mode and its associated semantics.\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
    }

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
	*error_code = MPI_SUCCESS; 
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;
    
    bufsize = buftype_size * count;

    if (!buftype_is_contig && filetype_is_contig) {

/* noncontiguous in memory, contiguous in file. use writev */

	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;

/* There is a limit of 16 on the number of iovecs for readv/writev! */

	iov = (struct iovec *) ADIOI_Malloc(16*sizeof(struct iovec));

	if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
	    off = fd->disp + etype_size * offset;
	    llseek(fd->fd_sys, off, SEEK_SET);
	}
	else off = llseek(fd->fd_sys, fd->fp_ind, SEEK_SET);

	k = 0;
	for (j=0; j<count; j++) 
	    for (i=0; i<flat_buf->count; i++) {
		iov[k].iov_base = ((char *) buf) + j*buftype_extent +
		    flat_buf->indices[i]; 
		iov[k].iov_len = flat_buf->blocklens[i];
		/*FPRINTF(stderr, "%d %d\n", iov[k].iov_base, iov[k].iov_len);*/

		off += flat_buf->blocklens[i];
		k = (k+1)%16;

		if (!k) {
		    err = writev(fd->fd_sys, iov, 16);
		    if (err == -1) err_flag = 1;
		}
	    }

	if (k) {
	    err = writev(fd->fd_sys, iov, k);
	    if (err == -1) err_flag = 1;
	}

	if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;

	ADIOI_Free(iov);
	if (err_flag) {
#ifdef MPICH2
	    *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
		"**io %s", strerror(errno));
#elif defined(PRINT_ERR_MSG) 
	    *error_code =  MPI_ERR_UNKNOWN;
#else /* MPICH-1 */
	    *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
			      myname, "I/O Error", "%s", strerror(errno));
	    ADIOI_Error(fd, *error_code, myname);
#endif
	}
	else *error_code = MPI_SUCCESS;
    } /* if (!buftype_is_contig && filetype_is_contig) ... */

    else {  /* noncontiguous in file */

/* split up into several contiguous writes */

/* find starting location in the file */

/* filetype already flattened in ADIO_Open */
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
        disp = fd->disp;

	if (file_ptr_type == ADIO_INDIVIDUAL) {
	    offset = fd->fp_ind; /* in bytes */
            n_filetypes = -1;
            flag = 0;
            while (!flag) {
                n_filetypes++;
                for (i=0; i<flat_file->count; i++) {
                    if (disp + flat_file->indices[i] + 
                        (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] 
                            >= offset) {
                        st_index = i;
                        fwr_size = disp + flat_file->indices[i] + 
                                (ADIO_Offset) n_filetypes*filetype_extent
                                 + flat_file->blocklens[i] - offset;
                        flag = 1;
                        break;
                    }
                }
            }
	}
	else {
	    n_etypes_in_filetype = filetype_size/etype_size;
	    n_filetypes = (int) (offset / n_etypes_in_filetype);
	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	    size_in_filetype = etype_in_filetype * etype_size;
 
	    sum = 0;
	    for (i=0; i<flat_file->count; i++) {
		sum += flat_file->blocklens[i];
		if (sum > size_in_filetype) {
		    st_index = i;
		    fwr_size = sum - size_in_filetype;
		    abs_off_in_filetype = flat_file->indices[i] +
			size_in_filetype - (sum - flat_file->blocklens[i]);
		    break;
		}
	    }

	    /* abs. offset in bytes in the file */
            offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
	}

	if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

	    i = 0;
	    j = st_index;
	    off = offset;
	    fwr_size = ADIOI_MIN(fwr_size, bufsize);
	    while (i < bufsize) {
                if (fwr_size) { 
                    /* TYPE_UB and TYPE_LB can result in 
                       fwr_size = 0. save system call in such cases */ 
#ifdef PROFILE
		    MPE_Log_event(11, 0, "start seek");
#endif
		    llseek(fd->fd_sys, off, SEEK_SET);
#ifdef PROFILE
		    MPE_Log_event(12, 0, "end seek");
		    MPE_Log_event(5, 0, "start write");
#endif
		    err = write(fd->fd_sys, ((char *) buf) + i, fwr_size);
#ifdef PROFILE
		    MPE_Log_event(6, 0, "end write");
#endif
		    if (err == -1) err_flag = 1;
		}
		i += fwr_size;

                if (off + fwr_size < disp + flat_file->indices[j] +
                   flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
                       off += fwr_size;
                /* did not reach end of contiguous block in filetype.
                   no more I/O needed. off is incremented by fwr_size. */
                else {
		    if (j < (flat_file->count - 1)) j++;
		    else {
			j = 0;
			n_filetypes++;
		    }
		    off = disp + flat_file->indices[j] + 
                                        (ADIO_Offset) n_filetypes*filetype_extent;
		    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
		}
	    }
	}
	else {
/* noncontiguous in memory as well as in file */

	    ADIOI_Flatten_datatype(datatype);
	    flat_buf = ADIOI_Flatlist;
	    while (flat_buf->type != datatype) flat_buf = flat_buf->next;

	    k = num = buf_count = 0;
	    indx = flat_buf->indices[0];
	    j = st_index;
	    off = offset;
	    bwr_size = flat_buf->blocklens[0];

	    while (num < bufsize) {
		size = ADIOI_MIN(fwr_size, bwr_size);
		if (size) {
#ifdef PROFILE
		    MPE_Log_event(11, 0, "start seek");
#endif
		    llseek(fd->fd_sys, off, SEEK_SET);
#ifdef PROFILE
		    MPE_Log_event(12, 0, "end seek");
		    MPE_Log_event(5, 0, "start write");
#endif
		    err = write(fd->fd_sys, ((char *) buf) + indx, size);
#ifdef PROFILE
		    MPE_Log_event(6, 0, "end write");
#endif
		    if (err == -1) err_flag = 1;
		}

		new_fwr_size = fwr_size;
		new_bwr_size = bwr_size;

		if (size == fwr_size) {
/* reached end of contiguous block in file */
                    if (j < (flat_file->count - 1)) j++;
                    else {
                        j = 0;
                        n_filetypes++;
                    }

                    off = disp + flat_file->indices[j] + 
                                              (ADIO_Offset) n_filetypes*filetype_extent;

		    new_fwr_size = flat_file->blocklens[j];
		    if (size != bwr_size) {
			indx += size;
			new_bwr_size -= size;
		    }
		}

		if (size == bwr_size) {
/* reached end of contiguous block in memory */

		    k = (k + 1)%flat_buf->count;
		    buf_count++;
		    indx = buftype_extent*(buf_count/flat_buf->count) +
			flat_buf->indices[k]; 
		    new_bwr_size = flat_buf->blocklens[k];
		    if (size != fwr_size) {
			off += size;
			new_fwr_size -= size;
		    }
		}
		num += size;
		fwr_size = new_fwr_size;
                bwr_size = new_bwr_size;
	    }
	}

        if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
	if (err_flag) {
#ifdef MPICH2
	    *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
		"**io %s", strerror(errno));
#elif defined(PRINT_ERR_MSG)
	    *error_code = MPI_ERR_UNKNOWN;
#else /* MPICH-1 */
	    *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
			      myname, "I/O Error", "%s", strerror(errno));
	    ADIOI_Error(fd, *error_code, myname);
#endif
	}
	else *error_code = MPI_SUCCESS;
    } 

    fd->fp_sys_posn = -1;   /* set it to null. */

#ifdef HAVE_STATUS_SET_BYTES
    MPIR_Status_set_bytes(status, datatype, bufsize);
/* This is a temporary way of filling in status. The right way is to 
   keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
#endif

    if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
}
Ejemplo n.º 7
0
static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
			 datatype, int nprocs,
			 int myrank, ADIOI_Access
			 *others_req, ADIO_Offset *offset_list,
			 ADIO_Offset *len_list, int contig_access_count, ADIO_Offset
                         min_st_offset, ADIO_Offset fd_size,
			 ADIO_Offset *fd_start, ADIO_Offset *fd_end,
                         int *buf_idx, int *error_code)
{
/* Read in sizes of no more than coll_bufsize, an info parameter.
   Send data to appropriate processes. 
   Place recd. data in user buf.
   The idea is to reduce the amount of extra memory required for
   collective I/O. If all data were read all at once, which is much
   easier, it would require temp space more than the size of user_buf,
   which is often unacceptable. For example, to read a distributed
   array from a file, where each local array is 8Mbytes, requiring
   at least another 8Mbytes of temp space is unacceptable. */

    int i, j, m, ntimes, max_ntimes, buftype_is_contig;
    ADIO_Offset st_loc=-1, end_loc=-1, off, done, real_off, req_off;
    char *read_buf = NULL, *tmp_buf;
    int *curr_offlen_ptr, *count, *send_size, *recv_size;
    int *partial_send, *recd_from_proc, *start_pos;
    /* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/
    ADIO_Offset real_size, size, for_curr_iter, for_next_iter;
    int req_len, flag, rank;
    MPI_Status status;
    ADIOI_Flatlist_node *flat_buf=NULL;
    MPI_Aint buftype_extent;
    int coll_bufsize;

    *error_code = MPI_SUCCESS;  /* changed below if error */
    /* only I/O errors are currently reported */
    
/* calculate the number of reads of size coll_bufsize
   to be done by each process and the max among all processes.
   That gives the no. of communication phases as well.
   coll_bufsize is obtained from the hints object. */

    coll_bufsize = fd->hints->cb_buffer_size;

    /* grab some initial values for st_loc and end_loc */
    for (i=0; i < nprocs; i++) {
	if (others_req[i].count) {
	    st_loc = others_req[i].offsets[0];
	    end_loc = others_req[i].offsets[0];
	    break;
	}
    }

    /* now find the real values */
    for (i=0; i < nprocs; i++)
	for (j=0; j<others_req[i].count; j++) {
	    st_loc = ADIOI_MIN(st_loc, others_req[i].offsets[j]);
	    end_loc = ADIOI_MAX(end_loc, (others_req[i].offsets[j]
					  + others_req[i].lens[j] - 1));
	}

    /* calculate ntimes, the number of times this process must perform I/O
     * operations in order to complete all the requests it has received.
     * the need for multiple I/O operations comes from the restriction that
     * we only use coll_bufsize bytes of memory for internal buffering.
     */
    if ((st_loc==-1) && (end_loc==-1)) {
	/* this process does no I/O. */
	ntimes = 0;
    }
    else {
	/* ntimes=ceiling_div(end_loc - st_loc + 1, coll_bufsize)*/
	ntimes = (int) ((end_loc - st_loc + coll_bufsize)/coll_bufsize);
    }

    MPI_Allreduce(&ntimes, &max_ntimes, 1, MPI_INT, MPI_MAX, fd->comm); 

    read_buf = fd->io_buf;  /* Allocated at open time */

    curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int)); 
    /* its use is explained below. calloc initializes to 0. */

    count = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    /* to store count of how many off-len pairs per proc are satisfied
       in an iteration. */

    partial_send = (int *) ADIOI_Calloc(nprocs, sizeof(int));
    /* if only a portion of the last off-len pair is sent to a process 
       in a particular iteration, the length sent is stored here.
       calloc initializes to 0. */

    send_size = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    /* total size of data to be sent to each proc. in an iteration */

    recv_size = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    /* total size of data to be recd. from each proc. in an iteration.
       Of size nprocs so that I can use MPI_Alltoall later. */

    recd_from_proc = (int *) ADIOI_Calloc(nprocs, sizeof(int));
    /* amount of data recd. so far from each proc. Used in
       ADIOI_Fill_user_buffer. initialized to 0 here. */

    start_pos = (int *) ADIOI_Malloc(nprocs*sizeof(int));
    /* used to store the starting value of curr_offlen_ptr[i] in 
       this iteration */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    if (!buftype_is_contig) {
	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
        while (flat_buf->type != datatype) flat_buf = flat_buf->next;
    }
    MPI_Type_extent(datatype, &buftype_extent);

    done = 0;
    off = st_loc;
    for_curr_iter = for_next_iter = 0;

    MPI_Comm_rank(fd->comm, &rank);

    for (m=0; m<ntimes; m++) {
       /* read buf of size coll_bufsize (or less) */
       /* go through all others_req and check if any are satisfied
          by the current read */

       /* since MPI guarantees that displacements in filetypes are in 
          monotonically nondecreasing order, I can maintain a pointer
	  (curr_offlen_ptr) to 
          current off-len pair for each process in others_req and scan
          further only from there. There is still a problem of filetypes
          such as:  (1, 2, 3 are not process nos. They are just numbers for
          three chunks of data, specified by a filetype.)

                   1  -------!--
                   2    -----!----
                   3       --!-----

          where ! indicates where the current read_size limitation cuts 
          through the filetype.  I resolve this by reading up to !, but
          filling the communication buffer only for 1. I copy the portion
          left over for 2 into a tmp_buf for use in the next
	  iteration. i.e., 2 and 3 will be satisfied in the next
	  iteration. This simplifies filling in the user's buf at the
	  other end, as only one off-len pair with incomplete data
	  will be sent. I also don't need to send the individual
	  offsets and lens along with the data, as the data is being
	  sent in a particular order. */ 

          /* off = start offset in the file for the data actually read in 
                   this iteration 
             size = size of data read corresponding to off
             real_off = off minus whatever data was retained in memory from
                  previous iteration for cases like 2, 3 illustrated above
             real_size = size plus the extra corresponding to real_off
             req_off = off in file for a particular contiguous request 
                       minus what was satisfied in previous iteration
             req_size = size corresponding to req_off */

	size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); 
	real_off = off - for_curr_iter;
	real_size = size + for_curr_iter;

	for (i=0; i<nprocs; i++) count[i] = send_size[i] = 0;
	for_next_iter = 0;

	for (i=0; i<nprocs; i++) {
#ifdef RDCOLL_DEBUG
	    DBG_FPRINTF(stderr, "rank %d, i %d, others_count %d\n", rank, i, others_req[i].count); 
#endif
	    if (others_req[i].count) {
		start_pos[i] = curr_offlen_ptr[i];
		for (j=curr_offlen_ptr[i]; j<others_req[i].count;
		     j++) {
		    if (partial_send[i]) {
			/* this request may have been partially
			   satisfied in the previous iteration. */
			req_off = others_req[i].offsets[j] +
			    partial_send[i]; 
                        req_len = others_req[i].lens[j] -
			    partial_send[i];
			partial_send[i] = 0;
			/* modify the off-len pair to reflect this change */
			others_req[i].offsets[j] = req_off;
			others_req[i].lens[j] = req_len;
		    }
		    else {
			req_off = others_req[i].offsets[j];
                        req_len = others_req[i].lens[j];
		    }
		    if (req_off < real_off + real_size) {
			count[i]++;
      ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)read_buf)+req_off-real_off) == (ADIO_Offset)(MPIR_Upint)(read_buf+req_off-real_off));
			MPI_Address(read_buf+req_off-real_off, 
                               &(others_req[i].mem_ptrs[j]));
      ADIOI_Assert((real_off + real_size - req_off) == (int)(real_off + real_size - req_off));
			send_size[i] += (int)(ADIOI_MIN(real_off + real_size - req_off, 
                                      (ADIO_Offset)(unsigned)req_len)); 

			if (real_off+real_size-req_off < (ADIO_Offset)(unsigned)req_len) {
			    partial_send[i] = (int) (real_off + real_size - req_off);
			    if ((j+1 < others_req[i].count) && 
                                 (others_req[i].offsets[j+1] < 
                                     real_off+real_size)) { 
				/* this is the case illustrated in the
				   figure above. */
				for_next_iter = ADIOI_MAX(for_next_iter,
					  real_off + real_size - others_req[i].offsets[j+1]); 
				/* max because it must cover requests 
				   from different processes */
			    }
			    break;
			}
		    }
		    else break;
		}
		curr_offlen_ptr[i] = j;
	    }
	}

	flag = 0;
	for (i=0; i<nprocs; i++)
	    if (count[i]) flag = 1;

	if (flag) {
      ADIOI_Assert(size == (int)size);
	    ADIO_ReadContig(fd, read_buf+for_curr_iter, (int)size, MPI_BYTE,
			    ADIO_EXPLICIT_OFFSET, off, &status, error_code);
	    if (*error_code != MPI_SUCCESS) return;
	}
	
	for_curr_iter = for_next_iter;
	
	ADIOI_R_Exchange_data(fd, buf, flat_buf, offset_list, len_list,
			    send_size, recv_size, count, 
       			    start_pos, partial_send, recd_from_proc, nprocs,
			    myrank, 
			    buftype_is_contig, contig_access_count,
			    min_st_offset, fd_size, fd_start, fd_end,
			    others_req, 
                            m, buftype_extent, buf_idx); 


	if (for_next_iter) {
	    tmp_buf = (char *) ADIOI_Malloc(for_next_iter);
      ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)read_buf)+real_size-for_next_iter) == (ADIO_Offset)(MPIR_Upint)(read_buf+real_size-for_next_iter));
      ADIOI_Assert((for_next_iter+coll_bufsize) == (size_t)(for_next_iter+coll_bufsize));
	    memcpy(tmp_buf, read_buf+real_size-for_next_iter, for_next_iter);
	    ADIOI_Free(fd->io_buf);
	    fd->io_buf = (char *) ADIOI_Malloc(for_next_iter+coll_bufsize);
	    memcpy(fd->io_buf, tmp_buf, for_next_iter);
	    read_buf = fd->io_buf;
	    ADIOI_Free(tmp_buf);
	}

	off += size;
	done += size;
    }

    for (i=0; i<nprocs; i++) count[i] = send_size[i] = 0;
    for (m=ntimes; m<max_ntimes; m++) 
/* nothing to send, but check for recv. */
	ADIOI_R_Exchange_data(fd, buf, flat_buf, offset_list, len_list,
			    send_size, recv_size, count, 
			    start_pos, partial_send, recd_from_proc, nprocs,
			    myrank, 
			    buftype_is_contig, contig_access_count,
			    min_st_offset, fd_size, fd_start, fd_end,
			    others_req, m,
                            buftype_extent, buf_idx); 

    ADIOI_Free(curr_offlen_ptr);
    ADIOI_Free(count);
    ADIOI_Free(partial_send);
    ADIOI_Free(send_size);
    ADIOI_Free(recv_size);
    ADIOI_Free(recd_from_proc);
    ADIOI_Free(start_pos);
}
Ejemplo n.º 8
0
void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count,
			     MPI_Datatype datatype, int file_ptr_type,
			     ADIO_Offset offset, ADIO_Status *status, int
			     *error_code)
{
    /* offset is in units of etype relative to the filetype. */
    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int i, j, k,  brd_size, frd_size=0, st_index=0;
    int sum, n_etypes_in_filetype, size_in_filetype;
    MPI_Count bufsize;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    MPI_Count filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent;
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset off, disp, start_off, initial_off;
    int flag, st_frd_size, st_n_filetypes;

    int mem_list_count, file_list_count;
    PVFS_size *mem_offsets;
    int64_t *file_offsets;
    int *mem_lengths;
    int32_t *file_lengths;
    int total_blks_to_read;

    int max_mem_list, max_file_list;

    int b_blks_read;
    int f_data_read;
    int size_read=0, n_read_lists, extra_blks;

    int end_brd_size, end_frd_size;
    int start_k, start_j, new_file_read, new_buffer_read;
    int start_mem_offset;
    PVFS_Request mem_req, file_req;
    ADIOI_PVFS2_fs * pvfs_fs;
    PVFS_sysresp_io resp_io;
    int err_flag=0;
    MPI_Offset total_bytes_read = 0;
    static char myname[] = "ADIOI_PVFS2_ReadStrided";

#define MAX_ARRAY_SIZE 64

    *error_code = MPI_SUCCESS;  /* changed below if error */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    /* the HDF5 tests showed a bug in this list processing code (see many many
     * lines down below).  We added a workaround, but common HDF5 file types
     * are actually contiguous and do not need the expensive workarond */
    if (!filetype_is_contig) {
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
	if (flat_file->count == 1 && !buftype_is_contig)
	    filetype_is_contig = 1;
    }

    MPI_Type_size_x(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, 0);
#endif
	*error_code = MPI_SUCCESS;
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size_x(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;

    bufsize = buftype_size * count;

    pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;

    if (!buftype_is_contig && filetype_is_contig) {

/* noncontiguous in memory, contiguous in file. */
       int64_t file_offset;
	int32_t file_length;

	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;

	off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
	    fd->disp + etype_size * offset;

	file_list_count = 1;
	file_offset = off;
	file_length = 0;
	total_blks_to_read = count*flat_buf->count;
	b_blks_read = 0;

	/* allocate arrays according to max usage */
	if (total_blks_to_read > MAX_ARRAY_SIZE)
	    mem_list_count = MAX_ARRAY_SIZE;
	else mem_list_count = total_blks_to_read;
	mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size));
	mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int));

	/* TODO: CHECK RESULTS OF MEMORY ALLOCATION */

	j = 0;
	/* step through each block in memory, filling memory arrays */
	while (b_blks_read < total_blks_to_read) {
	    for (i=0; i<flat_buf->count; i++) {
		mem_offsets[b_blks_read % MAX_ARRAY_SIZE] =
		    /* TODO: fix this compiler warning */
		    ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]);
		mem_lengths[b_blks_read % MAX_ARRAY_SIZE] =
		    flat_buf->blocklens[i];
		file_length += flat_buf->blocklens[i];
		b_blks_read++;
		if (!(b_blks_read % MAX_ARRAY_SIZE) ||
		    (b_blks_read == total_blks_to_read)) {

		    /* in the case of the last read list call,
		       adjust mem_list_count */
		    if (b_blks_read == total_blks_to_read) {
		        mem_list_count = total_blks_to_read % MAX_ARRAY_SIZE;
			/* in case last read list call fills max arrays */
			if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
		    }
		    err_flag = PVFS_Request_hindexed(mem_list_count,
			    mem_lengths, mem_offsets, PVFS_BYTE, &mem_req);
		    if (err_flag < 0) break;
		    err_flag = PVFS_Request_contiguous(file_length,
			    PVFS_BYTE, &file_req);
		    if (err_flag < 0) break;
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
		    err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req,
			    file_offset, PVFS_BOTTOM, mem_req,
			    &(pvfs_fs->credentials), &resp_io);
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
		    /* --BEGIN ERROR HANDLING-- */
		    if (err_flag != 0) {
			*error_code = MPIO_Err_create_code(MPI_SUCCESS,
							   MPIR_ERR_RECOVERABLE,
							   myname, __LINE__,
							   ADIOI_PVFS2_error_convert(err_flag),
							   "Error in PVFS_sys_read", 0);
			goto error_state;
		    }
		    PVFS_Request_free(&mem_req);
		    PVFS_Request_free(&file_req);
		    total_bytes_read += resp_io.total_completed;
		    /* --END ERROR HANDLING-- */

		    /* in the case of error or the last read list call,
		     * leave here */
		    if (err_flag || b_blks_read == total_blks_to_read) break;

		    file_offset += file_length;
		    file_length = 0;
		}
	    } /* for (i=0; i<flat_buf->count; i++) */
	    j++;
	} /* while (b_blks_read < total_blks_to_read) */
	ADIOI_Free(mem_offsets);
	ADIOI_Free(mem_lengths);

        if (file_ptr_type == ADIO_INDIVIDUAL)
	    fd->fp_ind += total_bytes_read;

	fd->fp_sys_posn = -1;  /* set it to null. */

#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, bufsize);
	/* This isa temporary way of filling in status.  The right way is to
	   keep tracke of how much data was actually read adn placed in buf
	   by ADIOI_BUFFERED_READ. */
#endif
	ADIOI_Delete_flattened(datatype);

	return;
    } /* if (!buftype_is_contig && filetype_is_contig) */

    /* know file is noncontiguous from above */
    /* noncontiguous in file */

    /* filetype already flattened in ADIO_Open */
    flat_file = ADIOI_Flatlist;
    while (flat_file->type != fd->filetype) flat_file = flat_file->next;

    disp = fd->disp;
    initial_off = offset;


    /* for each case - ADIO_Individual pointer or explicit, find the file
       offset in bytes (offset), n_filetypes (how many filetypes into
       file to start), frd_size (remaining amount of data in present
       file block), and st_index (start point in terms of blocks in
       starting filetype) */
    if (file_ptr_type == ADIO_INDIVIDUAL) {
        offset = fd->fp_ind; /* in bytes */
	n_filetypes = -1;
	flag = 0;
	while (!flag) {
	    n_filetypes++;
	    for (i=0; i<flat_file->count; i++) {
	        if (disp + flat_file->indices[i] +
		    ((ADIO_Offset) n_filetypes)*filetype_extent +
		    flat_file->blocklens[i]  >= offset) {
		    st_index = i;
		    frd_size = (int) (disp + flat_file->indices[i] +
				    ((ADIO_Offset) n_filetypes)*filetype_extent
				      + flat_file->blocklens[i] - offset);
		    flag = 1;
		    break;
		}
	    }
	} /* while (!flag) */
    } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
    else {
        n_etypes_in_filetype = filetype_size/etype_size;
	n_filetypes = (int) (offset / n_etypes_in_filetype);
	etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	size_in_filetype = etype_in_filetype * etype_size;

	sum = 0;
	for (i=0; i<flat_file->count; i++) {
	    sum += flat_file->blocklens[i];
	    if (sum > size_in_filetype) {
	        st_index = i;
		frd_size = sum - size_in_filetype;
		abs_off_in_filetype = flat_file->indices[i] +
		    size_in_filetype - (sum - flat_file->blocklens[i]);
		break;
	    }
	}

	/* abs. offset in bytes in the file */
	offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
	    abs_off_in_filetype;
    } /* else [file_ptr_type != ADIO_INDIVIDUAL] */

    start_off = offset;
    st_frd_size = frd_size;
    st_n_filetypes = n_filetypes;

    if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

       int mem_length=0;
	intptr_t mem_offset;

	i = 0;
	j = st_index;
	n_filetypes = st_n_filetypes;

	mem_list_count = 1;

	/* determine how many blocks in file to read */
	f_data_read = ADIOI_MIN(st_frd_size, bufsize);
	total_blks_to_read = 1;
	if (j < (flat_file->count-1)) j++;
	else {
	    j = 0;
	    n_filetypes++;
	}
	while (f_data_read < bufsize) {
	    f_data_read += flat_file->blocklens[j];
	    total_blks_to_read++;
	    if (j<(flat_file->count-1)) j++;
	    else j = 0;
	}

	j = st_index;
	n_filetypes = st_n_filetypes;
	n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE;
	extra_blks = total_blks_to_read%MAX_ARRAY_SIZE;

	mem_offset = (intptr_t)buf;
	mem_lengths = 0;

	/* if at least one full readlist, allocate file arrays
	   at max array size and don't free until very end */
	if (n_read_lists) {
	    file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
						  sizeof(int64_t));
	    file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
						  sizeof(int32_t));
	}
	/* if there's no full readlist allocate file arrays according
	   to needed size (extra_blks) */
	else {
	    file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
						  sizeof(int64_t));
	    file_lengths = (int32_t*)ADIOI_Malloc(extra_blks*
						  sizeof(int32_t));
	}

	/* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
	for (i=0; i<n_read_lists; i++) {
	    file_list_count = MAX_ARRAY_SIZE;
	    if(!i) {
	        file_offsets[0] = offset;
		file_lengths[0] = st_frd_size;
		mem_length = st_frd_size;
	    }
	    for (k=0; k<MAX_ARRAY_SIZE; k++) {
	        if (i || k) {
		    file_offsets[k] = disp +
			((ADIO_Offset)n_filetypes)*filetype_extent
		      + flat_file->indices[j];
		    file_lengths[k] = flat_file->blocklens[j];
		    mem_lengths += file_lengths[k];
		}
		if (j<(flat_file->count - 1)) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (k=0; k<MAX_ARRAY_SIZE; k++) */
	    err_flag = PVFS_Request_contiguous(mem_length,
					       PVFS_BYTE, &mem_req);
	    /* --BEGIN ERROR HANDLING-- */
	    if (err_flag != 0) {
		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
						   MPIR_ERR_RECOVERABLE,
						   myname, __LINE__,
						   ADIOI_PVFS2_error_convert(err_flag),
						   "Error in PVFS_Request_contiguous (memory)", 0);
		goto error_state;
	    }
	    /* --END ERROR HANDLING-- */

	    err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
					     file_offsets, PVFS_BYTE,
					     &file_req);
	    /* --BEGIN ERROR HANDLING-- */
	    if (err_flag != 0) {
		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
						   MPIR_ERR_RECOVERABLE,
						   myname, __LINE__,
						   ADIOI_PVFS2_error_convert(err_flag),
						   "Error in PVFS_Request_hindexed (file)", 0);
		goto error_state;
	    }
	    /* --END ERROR HANDLING-- */

	    /* PVFS_Request_hindexed already expresses the offsets into the
	     * file, so we should not pass in an offset if we are using
	     * hindexed for the file type */
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
	    err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0,
				     (void *)mem_offset, mem_req,
				     &(pvfs_fs->credentials), &resp_io);
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
	    /* --BEGIN ERROR HANDLING-- */
	    if (err_flag != 0) {
		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
						   MPIR_ERR_RECOVERABLE,
						   myname, __LINE__,
						   ADIOI_PVFS2_error_convert(err_flag),
						   "Error in PVFS_sys_read", 0);
		goto error_state;
	    }
	    /* --END ERROR HANDING-- */
	    PVFS_Request_free(&mem_req);
	    PVFS_Request_free(&file_req);

	    total_bytes_read += resp_io.total_completed;

	    mem_offset += mem_length;
	    mem_lengths = 0;
	} /* for (i=0; i<n_read_lists; i++) */

	/* for file arrays smaller than MAX_ARRAY_SIZE (last read_list call) */
	if (extra_blks) {
	    file_list_count = extra_blks;
	    if(!i) {
	        file_offsets[0] = offset;
		file_lengths[0] = ADIOI_MIN(st_frd_size, bufsize);
	    }
	    for (k=0; k<extra_blks; k++) {
	        if(i || k) {
		    file_offsets[k] = disp +
			((ADIO_Offset)n_filetypes)*filetype_extent +
			flat_file->indices[j];
		    if (k == (extra_blks - 1)) {
		        file_lengths[k] = bufsize - (int32_t) mem_lengths
			  - mem_offset + (int32_t)  buf;
		    }
		    else file_lengths[k] = flat_file->blocklens[j];
		} /* if(i || k) */
		mem_lengths += file_lengths[k];
		if (j<(flat_file->count - 1)) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (k=0; k<extra_blks; k++) */
	    err_flag = PVFS_Request_contiguous(mem_length,
					       PVFS_BYTE, &mem_req);
	    /* --BEGIN ERROR HANDLING-- */
	    if (err_flag != 0) {
		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
						   MPIR_ERR_RECOVERABLE,
						   myname, __LINE__,
						   ADIOI_PVFS2_error_convert(err_flag),
						   "Error in PVFS_Request_contiguous (memory)", 0);
		goto error_state;
	    }
	    /* --END ERROR HANDLING-- */

	    err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
		    file_offsets, PVFS_BYTE, &file_req);
	    /* --BEGIN ERROR HANDLING-- */
	    if (err_flag != 0) {
		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
						   MPIR_ERR_RECOVERABLE,
						   myname, __LINE__,
						   ADIOI_PVFS2_error_convert(err_flag),
						   "Error in PVFS_Request_hindexed (file)", 0);
		goto error_state;
	    }
	    /* --END ERROR HANDLING-- */

	    /* as above, use 0 for 'offset' when using hindexed file type */
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
	    err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0,
		    (void *)mem_offset, mem_req, &(pvfs_fs->credentials), &resp_io);
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
	    /* --BEGIN ERROR HANDLING-- */
	    if (err_flag != 0) {
		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
						   MPIR_ERR_RECOVERABLE,
						   myname, __LINE__,
						   ADIOI_PVFS2_error_convert(err_flag),
						   "Error in PVFS_sys_read", 0);
		goto error_state;
	    }
	    /* --END ERROR HANDLING-- */
	    PVFS_Request_free(&mem_req);
	    PVFS_Request_free(&file_req);
	    total_bytes_read += resp_io.total_completed;
	}
    }
    else {
/* noncontiguous in memory as well as in file */

        ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;

	size_read = 0;
	n_filetypes = st_n_filetypes;
	frd_size = st_frd_size;
	brd_size = flat_buf->blocklens[0];
	buf_count = 0;
	start_mem_offset = 0;
	start_k = k = 0;
	start_j = st_index;
	max_mem_list = 0;
	max_file_list = 0;

	/* run through and file max_file_list and max_mem_list so that you
	   can allocate the file and memory arrays less than MAX_ARRAY_SIZE
	   if possible */

	while (size_read < bufsize) {
	    k = start_k;
	    new_buffer_read = 0;
	    mem_list_count = 0;
	    while ((mem_list_count < MAX_ARRAY_SIZE) &&
		   (new_buffer_read < bufsize-size_read)) {
	        /* find mem_list_count and file_list_count such that both are
		   less than MAX_ARRAY_SIZE, the sum of their lengths are
		   equal, and the sum of all the data read and data to be
		   read in the next immediate read list is less than
		   bufsize */
	        if(mem_list_count) {
		    if((new_buffer_read + flat_buf->blocklens[k] +
			size_read) > bufsize) {
		        end_brd_size = new_buffer_read +
			    flat_buf->blocklens[k] - (bufsize - size_read);
			new_buffer_read = bufsize - size_read;
		    }
		    else {
		        new_buffer_read += flat_buf->blocklens[k];
			end_brd_size = flat_buf->blocklens[k];
		    }
		}
		else {
		    if (brd_size > (bufsize - size_read)) {
		        new_buffer_read = bufsize - size_read;
			brd_size = new_buffer_read;
		    }
		    else new_buffer_read = brd_size;
		}
		mem_list_count++;
		k = (k + 1)%flat_buf->count;
	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
	       (new_buffer_read < bufsize-size_read)) */
	    j = start_j;
	    new_file_read = 0;
	    file_list_count = 0;
	    while ((file_list_count < MAX_ARRAY_SIZE) &&
		   (new_file_read < new_buffer_read)) {
	        if(file_list_count) {
		    if((new_file_read + flat_file->blocklens[j]) >
		       new_buffer_read) {
		        end_frd_size = new_buffer_read - new_file_read;
			new_file_read = new_buffer_read;
			j--;
		    }
		    else {
		        new_file_read += flat_file->blocklens[j];
			end_frd_size = flat_file->blocklens[j];
		    }
		}
		else {
		    if (frd_size > new_buffer_read) {
		        new_file_read = new_buffer_read;
			frd_size = new_file_read;
		    }
		    else new_file_read = frd_size;
		}
		file_list_count++;
		if (j < (flat_file->count - 1)) j++;
		else j = 0;

		k = start_k;
		if ((new_file_read < new_buffer_read) &&
		    (file_list_count == MAX_ARRAY_SIZE)) {
		    new_buffer_read = 0;
		    mem_list_count = 0;
		    while (new_buffer_read < new_file_read) {
		        if(mem_list_count) {
			    if((new_buffer_read + flat_buf->blocklens[k]) >
			       new_file_read) {
			        end_brd_size = new_file_read - new_buffer_read;
				new_buffer_read = new_file_read;
				k--;
			    }
			    else {
			        new_buffer_read += flat_buf->blocklens[k];
				end_brd_size = flat_buf->blocklens[k];
			    }
			}
			else {
			    new_buffer_read = brd_size;
			    if (brd_size > (bufsize - size_read)) {
			        new_buffer_read = bufsize - size_read;
				brd_size = new_buffer_read;
			    }
			}
			mem_list_count++;
			k = (k + 1)%flat_buf->count;
		    } /* while (new_buffer_read < new_file_read) */
		} /* if ((new_file_read < new_buffer_read) && (file_list_count
		     == MAX_ARRAY_SIZE)) */
	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
		 (new_buffer_read < bufsize-size_read)) */

	    /*  fakes filling the readlist arrays of lengths found above  */
	    k = start_k;
	    j = start_j;
	    for (i=0; i<mem_list_count; i++) {
		if(i) {
		    if (i == (mem_list_count - 1)) {
			if (flat_buf->blocklens[k] == end_brd_size)
			    brd_size = flat_buf->blocklens[(k+1)%
							  flat_buf->count];
			else {
			    brd_size = flat_buf->blocklens[k] - end_brd_size;
			    k--;
			    buf_count--;
			}
		    }
		}
		buf_count++;
		k = (k + 1)%flat_buf->count;
	    } /* for (i=0; i<mem_list_count; i++) */
	    for (i=0; i<file_list_count; i++) {
		if (i) {
		    if (i == (file_list_count - 1)) {
			if (flat_file->blocklens[j] == end_frd_size)
			    frd_size = flat_file->blocklens[(j+1)%
							  flat_file->count];
			else {
			    frd_size = flat_file->blocklens[j] - end_frd_size;
			    j--;
			}
		    }
		}
		if (j < flat_file->count - 1) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (i=0; i<file_list_count; i++) */
	    size_read += new_buffer_read;
	    start_k = k;
	    start_j = j;
	    if (max_mem_list < mem_list_count)
	        max_mem_list = mem_list_count;
	    if (max_file_list < file_list_count)
	        max_file_list = file_list_count;
	} /* while (size_read < bufsize) */

	/* one last check before we actually carry out the operation:
	 * this code has hard-to-fix bugs when a noncontiguous file type has
	 * such large pieces that the sum of the lengths of the memory type is
	 * not larger than one of those pieces (and vice versa for large memory
	 * types and many pices of file types.  In these cases, give up and
	 * fall back to naive reads and writes.  The testphdf5 test created a
	 * type with two very large memory regions and 600 very small file
	 * regions.  The same test also created a type with one very large file
	 * region and many (700) very small memory regions.  both cases caused
	 * problems for this code */

	if ( ( (file_list_count == 1) &&
		    (new_file_read < flat_file->blocklens[0] ) ) ||
		((mem_list_count == 1) &&
		    (new_buffer_read < flat_buf->blocklens[0]) ) ||
		((file_list_count == MAX_ARRAY_SIZE) &&
		    (new_file_read < flat_buf->blocklens[0]) ) ||
		( (mem_list_count == MAX_ARRAY_SIZE) &&
		    (new_buffer_read < flat_file->blocklens[0])) )
	{

	    ADIOI_Delete_flattened(datatype);
	    ADIOI_GEN_ReadStrided_naive(fd, buf, count, datatype,
		    file_ptr_type, initial_off, status, error_code);
	    return;
	}

	mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size));
	mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int));
	file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t));
	file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t));

	size_read = 0;
	n_filetypes = st_n_filetypes;
	frd_size = st_frd_size;
	brd_size = flat_buf->blocklens[0];
	buf_count = 0;
	start_mem_offset = 0;
	start_k = k = 0;
	start_j = st_index;

	/*  this section calculates mem_list_count and file_list_count
	    and also finds the possibly odd sized last array elements
	    in new_frd_size and new_brd_size  */

	while (size_read < bufsize) {
	    k = start_k;
	    new_buffer_read = 0;
	    mem_list_count = 0;
	    while ((mem_list_count < MAX_ARRAY_SIZE) &&
		   (new_buffer_read < bufsize-size_read)) {
	        /* find mem_list_count and file_list_count such that both are
		   less than MAX_ARRAY_SIZE, the sum of their lengths are
		   equal, and the sum of all the data read and data to be
		   read in the next immediate read list is less than
		   bufsize */
	        if(mem_list_count) {
		    if((new_buffer_read + flat_buf->blocklens[k] +
			size_read) > bufsize) {
		        end_brd_size = new_buffer_read +
			    flat_buf->blocklens[k] - (bufsize - size_read);
			new_buffer_read = bufsize - size_read;
		    }
		    else {
		        new_buffer_read += flat_buf->blocklens[k];
			end_brd_size = flat_buf->blocklens[k];
		    }
		}
		else {
		    if (brd_size > (bufsize - size_read)) {
		        new_buffer_read = bufsize - size_read;
			brd_size = new_buffer_read;
		    }
		    else new_buffer_read = brd_size;
		}
		mem_list_count++;
		k = (k + 1)%flat_buf->count;
	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
	       (new_buffer_read < bufsize-size_read)) */
	    j = start_j;
	    new_file_read = 0;
	    file_list_count = 0;
	    while ((file_list_count < MAX_ARRAY_SIZE) &&
		   (new_file_read < new_buffer_read)) {
	        if(file_list_count) {
		    if((new_file_read + flat_file->blocklens[j]) >
		       new_buffer_read) {
		        end_frd_size = new_buffer_read - new_file_read;
			new_file_read = new_buffer_read;
			j--;
		    }
		    else {
		        new_file_read += flat_file->blocklens[j];
			end_frd_size = flat_file->blocklens[j];
		    }
		}
		else {
		    if (frd_size > new_buffer_read) {
		        new_file_read = new_buffer_read;
			frd_size = new_file_read;
		    }
		    else new_file_read = frd_size;
		}
		file_list_count++;
		if (j < (flat_file->count - 1)) j++;
		else j = 0;

		k = start_k;
		if ((new_file_read < new_buffer_read) &&
		    (file_list_count == MAX_ARRAY_SIZE)) {
		    new_buffer_read = 0;
		    mem_list_count = 0;
		    while (new_buffer_read < new_file_read) {
		        if(mem_list_count) {
			    if((new_buffer_read + flat_buf->blocklens[k]) >
			       new_file_read) {
			        end_brd_size = new_file_read - new_buffer_read;
				new_buffer_read = new_file_read;
				k--;
			    }
			    else {
			        new_buffer_read += flat_buf->blocklens[k];
				end_brd_size = flat_buf->blocklens[k];
			    }
			}
			else {
			    new_buffer_read = brd_size;
			    if (brd_size > (bufsize - size_read)) {
			        new_buffer_read = bufsize - size_read;
				brd_size = new_buffer_read;
			    }
			}
			mem_list_count++;
			k = (k + 1)%flat_buf->count;
		    } /* while (new_buffer_read < new_file_read) */
		} /* if ((new_file_read < new_buffer_read) && (file_list_count
		     == MAX_ARRAY_SIZE)) */
	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
		 (new_buffer_read < bufsize-size_read)) */

	    /*  fills the allocated readlist arrays  */
	    k = start_k;
	    j = start_j;
	    for (i=0; i<mem_list_count; i++) {
	        mem_offsets[i] = ((PVFS_size)buf + buftype_extent*
					 (buf_count/flat_buf->count) +
					 (int)flat_buf->indices[k]);
		if(!i) {
		    mem_lengths[0] = brd_size;
		    mem_offsets[0] += flat_buf->blocklens[k] - brd_size;
		}
		else {
		    if (i == (mem_list_count - 1)) {
		        mem_lengths[i] = end_brd_size;
			if (flat_buf->blocklens[k] == end_brd_size)
			    brd_size = flat_buf->blocklens[(k+1)%
							  flat_buf->count];
			else {
			    brd_size = flat_buf->blocklens[k] - end_brd_size;
			    k--;
			    buf_count--;
			}
		    }
		    else {
		        mem_lengths[i] = flat_buf->blocklens[k];
		    }
		}
		buf_count++;
		k = (k + 1)%flat_buf->count;
	    } /* for (i=0; i<mem_list_count; i++) */
	    for (i=0; i<file_list_count; i++) {
	        file_offsets[i] = disp + flat_file->indices[j] +
		    ((ADIO_Offset)n_filetypes) * filetype_extent;
	        if (!i) {
		    file_lengths[0] = frd_size;
		    file_offsets[0] += flat_file->blocklens[j] - frd_size;
		}
		else {
		    if (i == (file_list_count - 1)) {
		        file_lengths[i] = end_frd_size;
			if (flat_file->blocklens[j] == end_frd_size)
			    frd_size = flat_file->blocklens[(j+1)%
							  flat_file->count];
			else {
			    frd_size = flat_file->blocklens[j] - end_frd_size;
			    j--;
			}
		    }
		    else file_lengths[i] = flat_file->blocklens[j];
		}
		if (j < flat_file->count - 1) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (i=0; i<file_list_count; i++) */
	    err_flag = PVFS_Request_hindexed(mem_list_count, mem_lengths,
		    mem_offsets, PVFS_BYTE, &mem_req);
	    /* --BEGIN ERROR HANDLING-- */
	    if (err_flag != 0 ) {
		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
						   MPIR_ERR_RECOVERABLE,
						   myname, __LINE__,
						   ADIOI_PVFS2_error_convert(err_flag),
						   "Error in PVFS_Request_hindexed (memory)", 0);
		goto error_state;
	    }
	    /* -- END ERROR HANDLING-- */
	    err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
		    file_offsets, PVFS_BYTE, &file_req);
	    /* --BEGIN ERROR HANDLING-- */
	    if (err_flag != 0) {
		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
						   MPIR_ERR_RECOVERABLE,
						   myname, __LINE__,
						   ADIOI_PVFS2_error_convert(err_flag),
						   "Error in PVFS_Request_hindexed (file)", 0);
		goto error_state;
	    }
	    /* --END ERROR HANDLING-- */

	    /* offset will be expressed in memory and file datatypes */
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
	    err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0,
		    PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io);
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
	    /* --BEGIN ERROR HANDLING-- */
	    if (err_flag != 0) {
		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
						   MPIR_ERR_RECOVERABLE,
						   myname, __LINE__,
						   ADIOI_PVFS2_error_convert(err_flag),
						   "Error in PVFS_sys_read", 0);
	    }
	    /* --END ERROR HANDLING-- */
	    PVFS_Request_free(&mem_req);
	    PVFS_Request_free(&file_req);
	    total_bytes_read += resp_io.total_completed;
	    size_read += new_buffer_read;
	    start_k = k;
	    start_j = j;
	} /* while (size_read < bufsize) */
	ADIOI_Free(mem_offsets);
	ADIOI_Free(mem_lengths);
    }
    /* Other ADIO routines will convert absolute bytes into counts of datatypes */
    /* when incrementing fp_ind, need to also take into account the file type:
     * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
     * if we wrote N elements, offset needs to point at beginning of type, not
     * at empty region at offset N+1)
     *
     * As we discussed on mpich-discuss in may/june 2009, the code below might
     * look wierd, but by putting fp_ind at the last byte written, the next
     * time we run through the strided code we'll update the fp_ind to the
     * right location. */
    if (file_ptr_type == ADIO_INDIVIDUAL) {
	fd->fp_ind = file_offsets[file_list_count-1]+
	    file_lengths[file_list_count-1];
    }

    ADIOI_Free(file_offsets);
    ADIOI_Free(file_lengths);

    if (err_flag == 0) *error_code = MPI_SUCCESS;

error_state:
    fd->fp_sys_posn = -1;   /* set it to null. */

#ifdef HAVE_STATUS_SET_BYTES
    MPIR_Status_set_bytes(status, datatype, bufsize);
    /* This is a temporary way of filling in status. The right way is to
       keep track of how much data was actually read and placed in buf
       by ADIOI_BUFFERED_READ. */
#endif

    if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
}
Ejemplo n.º 9
0
/* #define IO_DEBUG 1 */
void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count,
			     MPI_Datatype datatype, int file_ptr_type,
			     ADIO_Offset offset, ADIO_Status *status, int
			     *error_code)
{
/* borrowed from old-school PVFS (v1) code. A driver for file systems that
 * cannot or do not support client-side buffering
 * Does not do data sieving optimization
 * Does contain write-combining optimization for noncontig in memory, contig in
 * file 
 */

/* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int j, k, st_index=0;
    off_t err_lseek=-1;
    ssize_t err=-1;
    ADIO_Offset fwr_size=0, bwr_size, new_bwr_size, new_fwr_size, i_offset, num;
    ADIO_Offset bufsize, n_etypes_in_filetype;
    ADIO_Offset n_filetypes, etype_in_filetype, size, sum;
    ADIO_Offset abs_off_in_filetype=0, size_in_filetype;
    MPI_Count filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent, indx;
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset off, disp;
    int flag, err_flag=0;
    static char myname[] = "ADIOI_NOLOCK_WRITESTRIDED";
#ifdef IO_DEBUG
    int rank,nprocs;
#endif

    /* --BEGIN ERROR HANDLING-- */
    if (fd->atomicity) {
	*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
					   myname, __LINE__,
					   MPI_ERR_INTERN,
					   "Atomic mode set in I/O function", 0);
	return;
    }
    /* --END ERROR HANDLING-- */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size_x(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, 0);
#endif
	*error_code = MPI_SUCCESS; 
	return;
    }

#ifdef IO_DEBUG
    MPI_Comm_rank(fd->comm, &rank);
    MPI_Comm_size(fd->comm, &nprocs);
#endif

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size_x(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;
    
    ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
    bufsize = buftype_size * count;

    if (!buftype_is_contig && filetype_is_contig) {
	char *combine_buf, *combine_buf_ptr;
	ADIO_Offset combine_buf_remain;
/* noncontiguous in memory, contiguous in file. use writev */

	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;

	/* allocate our "combine buffer" to pack data into before writing */
	combine_buf = (char *) ADIOI_Malloc(fd->hints->ind_wr_buffer_size);
	combine_buf_ptr = combine_buf;
	combine_buf_remain = fd->hints->ind_wr_buffer_size;

	/* seek to the right spot in the file */
	if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
	    off = fd->disp + etype_size * offset;
	    lseek(fd->fd_sys, off, SEEK_SET);
	}
	else off = lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);

	/* loop through all the flattened pieces.  combine into buffer until
	 * no more will fit, then write.
	 *
	 * special case of a given piece being bigger than the combine buffer
	 * is also handled.
	 */
	for (j=0; j<count; j++) {
    int i;
	    for (i=0; i<flat_buf->count; i++) {
		if (flat_buf->blocklens[i] > combine_buf_remain && combine_buf != combine_buf_ptr) {
		    /* there is data in the buffer; write out the buffer so far */
#ifdef IO_DEBUG
		    printf("[%d/%d] nc mem c file (0) writing loc = %Ld sz = %Ld\n", 
				    rank, nprocs, off, 
				    fd->hints->ind_wr_buffer_size-combine_buf_remain);
#endif
#ifdef ADIOI_MPE_LOGGING
		    MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
		    err = write(fd->fd_sys,
				     combine_buf,
				     fd->hints->ind_wr_buffer_size - combine_buf_remain);
#ifdef ADIOI_MPE_LOGGING
		    MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
		    if (err == -1) err_flag = 1;

		    /* reset our buffer info */
		    combine_buf_ptr = combine_buf;
		    combine_buf_remain = fd->hints->ind_wr_buffer_size;
		}

		/* TODO: heuristic for when to not bother to use combine buffer? */
		if (flat_buf->blocklens[i] >= combine_buf_remain) {
		    /* special case: blocklen is as big as or bigger than the combine buf;
		     * write directly
		     */
#ifdef IO_DEBUG
		    printf("[%d/%d] nc mem c file (1) writing loc = %Ld sz = %d\n", 
				    rank, nprocs, off, 
				    flat_buf->blocklens[i]);
#endif
        ADIOI_Assert(flat_buf->blocklens[i] == (unsigned)flat_buf->blocklens[i]);
        ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i]) == (ADIO_Offset)((MPIR_Upint)buf + (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i]));
#ifdef ADIOI_MPE_LOGGING
		    MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
		    err = write(fd->fd_sys,
				     ((char *) buf) + (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i],
				     (unsigned)flat_buf->blocklens[i]);
#ifdef ADIOI_MPE_LOGGING
		    MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
		    if (err == -1) err_flag = 1;
		    off += flat_buf->blocklens[i]; /* keep up with the final file offset too */
		}
		else {
		    /* copy more data into combine buffer */
		    memcpy(combine_buf_ptr,
			   ((char *) buf) + j*buftype_extent + flat_buf->indices[i],
			   flat_buf->blocklens[i]);
		    combine_buf_ptr += flat_buf->blocklens[i];
		    combine_buf_remain -= flat_buf->blocklens[i];
		    off += flat_buf->blocklens[i]; /* keep up with the final file offset too */
		}
	    }
	}

	if (combine_buf_ptr != combine_buf) {
	    /* data left in buffer to write */
#ifdef IO_DEBUG
	    printf("[%d/%d] nc mem c file (2) writing loc = %Ld sz = %Ld\n", 
			    rank, nprocs, off, 
			     fd->hints->ind_wr_buffer_size-combine_buf_remain);
#endif
#ifdef ADIOI_MPE_LOGGING
	    MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
	    err = write(fd->fd_sys,
			     combine_buf,
			     fd->hints->ind_wr_buffer_size - combine_buf_remain);
#ifdef ADIOI_MPE_LOGGING
	    MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
	    if (err == -1) err_flag = 1;
	}

	if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;

	ADIOI_Free(combine_buf);

	if (err_flag) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO, "**io",
					       "**io %s", strerror(errno));
	}
	else *error_code = MPI_SUCCESS;
    } /* if (!buftype_is_contig && filetype_is_contig)  ... */

    else {  /* noncontiguous in file */

/* split up into several contiguous writes */

/* find starting location in the file */

/* filetype already flattened in ADIO_Open */
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
        disp = fd->disp;

	if (file_ptr_type == ADIO_INDIVIDUAL) {
	    offset = fd->fp_ind; /* in bytes */
            n_filetypes = -1;
            flag = 0;
            while (!flag) {
                int i;
                n_filetypes++;
                for (i=0; i<flat_file->count; i++) {
                    if (disp + flat_file->indices[i] + 
                        n_filetypes*(ADIO_Offset)filetype_extent + flat_file->blocklens[i] 
                            >= offset) {
                        st_index = i;
                        fwr_size = disp + flat_file->indices[i] + 
                                n_filetypes*(ADIO_Offset)filetype_extent
                                 + flat_file->blocklens[i] - offset;
                        flag = 1;
                        break;
                    }
                }
            }
	}
	else {
            int i;
	    n_etypes_in_filetype = filetype_size/etype_size;
	    n_filetypes = offset / n_etypes_in_filetype;
	    etype_in_filetype = offset % n_etypes_in_filetype;
	    size_in_filetype = etype_in_filetype * etype_size;
 
	    sum = 0;
	    for (i=0; i<flat_file->count; i++) {
		sum += flat_file->blocklens[i];
		if (sum > size_in_filetype) {
		    st_index = i;
		    fwr_size = sum - size_in_filetype;
		    abs_off_in_filetype = flat_file->indices[i] +
			size_in_filetype - (sum - flat_file->blocklens[i]);
		    break;
		}
	    }

	    /* abs. offset in bytes in the file */
            offset = disp + n_filetypes*(ADIO_Offset)filetype_extent + abs_off_in_filetype;
	}

	if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

	    i_offset = 0;
	    j = st_index;
	    off = offset;
	    fwr_size = ADIOI_MIN(fwr_size, bufsize);
	    while (i_offset < bufsize) {
                if (fwr_size) { 
                    /* TYPE_UB and TYPE_LB can result in 
                       fwr_size = 0. save system call in such cases */ 
#ifdef ADIOI_MPE_LOGGING
		    MPE_Log_event(ADIOI_MPE_lseek_a, 0, NULL);
#endif
#ifdef IO_DEBUG
		    printf("[%d/%d] c mem nc file writing loc = %Ld sz = %d\n", 
			    rank, nprocs, off, fwr_size);
#endif
		    err_lseek = lseek(fd->fd_sys, off, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
		    MPE_Log_event(ADIOI_MPE_lseek_b, 0, NULL);
#endif
		    if (err_lseek == -1) err_flag = 1;
#ifdef ADIOI_MPE_LOGGING
		    MPE_Log_event(ADIOI_MPE_write_a, 0, NULL);
#endif
		    err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);
#ifdef ADIOI_MPE_LOGGING
		    MPE_Log_event(ADIOI_MPE_write_b, 0, NULL);
#endif
		    if (err == -1) err_flag = 1;
		}
		i_offset += fwr_size;

                if (off + fwr_size < disp + flat_file->indices[j] +
                   flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
                       off += fwr_size;
                /* did not reach end of contiguous block in filetype.
                   no more I/O needed. off is incremented by fwr_size. */
                else {
		    if (j < (flat_file->count - 1)) j++;
		    else {
			j = 0;
			n_filetypes++;
		    }
		    off = disp + flat_file->indices[j] + 
                                        n_filetypes*(ADIO_Offset)filetype_extent;
		    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
		}
	    }
	}
	else {
/* noncontiguous in memory as well as in file */

	    ADIOI_Flatten_datatype(datatype);
	    flat_buf = ADIOI_Flatlist;
	    while (flat_buf->type != datatype) flat_buf = flat_buf->next;

	    k = num = buf_count = 0;
	    indx = flat_buf->indices[0];
	    j = st_index;
	    off = offset;
	    bwr_size = flat_buf->blocklens[0];

	    while (num < bufsize) {
		size = ADIOI_MIN(fwr_size, bwr_size);
		if (size) {
#ifdef IO_DEBUG
		    printf("[%d/%d] nc mem nc file writing loc = %Ld sz = %d\n", 
				    rank, nprocs, off, size);
#endif
#ifdef ADIOI_MPE_LOGGING
		    MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
		    lseek(fd->fd_sys, off, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING 
		    MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
		    if (err == -1) err_flag = 1;
#ifdef ADIOI_MPE_LOGGING 
		    MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
                    ADIOI_Assert(size == (size_t) size);
                    ADIOI_Assert(off == (off_t) off);
		    err = write(fd->fd_sys, ((char *) buf) + indx, size);
#ifdef ADIOI_MPE_LOGGING
		    MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
		    if (err == -1) err_flag = 1;
		}

		new_fwr_size = fwr_size;
		new_bwr_size = bwr_size;

		if (size == fwr_size) {
/* reached end of contiguous block in file */
                    if (j < (flat_file->count - 1)) j++;
                    else {
                        j = 0;
                        n_filetypes++;
                    }

                    off = disp + flat_file->indices[j] + 
                                   n_filetypes*(ADIO_Offset)filetype_extent;

		    new_fwr_size = flat_file->blocklens[j];
		    if (size != bwr_size) {
			indx += size;
			new_bwr_size -= size;
		    }
		}

		if (size == bwr_size) {
/* reached end of contiguous block in memory */

		    k = (k + 1)%flat_buf->count;
		    buf_count++;
		    indx = buftype_extent*(buf_count/flat_buf->count) +
			flat_buf->indices[k]; 
		    new_bwr_size = flat_buf->blocklens[k];
		    if (size != fwr_size) {
			off += size;
			new_fwr_size -= size;
		    }
		}
		num += size;
		fwr_size = new_fwr_size;
                bwr_size = new_bwr_size;
	    }
	}

        if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
	if (err_flag) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO, "**io",
					       "**io %s", strerror(errno));
	}
	else *error_code = MPI_SUCCESS;
    }

    fd->fp_sys_posn = -1;   /* set it to null. */

#ifdef HAVE_STATUS_SET_BYTES
    MPIR_Status_set_bytes(status, datatype, bufsize);
/* This is a temporary way of filling in status. The right way is to 
   keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
#endif

    if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
}
Ejemplo n.º 10
0
void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code)
{
/* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int i, j, k, err=-1, brd_size, frd_size=0, st_index=0;
    int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    int filetype_size, etype_size, buftype_size, req_len, partial_read;
    MPI_Aint filetype_extent, buftype_extent; 
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off;
    ADIO_Offset off, req_off, disp, end_offset, readbuf_off, start_off;
    char *readbuf, *tmp_buf, *value;
    int flag, st_frd_size, st_n_filetypes, readbuf_len;
    int new_brd_size, new_frd_size, err_flag=0, info_flag, max_bufsize;
#ifndef PRINT_ERR_MSG
    static char myname[] = "ADIOI_NFS_READSTRIDED";
#endif

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size(fd->filetype, &filetype_size);
    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;

    bufsize = buftype_size * count;

/* get max_bufsize from the info object. */

    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
    MPI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, 
                 &info_flag);
    max_bufsize = atoi(value);
    ADIOI_Free(value);

    if (!buftype_is_contig && filetype_is_contig) {

/* noncontiguous in memory, contiguous in file. */

	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;

        off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
                 fd->disp + etype_size * offset;

	start_off = off;
	end_offset = off + bufsize - 1;
        readbuf_off = off;
        readbuf = (char *) ADIOI_Malloc(max_bufsize);
        readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));

/* if atomicity is true, lock (exclusive) the region to be accessed */
        if (fd->atomicity)
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

	lseek(fd->fd_sys, readbuf_off, SEEK_SET);
        if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);
        err = read(fd->fd_sys, readbuf, readbuf_len);
        if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off, SEEK_SET, readbuf_len);
        if (err == -1) err_flag = 1;

        for (j=0; j<count; j++) 
            for (i=0; i<flat_buf->count; i++) {
                userbuf_off = j*buftype_extent + flat_buf->indices[i];
		req_off = off;
		req_len = flat_buf->blocklens[i];
		ADIOI_BUFFERED_READ
                off += flat_buf->blocklens[i];
            }

        if (fd->atomicity)
            ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

        if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;

	ADIOI_Free(readbuf); /* malloced in the buffered_read macro */

#ifdef PRINT_ERR_MSG
        *error_code = (err_flag) ? MPI_ERR_UNKNOWN : MPI_SUCCESS;
#else
	if (err_flag) {
	    *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
			      myname, "I/O Error", "%s", strerror(errno));
	    ADIOI_Error(fd, *error_code, myname);	    
	}
	else *error_code = MPI_SUCCESS;
#endif
    }

    else {  /* noncontiguous in file */

/* filetype already flattened in ADIO_Open */
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
	disp = fd->disp;

	if (file_ptr_type == ADIO_INDIVIDUAL) {
	    offset = fd->fp_ind; /* in bytes */
	    n_filetypes = -1;
	    flag = 0;
	    while (!flag) {
                n_filetypes++;
		for (i=0; i<flat_file->count; i++) {
		    if (disp + flat_file->indices[i] + 
                        (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] 
                            >= offset) {
			st_index = i;
			frd_size = (int) (disp + flat_file->indices[i] + 
			        (ADIO_Offset) n_filetypes*filetype_extent
			         + flat_file->blocklens[i] - offset);
			flag = 1;
			break;
		    }
		}
	    }
	}
	else {
	    n_etypes_in_filetype = filetype_size/etype_size;
	    n_filetypes = (int) (offset / n_etypes_in_filetype);
	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	    size_in_filetype = etype_in_filetype * etype_size;
 
	    sum = 0;
	    for (i=0; i<flat_file->count; i++) {
		sum += flat_file->blocklens[i];
		if (sum > size_in_filetype) {
		    st_index = i;
		    frd_size = sum - size_in_filetype;
		    abs_off_in_filetype = flat_file->indices[i] +
			size_in_filetype - (sum - flat_file->blocklens[i]);
		    break;
		}
	    }

	    /* abs. offset in bytes in the file */
	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
	}

        start_off = offset;

       /* Calculate end_offset, the last byte-offset that will be accessed.
         e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/

	st_frd_size = frd_size;
	st_n_filetypes = n_filetypes;
	i = 0;
	j = st_index;
	off = offset;
	frd_size = ADIOI_MIN(st_frd_size, bufsize);
	while (i < bufsize) {
	    i += frd_size;
	    end_offset = off + frd_size - 1;

	    if (j < (flat_file->count - 1)) j++;
	    else {
		j = 0;
		n_filetypes++;
	    }

	    off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
	    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
	}

/* if atomicity is true, lock (exclusive) the region to be accessed */
        if (fd->atomicity)
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

        /* initial read into readbuf */
	readbuf_off = offset;
	readbuf = (char *) ADIOI_Malloc(max_bufsize);
	readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));

	lseek(fd->fd_sys, offset, SEEK_SET);
        if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, offset, SEEK_SET, readbuf_len);
        err = read(fd->fd_sys, readbuf, readbuf_len);
        if (!(fd->atomicity)) ADIOI_UNLOCK(fd, offset, SEEK_SET, readbuf_len);

        if (err == -1) err_flag = 1;

	if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

	    i = 0;
	    j = st_index;
	    off = offset;
	    n_filetypes = st_n_filetypes;
	    frd_size = ADIOI_MIN(st_frd_size, bufsize);
	    while (i < bufsize) {
                if (frd_size) { 
                    /* TYPE_UB and TYPE_LB can result in 
                       frd_size = 0. save system call in such cases */ 
		    /* lseek(fd->fd_sys, off, SEEK_SET);
		    err = read(fd->fd_sys, ((char *) buf) + i, frd_size);*/

		    req_off = off;
		    req_len = frd_size;
		    userbuf_off = i;
		    ADIOI_BUFFERED_READ
		}
		i += frd_size;

                if (off + frd_size < disp + flat_file->indices[j] +
                   flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
                       off += frd_size;
                /* did not reach end of contiguous block in filetype.
                   no more I/O needed. off is incremented by frd_size. */
                else {
		    if (j < (flat_file->count - 1)) j++;
		    else {
			j = 0;
			n_filetypes++;
		    }
		    off = disp + flat_file->indices[j] + 
                                        (ADIO_Offset) n_filetypes*filetype_extent;
		    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
		}
	    }
	}
	else {
Ejemplo n.º 11
0
static void ADIOI_Iread_and_exch(ADIOI_NBC_Request *nbc_req, int *error_code)
{
    ADIOI_Iread_and_exch_vars *vars = nbc_req->data.rd.rae_vars;
    ADIO_File fd = vars->fd;
    MPI_Datatype datatype = vars->datatype;
    int nprocs = vars->nprocs;
    ADIOI_Access *others_req = vars->others_req;

    /* Read in sizes of no more than coll_bufsize, an info parameter.
       Send data to appropriate processes.
       Place recd. data in user buf.
       The idea is to reduce the amount of extra memory required for
       collective I/O. If all data were read all at once, which is much
       easier, it would require temp space more than the size of user_buf,
       which is often unacceptable. For example, to read a distributed
       array from a file, where each local array is 8Mbytes, requiring
       at least another 8Mbytes of temp space is unacceptable. */

    int i, j;
    ADIO_Offset st_loc = -1, end_loc = -1;
    ADIOI_Flatlist_node *flat_buf = NULL;
    int coll_bufsize;

    *error_code = MPI_SUCCESS;  /* changed below if error */
    /* only I/O errors are currently reported */

    /* calculate the number of reads of size coll_bufsize
       to be done by each process and the max among all processes.
       That gives the no. of communication phases as well.
       coll_bufsize is obtained from the hints object. */

    coll_bufsize = fd->hints->cb_buffer_size;
    vars->coll_bufsize = coll_bufsize;

    /* grab some initial values for st_loc and end_loc */
    for (i = 0; i < nprocs; i++) {
        if (others_req[i].count) {
            st_loc = others_req[i].offsets[0];
            end_loc = others_req[i].offsets[0];
            break;
        }
    }

    /* now find the real values */
    for (i = 0; i < nprocs; i++)
        for (j = 0; j < others_req[i].count; j++) {
            st_loc = ADIOI_MIN(st_loc, others_req[i].offsets[j]);
            end_loc = ADIOI_MAX(end_loc, (others_req[i].offsets[j]
                          + others_req[i].lens[j] - 1));
        }

    vars->st_loc = st_loc;
    vars->end_loc = end_loc;

    /* calculate ntimes, the number of times this process must perform I/O
     * operations in order to complete all the requests it has received.
     * the need for multiple I/O operations comes from the restriction that
     * we only use coll_bufsize bytes of memory for internal buffering.
     */
    if ((st_loc == -1) && (end_loc == -1)) {
        /* this process does no I/O. */
        vars->ntimes = 0;
    }
    else {
        /* ntimes=ceiling_div(end_loc - st_loc + 1, coll_bufsize)*/
        vars->ntimes = (int)((end_loc - st_loc + coll_bufsize) / coll_bufsize);
    }

    *error_code = MPI_Iallreduce(&vars->ntimes, &vars->max_ntimes, 1, MPI_INT,
                                 MPI_MAX, fd->comm, &vars->req1);

    vars->read_buf = fd->io_buf;  /* Allocated at open time */

    vars->curr_offlen_ptr = (int *)ADIOI_Calloc(nprocs, sizeof(int));
    /* its use is explained below. calloc initializes to 0. */

    vars->count = (int *)ADIOI_Malloc(nprocs * sizeof(int));
    /* to store count of how many off-len pairs per proc are satisfied
       in an iteration. */

    vars->partial_send = (int *)ADIOI_Calloc(nprocs, sizeof(int));
    /* if only a portion of the last off-len pair is sent to a process
       in a particular iteration, the length sent is stored here.
       calloc initializes to 0. */

    vars->send_size = (int *)ADIOI_Malloc(nprocs * sizeof(int));
    /* total size of data to be sent to each proc. in an iteration */

    vars->recv_size = (int *)ADIOI_Malloc(nprocs * sizeof(int));
    /* total size of data to be recd. from each proc. in an iteration.
       Of size nprocs so that I can use MPI_Alltoall later. */

    vars->recd_from_proc = (int *)ADIOI_Calloc(nprocs, sizeof(int));
    /* amount of data recd. so far from each proc. Used in
       ADIOI_Fill_user_buffer. initialized to 0 here. */

    vars->start_pos = (int *)ADIOI_Malloc(nprocs*sizeof(int));
    /* used to store the starting value of curr_offlen_ptr[i] in
       this iteration */

    ADIOI_Datatype_iscontig(datatype, &vars->buftype_is_contig);
    if (!vars->buftype_is_contig) {
        ADIOI_Flatten_datatype(datatype);
        flat_buf = ADIOI_Flatlist;
        while (flat_buf->type != datatype) flat_buf = flat_buf->next;
        vars->flat_buf = flat_buf;
    }
    MPI_Type_extent(datatype, &vars->buftype_extent);

    vars->done = 0;
    vars->off = st_loc;
    vars->for_curr_iter = vars->for_next_iter = 0;

    /* set the state to wait until MPI_Ialltoall finishes. */
    nbc_req->data.rd.state = ADIOI_IRC_STATE_IREAD_AND_EXCH;
}
Ejemplo n.º 12
0
/* If successful, error_code is set to MPI_SUCCESS.  Otherwise an error
 * code is created and returned in error_code.
 */
static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, void *buf,
					MPI_Datatype datatype, int nprocs,
					int myrank, ADIOI_Access *others_req,
                                        ADIOI_Access *my_req,
					ADIO_Offset *offset_list,
                                        ADIO_Offset *len_list, 
					int contig_access_count,
                                        int *striping_info, int **buf_idx,
                                        int *error_code)
{
    /* Send data to appropriate processes and write in sizes of no more
     * than lustre stripe_size.
     * The idea is to reduce the amount of extra memory required for
     * collective I/O. If all data were written all at once, which is much
     * easier, it would require temp space more than the size of user_buf,
     * which is often unacceptable. For example, to write a distributed
     * array to a file, where each local array is 8Mbytes, requiring
     * at least another 8Mbytes of temp space is unacceptable.
     */

    int hole, i, j, m, flag, ntimes = 1 , max_ntimes, buftype_is_contig;
    ADIO_Offset st_loc = -1, end_loc = -1, min_st_loc, max_end_loc;
    ADIO_Offset off, req_off, send_off, iter_st_off, *off_list;
    ADIO_Offset max_size, step_size = 0;
    int real_size, req_len, send_len;
    int *recv_curr_offlen_ptr, *recv_count, *recv_size;
    int *send_curr_offlen_ptr, *send_size;
    int *sent_to_proc, *recv_start_pos;
    int *send_buf_idx, *curr_to_proc, *done_to_proc;
    int *this_buf_idx;
    char *write_buf = NULL;
    MPI_Status status;
    ADIOI_Flatlist_node *flat_buf = NULL;
    MPI_Aint buftype_extent;
    int stripe_size = striping_info[0], avail_cb_nodes = striping_info[2];
    int data_sieving = 0;
    ADIO_Offset *srt_off = NULL;
    int *srt_len = NULL;
    int srt_num = 0;
    ADIO_Offset block_offset;
    int block_len;

    *error_code = MPI_SUCCESS;	/* changed below if error */
    /* only I/O errors are currently reported */

    /* calculate the number of writes of stripe size to be done.
     * That gives the no. of communication phases as well.
     * Note:
     *   Because we redistribute data in stripe-contiguous pattern for Lustre,
     *   each process has the same no. of communication phases.
     */

    for (i = 0; i < nprocs; i++) {
	if (others_req[i].count) {
	    st_loc = others_req[i].offsets[0];
	    end_loc = others_req[i].offsets[0];
	    break;
	}
    }
    for (i = 0; i < nprocs; i++) {
	for (j = 0; j < others_req[i].count; j++) {
	    st_loc = ADIOI_MIN(st_loc, others_req[i].offsets[j]);
	    end_loc = ADIOI_MAX(end_loc, (others_req[i].offsets[j] +
                                          others_req[i].lens[j] - 1));
	}
    }
    /* this process does no writing. */
    if ((st_loc == -1) && (end_loc == -1))
	ntimes = 0;
    MPI_Allreduce(&end_loc, &max_end_loc, 1, MPI_LONG_LONG_INT, MPI_MAX, fd->comm);
    /* avoid min_st_loc be -1 */
    if (st_loc == -1)
        st_loc = max_end_loc;
    MPI_Allreduce(&st_loc, &min_st_loc, 1, MPI_LONG_LONG_INT, MPI_MIN, fd->comm);
    /* align downward */
    min_st_loc -= min_st_loc % (ADIO_Offset)stripe_size;

    /* Each time, only avail_cb_nodes number of IO clients perform IO,
     * so, step_size=avail_cb_nodes*stripe_size IO will be performed at most,
     * and ntimes=whole_file_portion/step_size
     */
    step_size = (ADIO_Offset) avail_cb_nodes * stripe_size;
    max_ntimes = (max_end_loc - min_st_loc + 1) / step_size
        + (((max_end_loc - min_st_loc + 1) % step_size) ? 1 : 0);
/*     max_ntimes = (int)((max_end_loc - min_st_loc) / step_size + 1); */
    if (ntimes)
	write_buf = (char *) ADIOI_Malloc(stripe_size);

    /* calculate the start offset for each iteration */
    off_list = (ADIO_Offset *) ADIOI_Malloc(max_ntimes * sizeof(ADIO_Offset));
    for (m = 0; m < max_ntimes; m ++)
        off_list[m] = max_end_loc;
    for (i = 0; i < nprocs; i++) {
        for (j = 0; j < others_req[i].count; j ++) {
            req_off = others_req[i].offsets[j];
            m = (int)((req_off - min_st_loc) / step_size);
            off_list[m] = ADIOI_MIN(off_list[m], req_off);
        }
    }

    recv_curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int));
    send_curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int));
    /* their use is explained below. calloc initializes to 0. */

    recv_count = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    /* to store count of how many off-len pairs per proc are satisfied
       in an iteration. */

    send_size = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    /* total size of data to be sent to each proc. in an iteration.
       Of size nprocs so that I can use MPI_Alltoall later. */

    recv_size = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    /* total size of data to be recd. from each proc. in an iteration. */

    sent_to_proc = (int *) ADIOI_Calloc(nprocs, sizeof(int));
    /* amount of data sent to each proc so far. Used in
       ADIOI_Fill_send_buffer. initialized to 0 here. */

    send_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    curr_to_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    done_to_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    /* Above three are used in ADIOI_Fill_send_buffer */

    this_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int));

    recv_start_pos = (int *) ADIOI_Malloc(nprocs * sizeof(int));
    /* used to store the starting value of recv_curr_offlen_ptr[i] in
       this iteration */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    if (!buftype_is_contig) {
	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype)
	    flat_buf = flat_buf->next;
    }
    MPI_Type_extent(datatype, &buftype_extent);
    /* I need to check if there are any outstanding nonblocking writes to
     * the file, which could potentially interfere with the writes taking
     * place in this collective write call. Since this is not likely to be
     * common, let me do the simplest thing possible here: Each process
     * completes all pending nonblocking operations before completing.
     */
    /*ADIOI_Complete_async(error_code);
    if (*error_code != MPI_SUCCESS) return;
    MPI_Barrier(fd->comm);
    */

    iter_st_off = min_st_loc;

    /* Although we have recognized the data according to OST index,
     * a read-modify-write will be done if there is a hole between the data.
     * For example: if blocksize=60, xfersize=30 and stripe_size=100,
     * then rank0 will collect data [0, 30] and [60, 90] then write. There
     * is a hole in [30, 60], which will cause a read-modify-write in [0, 90].
     *
     * To reduce its impact on the performance, we can disable data sieving
     * by hint "ds_in_coll".
     */
    /* check the hint for data sieving */
    data_sieving = fd->hints->fs_hints.lustre.ds_in_coll;

    for (m = 0; m < max_ntimes; m++) {
	/* go through all others_req and my_req to check which will be received
         * and sent in this iteration.
         */

	/* Note that MPI guarantees that displacements in filetypes are in
	   monotonically nondecreasing order and that, for writes, the
	   filetypes cannot specify overlapping regions in the file. This
	   simplifies implementation a bit compared to reads. */

	/*
           off         = start offset in the file for the data to be written in
                         this iteration
           iter_st_off = start offset of this iteration
           real_size   = size of data written (bytes) corresponding to off
           max_size    = possible maximum size of data written in this iteration
           req_off     = offset in the file for a particular contiguous request minus
                         what was satisfied in previous iteration
           send_off    = offset the request needed by other processes in this iteration
           req_len     = size corresponding to req_off
           send_len    = size corresponding to send_off
         */

	/* first calculate what should be communicated */
	for (i = 0; i < nprocs; i++)
	    recv_count[i] = recv_size[i] = send_size[i] = 0;

        off = off_list[m];
        max_size = ADIOI_MIN(step_size, max_end_loc - iter_st_off + 1);
        real_size = (int) ADIOI_MIN((off / stripe_size + 1) * stripe_size -
                                    off,
                                    end_loc - off + 1);

	for (i = 0; i < nprocs; i++) {
            if (my_req[i].count) {
                this_buf_idx[i] = buf_idx[i][send_curr_offlen_ptr[i]];
                for (j = send_curr_offlen_ptr[i]; j < my_req[i].count; j++) {
                    send_off = my_req[i].offsets[j];
                    send_len = my_req[i].lens[j];
                    if (send_off < iter_st_off + max_size) {
                        send_size[i] += send_len;
                    } else {
                        break;
                    }
                }
                send_curr_offlen_ptr[i] = j;
            }
	    if (others_req[i].count) {
		recv_start_pos[i] = recv_curr_offlen_ptr[i];
		for (j = recv_curr_offlen_ptr[i]; j < others_req[i].count; j++) {
                    req_off = others_req[i].offsets[j];
                    req_len = others_req[i].lens[j];
		    if (req_off < iter_st_off + max_size) {
			recv_count[i]++;
                        ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIR_Upint)(write_buf+req_off-off));
			MPI_Address(write_buf + req_off - off,
				    &(others_req[i].mem_ptrs[j]));
                        recv_size[i] += req_len;
		    } else {
			break;
                    }
		}
		recv_curr_offlen_ptr[i] = j;
	    }
	}
        /* use variable "hole" to pass data_sieving flag into W_Exchange_data */
        hole = data_sieving;
	ADIOI_LUSTRE_W_Exchange_data(fd, buf, write_buf, flat_buf, offset_list,
                                     len_list, send_size, recv_size, off, real_size,
                                     recv_count, recv_start_pos,
                                     sent_to_proc, nprocs, myrank,
                                     buftype_is_contig, contig_access_count,
                                     striping_info, others_req, send_buf_idx,
                                     curr_to_proc, done_to_proc, &hole, m,
                                  buftype_extent, this_buf_idx,
                                  &srt_off, &srt_len, &srt_num, error_code);

	if (*error_code != MPI_SUCCESS)
            goto over;

	flag = 0;
	for (i = 0; i < nprocs; i++)
	    if (recv_count[i]) {
		flag = 1;
		break;
	    }
	if (flag) {
            /* check whether to do data sieving */
            if(data_sieving == ADIOI_HINT_ENABLE) {
	        ADIO_WriteContig(fd, write_buf, real_size, MPI_BYTE,
			         ADIO_EXPLICIT_OFFSET, off, &status,
			         error_code);
            } else {
                /* if there is no hole, write data in one time;
                 * otherwise, write data in several times */
                if (!hole) {
                    ADIO_WriteContig(fd, write_buf, real_size, MPI_BYTE,
                                     ADIO_EXPLICIT_OFFSET, off, &status,
                                     error_code);
                } else {
                    block_offset = -1;
                    block_len = 0;
                    for (i = 0; i < srt_num; ++i) {
                        if (srt_off[i] < off + real_size &&
                            srt_off[i] >= off) {
                            if (block_offset == -1) {
                                block_offset = srt_off[i];
                                block_len = srt_len[i];
                            } else {
                                if (srt_off[i] == block_offset + block_len) {
                                    block_len += srt_len[i];
                                } else {
                                    ADIO_WriteContig(fd,
                                                     write_buf + block_offset - off,
                                                     block_len,
                                                     MPI_BYTE, ADIO_EXPLICIT_OFFSET,
                                                     block_offset, &status,
                                                     error_code);
	                            if (*error_code != MPI_SUCCESS)
		                        goto over;
                                    block_offset = srt_off[i];
                                    block_len = srt_len[i];
                                }
                            }
                        }
                    }
                    if (block_offset != -1) {
                        ADIO_WriteContig(fd,
                                         write_buf + block_offset - off,
                                         block_len,
                                         MPI_BYTE, ADIO_EXPLICIT_OFFSET,
                                         block_offset, &status,
                                         error_code);
                        if (*error_code != MPI_SUCCESS)
                            goto over;
                    }
                }
            }
	    if (*error_code != MPI_SUCCESS)
		goto over;
	}
        iter_st_off += max_size;
    }
over:
    if (srt_off)
        ADIOI_Free(srt_off);
    if (srt_len)
        ADIOI_Free(srt_len);
    if (ntimes)
	ADIOI_Free(write_buf);
    ADIOI_Free(recv_curr_offlen_ptr);
    ADIOI_Free(send_curr_offlen_ptr);
    ADIOI_Free(recv_count);
    ADIOI_Free(send_size);
    ADIOI_Free(recv_size);
    ADIOI_Free(sent_to_proc);
    ADIOI_Free(recv_start_pos);
    ADIOI_Free(send_buf_idx);
    ADIOI_Free(curr_to_proc);
    ADIOI_Free(done_to_proc);
    ADIOI_Free(this_buf_idx);
    ADIOI_Free(off_list);
}
Ejemplo n.º 13
0
void ADIOI_GRIDFTP_ReadDiscontig(ADIO_File fd, void *buf, int count,
				 MPI_Datatype datatype, int file_ptr_type,
				 ADIO_Offset offset, ADIO_Status *status, int
				 *error_code)
{
    char myname[]="ADIOI_GRIDFTP_ReadDiscontig";
    int myrank,nprocs;
    /* size and extent of buffer in memory */
    MPI_Aint btype_size,btype_extent;
    /* size and extent of file record layout */
    MPI_Aint ftype_size,ftype_extent;
    /* size of file elemental type; seeks are done in units of this */
    MPI_Aint etype_size;
    MPI_Aint extent;
    ADIOI_Flatlist_node *flat_file;
    int i,buf_contig,boff,nblks;
    globus_off_t start,end,goff;
    globus_size_t bytes_read;
    globus_result_t result;
    globus_byte_t *tmp;

    if ( fd->access_mode&MPI_MODE_WRONLY )
	{
	    *error_code=MPIR_ERR_MODE_WRONLY;
	    return;
	}

    *error_code=MPI_SUCCESS;

    MPI_Comm_rank(fd->comm,&myrank);
    MPI_Comm_size(fd->comm,&nprocs);

    etype_size=fd->etype_size;
    MPI_Type_size(fd->filetype,&ftype_size);
    MPI_Type_extent(fd->filetype,&ftype_extent);
    /* This is arguably unnecessary, as this routine assumes that the
       buffer in memory is contiguous */
    MPI_Type_size(datatype,&btype_size);
    MPI_Type_extent(datatype,&btype_extent);
    ADIOI_Datatype_iscontig(datatype,&buf_contig);
    
    if ( ( btype_extent!=btype_size ) || ( ! buf_contig ) )
	{
	    FPRINTF(stderr,"[%d/%d] %s called with discontigous memory buffer\n",
		    myrank,nprocs,myname);
	    fflush(stderr);
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS, 
			    MPIR_ERR_RECOVERABLE, myname, __LINE__, 
			    MPI_ERR_IO, "**io", 0 );
	    return;
	}
    /* from here we can assume btype_extent==btype_size */

    /* Flatten out fd->filetype so we know which blocks to skip */
    ADIOI_Flatten_datatype(fd->filetype);
    flat_file = ADIOI_Flatlist;
    while (flat_file->type != fd->filetype && flat_file->next!=NULL)
	flat_file = flat_file->next;

    /* Figure out how big the area to read is */
    start=(globus_off_t)(offset*etype_size);
    goff=start;
    boff=0;
    extent=0;
    nblks=0;
    while ( boff < (count*btype_size) )
	{
	    int blklen=0;

	    for (i=0;i<flat_file->count;i++)
		{
		    /* find the length of the next block */
		    if ( (boff+flat_file->blocklens[i]) < (count*btype_size) )
			blklen=flat_file->blocklens[i];
		    else
			blklen=(count*btype_size)-boff;
		    /* increment buffer size to be used */
		    boff+=blklen;
		    /* compute extent -- the nblks*ftype_extent bit is
		       there so we remember how many ftypes we've already
		       been through */
		    extent=MAX(extent,nblks*ftype_extent+flat_file->indices[i]+blklen);
		    if ( boff>=(count*btype_size) )
			break;
		}
	    nblks++;
	}
    if ( extent < count*btype_size )
	{
	    fprintf(stderr,"[%d/%d] %s error in computing extent -- extent %d is smaller than total bytes requested %d!\n",
		    myrank,nprocs,myname,extent,count*btype_size);
	    fflush(stderr);
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS, 
			    MPIR_ERR_RECOVERABLE, myanem, __LINE__, 
			    MPI_ERR_IO, "**io", 0);
	    return;
	}
    end=start+(globus_off_t)extent;
    tmp=(globus_byte_t *)malloc((size_t)extent*sizeof(globus_byte_t));

    /* start up the globus partial read */
    globus_mutex_init(&readdiscontig_ctl_lock, GLOBUS_NULL);
    globus_cond_init(&readdiscontig_ctl_cond, GLOBUS_NULL);
    readdiscontig_ctl_done=GLOBUS_FALSE;
    if ( (result=globus_ftp_client_partial_get(&(gridftp_fh[fd->fd_sys]),
					       fd->filename,
					       &(oattr[fd->fd_sys]),
					       GLOBUS_NULL,
					       start,
					       end,
					       readdiscontig_ctl_cb,
					       GLOBUS_NULL))!=GLOBUS_SUCCESS )
	{
	    globus_err_handler("globus_ftp_client_partial_get",myname,result);
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS, 
			    MPIR_ERR_RECOVERABLE, myanem, __LINE__, 
			    MPI_ERR_IO, "**io", "**io %s", 
			    globus_object_printable_to_string(result));
	    return;
	}

    /* Do all the actual I/Os */
    /* Since globus_ftp_client_register_read() is brain-dead and doesn't
       let you specify an offset, we have to slurp the entire extent into
       memory and then parse out the pieces we want...  Sucks, doesn't it?

       This should probably be done in chunks (preferably of a size
       set using a file hint), but that'll have to come later.
       --TB */
    if ( (result=globus_ftp_client_register_read(&(gridftp_fh[fd->fd_sys]),
						 tmp,
						 (globus_size_t)extent,
						 readdiscontig_data_cb,
						 (void *)(&bytes_read)))!=GLOBUS_SUCCESS )
	{
	    globus_err_handler("globus_ftp_client_register_read",myname,result);
	    *error_code = MPIO_Err_create_code(MPI_SUCESS, MPIR_ERR_RECOVERABLE,
		    myname, __LINE__, MPI_ERR_IO,
		    "**io",
		    "**io %s", globus_object_printable_to_string(result));
	    return;
	}
    /* The ctl callback won't start till the data callbacks complete, so it's
       safe to wait on just the ctl callback */
    globus_mutex_lock(&readdiscontig_ctl_lock);
    while ( readdiscontig_ctl_done!=GLOBUS_TRUE )
	globus_cond_wait(&readdiscontig_ctl_cond,&readdiscontig_ctl_lock);
    globus_mutex_unlock(&readdiscontig_ctl_lock);

    globus_mutex_destroy(&readdiscontig_ctl_lock);
    globus_cond_destroy(&readdiscontig_ctl_cond);

    boff=0;
    nblks=0;
    goff=0;
    while ( boff < (count*btype_size) )
	{
	    int i,blklen;

	    for (i=0;i<flat_file->count;i++)
		{
		    if ( (boff+flat_file->blocklens[i]) < (count*btype_size) )
			blklen=flat_file->blocklens[i];
		    else
			blklen=(count*btype_size)-boff;
		    if ( blklen > 0 )
			{
			    goff=nblks*ftype_extent+flat_file->indices[i];
			    memcpy((globus_byte_t *)buf+boff,tmp+goff,(size_t)blklen);
			    boff+=blklen;
			    if ( boff>=(count*btype_size) )
				break;
			}
		}
	    nblks++;
	}
    free(tmp);

#ifdef HAVE_STATUS_SET_BYTES
    MPIR_Status_set_bytes(status, datatype, bytes_read);
#endif
    if (file_ptr_type != ADIO_EXPLICIT_OFFSET)
    {
	fd->fp_ind += extent;
	fd->fp_sys_posn = fd->fp_ind;
    }
    else {
	fd->fp_sys_posn = offset + extent;
    }
}
Ejemplo n.º 14
0
void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code)
{
/* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int i, j, k, brd_size, frd_size=0, st_index=0;
    int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    int filetype_size, etype_size, buftype_size, req_len, partial_read;
    MPI_Aint filetype_extent, buftype_extent; 
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off;
    ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off;
    char *readbuf, *tmp_buf, *value;
    int flag, st_frd_size, st_n_filetypes, readbuf_len;
    int new_brd_size, new_frd_size, info_flag, max_bufsize;
    ADIO_Status status1;

    if (fd->hints->ds_read == ADIOI_HINT_DISABLE) {
    	/* if user has disabled data sieving on reads, use naive
	 * approach instead.
	 */
	ADIOI_GEN_ReadStrided_naive(fd, 
				    buf,
				    count,
				    datatype,
				    file_ptr_type,
				    offset,
				    status,
				    error_code);
    	return;
    }

    *error_code = MPI_SUCCESS;  /* changed below if error */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
	*error_code = MPI_SUCCESS; 
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;

    bufsize = buftype_size * count;

/* get max_bufsize from the info object. */

    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
    MPI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, 
                 &info_flag);
    max_bufsize = atoi(value);
    ADIOI_Free(value);


    if (!buftype_is_contig && filetype_is_contig) {

/* noncontiguous in memory, contiguous in file. */

	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;

        off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
                 fd->disp + etype_size * offset;

	start_off = off;
	end_offset = off + bufsize - 1;
        readbuf_off = off;
        readbuf = (char *) ADIOI_Malloc(max_bufsize);
        readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));

/* if atomicity is true, lock (exclusive) the region to be accessed */
        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS))
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

        ADIO_ReadContig(fd, readbuf, readbuf_len, MPI_BYTE, 
            ADIO_EXPLICIT_OFFSET, readbuf_off, &status1, error_code);
	if (*error_code != MPI_SUCCESS) return;

        for (j=0; j<count; j++) 
            for (i=0; i<flat_buf->count; i++) {
                userbuf_off = j*buftype_extent + flat_buf->indices[i];
		req_off = off;
		req_len = flat_buf->blocklens[i];
		ADIOI_BUFFERED_READ
                off += flat_buf->blocklens[i];
            }

        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS))
            ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

        if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;

	ADIOI_Free(readbuf); 
    }

    else {  /* noncontiguous in file */

/* filetype already flattened in ADIO_Open */
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
	disp = fd->disp;

	if (file_ptr_type == ADIO_INDIVIDUAL) {
	    offset = fd->fp_ind; /* in bytes */
	    n_filetypes = -1;
	    flag = 0;
	    while (!flag) {
                n_filetypes++;
		for (i=0; i<flat_file->count; i++) {
		    if (disp + flat_file->indices[i] + 
                        (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] 
                            >= offset) {
			st_index = i;
			frd_size = (int) (disp + flat_file->indices[i] + 
			        (ADIO_Offset) n_filetypes*filetype_extent
			         + flat_file->blocklens[i] - offset);
			flag = 1;
			break;
		    }
		}
	    }
	}
	else {
	    n_etypes_in_filetype = filetype_size/etype_size;
	    n_filetypes = (int) (offset / n_etypes_in_filetype);
	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	    size_in_filetype = etype_in_filetype * etype_size;
 
	    sum = 0;
	    for (i=0; i<flat_file->count; i++) {
		sum += flat_file->blocklens[i];
		if (sum > size_in_filetype) {
		    st_index = i;
		    frd_size = sum - size_in_filetype;
		    abs_off_in_filetype = flat_file->indices[i] +
			size_in_filetype - (sum - flat_file->blocklens[i]);
		    break;
		}
	    }

	    /* abs. offset in bytes in the file */
	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
	}

        start_off = offset;

       /* Calculate end_offset, the last byte-offset that will be accessed.
         e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/

	st_frd_size = frd_size;
	st_n_filetypes = n_filetypes;
	i = 0;
	j = st_index;
	off = offset;
	frd_size = ADIOI_MIN(st_frd_size, bufsize);
	while (i < bufsize) {
	    i += frd_size;
	    end_offset = off + frd_size - 1;

	    if (j < (flat_file->count - 1)) j++;
	    else {
		j = 0;
		n_filetypes++;
	    }

	    off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
	    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
	}

/* if atomicity is true, lock (exclusive) the region to be accessed */
        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS))
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

	readbuf_off = 0;
	readbuf_len = 0;
	readbuf = (char *) ADIOI_Malloc(max_bufsize);

	if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

	    i = 0;
	    j = st_index;
	    off = offset;
	    n_filetypes = st_n_filetypes;
	    frd_size = ADIOI_MIN(st_frd_size, bufsize);
	    while (i < bufsize) {
                if (frd_size) { 
                    /* TYPE_UB and TYPE_LB can result in 
                       frd_size = 0. save system call in such cases */ 
		    /* lseek(fd->fd_sys, off, SEEK_SET);
		    err = read(fd->fd_sys, ((char *) buf) + i, frd_size);*/

		    req_off = off;
		    req_len = frd_size;
		    userbuf_off = i;
		    ADIOI_BUFFERED_READ
		}
		i += frd_size;

                if (off + frd_size < disp + flat_file->indices[j] +
                   flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
                       off += frd_size;
                /* did not reach end of contiguous block in filetype.
                   no more I/O needed. off is incremented by frd_size. */
                else {
		    if (j < (flat_file->count - 1)) j++;
		    else {
			j = 0;
			n_filetypes++;
		    }
		    off = disp + flat_file->indices[j] + 
                                        (ADIO_Offset) n_filetypes*filetype_extent;
		    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
		}
	    }
	}
	else {
Ejemplo n.º 15
0
/* this used to be implemented in every file system as an fcntl.  It makes
 * deferred open easier if we know ADIO_Fcntl will always need a file to really
 * be open. set_view doesn't modify anything related to the open files.
 */
void ADIO_Set_view(ADIO_File fd, ADIO_Offset disp, MPI_Datatype etype, 
		MPI_Datatype filetype, MPI_Info info,  int *error_code) 
{
	int combiner, i, j, k, err, filetype_is_contig;
	MPI_Datatype copy_etype, copy_filetype;
	ADIOI_Flatlist_node *flat_file;
	/* free copies of old etypes and filetypes and delete flattened 
       version of filetype if necessary */

	MPI_Type_get_envelope(fd->etype, &i, &j, &k, &combiner);
	if (combiner != MPI_COMBINER_NAMED) MPI_Type_free(&(fd->etype));

	ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
	if (!filetype_is_contig) ADIOI_Delete_flattened(fd->filetype);

	MPI_Type_get_envelope(fd->filetype, &i, &j, &k, &combiner);
	if (combiner != MPI_COMBINER_NAMED) MPI_Type_free(&(fd->filetype));

	/* set new info */
	ADIO_SetInfo(fd, info, &err);

        /* set new etypes and filetypes */

	MPI_Type_get_envelope(etype, &i, &j, &k, &combiner);
	if (combiner == MPI_COMBINER_NAMED) fd->etype = etype;
	else {
	    MPI_Type_contiguous(1, etype, &copy_etype);
	    MPI_Type_commit(&copy_etype);
	    fd->etype = copy_etype;
	}
	MPI_Type_get_envelope(filetype, &i, &j, &k, &combiner);
	if (combiner == MPI_COMBINER_NAMED) 
	    fd->filetype = filetype;
	else {
	    MPI_Type_contiguous(1, filetype, &copy_filetype);
	    MPI_Type_commit(&copy_filetype);
	    fd->filetype = copy_filetype;
	    ADIOI_Flatten_datatype(fd->filetype);
            /* this function will not flatten the filetype if it turns out
               to be all contiguous. */
	}

	MPI_Type_size(fd->etype, &(fd->etype_size));
	fd->disp = disp;

        /* reset MPI-IO file pointer to point to the first byte that can
           be accessed in this view. */

        ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
	if (filetype_is_contig) fd->fp_ind = disp;
	else {
	    flat_file = ADIOI_Flatlist;
	    while (flat_file->type != fd->filetype) 
		flat_file = flat_file->next;
	    for (i=0; i<flat_file->count; i++) {
		if (flat_file->blocklens[i]) {
		    fd->fp_ind = disp + flat_file->indices[i];
		    break;
		}
	    }
	}
	*error_code = MPI_SUCCESS;
}
Ejemplo n.º 16
0
void ADIOI_SCI_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
{
    MPI_Datatype copy_etype, copy_filetype;
    int combiner, i, j, k, filetype_is_contig, ntimes, err;
    ADIOI_Flatlist_node *flat_file;
    ADIO_Offset curr_fsize, alloc_size, size, len, done;
    ADIO_Status status;
    char *buf;
#ifndef PRINT_ERR_MSG
    static char myname[] = "ADIOI_SCI_FCNTL";
#endif

    switch (flag) {
    case ADIO_FCNTL_SET_VIEW:
        /* free copies of old etypes and filetypes and delete flattened 
           version of filetype if necessary */

	MPI_Type_get_envelope(fd->etype, &i, &j, &k, &combiner);
	if (combiner != MPI_COMBINER_NAMED) 
	    MPI_Type_free(&(fd->etype));

	ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
	if (!filetype_is_contig) 
	    ADIOI_Delete_flattened(fd->filetype);

	MPI_Type_get_envelope(fd->filetype, &i, &j, &k, &combiner);
	if (combiner != MPI_COMBINER_NAMED) 
	    MPI_Type_free(&(fd->filetype));

	/* set new info */
	ADIO_SetInfo(fd, fcntl_struct->info, &err);

        /* set new etypes and filetypes */

	MPI_Type_get_envelope(fcntl_struct->etype, &i, &j, &k, &combiner);
	if (combiner == MPI_COMBINER_NAMED) {
	    fd->etype = fcntl_struct->etype;
	} else {
	    MPI_Type_contiguous(1, fcntl_struct->etype, &copy_etype);
	    MPI_Type_commit(&copy_etype);
	    fd->etype = copy_etype;
	}
	MPI_Type_get_envelope(fcntl_struct->filetype, &i, &j, &k, &combiner);
	if (combiner == MPI_COMBINER_NAMED) {
	    fd->filetype = fcntl_struct->filetype;
	} else {
	    MPI_Type_contiguous(1, fcntl_struct->filetype, &copy_filetype);
	    MPI_Type_commit(&copy_filetype);
	    fd->filetype = copy_filetype;
	    ADIOI_Flatten_datatype(fd->filetype);
            /* this function will not flatten the filetype if it turns out
               to be all contiguous. */
	}

	MPI_Type_size(fd->etype, &(fd->etype_size));
	fd->disp = fcntl_struct->disp;

        /* reset MPI-IO file pointer to point to the first byte that can
           be accessed in this view. */

        ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
	if (filetype_is_contig) {
	    fd->fp_ind = fcntl_struct->disp;
	} else {
	    flat_file = ADIOI_Flatlist;
	    while (flat_file->type != fd->filetype) 
		flat_file = flat_file->next;
	    for (i=0; i<flat_file->count; i++) {
		if (flat_file->blocklens[i]) {
		    fd->fp_ind = fcntl_struct->disp + flat_file->indices[i];
		    break;
		}
	    }
	}
	*error_code = MPI_SUCCESS;
	break;

    case ADIO_FCNTL_GET_FSIZE:
	break;

    case ADIO_FCNTL_SET_DISKSPACE:
	/* will be called by one process only */
	/* On file systems with no preallocation function, I have to 
           explicitly write 
           to allocate space. Since there could be holes in the file, 
           I need to read up to the current file size, write it back, 
           and then write beyond that depending on how much 
           preallocation is needed.
           read/write in sizes of no more than ADIOI_PREALLOC_BUFSZ */

	break;

    case ADIO_FCNTL_SET_IOMODE:
	*error_code = MPI_SUCCESS;
	break;

    case ADIO_FCNTL_SET_ATOMICITY:
	fd->atomicity = (fcntl_struct->atomicity == 0) ? 0 : 1;
	*error_code = MPI_SUCCESS;
	break;

    default:
	FPRINTF(stderr, "Unknown flag passed to ADIOI_SCI_Fcntl\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
    }
}
Ejemplo n.º 17
0
void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
                       MPI_Datatype buftype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code)
{
    /* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int brd_size, frd_size=0, b_index;
    int bufsize, size, sum, n_etypes_in_filetype, size_in_filetype;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    int filetype_size, etype_size, buftype_size, req_len;
    MPI_Aint filetype_extent, buftype_extent; 
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off;
    ADIO_Offset off, req_off, disp, end_offset=0, start_off;
    ADIO_Status status1;

    *error_code = MPI_SUCCESS;  /* changed below if error */

    ADIOI_Datatype_iscontig(buftype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
	*error_code = MPI_SUCCESS; 
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size(buftype, &buftype_size);
    MPI_Type_extent(buftype, &buftype_extent);
    etype_size = fd->etype_size;

    bufsize = buftype_size * count;

    /* contiguous in buftype and filetype is handled elsewhere */

    if (!buftype_is_contig && filetype_is_contig) {
    	int b_count;
	/* noncontiguous in memory, contiguous in file. */

	ADIOI_Flatten_datatype(buftype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != buftype) flat_buf = flat_buf->next;

        off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
              fd->disp + etype_size * offset;

	start_off = off;
	end_offset = off + bufsize - 1;

	/* if atomicity is true, lock (exclusive) the region to be accessed */
        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && 
	   (fd->file_system != ADIO_PVFS))
	{
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
	}

	/* for each region in the buffer, grab the data and put it in
	 * place
	 */
        for (b_count=0; b_count < count; b_count++) {
            for (b_index=0; b_index < flat_buf->count; b_index++) {
                userbuf_off = b_count*buftype_extent + 
		              flat_buf->indices[b_index];
		req_off = off;
		req_len = flat_buf->blocklens[b_index];

		ADIO_ReadContig(fd, 
				(char *) buf + userbuf_off,
				req_len, 
				MPI_BYTE, 
		    		ADIO_EXPLICIT_OFFSET,
				req_off,
				&status1,
				error_code);
		if (*error_code != MPI_SUCCESS) return;

		/* off is (potentially) used to save the final offset later */
                off += flat_buf->blocklens[b_index];
            }
	}

        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && 
	   (fd->file_system != ADIO_PVFS))
	{
            ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
	}

        if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;

    }

    else {  /* noncontiguous in file */
    	int f_index, st_frd_size, st_index = 0, st_n_filetypes;
	int flag;

        /* First we're going to calculate a set of values for use in all
	 * the noncontiguous in file cases:
	 * start_off - starting byte position of data in file
	 * end_offset - last byte offset to be acessed in the file
	 * st_n_filetypes - how far into the file we start in terms of
	 *                  whole filetypes
	 * st_index - index of block in first filetype that we will be
	 *            starting in (?)
	 * st_frd_size - size of the data in the first filetype block
	 *               that we will read (accounts for being part-way
	 *               into reading this block of the filetype
	 *
	 */

	/* filetype already flattened in ADIO_Open */
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
	disp = fd->disp;

	if (file_ptr_type == ADIO_INDIVIDUAL) {
	    start_off = fd->fp_ind; /* in bytes */
	    n_filetypes = -1;
	    flag = 0;
	    while (!flag) {
                n_filetypes++;
		for (f_index=0; f_index < flat_file->count; f_index++) {
		    if (disp + flat_file->indices[f_index] + 
                       (ADIO_Offset) n_filetypes*filetype_extent + 
		       flat_file->blocklens[f_index] >= start_off) 
		    {
		    	/* this block contains our starting position */

			st_index = f_index;
			frd_size = (int) (disp + flat_file->indices[f_index] + 
		 	           (ADIO_Offset) n_filetypes*filetype_extent + 
				   flat_file->blocklens[f_index] - start_off);
			flag = 1;
			break;
		    }
		}
	    }
	}
	else {
	    n_etypes_in_filetype = filetype_size/etype_size;
	    n_filetypes = (int) (offset / n_etypes_in_filetype);
	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	    size_in_filetype = etype_in_filetype * etype_size;
 
	    sum = 0;
	    for (f_index=0; f_index < flat_file->count; f_index++) {
		sum += flat_file->blocklens[f_index];
		if (sum > size_in_filetype) {
		    st_index = f_index;
		    frd_size = sum - size_in_filetype;
		    abs_off_in_filetype = flat_file->indices[f_index] +
			                  size_in_filetype - 
			                  (sum - flat_file->blocklens[f_index]);
		    break;
		}
	    }

	    /* abs. offset in bytes in the file */
	    start_off = disp + (ADIO_Offset) n_filetypes*filetype_extent + 
	    	        abs_off_in_filetype;
	}

	st_frd_size = frd_size;
	st_n_filetypes = n_filetypes;

	/* start_off, st_n_filetypes, st_index, and st_frd_size are 
	 * all calculated at this point
	 */

        /* Calculate end_offset, the last byte-offset that will be accessed.
         * e.g., if start_off=0 and 100 bytes to be read, end_offset=99
	 */
	userbuf_off = 0;
	f_index = st_index;
	off = start_off;
	frd_size = ADIOI_MIN(st_frd_size, bufsize);
	while (userbuf_off < bufsize) {
	    userbuf_off += frd_size;
	    end_offset = off + frd_size - 1;

	    if (f_index < (flat_file->count - 1)) f_index++;
	    else {
		f_index = 0;
		n_filetypes++;
	    }

	    off = disp + flat_file->indices[f_index] + 
	          (ADIO_Offset) n_filetypes*filetype_extent;
	    frd_size = ADIOI_MIN(flat_file->blocklens[f_index], 
	                         bufsize-(int)userbuf_off);
	}

	/* End of calculations.  At this point the following values have
	 * been calculated and are ready for use:
	 * - start_off
	 * - end_offset
	 * - st_n_filetypes
	 * - st_index
	 * - st_frd_size
	 */

	/* if atomicity is true, lock (exclusive) the region to be accessed */
        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && 
	   (fd->file_system != ADIO_PVFS))
	{
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
	}

	if (buftype_is_contig && !filetype_is_contig) {
	    /* contiguous in memory, noncontiguous in file. should be the
	     * most common case.
	     */

	    userbuf_off = 0;
	    f_index = st_index;
	    off = start_off;
	    n_filetypes = st_n_filetypes;
	    frd_size = ADIOI_MIN(st_frd_size, bufsize);

	    /* while there is still space in the buffer, read more data */
	    while (userbuf_off < bufsize) {
                if (frd_size) { 
                    /* TYPE_UB and TYPE_LB can result in 
                       frd_size = 0. save system call in such cases */ 
		    req_off = off;
		    req_len = frd_size;

		    ADIO_ReadContig(fd, 
				    (char *) buf + userbuf_off,
				    req_len, 
				    MPI_BYTE, 
				    ADIO_EXPLICIT_OFFSET,
				    req_off,
				    &status1,
				    error_code);
		    if (*error_code != MPI_SUCCESS) return;
		}
		userbuf_off += frd_size;

                if (off + frd_size < disp + flat_file->indices[f_index] +
                   flat_file->blocklens[f_index] + 
		   (ADIO_Offset) n_filetypes*filetype_extent)
		{
		    /* important that this value be correct, as it is
		     * used to set the offset in the fd near the end of
		     * this function.
		     */
                    off += frd_size;
		}
                /* did not reach end of contiguous block in filetype.
                 * no more I/O needed. off is incremented by frd_size.
		 */
                else {
		    if (f_index < (flat_file->count - 1)) f_index++;
		    else {
			f_index = 0;
			n_filetypes++;
		    }
		    off = disp + flat_file->indices[f_index] + 
                          (ADIO_Offset) n_filetypes*filetype_extent;
		    frd_size = ADIOI_MIN(flat_file->blocklens[f_index], 
		                         bufsize-(int)userbuf_off);
		}
	    }
	}
	else {
	    int i, tmp_bufsize = 0;
	    /* noncontiguous in memory as well as in file */

	    ADIOI_Flatten_datatype(buftype);
	    flat_buf = ADIOI_Flatlist;
	    while (flat_buf->type != buftype) flat_buf = flat_buf->next;

	    b_index = buf_count = 0;
	    i = (int) (flat_buf->indices[0]);
	    f_index = st_index;
	    off = start_off;
	    n_filetypes = st_n_filetypes;
	    frd_size = st_frd_size;
	    brd_size = flat_buf->blocklens[0];

	    /* while we haven't read size * count bytes, keep going */
	    while (tmp_bufsize < bufsize) {
    		int new_brd_size = brd_size, new_frd_size = frd_size;

		size = ADIOI_MIN(frd_size, brd_size);
		if (size) {
		    req_off = off;
		    req_len = size;
		    userbuf_off = i;

		    ADIO_ReadContig(fd, 
				    (char *) buf + userbuf_off,
				    req_len, 
				    MPI_BYTE, 
				    ADIO_EXPLICIT_OFFSET,
				    req_off,
				    &status1,
				    error_code);
		    if (*error_code != MPI_SUCCESS) return;
		}

		if (size == frd_size) {
		    /* reached end of contiguous block in file */
		    if (f_index < (flat_file->count - 1)) f_index++;
		    else {
			f_index = 0;
			n_filetypes++;
		    }

		    off = disp + flat_file->indices[f_index] + 
                          (ADIO_Offset) n_filetypes*filetype_extent;

		    new_frd_size = flat_file->blocklens[f_index];
		    if (size != brd_size) {
			i += size;
			new_brd_size -= size;
		    }
		}

		if (size == brd_size) {
		    /* reached end of contiguous block in memory */

		    b_index = (b_index + 1)%flat_buf->count;
		    buf_count++;
		    i = (int) (buftype_extent*(buf_count/flat_buf->count) +
			flat_buf->indices[b_index]);
		    new_brd_size = flat_buf->blocklens[b_index];
		    if (size != frd_size) {
			off += size;
			new_frd_size -= size;
		    }
		}
		tmp_bufsize += size;
		frd_size = new_frd_size;
                brd_size = new_brd_size;
	    }
	}

	/* unlock the file region if we locked it */
        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && 
	   (fd->file_system != ADIO_PVFS))
	{
            ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
	}

	if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
    } /* end of (else noncontiguous in file) */

    fd->fp_sys_posn = -1;   /* mark it as invalid. */

#ifdef HAVE_STATUS_SET_BYTES
    MPIR_Status_set_bytes(status, buftype, bufsize);
    /* This is a temporary way of filling in status. The right way is to 
     * keep track of how much data was actually read and placed in buf 
     */
#endif

    if (!buftype_is_contig) ADIOI_Delete_flattened(buftype);
}
Ejemplo n.º 18
0
void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code)
{
/* offset is in units of etype relative to the filetype. */



    ADIOI_Flatlist_node *flat_buf, *flat_file;
    ADIO_Offset i_offset, sum, size_in_filetype;
    int i, j, k, err=-1, st_index=0;
    int n_etypes_in_filetype;
    ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes;
    ADIO_Offset abs_off_in_filetype=0;
    int filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent; 
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off;
    ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
    char *writebuf, *value;
    unsigned bufsize, writebuf_len, max_bufsize, write_sz;
    int err_flag=0, info_flag;
    ADIO_Offset new_bwr_size, new_fwr_size, st_fwr_size, fwr_size=0, bwr_size, req_len;
    static char myname[] = "ADIOI_BGL_WRITESTRIDED";

    if (fd->hints->ds_write == ADIOI_HINT_DISABLE) {
    	/* if user has disabled data sieving on reads, use naive
	 * approach instead.
	 */
      /*FPRINTF(stderr, "ADIOI_GEN_WriteStrided_naive(%d):\n", __LINE__);*/
      ADIOI_GEN_WriteStrided_naive(fd, 
				    buf,
				    count,
				    datatype,
				    file_ptr_type,
				    offset,
				    status,
				    error_code);
    	return;
    }
    /*FPRINTF(stderr, "%s(%d):\n",myname, __LINE__);*/

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, 0);
#endif
	*error_code = MPI_SUCCESS; 
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;

    ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
    bufsize = buftype_size * count;

/* get max_bufsize from the info object. */

    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
    ADIOI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, 
                 &info_flag);
    max_bufsize = atoi(value);
    ADIOI_Free(value);

    if (!buftype_is_contig && filetype_is_contig) {

/* noncontiguous in memory, contiguous in file. */

	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;

        off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
                 fd->disp + etype_size * offset;

        start_off = off;
	end_offset = off + bufsize - 1;
        writebuf_off = off;
        writebuf = (char *) ADIOI_Malloc(max_bufsize);
        writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));

/* if atomicity is true, lock the region to be accessed */
        if (fd->atomicity) 
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

        for (j=0; j<count; j++) 
        {
          int i;
            for (i=0; i<flat_buf->count; i++) {
                userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i];
		req_off = off;
		req_len = flat_buf->blocklens[i];
		ADIOI_BUFFERED_WRITE_WITHOUT_READ
                off += flat_buf->blocklens[i];
            }
        }

        /* write the buffer out finally */
	lseek(fd->fd_sys, writebuf_off, SEEK_SET); 
	if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
	err = write(fd->fd_sys, writebuf, writebuf_len); 
        if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
        if (err == -1) err_flag = 1; 

        if (fd->atomicity) 
            ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

	ADIOI_Free(writebuf); /* malloced in the buffered_write macro */

        if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
	if (err_flag) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO, "**io",
					       "**io %s", strerror(errno));
	}
	else *error_code = MPI_SUCCESS;
    }

    else {  /* noncontiguous in file */

/* filetype already flattened in ADIO_Open */
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
	disp = fd->disp;

	if (file_ptr_type == ADIO_INDIVIDUAL) {
	/* Wei-keng reworked type processing to be a bit more efficient */
            offset       = fd->fp_ind - disp;
            n_filetypes  = (offset - flat_file->indices[0]) / filetype_extent;
            offset      -= (ADIO_Offset)n_filetypes * filetype_extent;
            /* now offset is local to this extent */

            /* find the block where offset is located, skip blocklens[i]==0 */
            for (i=0; i<flat_file->count; i++) {
                ADIO_Offset dist;
                if (flat_file->blocklens[i] == 0) continue;
                dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
                /* fwr_size is from offset to the end of block i */
                if (dist == 0) {
                    i++;
                    offset   = flat_file->indices[i];
                    fwr_size = flat_file->blocklens[i];
                    break;
                }
                if (dist > 0) {
                    fwr_size = dist;
                    break;
                }
            }
            st_index = i;  /* starting index in flat_file->indices[] */
            offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
	}
	else {
    int i;
	    n_etypes_in_filetype = filetype_size/etype_size;
	    n_filetypes = offset / n_etypes_in_filetype;
	    etype_in_filetype = offset % n_etypes_in_filetype;
	    size_in_filetype = etype_in_filetype * etype_size;
 
	    sum = 0;
	    for (i=0; i<flat_file->count; i++) {
		sum += flat_file->blocklens[i];
		if (sum > size_in_filetype) {
		    st_index = i;
		    fwr_size = sum - size_in_filetype;
		    abs_off_in_filetype = flat_file->indices[i] +
			size_in_filetype - (sum - flat_file->blocklens[i]);
		    break;
		}
	    }

	    /* abs. offset in bytes in the file */
	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + 
		    abs_off_in_filetype;
	}

        start_off = offset;
        /* Wei-keng Liao:write request is within single flat_file contig block*/
	/* this could happen, for example, with subarray types that are
	 * actually fairly contiguous */
        if (buftype_is_contig && bufsize <= fwr_size) {
            ADIO_WriteContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
                             offset, status, error_code);

	    if (file_ptr_type == ADIO_INDIVIDUAL) {
                /* update MPI-IO file pointer to point to the first byte 
		 * that can be accessed in the fileview. */
                fd->fp_ind = offset + bufsize;
                if (bufsize == fwr_size) {
                    do {
                        st_index++;
                        if (st_index == flat_file->count) {
                            st_index = 0;
                            n_filetypes++;
                        }
                    } while (flat_file->blocklens[st_index] == 0);
                    fd->fp_ind = disp + flat_file->indices[st_index]
                               + (ADIO_Offset)n_filetypes*filetype_extent;
                }
            }
	    fd->fp_sys_posn = -1;   /* set it to null. */ 
#ifdef HAVE_STATUS_SET_BYTES
	    MPIR_Status_set_bytes(status, datatype, bufsize);
#endif 
            return;
        }

       /* Calculate end_offset, the last byte-offset that will be accessed.
         e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/

	st_fwr_size = fwr_size;
	st_n_filetypes = n_filetypes;
	i_offset = 0;
	j = st_index;
	off = offset;
	fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
	while (i_offset < bufsize) {
	    i_offset += fwr_size;
	    end_offset = off + fwr_size - 1;

            j = (j+1) % flat_file->count;
            n_filetypes += (j == 0) ? 1 : 0;
            while (flat_file->blocklens[j]==0) {
                j = (j+1) % flat_file->count;
                n_filetypes += (j == 0) ? 1 : 0;
            }

	    off = disp + flat_file->indices[j] + 
		    n_filetypes*(ADIO_Offset)filetype_extent;
	    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
	}

/* if atomicity is true, lock the region to be accessed */
        if (fd->atomicity) 
            ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

        /* initial read for the read-modify-write */
        writebuf_off = offset;
        writebuf = (char *) ADIOI_Malloc(max_bufsize);
        writebuf_len = (unsigned)(ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
	if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
	lseek(fd->fd_sys, writebuf_off, SEEK_SET); 
	err = read(fd->fd_sys, writebuf, writebuf_len); 
        if (err == -1) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE,
					       myname, __LINE__,
					       MPI_ERR_IO,
					       "ADIOI_BGL_WriteStrided: ROMIO tries to optimize this access by doing a read-modify-write, but is unable to read the file. Please give the file read permission and open it with MPI_MODE_RDWR.", 0);
	    return;
        } 

	if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

	    i_offset = 0;
	    j = st_index;
	    off = offset;
	    n_filetypes = st_n_filetypes;
	    fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
	    while (i_offset < bufsize) {
                if (fwr_size) { 
                    /* TYPE_UB and TYPE_LB can result in 
                       fwr_size = 0. save system call in such cases */ 
		    /* lseek(fd->fd_sys, off, SEEK_SET);
		    err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/

		    req_off = off;
		    req_len = fwr_size;
		    userbuf_off = i_offset;
		    ADIOI_BUFFERED_WRITE
		}
		i_offset += fwr_size;

                if (off + fwr_size < disp + flat_file->indices[j] +
                   flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
                       off += fwr_size;
                /* did not reach end of contiguous block in filetype.
                   no more I/O needed. off is incremented by fwr_size. */
                else {
                    j = (j+1) % flat_file->count;
                    n_filetypes += (j == 0) ? 1 : 0;
                    while (flat_file->blocklens[j]==0) {
                        j = (j+1) % flat_file->count;
                        n_filetypes += (j == 0) ? 1 : 0;
                    }
		    off = disp + flat_file->indices[j] + 
                                    n_filetypes*(ADIO_Offset)filetype_extent;
		    fwr_size = ADIOI_MIN(flat_file->blocklens[j], 
				    bufsize-i_offset);
		}
	    }
	}
	else {
Ejemplo n.º 19
0
/* If successful, error_code is set to MPI_SUCCESS.  Otherwise an error
 * code is created and returned in error_code.
 */
static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
				 datatype, int nprocs,
				 int myrank,
				 ADIOI_Access
				 *others_req, ADIO_Offset *offset_list,
				 ADIO_Offset *len_list, int contig_access_count,
				 ADIO_Offset min_st_offset, ADIO_Offset fd_size,
				 ADIO_Offset *fd_start, ADIO_Offset *fd_end,
				 int *buf_idx, int *error_code)
{
/* Send data to appropriate processes and write in sizes of no more
   than coll_bufsize.
   The idea is to reduce the amount of extra memory required for
   collective I/O. If all data were written all at once, which is much
   easier, it would require temp space more than the size of user_buf,
   which is often unacceptable. For example, to write a distributed
   array to a file, where each local array is 8Mbytes, requiring
   at least another 8Mbytes of temp space is unacceptable. */

    /* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/
    ADIO_Offset size=0;
    int hole, i, j, m, ntimes, max_ntimes, buftype_is_contig;
    ADIO_Offset st_loc=-1, end_loc=-1, off, done, req_off;
    char *write_buf=NULL;
    int *curr_offlen_ptr, *count, *send_size, req_len, *recv_size;
    int *partial_recv, *sent_to_proc, *start_pos, flag;
    int *send_buf_idx, *curr_to_proc, *done_to_proc;
    MPI_Status status;
    ADIOI_Flatlist_node *flat_buf=NULL;
    MPI_Aint buftype_extent;
    int info_flag, coll_bufsize;
    char *value;
    static char myname[] = "ADIOI_EXCH_AND_WRITE";

    *error_code = MPI_SUCCESS;  /* changed below if error */
    /* only I/O errors are currently reported */

/* calculate the number of writes of size coll_bufsize
   to be done by each process and the max among all processes.
   That gives the no. of communication phases as well. */

    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
    ADIOI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value,
                 &info_flag);
    coll_bufsize = atoi(value);
    ADIOI_Free(value);


    for (i=0; i < nprocs; i++) {
	if (others_req[i].count) {
	    st_loc = others_req[i].offsets[0];
	    end_loc = others_req[i].offsets[0];
	    break;
	}
    }

    for (i=0; i < nprocs; i++)
	for (j=0; j < others_req[i].count; j++) {
	    st_loc = ADIOI_MIN(st_loc, others_req[i].offsets[j]);
	    end_loc = ADIOI_MAX(end_loc, (others_req[i].offsets[j]
				       + others_req[i].lens[j] - 1));
	}

/* ntimes=ceiling_div(end_loc - st_loc + 1, coll_bufsize)*/

    ntimes = (int) ((end_loc - st_loc + coll_bufsize)/coll_bufsize);

    if ((st_loc==-1) && (end_loc==-1)) {
	ntimes = 0; /* this process does no writing. */
    }

    MPI_Allreduce(&ntimes, &max_ntimes, 1, MPI_INT, MPI_MAX,
		  fd->comm);

    write_buf = fd->io_buf;

    curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int));
    /* its use is explained below. calloc initializes to 0. */

    count = (int *) ADIOI_Malloc(nprocs*sizeof(int));
    /* to store count of how many off-len pairs per proc are satisfied
       in an iteration. */

    partial_recv = (int *) ADIOI_Calloc(nprocs, sizeof(int));
    /* if only a portion of the last off-len pair is recd. from a process
       in a particular iteration, the length recd. is stored here.
       calloc initializes to 0. */

    send_size = (int *) ADIOI_Malloc(nprocs*sizeof(int));
    /* total size of data to be sent to each proc. in an iteration.
       Of size nprocs so that I can use MPI_Alltoall later. */

    recv_size = (int *) ADIOI_Malloc(nprocs*sizeof(int));
    /* total size of data to be recd. from each proc. in an iteration.*/

    sent_to_proc = (int *) ADIOI_Calloc(nprocs, sizeof(int));
    /* amount of data sent to each proc so far. Used in
       ADIOI_Fill_send_buffer. initialized to 0 here. */

    send_buf_idx = (int *) ADIOI_Malloc(nprocs*sizeof(int));
    curr_to_proc = (int *) ADIOI_Malloc(nprocs*sizeof(int));
    done_to_proc = (int *) ADIOI_Malloc(nprocs*sizeof(int));
    /* Above three are used in ADIOI_Fill_send_buffer*/

    start_pos = (int *) ADIOI_Malloc(nprocs*sizeof(int));
    /* used to store the starting value of curr_offlen_ptr[i] in
       this iteration */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    if (!buftype_is_contig) {
	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
        while (flat_buf->type != datatype) flat_buf = flat_buf->next;
    }
    MPI_Type_extent(datatype, &buftype_extent);


/* I need to check if there are any outstanding nonblocking writes to
   the file, which could potentially interfere with the writes taking
   place in this collective write call. Since this is not likely to be
   common, let me do the simplest thing possible here: Each process
   completes all pending nonblocking operations before completing. */

    /*ADIOI_Complete_async(error_code);
    if (*error_code != MPI_SUCCESS) return;
    MPI_Barrier(fd->comm);
    */

    done = 0;
    off = st_loc;

    for (m=0; m < ntimes; m++) {
       /* go through all others_req and check which will be satisfied
          by the current write */

       /* Note that MPI guarantees that displacements in filetypes are in
          monotonically nondecreasing order and that, for writes, the
	  filetypes cannot specify overlapping regions in the file. This
	  simplifies implementation a bit compared to reads. */

          /* off = start offset in the file for the data to be written in
                   this iteration
             size = size of data written (bytes) corresponding to off
             req_off = off in file for a particular contiguous request
                       minus what was satisfied in previous iteration
             req_size = size corresponding to req_off */

	/* first calculate what should be communicated */

	for (i=0; i < nprocs; i++) count[i] = recv_size[i] = 0;

	size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done);

	for (i=0; i < nprocs; i++) {
	    if (others_req[i].count) {
		start_pos[i] = curr_offlen_ptr[i];
		for (j=curr_offlen_ptr[i]; j<others_req[i].count; j++) {
		    if (partial_recv[i]) {
			/* this request may have been partially
			   satisfied in the previous iteration. */
			req_off = others_req[i].offsets[j] +
			    partial_recv[i];
                        req_len = others_req[i].lens[j] -
			    partial_recv[i];
			partial_recv[i] = 0;
			/* modify the off-len pair to reflect this change */
			others_req[i].offsets[j] = req_off;
			others_req[i].lens[j] = req_len;
		    }
		    else {
			req_off = others_req[i].offsets[j];
                        req_len = others_req[i].lens[j];
		    }
		    if (req_off < off + size) {
			count[i]++;
      ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIR_Upint)(write_buf+req_off-off));
			MPI_Address(write_buf+req_off-off,
                               &(others_req[i].mem_ptrs[j]));
      ADIOI_Assert((off + size - req_off) == (int)(off + size - req_off));
			recv_size[i] += (int)(ADIOI_MIN(off + size - req_off,
                                      (unsigned)req_len));

			if (off+size-req_off < (unsigned)req_len)
			{
			    partial_recv[i] = (int) (off + size - req_off);

			    /* --BEGIN ERROR HANDLING-- */
			    if ((j+1 < others_req[i].count) &&
                                 (others_req[i].offsets[j+1] < off+size))
			    {
				*error_code = MPIO_Err_create_code(MPI_SUCCESS,
								   MPIR_ERR_RECOVERABLE,
								   myname,
								   __LINE__,
								   MPI_ERR_ARG,
								   "Filetype specifies overlapping write regions (which is illegal according to the MPI-2 specification)", 0);
				/* allow to continue since additional
				 * communication might have to occur
				 */
			    }
			    /* --END ERROR HANDLING-- */
			    break;
			}
		    }
		    else break;
		}
		curr_offlen_ptr[i] = j;
	    }
	}

	ADIOI_W_Exchange_data(fd, buf, write_buf, flat_buf, offset_list,
                            len_list, send_size, recv_size, off, size, count,
                            start_pos, partial_recv,
                            sent_to_proc, nprocs, myrank,
			    buftype_is_contig, contig_access_count,
			    min_st_offset, fd_size, fd_start, fd_end,
			    others_req, send_buf_idx, curr_to_proc,
                            done_to_proc, &hole, m, buftype_extent, buf_idx,
			    error_code);
        if (*error_code != MPI_SUCCESS) return;

	flag = 0;
	for (i=0; i<nprocs; i++)
	    if (count[i]) flag = 1;

	if (flag) {
      ADIOI_Assert(size == (int)size);
	    ADIO_WriteContig(fd, write_buf, (int)size, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
                        off, &status, error_code);
	    if (*error_code != MPI_SUCCESS) return;
	}

	off += size;
	done += size;
    }

    for (i=0; i<nprocs; i++) count[i] = recv_size[i] = 0;
    for (m=ntimes; m<max_ntimes; m++) {
	ADIOI_Assert(size == (int)size);
	/* nothing to recv, but check for send. */
	ADIOI_W_Exchange_data(fd, buf, write_buf, flat_buf, offset_list,
                            len_list, send_size, recv_size, off, (int)size, count,
                            start_pos, partial_recv,
                            sent_to_proc, nprocs, myrank,
			    buftype_is_contig, contig_access_count,
			    min_st_offset, fd_size, fd_start, fd_end,
			    others_req, send_buf_idx,
                            curr_to_proc, done_to_proc, &hole, m,
                            buftype_extent, buf_idx, error_code);
        if (*error_code != MPI_SUCCESS) return;
    }

    ADIOI_Free(curr_offlen_ptr);
    ADIOI_Free(count);
    ADIOI_Free(partial_recv);
    ADIOI_Free(send_size);
    ADIOI_Free(recv_size);
    ADIOI_Free(sent_to_proc);
    ADIOI_Free(start_pos);
    ADIOI_Free(send_buf_idx);
    ADIOI_Free(curr_to_proc);
    ADIOI_Free(done_to_proc);
}
Ejemplo n.º 20
0
void ADIOI_SVM_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
{
    MPI_Datatype copy_etype, copy_filetype;
    int combiner, i, j, k, filetype_is_contig, ntimes, err;
    ADIOI_Flatlist_node *flat_file;
    ADIO_Offset curr_fsize, alloc_size, size, len, done;
    ADIO_Status status;
    char *buf;
    /* added by RAY */
    FileTable  files_local;
    /* end RAY */
    

    /*printf("Entering ADIOI_SVM_Fcntl().\n");*/
    
    switch(flag) {
    case ADIO_FCNTL_SET_VIEW:
        /* free copies of old etypes and filetypes and delete flattened 
           version of filetype if necessary */

	MPI_Type_get_envelope(fd->etype, &i, &j, &k, &combiner);
	if (combiner != MPI_COMBINER_NAMED) MPI_Type_free(&(fd->etype));

	ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
	if (!filetype_is_contig) ADIOI_Delete_flattened(fd->filetype);

	MPI_Type_get_envelope(fd->filetype, &i, &j, &k, &combiner);
	if (combiner != MPI_COMBINER_NAMED) MPI_Type_free(&(fd->filetype));

	/* set new info */
	ADIO_SetInfo(fd, fcntl_struct->info, &err);

        /* set new etypes and filetypes */

	MPI_Type_get_envelope(fcntl_struct->etype, &i, &j, &k, &combiner);
	if (combiner == MPI_COMBINER_NAMED) fd->etype = fcntl_struct->etype;
	else {
	    MPI_Type_contiguous(1, fcntl_struct->etype, &copy_etype);
	    MPI_Type_commit(&copy_etype);
	    fd->etype = copy_etype;
	}
	MPI_Type_get_envelope(fcntl_struct->filetype, &i, &j, &k, &combiner);
	if (combiner == MPI_COMBINER_NAMED) 
	    fd->filetype = fcntl_struct->filetype;
	else {
	    MPI_Type_contiguous(1, fcntl_struct->filetype, &copy_filetype);
	    MPI_Type_commit(&copy_filetype);
	    fd->filetype = copy_filetype;
	    ADIOI_Flatten_datatype(fd->filetype);
            /* this function will not flatten the filetype if it turns out
               to be all contiguous. */
	}

	MPI_Type_size(fd->etype, &(fd->etype_size));
	fd->disp = fcntl_struct->disp;

        /* reset MPI-IO file pointer to point to the first byte that can
           be accessed in this view. */

        ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
	if (filetype_is_contig) fd->fp_ind = fcntl_struct->disp;
	else {
	    flat_file = ADIOI_Flatlist;
	    while (flat_file->type != fd->filetype) 
		flat_file = flat_file->next;
	    for (i=0; i<flat_file->count; i++) {
		if (flat_file->blocklens[i]) {
		    fd->fp_ind = fcntl_struct->disp + flat_file->indices[i];
		    break;
		}
	    }
	}
	*error_code = MPI_SUCCESS;
	break;

    case ADIO_FCNTL_GET_FSIZE:
    	/* changed by RAY */
	/*fcntl_struct->fsize = lseek(fd->fd_sys, 0, SEEK_END);*/
	files_local = (FileTable)NULL;
	if (ADIOI_SVM_Lookup_fd(fd->fd_sys,&files_local)==MPI_SUCCESS)
	   fcntl_struct->fsize = files_local->size;
	else
	   fcntl_struct->fsize = -1;
	/*if (fd->fp_sys_posn != -1) */
	     /*lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);*/
	free(files_local);
	/* end RAY */
	*error_code = (fcntl_struct->fsize == -1) ? MPI_ERR_UNKNOWN : MPI_SUCCESS;
	break;

    case ADIO_FCNTL_SET_DISKSPACE:
	/* will be called by one process only */
	/* added by RAY */
	/* !!! Not with AD_SVM - here every process calls it !!! */
	/* end RAY */
	/* On file systems with no preallocation function, I have to 
           explicitly write 
           to allocate space. Since there could be holes in the file, 
           I need to read up to the current file size, write it back, 
           and then write beyond that depending on how much 
           preallocation is needed.
           read/write in sizes of no more than ADIOI_PREALLOC_BUFSZ */

	/* changed by RAY */
	/*curr_fsize = lseek(fd->fd_sys, 0, SEEK_END);*/
	files_local = (FileTable)NULL;
	if (ADIOI_SVM_Lookup_fd(fd->fd_sys,&files_local)==MPI_SUCCESS)
	   curr_fsize = files_local->size;
	else
	   curr_fsize = -1;

	free(files_local);

	if (curr_fsize < fcntl_struct->diskspace)
	   ADIOI_SVM_Resize(fd,fcntl_struct->diskspace,error_code);
		
	if (fd->fp_sys_posn != -1)
	   /*lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);*/
	   ADIOI_SVM_Lseek(fd,fd->fp_sys_posn);
	/* end RAY */ 
	*error_code = MPI_SUCCESS;
	break;

    case ADIO_FCNTL_SET_IOMODE:
        /* for implementing PFS I/O modes. will not occur in MPI-IO
           implementation.*/
	if (fd->iomode != fcntl_struct->iomode) {
	    fd->iomode = fcntl_struct->iomode;
	    MPI_Barrier(MPI_COMM_WORLD);
	}
	*error_code = MPI_SUCCESS;
	break;

    case ADIO_FCNTL_SET_ATOMICITY:
	fd->atomicity = (fcntl_struct->atomicity == 0) ? 0 : 1;
	*error_code = MPI_SUCCESS;
	break;

    default:
	printf("Unknown flag passed to ADIOI_SVM_Fcntl\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
    }
}
/* Copied from ADIOI_PVFS2_OldWriteStrided.  It would be good to have fewer
 * copies of this code... */
void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count,
			MPI_Datatype datatype, int file_ptr_type,
			ADIO_Offset offset, ADIO_Status *status,
			int *error_code)
{
    /* as with all the other WriteStrided functions, offset is in units of
     * etype relative to the filetype */

    /* Since zoidfs does not support file locking, can't do buffered writes
       as on Unix */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int i, j, k, bwr_size, fwr_size=0, st_index=0;
    int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    int filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent;
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset off, disp, start_off, initial_off;
    int flag, st_fwr_size, st_n_filetypes;
    int err_flag=0;

    size_t mem_list_count, file_list_count;
    const void ** mem_offsets;
    uint64_t *file_offsets;
    size_t *mem_lengths;
    uint64_t *file_lengths;
    int total_blks_to_write;

    int max_mem_list, max_file_list;

    int b_blks_wrote;
    int f_data_wrote;
    int size_wrote=0, n_write_lists, extra_blks;

    int end_bwr_size, end_fwr_size;
    int start_k, start_j, new_file_write, new_buffer_write;
    int start_mem_offset;
    ADIOI_ZOIDFS_object *zoidfs_obj_ptr;
    MPI_Offset total_bytes_written=0;
    static char myname[] = "ADIOI_ZOIDFS_WRITESTRIDED";

    /* note: I don't know what zoidfs will do if you pass it a super-long list,
     * so let's keep with the PVFS limit for now */
#define MAX_ARRAY_SIZE 64

    /* --BEGIN ERROR HANDLING-- */
    if (fd->atomicity) {
	*error_code = MPIO_Err_create_code(MPI_SUCCESS,
					   MPIR_ERR_RECOVERABLE,
					   myname, __LINE__,
					   MPI_ERR_ARG,
					   "Atomic noncontiguous writes are not supported by ZOIDFS", 0);
	return;
    }
    /* --END ERROR HANDLING-- */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    /* the HDF5 tests showed a bug in this list processing code (see many many
     * lines down below).  We added a workaround, but common HDF5 file types
     * are actually contiguous and do not need the expensive workarond */
    if (!filetype_is_contig) {
	flat_file = ADIOI_Flatlist;
	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
	if (flat_file->count == 1 && !buftype_is_contig)
	    filetype_is_contig = 1;
    }

    MPI_Type_size(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
	*error_code = MPI_SUCCESS; 
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;
    
    bufsize = buftype_size * count;

    zoidfs_obj_ptr = (ADIOI_ZOIDFS_object*)fd->fs_ptr;

    if (!buftype_is_contig && filetype_is_contig) {

/* noncontiguous in memory, contiguous in file.  */
        uint64_t file_offsets;
	uint64_t file_lengths;

	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;
	
	if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
	    off = fd->disp + etype_size * offset;
	}
	else off = fd->fp_ind;

	file_list_count = 1;
	file_offsets = off;
	file_lengths = 0;
	total_blks_to_write = count*flat_buf->count;
	b_blks_wrote = 0;

	/* allocate arrays according to max usage */
	if (total_blks_to_write > MAX_ARRAY_SIZE)
	    mem_list_count = MAX_ARRAY_SIZE;
	else mem_list_count = total_blks_to_write;
	mem_offsets = (void*)ADIOI_Malloc(mem_list_count*sizeof(void*));
	mem_lengths = (size_t*)ADIOI_Malloc(mem_list_count*sizeof(size_t));

	j = 0;
	/* step through each block in memory, filling memory arrays */
	while (b_blks_wrote < total_blks_to_write) {
	    for (i=0; i<flat_buf->count; i++) {
		mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] = 
		    buf + 
		     j*buftype_extent + 
		     flat_buf->indices[i];
		mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = 
		    flat_buf->blocklens[i];
		file_lengths += flat_buf->blocklens[i];
		b_blks_wrote++;
		if (!(b_blks_wrote % MAX_ARRAY_SIZE) ||
		    (b_blks_wrote == total_blks_to_write)) {

		    /* in the case of the last write list call,
		       adjust mem_list_count */
		    if (b_blks_wrote == total_blks_to_write) {
		        mem_list_count = total_blks_to_write % MAX_ARRAY_SIZE;
			/* in case last write list call fills max arrays */
			if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
		    }
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
		    NO_STALE(err_flag, fd, zoidfs_obj_ptr,
				    zoidfs_write(zoidfs_obj_ptr, 
					    mem_list_count,
					    mem_offsets, mem_lengths, 
					    1, &file_offsets, &file_lengths, ZOIDFS_NO_OP_HINT));

		    /* --BEGIN ERROR HANDLING-- */
		    if (err_flag != ZFS_OK) {
			*error_code = MPIO_Err_create_code(MPI_SUCCESS,
							   MPIR_ERR_RECOVERABLE,
							   myname, __LINE__,
							   ADIOI_ZOIDFS_error_convert(err_flag),
							   "Error in zoidfs_write", 0);
			break;
		    }
#ifdef ADIOI_MPE_LOGGING
                    MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
		    total_bytes_written += file_lengths;
		  
		    /* in the case of error or the last write list call, 
		     * leave here */
		    /* --BEGIN ERROR HANDLING-- */
		    if (err_flag) {
			*error_code = MPIO_Err_create_code(MPI_SUCCESS,
							   MPIR_ERR_RECOVERABLE,
							   myname, __LINE__,
							   ADIOI_ZOIDFS_error_convert(err_flag),
							   "Error in zoidfs_write", 0);
			break;
		    }
		    /* --END ERROR HANDLING-- */
		    if (b_blks_wrote == total_blks_to_write) break;

		    file_offsets += file_lengths;
		    file_lengths = 0;
		} 
	    } /* for (i=0; i<flat_buf->count; i++) */
	    j++;
	} /* while (b_blks_wrote < total_blks_to_write) */
	ADIOI_Free(mem_offsets);
	ADIOI_Free(mem_lengths);

	if (file_ptr_type == ADIO_INDIVIDUAL) 
	    fd->fp_ind += total_bytes_written;

	if (!err_flag)  *error_code = MPI_SUCCESS;

	fd->fp_sys_posn = -1;   /* clear this. */

#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, bufsize);
/* This is a temporary way of filling in status. The right way is to 
   keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
#endif

	ADIOI_Delete_flattened(datatype);
	return;
    } /* if (!buftype_is_contig && filetype_is_contig) */

    /* already know that file is noncontiguous from above */
    /* noncontiguous in file */

/* filetype already flattened in ADIO_Open */
    flat_file = ADIOI_Flatlist;
    while (flat_file->type != fd->filetype) flat_file = flat_file->next;

    disp = fd->disp;
    initial_off = offset;

    /* for each case - ADIO_Individual pointer or explicit, find offset
       (file offset in bytes), n_filetypes (how many filetypes into file 
       to start), fwr_size (remaining amount of data in present file
       block), and st_index (start point in terms of blocks in starting
       filetype) */
    if (file_ptr_type == ADIO_INDIVIDUAL) {
        offset = fd->fp_ind; /* in bytes */
	n_filetypes = -1;
	flag = 0;
	while (!flag) {
	    n_filetypes++;
	    for (i=0; i<flat_file->count; i++) {
	        if (disp + flat_file->indices[i] + 
		    ((ADIO_Offset) n_filetypes)*filetype_extent +
		      flat_file->blocklens[i] >= offset) {
		  st_index = i;
		  fwr_size = disp + flat_file->indices[i] + 
		    ((ADIO_Offset) n_filetypes)*filetype_extent
		    + flat_file->blocklens[i] - offset;
		  flag = 1;
		  break;
		}
	    }
	} /* while (!flag) */
    } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
    else {
        n_etypes_in_filetype = filetype_size/etype_size;
	n_filetypes = (int) (offset / n_etypes_in_filetype);
	etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	size_in_filetype = etype_in_filetype * etype_size;
	
	sum = 0;
	for (i=0; i<flat_file->count; i++) {
	    sum += flat_file->blocklens[i];
	    if (sum > size_in_filetype) {
	        st_index = i;
		fwr_size = sum - size_in_filetype;
		abs_off_in_filetype = flat_file->indices[i] +
		    size_in_filetype - (sum - flat_file->blocklens[i]);
		break;
	    }
	}

	/* abs. offset in bytes in the file */
	offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
	    abs_off_in_filetype;
    } /* else [file_ptr_type != ADIO_INDIVIDUAL] */

    start_off = offset;
    st_fwr_size = fwr_size;
    st_n_filetypes = n_filetypes;
    
    if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

	/* only one memory off-len pair, so no array */
        size_t mem_lengths;
	size_t mem_offsets;
        
	i = 0;
	j = st_index;
	off = offset;
	n_filetypes = st_n_filetypes;
        
	mem_list_count = 1;
        
	/* determine how many blocks in file to write */
	f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize);
	total_blks_to_write = 1;
	if (j < (flat_file->count -1)) j++;
	else {
	    j = 0;
	    n_filetypes++;
	}
	while (f_data_wrote < bufsize) {
	    f_data_wrote += flat_file->blocklens[j];
	    total_blks_to_write++;
	    if (j<(flat_file->count-1)) j++;
	    else j = 0; 
	}
	    
	j = st_index;
	n_filetypes = st_n_filetypes;
	n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE;
	extra_blks = total_blks_to_write%MAX_ARRAY_SIZE;
        
	mem_offsets = (size_t)buf;
	mem_lengths = 0;
        
	/* if at least one full writelist, allocate file arrays
	   at max array size and don't free until very end */
	if (n_write_lists) {
	    file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
						  sizeof(int64_t));
	    file_lengths = (uint64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
						  sizeof(uint64_t));
	}
	/* if there's no full writelist allocate file arrays according
	   to needed size (extra_blks) */
	else {
	    file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
                                                  sizeof(int64_t));
            file_lengths = (uint64_t*)ADIOI_Malloc(extra_blks*
                                                  sizeof(uint64_t));
        }
        
        /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
        for (i=0; i<n_write_lists; i++) {
            file_list_count = MAX_ARRAY_SIZE;
            if(!i) {
                file_offsets[0] = offset;
                file_lengths[0] = st_fwr_size;
                mem_lengths = st_fwr_size;
            }
            for (k=0; k<MAX_ARRAY_SIZE; k++) {
                if (i || k) {
                    file_offsets[k] = disp + 
			((ADIO_Offset)n_filetypes)*filetype_extent
			+ flat_file->indices[j];
                    file_lengths[k] = flat_file->blocklens[j];
                    mem_lengths += file_lengths[k];
                }
                if (j<(flat_file->count - 1)) j++;
                else {
                    j = 0;
                    n_filetypes++;
                }
            } /* for (k=0; k<MAX_ARRAY_SIZE; k++) */
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
	    NO_STALE(err_flag, fd, zoidfs_obj_ptr,
			    zoidfs_write(zoidfs_obj_ptr,
				    1, buf, &mem_lengths,
				    file_list_count, 
				    file_offsets, file_lengths, ZOIDFS_NO_OP_HINT));

#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
	    /* --BEGIN ERROR HANDLING-- */
	    if (err_flag != ZFS_OK) {
		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
						   MPIR_ERR_RECOVERABLE,
						   myname, __LINE__,
						   ADIOI_ZOIDFS_error_convert(err_flag),
						   "Error in zoidfs_write", 0);
		goto error_state;
	    }
	    /* --END ERROR HANDLING-- */
	    total_bytes_written += mem_lengths;

            mem_offsets += mem_lengths;
            mem_lengths = 0;

        } /* for (i=0; i<n_write_lists; i++) */

        /* for file arrays smaller than MAX_ARRAY_SIZE (last write_list call) */
        if (extra_blks) {
            file_list_count = extra_blks;
            if(!i) {
                file_offsets[0] = offset;
                file_lengths[0] = ADIOI_MIN(st_fwr_size, bufsize);
            }
            for (k=0; k<extra_blks; k++) {
                if(i || k) {
                    file_offsets[k] = disp + 
			((ADIO_Offset)n_filetypes)*filetype_extent +
			flat_file->indices[j];
		    /* XXX: double-check these casts  */
                    if (k == (extra_blks - 1)) {
                        file_lengths[k] = bufsize 
				- mem_lengths - mem_offsets +  (size_t)buf;
                    }
                    else file_lengths[k] = flat_file->blocklens[j];
                } /* if(i || k) */
                mem_lengths += file_lengths[k];
                if (j<(flat_file->count - 1)) j++;
                else {
                    j = 0;
                    n_filetypes++;
                }
            } /* for (k=0; k<extra_blks; k++) */

#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
	    NO_STALE(err_flag, fd, zoidfs_obj_ptr, 
			    zoidfs_write(zoidfs_obj_ptr, 1, 
				    (const void **)&mem_offsets, 
				    &mem_lengths,
				    file_list_count, 
				    file_offsets, file_lengths, ZOIDFS_NO_OP_HINT));

#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
	    /* --BEGIN ERROR HANDLING-- */
	    if (err_flag != 0) {
		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
						   MPIR_ERR_RECOVERABLE,
						   myname, __LINE__,
						   ADIOI_ZOIDFS_error_convert(err_flag),
						   "Error in zoidfs_write", 0);
		goto error_state;
	    }
	    /* --END ERROR HANDLING-- */
	    total_bytes_written += mem_lengths;
        }
    } 
    else {
        /* noncontiguous in memory as well as in file */

        ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;

	size_wrote = 0;
	n_filetypes = st_n_filetypes;
	fwr_size = st_fwr_size;
	bwr_size = flat_buf->blocklens[0];
	buf_count = 0;
	start_mem_offset = 0;
	start_k = k = 0;
	start_j = st_index;
	max_mem_list = 0;
	max_file_list = 0;

	/* run through and file max_file_list and max_mem_list so that you 
	   can allocate the file and memory arrays less than MAX_ARRAY_SIZE
	   if possible */

	while (size_wrote < bufsize) {
	    k = start_k;
	    new_buffer_write = 0;
	    mem_list_count = 0;
	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
		   (new_buffer_write < bufsize-size_wrote)) {
	        /* find mem_list_count and file_list_count such that both are
		   less than MAX_ARRAY_SIZE, the sum of their lengths are
		   equal, and the sum of all the data written and data to be
		   written in the next immediate write list is less than
		   bufsize */
	        if(mem_list_count) {
		    if((new_buffer_write + flat_buf->blocklens[k] + 
			size_wrote) > bufsize) {
		        end_bwr_size = new_buffer_write + 
			    flat_buf->blocklens[k] - (bufsize - size_wrote);
			new_buffer_write = bufsize - size_wrote;
		    }
		    else {
		        new_buffer_write += flat_buf->blocklens[k];
			end_bwr_size = flat_buf->blocklens[k];
		    }
		}
		else {
		    if (bwr_size > (bufsize - size_wrote)) {
		        new_buffer_write = bufsize - size_wrote;
			bwr_size = new_buffer_write;
		    }
		    else new_buffer_write = bwr_size;
		}
		mem_list_count++;
		k = (k + 1)%flat_buf->count;
	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
	       (new_buffer_write < bufsize-size_wrote)) */
	    j = start_j;
	    new_file_write = 0;
	    file_list_count = 0;
	    while ((file_list_count < MAX_ARRAY_SIZE) && 
		   (new_file_write < new_buffer_write)) { 
	        if(file_list_count) {
		    if((new_file_write + flat_file->blocklens[j]) > 
		       new_buffer_write) {
		        end_fwr_size = new_buffer_write - new_file_write;
			new_file_write = new_buffer_write;
			j--;
		    }
		    else {
		        new_file_write += flat_file->blocklens[j];
			end_fwr_size = flat_file->blocklens[j];
		    }
		}
		else {
		    if (fwr_size > new_buffer_write) {
		        new_file_write = new_buffer_write;
			fwr_size = new_file_write;
		    }
		    else new_file_write = fwr_size;
		}
		file_list_count++;
		if (j < (flat_file->count - 1)) j++;
		else j = 0;
		
		k = start_k;
		if ((new_file_write < new_buffer_write) && 
		    (file_list_count == MAX_ARRAY_SIZE)) {
		    new_buffer_write = 0;
		    mem_list_count = 0;
		    while (new_buffer_write < new_file_write) {
		        if(mem_list_count) {
			    if((new_buffer_write + flat_buf->blocklens[k]) >
			       new_file_write) {
			        end_bwr_size = new_file_write - 
				    new_buffer_write;
				new_buffer_write = new_file_write;
				k--;
			    }
			    else {
			        new_buffer_write += flat_buf->blocklens[k];
				end_bwr_size = flat_buf->blocklens[k];
			    }
			}
			else {
			    new_buffer_write = bwr_size;
			    if (bwr_size > (bufsize - size_wrote)) {
			        new_buffer_write = bufsize - size_wrote;
				bwr_size = new_buffer_write;
			    }
			}
			mem_list_count++;
			k = (k + 1)%flat_buf->count;
		    } /* while (new_buffer_write < new_file_write) */
		} /* if ((new_file_write < new_buffer_write) &&
		     (file_list_count == MAX_ARRAY_SIZE)) */
	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
		 (new_buffer_write < bufsize-size_wrote)) */

	    /*  fakes filling the writelist arrays of lengths found above  */
	    k = start_k;
	    j = start_j;
	    for (i=0; i<mem_list_count; i++) {	     
		if(i) {
		    if (i == (mem_list_count - 1)) {
			if (flat_buf->blocklens[k] == end_bwr_size)
			    bwr_size = flat_buf->blocklens[(k+1)%
							  flat_buf->count];
			else {
			    bwr_size = flat_buf->blocklens[k] - end_bwr_size;
			    k--;
			    buf_count--;
			}
		    }
		}
		buf_count++;
		k = (k + 1)%flat_buf->count;
	    } /* for (i=0; i<mem_list_count; i++) */
	    for (i=0; i<file_list_count; i++) {
		if (i) {
		    if (i == (file_list_count - 1)) {
			if (flat_file->blocklens[j] == end_fwr_size)
			    fwr_size = flat_file->blocklens[(j+1)%
							  flat_file->count];   
			else {
			    fwr_size = flat_file->blocklens[j] - end_fwr_size;
			    j--;
			}
		    }
		}
		if (j < flat_file->count - 1) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (i=0; i<file_list_count; i++) */
	    size_wrote += new_buffer_write;
	    start_k = k;
	    start_j = j;
	    if (max_mem_list < mem_list_count)
	        max_mem_list = mem_list_count;
	    if (max_file_list < file_list_count)
	        max_file_list = file_list_count;
	} /* while (size_wrote < bufsize) */

	/* one last check before we actually carry out the operation:
	 * this code has hard-to-fix bugs when a noncontiguous file type has
	 * such large pieces that the sum of the lengths of the memory type is
	 * not larger than one of those pieces (and vice versa for large memory
	 * types and many pices of file types.  In these cases, give up and
	 * fall back to naive reads and writes.  The testphdf5 test created a
	 * type with two very large memory regions and 600 very small file
	 * regions.  The same test also created a type with one very large file
	 * region and many (700) very small memory regions.  both cases caused
	 * problems for this code */

	if ( ( (file_list_count == 1) && 
		    (new_file_write < flat_file->blocklens[0] ) ) ||
		((mem_list_count == 1) && 
		    (new_buffer_write < flat_buf->blocklens[0]) ) ||
		((file_list_count == MAX_ARRAY_SIZE) && 
		    (new_file_write < flat_buf->blocklens[0]) ) ||
		( (mem_list_count == MAX_ARRAY_SIZE) &&
		    (new_buffer_write < flat_file->blocklens[0])) )
	{
	    ADIOI_Delete_flattened(datatype);
	    ADIOI_GEN_WriteStrided_naive(fd, buf, count, datatype,
		    file_ptr_type, initial_off, status, error_code);
	    return;
	}


	mem_offsets = (void *)ADIOI_Malloc(max_mem_list*sizeof(void *));
	mem_lengths = (size_t*)ADIOI_Malloc(max_mem_list*sizeof(size_t));
	file_offsets = (uint64_t *)ADIOI_Malloc(max_file_list*sizeof(uint64_t));
	file_lengths = (uint64_t*)ADIOI_Malloc(max_file_list*sizeof(uint64_t));
	    
	size_wrote = 0;
	n_filetypes = st_n_filetypes;
	fwr_size = st_fwr_size;
	bwr_size = flat_buf->blocklens[0];
	buf_count = 0;
	start_mem_offset = 0;
	start_k = k = 0;
	start_j = st_index;

	/*  this section calculates mem_list_count and file_list_count
	    and also finds the possibly odd sized last array elements
	    in new_fwr_size and new_bwr_size  */
	
	while (size_wrote < bufsize) {
	    k = start_k;
	    new_buffer_write = 0;
	    mem_list_count = 0;
	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
		   (new_buffer_write < bufsize-size_wrote)) {
	        /* find mem_list_count and file_list_count such that both are
		   less than MAX_ARRAY_SIZE, the sum of their lengths are
		   equal, and the sum of all the data written and data to be
		   written in the next immediate write list is less than
		   bufsize */
	        if(mem_list_count) {
		    if((new_buffer_write + flat_buf->blocklens[k] + 
			size_wrote) > bufsize) {
		        end_bwr_size = new_buffer_write + 
			    flat_buf->blocklens[k] - (bufsize - size_wrote);
			new_buffer_write = bufsize - size_wrote;
		    }
		    else {
		        new_buffer_write += flat_buf->blocklens[k];
			end_bwr_size = flat_buf->blocklens[k];
		    }
		}
		else {
		    if (bwr_size > (bufsize - size_wrote)) {
		        new_buffer_write = bufsize - size_wrote;
			bwr_size = new_buffer_write;
		    }
		    else new_buffer_write = bwr_size;
		}
		mem_list_count++;
		k = (k + 1)%flat_buf->count;
	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
	       (new_buffer_write < bufsize-size_wrote)) */
	    j = start_j;
	    new_file_write = 0;
	    file_list_count = 0;
	    while ((file_list_count < MAX_ARRAY_SIZE) && 
		   (new_file_write < new_buffer_write)) {
	        if(file_list_count) {
		    if((new_file_write + flat_file->blocklens[j]) > 
		       new_buffer_write) {
		        end_fwr_size = new_buffer_write - new_file_write;
			new_file_write = new_buffer_write;
			j--;
		    }
		    else {
		        new_file_write += flat_file->blocklens[j];
			end_fwr_size = flat_file->blocklens[j];
		    }
		}
		else {
		    if (fwr_size > new_buffer_write) {
		        new_file_write = new_buffer_write;
			fwr_size = new_file_write;
		    }
		    else new_file_write = fwr_size;
		}
		file_list_count++;
		if (j < (flat_file->count - 1)) j++;
		else j = 0;
		
		k = start_k;
		if ((new_file_write < new_buffer_write) && 
		    (file_list_count == MAX_ARRAY_SIZE)) {
		    new_buffer_write = 0;
		    mem_list_count = 0;
		    while (new_buffer_write < new_file_write) {
		        if(mem_list_count) {
			    if((new_buffer_write + flat_buf->blocklens[k]) >
			       new_file_write) {
			        end_bwr_size = new_file_write -
				  new_buffer_write;
				new_buffer_write = new_file_write;
				k--;
			    }
			    else {
			        new_buffer_write += flat_buf->blocklens[k];
				end_bwr_size = flat_buf->blocklens[k];
			    }
			}
			else {
			    new_buffer_write = bwr_size;
			    if (bwr_size > (bufsize - size_wrote)) {
			        new_buffer_write = bufsize - size_wrote;
				bwr_size = new_buffer_write;
			    }
			}
			mem_list_count++;
			k = (k + 1)%flat_buf->count;
		    } /* while (new_buffer_write < new_file_write) */
		} /* if ((new_file_write < new_buffer_write) &&
		     (file_list_count == MAX_ARRAY_SIZE)) */
	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
		 (new_buffer_write < bufsize-size_wrote)) */

	    /*  fills the allocated writelist arrays  */
	    k = start_k;
	    j = start_j;
	    for (i=0; i<mem_list_count; i++) {	     
	        mem_offsets[i] = buf + 
			buftype_extent* (buf_count/flat_buf->count) +
				  flat_buf->indices[k];
		
		if(!i) {
		    mem_lengths[0] = bwr_size;
		    mem_offsets[0] += flat_buf->blocklens[k] - bwr_size;
		}
		else {
		    if (i == (mem_list_count - 1)) {
		        mem_lengths[i] = end_bwr_size;
			if (flat_buf->blocklens[k] == end_bwr_size)
			    bwr_size = flat_buf->blocklens[(k+1)%
							  flat_buf->count];
			else {
			    bwr_size = flat_buf->blocklens[k] - end_bwr_size;
			    k--;
			    buf_count--;
			}
		    }
		    else {
		        mem_lengths[i] = flat_buf->blocklens[k];
		    }
		}
		buf_count++;
		k = (k + 1)%flat_buf->count;
	    } /* for (i=0; i<mem_list_count; i++) */
	    for (i=0; i<file_list_count; i++) {
	        file_offsets[i] = disp + flat_file->indices[j] + 
		    ((ADIO_Offset)n_filetypes) * filetype_extent;
	        if (!i) {
		    file_lengths[0] = fwr_size;
		    file_offsets[0] += flat_file->blocklens[j] - fwr_size;
		}
		else {
		    if (i == (file_list_count - 1)) {
		        file_lengths[i] = end_fwr_size;
			if (flat_file->blocklens[j] == end_fwr_size)
			    fwr_size = flat_file->blocklens[(j+1)%
							  flat_file->count];   
			else {
			    fwr_size = flat_file->blocklens[j] - end_fwr_size;
			    j--;
			}
		    }
		    else file_lengths[i] = flat_file->blocklens[j];
		}
		if (j < flat_file->count - 1) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (i=0; i<file_list_count; i++) */

#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
	    NO_STALE(err_flag, fd, zoidfs_obj_ptr,
			    zoidfs_write(zoidfs_obj_ptr, 
				    mem_list_count, mem_offsets, mem_lengths, 
				    file_list_count, 
				    file_offsets, file_lengths, ZOIDFS_NO_OP_HINT));
	    /* --BEGIN ERROR HANDLING-- */
	    if (err_flag != ZFS_OK) {
		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
						   MPIR_ERR_RECOVERABLE,
						   myname, __LINE__,
						   ADIOI_ZOIDFS_error_convert(err_flag),
						   "Error in zoidfs_write", 0);
		goto error_state;
	    }
	    /* --END ERROR HANDLING-- */
#ifdef ADIOI_MPE_LOGGING
            MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
	    size_wrote += new_buffer_write;
	    total_bytes_written += new_buffer_write; /* XXX: is this right? */
	    start_k = k;
	    start_j = j;
	} /* while (size_wrote < bufsize) */
	ADIOI_Free(mem_offsets);
	ADIOI_Free(mem_lengths);
    }
    /* when incrementing fp_ind, need to also take into account the file type:
     * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
     * if we wrote N elements, offset needs to point at beginning of type, not
     * at empty region at offset N+1).  
     *
     * As we discussed on mpich-discuss in may/june 2009, the code below might
     * look wierd, but by putting fp_ind at the last byte written, the next
     * time we run through the strided code we'll update the fp_ind to the
     * right location. */
    if (file_ptr_type == ADIO_INDIVIDUAL) {
	fd->fp_ind = file_offsets[file_list_count-1]+
	    file_lengths[file_list_count-1];
    }
    ADIOI_Free(file_offsets);
    ADIOI_Free(file_lengths);

    *error_code = MPI_SUCCESS;

error_state:
    fd->fp_sys_posn = -1;   /* set it to null. */

#ifdef HAVE_STATUS_SET_BYTES
    MPIR_Status_set_bytes(status, datatype, bufsize);
/* This is a temporary way of filling in status. The right way is to 
   keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
#endif

    if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
}
Ejemplo n.º 22
0
void ADIOI_XFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
{
    MPI_Datatype copy_etype, copy_filetype;
    int combiner, i, j, k, filetype_is_contig, err;
    ADIOI_Flatlist_node *flat_file;
    struct flock64 fl;
#ifndef PRINT_ERR_MSG
    static char myname[] = "ADIOI_XFS_FCNTL";
#endif

    switch(flag) {
    case ADIO_FCNTL_SET_VIEW:
        /* free copies of old etypes and filetypes and delete flattened 
           version of filetype if necessary */

	MPI_Type_get_envelope(fd->etype, &i, &j, &k, &combiner);
	if (combiner != MPI_COMBINER_NAMED) MPI_Type_free(&(fd->etype));

	ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
	if (!filetype_is_contig) ADIOI_Delete_flattened(fd->filetype);

	MPI_Type_get_envelope(fd->filetype, &i, &j, &k, &combiner);
	if (combiner != MPI_COMBINER_NAMED) MPI_Type_free(&(fd->filetype));

	/* set new info */
	ADIO_SetInfo(fd, fcntl_struct->info, &err);

        /* set new etypes and filetypes */

	MPI_Type_get_envelope(fcntl_struct->etype, &i, &j, &k, &combiner);
	if (combiner == MPI_COMBINER_NAMED) fd->etype = fcntl_struct->etype;
	else {
	    MPI_Type_contiguous(1, fcntl_struct->etype, &copy_etype);
	    MPI_Type_commit(&copy_etype);
	    fd->etype = copy_etype;
	}
	MPI_Type_get_envelope(fcntl_struct->filetype, &i, &j, &k, &combiner);
	if (combiner == MPI_COMBINER_NAMED) 
	    fd->filetype = fcntl_struct->filetype;
	else {
	    MPI_Type_contiguous(1, fcntl_struct->filetype, &copy_filetype);
	    MPI_Type_commit(&copy_filetype);
	    fd->filetype = copy_filetype;
	    ADIOI_Flatten_datatype(fd->filetype);
            /* this function will not flatten the filetype if it turns out
               to be all contiguous. */
	}

	MPI_Type_size(fd->etype, &(fd->etype_size));
	fd->disp = fcntl_struct->disp;

        /* reset MPI-IO file pointer to point to the first byte that can
           be accessed in this view. */

        ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
	if (filetype_is_contig) fd->fp_ind = fcntl_struct->disp;
	else {
	    flat_file = ADIOI_Flatlist;
	    while (flat_file->type != fd->filetype) 
		flat_file = flat_file->next;
	    for (i=0; i<flat_file->count; i++) {
		if (flat_file->blocklens[i]) {
		    fd->fp_ind = fcntl_struct->disp + flat_file->indices[i];
		    break;
		}
	    }
	}
	*error_code = MPI_SUCCESS;
	break;

    case ADIO_FCNTL_GET_FSIZE:
	fcntl_struct->fsize = lseek64(fd->fd_sys, 0, SEEK_END);
#ifdef PRINT_ERR_MSG
	*error_code = (fcntl_struct->fsize == -1) ? MPI_ERR_UNKNOWN : MPI_SUCCESS;
#else
	if (fcntl_struct->fsize == -1) {
	    *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
			      myname, "I/O Error", "%s", strerror(errno));
	    ADIOI_Error(fd, *error_code, myname);	    
	}
	else *error_code = MPI_SUCCESS;
#endif
	break;

    case ADIO_FCNTL_SET_DISKSPACE:
	i = 0;
	fl.l_start = 0;
	fl.l_whence = SEEK_SET;
	fl.l_len = fcntl_struct->diskspace;
	err = fcntl(fd->fd_sys, F_RESVSP64, &fl);
	if (err) i = 1;
	if (fcntl_struct->diskspace > lseek64(fd->fd_sys, 0, SEEK_END)) {
	    /* also need to set the file size */
	    err = ftruncate64(fd->fd_sys, fcntl_struct->diskspace);
	    if (err) i = 1;
	}
#ifdef PRINT_ERR_MSG
	*error_code = (i == 0) ? MPI_SUCCESS : MPI_ERR_UNKNOWN;
#else
	if (i == 1) {
	    *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
			      myname, "I/O Error", "%s", strerror(errno));
	    ADIOI_Error(fd, *error_code, myname);	    
	}
	else *error_code = MPI_SUCCESS;
#endif
	break;

    case ADIO_FCNTL_SET_IOMODE:
        /* for implementing PFS I/O modes. will not occur in MPI-IO
           implementation.*/
	if (fd->iomode != fcntl_struct->iomode) {
	    fd->iomode = fcntl_struct->iomode;
	    MPI_Barrier(MPI_COMM_WORLD);
	}
	*error_code = MPI_SUCCESS;
	break;

    case ADIO_FCNTL_SET_ATOMICITY:
	fd->atomicity = (fcntl_struct->atomicity == 0) ? 0 : 1;
	*error_code = MPI_SUCCESS;
	break;

    default:
	FPRINTF(stderr, "Unknown flag passed to ADIOI_XFS_Fcntl\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
    }
}
Ejemplo n.º 23
0
void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count,
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code) 
{
/* Since PVFS does not support file locking, can't do buffered writes
   as on Unix */

/* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0;
    int bufsize, size, sum, n_etypes_in_filetype, size_in_filetype;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    int filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent;
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off;
    ADIO_Offset off, disp, start_off;
    int flag, st_fwr_size, st_n_filetypes;
    int new_bwr_size, new_fwr_size, err_flag=0;

    int mem_list_count, file_list_count;
    char ** mem_offsets;
    int64_t *file_offsets;
    int *mem_lengths;
    int32_t *file_lengths;
    int total_blks_to_write;

    int max_mem_list, max_file_list;

    int b_blks_wrote;
    int f_data_wrote;
    int size_wrote=0, n_write_lists, extra_blks;

    int end_bwr_size, end_fwr_size;
    int start_k, start_j, new_file_write, new_buffer_write;
    int start_mem_offset;
#define MAX_ARRAY_SIZE 1024
    static char myname[] = "ADIOI_PVFS_WRITESTRIDED";

/* PFS file pointer modes are not relevant here, because PFS does
   not support strided accesses. */

    /* --BEGIN ERROR HANDLING-- */
    if (fd->atomicity) {
	*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
					   myname, __LINE__,
					   MPI_ERR_INTERN,
					   "Atomic mode set in PVFS I/O function", 0);
	return;
    }
    /* --END ERROR HANDLING-- */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);

    MPI_Type_size(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
	*error_code = MPI_SUCCESS; 
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;
    
    bufsize = buftype_size * count;

    if (!buftype_is_contig && filetype_is_contig) {

/* noncontiguous in memory, contiguous in file.  */
        int64_t file_offsets;
	int32_t file_lengths;

	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;
	
	if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
	    off = fd->disp + etype_size * offset;
	    pvfs_lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET);
	}
	else off = pvfs_lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET);

	file_list_count = 1;
	file_offsets = off;
	file_lengths = 0;
	total_blks_to_write = count*flat_buf->count;
	b_blks_wrote = 0;

	/* allocate arrays according to max usage */
	if (total_blks_to_write > MAX_ARRAY_SIZE)
	    mem_list_count = MAX_ARRAY_SIZE;
	else mem_list_count = total_blks_to_write;
	mem_offsets = (char**)ADIOI_Malloc(mem_list_count*sizeof(char*));
	mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int));

	j = 0;
	/* step through each block in memory, filling memory arrays */
	while (b_blks_wrote < total_blks_to_write) {
	    for (i=0; i<flat_buf->count; i++) {
		mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] = 
		    ((char*)buf + j*buftype_extent + flat_buf->indices[i]);
		mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = 
		    flat_buf->blocklens[i];
		file_lengths += flat_buf->blocklens[i];
		b_blks_wrote++;
		if (!(b_blks_wrote % MAX_ARRAY_SIZE) ||
		    (b_blks_wrote == total_blks_to_write)) {

		    /* in the case of the last read list call,
		       adjust mem_list_count */
		    if (b_blks_wrote == total_blks_to_write) {
		        mem_list_count = total_blks_to_write % MAX_ARRAY_SIZE;
			/* in case last read list call fills max arrays */
			if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
		    }

		    pvfs_write_list(fd->fd_sys ,mem_list_count, mem_offsets,
				   mem_lengths, file_list_count,
				   &file_offsets, &file_lengths);
		  
		    /* in the case of the last read list call, leave here */
		    if (b_blks_wrote == total_blks_to_write) break;

		    file_offsets += file_lengths;
		    file_lengths = 0;
		} 
	    } /* for (i=0; i<flat_buf->count; i++) */
	    j++;
	} /* while (b_blks_wrote < total_blks_to_write) */
	ADIOI_Free(mem_offsets);
	ADIOI_Free(mem_lengths);

	if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;

	if (err_flag) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO, "**io",
					       "**io %s", strerror(errno));
	}
	else *error_code = MPI_SUCCESS;

	fd->fp_sys_posn = -1;   /* clear this. */

#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, bufsize);
/* This is a temporary way of filling in status. The right way is to 
   keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
#endif

	ADIOI_Delete_flattened(datatype);
	return;
    } /* if (!buftype_is_contig && filetype_is_contig) */

    /* already know that file is noncontiguous from above */
    /* noncontiguous in file */

/* filetype already flattened in ADIO_Open */
    flat_file = ADIOI_Flatlist;
    while (flat_file->type != fd->filetype) flat_file = flat_file->next;

    disp = fd->disp;

    /* for each case - ADIO_Individual pointer or explicit, find offset
       (file offset in bytes), n_filetypes (how many filetypes into file 
       to start), fwr_size (remaining amount of data in present file
       block), and st_index (start point in terms of blocks in starting
       filetype) */
    if (file_ptr_type == ADIO_INDIVIDUAL) {
        offset = fd->fp_ind; /* in bytes */
	n_filetypes = -1;
	flag = 0;
	while (!flag) {
	    n_filetypes++;
	    for (i=0; i<flat_file->count; i++) {
	        if (disp + flat_file->indices[i] + 
		    (ADIO_Offset) n_filetypes*filetype_extent +
		      flat_file->blocklens[i] >= offset) {
		  st_index = i;
		  fwr_size = disp + flat_file->indices[i] + 
		    (ADIO_Offset) n_filetypes*filetype_extent
		    + flat_file->blocklens[i] - offset;
		  flag = 1;
		  break;
		}
	    }
	} /* while (!flag) */
    } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
    else {
        n_etypes_in_filetype = filetype_size/etype_size;
	n_filetypes = (int) (offset / n_etypes_in_filetype);
	etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	size_in_filetype = etype_in_filetype * etype_size;
	
	sum = 0;
	for (i=0; i<flat_file->count; i++) {
	    sum += flat_file->blocklens[i];
	    if (sum > size_in_filetype) {
	        st_index = i;
		fwr_size = sum - size_in_filetype;
		abs_off_in_filetype = flat_file->indices[i] +
		    size_in_filetype - (sum - flat_file->blocklens[i]);
		break;
	    }
	}

	/* abs. offset in bytes in the file */
	offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
	    abs_off_in_filetype;
    } /* else [file_ptr_type != ADIO_INDIVIDUAL] */

    start_off = offset;
    st_fwr_size = fwr_size;
    st_n_filetypes = n_filetypes;
    
    if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

        int mem_lengths;
	char *mem_offsets;
        
	i = 0;
	j = st_index;
	off = offset;
	n_filetypes = st_n_filetypes;
        
	mem_list_count = 1;
        
	/* determine how many blocks in file to read */
	f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize);
	total_blks_to_write = 1;
	j++;
	while (f_data_wrote < bufsize) {
	    f_data_wrote += flat_file->blocklens[j];
	    total_blks_to_write++;
	    if (j<(flat_file->count-1)) j++;
	    else j = 0; 
	}
	    
	j = st_index;
	n_filetypes = st_n_filetypes;
	n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE;
	extra_blks = total_blks_to_write%MAX_ARRAY_SIZE;
        
	mem_offsets = buf;
	mem_lengths = 0;
        
	/* if at least one full readlist, allocate file arrays
	   at max array size and don't free until very end */
	if (n_write_lists) {
	    file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
						  sizeof(int64_t));
	    file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
						  sizeof(int32_t));
	}
	/* if there's no full readlist allocate file arrays according
	   to needed size (extra_blks) */
	else {
	    file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
                                                  sizeof(int64_t));
            file_lengths = (int32_t*)ADIOI_Malloc(extra_blks*
                                                  sizeof(int32_t));
        }
        
        /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
        for (i=0; i<n_write_lists; i++) {
            file_list_count = MAX_ARRAY_SIZE;
            if(!i) {
                file_offsets[0] = offset;
                file_lengths[0] = st_fwr_size;
                mem_lengths = st_fwr_size;
            }
            for (k=0; k<MAX_ARRAY_SIZE; k++) {
                if (i || k) {
                    file_offsets[k] = disp + n_filetypes*filetype_extent
                      + flat_file->indices[j];
                    file_lengths[k] = flat_file->blocklens[j];
                    mem_lengths += file_lengths[k];
                }
                if (j<(flat_file->count - 1)) j++;
                else {
                    j = 0;
                    n_filetypes++;
                }
            } /* for (k=0; k<MAX_ARRAY_SIZE; k++) */
            pvfs_write_list(fd->fd_sys, mem_list_count,
                           &mem_offsets, &mem_lengths,
                           file_list_count, file_offsets,
                           file_lengths);
            mem_offsets += mem_lengths;
            mem_lengths = 0;
        } /* for (i=0; i<n_write_lists; i++) */

        /* for file arrays smaller than MAX_ARRAY_SIZE (last read_list call) */
        if (extra_blks) {
            file_list_count = extra_blks;
            if(!i) {
                file_offsets[0] = offset;
                file_lengths[0] = st_fwr_size;
            }
            for (k=0; k<extra_blks; k++) {
                if(i || k) {
                    file_offsets[k] = disp + n_filetypes*filetype_extent +
                      flat_file->indices[j];
                    if (k == (extra_blks - 1)) {
                        file_lengths[k] = bufsize - (int32_t) mem_lengths
                          - (int32_t) mem_offsets + (int32_t)  buf;
                    }
                    else file_lengths[k] = flat_file->blocklens[j];
                } /* if(i || k) */
                mem_lengths += file_lengths[k];
                if (j<(flat_file->count - 1)) j++;
                else {
                    j = 0;
                    n_filetypes++;
                }
            } /* for (k=0; k<extra_blks; k++) */
            pvfs_write_list(fd->fd_sys, mem_list_count, &mem_offsets,
                           &mem_lengths, file_list_count, file_offsets,
                           file_lengths);
        }
    } 
    else {
        /* noncontiguous in memory as well as in file */

        ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;

	size_wrote = 0;
	n_filetypes = st_n_filetypes;
	fwr_size = st_fwr_size;
	bwr_size = flat_buf->blocklens[0];
	buf_count = 0;
	start_mem_offset = 0;
	start_k = k = 0;
	start_j = st_index;
	max_mem_list = 0;
	max_file_list = 0;

	/* run through and file max_file_list and max_mem_list so that you 
	   can allocate the file and memory arrays less than MAX_ARRAY_SIZE
	   if possible */

	while (size_wrote < bufsize) {
	    k = start_k;
	    new_buffer_write = 0;
	    mem_list_count = 0;
	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
		   (new_buffer_write < bufsize-size_wrote)) {
	        /* find mem_list_count and file_list_count such that both are
		   less than MAX_ARRAY_SIZE, the sum of their lengths are
		   equal, and the sum of all the data read and data to be
		   read in the next immediate read list is less than
		   bufsize */
	        if(mem_list_count) {
		    if((new_buffer_write + flat_buf->blocklens[k] + 
			size_wrote) > bufsize) {
		        end_bwr_size = new_buffer_write + 
			    flat_buf->blocklens[k] - (bufsize - size_wrote);
			new_buffer_write = bufsize - size_wrote;
		    }
		    else {
		        new_buffer_write += flat_buf->blocklens[k];
			end_bwr_size = flat_buf->blocklens[k];
		    }
		}
		else {
		    if (bwr_size > (bufsize - size_wrote)) {
		        new_buffer_write = bufsize - size_wrote;
			bwr_size = new_buffer_write;
		    }
		    else new_buffer_write = bwr_size;
		}
		mem_list_count++;
		k = (k + 1)%flat_buf->count;
	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
	       (new_buffer_write < bufsize-size_wrote)) */
	    j = start_j;
	    new_file_write = 0;
	    file_list_count = 0;
	    while ((file_list_count < MAX_ARRAY_SIZE) && 
		   (new_file_write < new_buffer_write)) {
	        if(file_list_count) {
		    if((new_file_write + flat_file->blocklens[j]) > 
		       new_buffer_write) {
		        end_fwr_size = new_buffer_write - new_file_write;
			new_file_write = new_buffer_write;
			j--;
		    }
		    else {
		        new_file_write += flat_file->blocklens[j];
			end_fwr_size = flat_file->blocklens[j];
		    }
		}
		else {
		    if (fwr_size > new_buffer_write) {
		        new_file_write = new_buffer_write;
			fwr_size = new_file_write;
		    }
		    else new_file_write = fwr_size;
		}
		file_list_count++;
		if (j < (flat_file->count - 1)) j++;
		else j = 0;
		
		k = start_k;
		if ((new_file_write < new_buffer_write) && 
		    (file_list_count == MAX_ARRAY_SIZE)) {
		    new_buffer_write = 0;
		    mem_list_count = 0;
		    while (new_buffer_write < new_file_write) {
		        if(mem_list_count) {
			    if((new_buffer_write + flat_buf->blocklens[k]) >
			       new_file_write) {
			        end_bwr_size = new_file_write - 
				    new_buffer_write;
				new_buffer_write = new_file_write;
				k--;
			    }
			    else {
			        new_buffer_write += flat_buf->blocklens[k];
				end_bwr_size = flat_buf->blocklens[k];
			    }
			}
			else {
			    new_buffer_write = bwr_size;
			    if (bwr_size > (bufsize - size_wrote)) {
			        new_buffer_write = bufsize - size_wrote;
				bwr_size = new_buffer_write;
			    }
			}
			mem_list_count++;
			k = (k + 1)%flat_buf->count;
		    } /* while (new_buffer_write < new_file_write) */
		} /* if ((new_file_write < new_buffer_write) &&
		     (file_list_count == MAX_ARRAY_SIZE)) */
	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
		 (new_buffer_write < bufsize-size_wrote)) */

	    /*  fakes filling the writelist arrays of lengths found above  */
	    k = start_k;
	    j = start_j;
	    for (i=0; i<mem_list_count; i++) {	     
		if(i) {
		    if (i == (mem_list_count - 1)) {
			if (flat_buf->blocklens[k] == end_bwr_size)
			    bwr_size = flat_buf->blocklens[(k+1)%
							  flat_buf->count];
			else {
			    bwr_size = flat_buf->blocklens[k] - end_bwr_size;
			    k--;
			    buf_count--;
			}
		    }
		}
		buf_count++;
		k = (k + 1)%flat_buf->count;
	    } /* for (i=0; i<mem_list_count; i++) */
	    for (i=0; i<file_list_count; i++) {
		if (i) {
		    if (i == (file_list_count - 1)) {
			if (flat_file->blocklens[j] == end_fwr_size)
			    fwr_size = flat_file->blocklens[(j+1)%
							  flat_file->count];   
			else {
			    fwr_size = flat_file->blocklens[j] - end_fwr_size;
			    j--;
			}
		    }
		}
		if (j < flat_file->count - 1) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (i=0; i<file_list_count; i++) */
	    size_wrote += new_buffer_write;
	    start_k = k;
	    start_j = j;
	    if (max_mem_list < mem_list_count)
	        max_mem_list = mem_list_count;
	    if (max_file_list < file_list_count)
	        max_file_list = file_list_count;
	    if (max_mem_list == max_mem_list == MAX_ARRAY_SIZE)
	        break;
	} /* while (size_wrote < bufsize) */

	mem_offsets = (char **)ADIOI_Malloc(max_mem_list*sizeof(char *));
	mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int));
	file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t));
	file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t));
	    
	size_wrote = 0;
	n_filetypes = st_n_filetypes;
	fwr_size = st_fwr_size;
	bwr_size = flat_buf->blocklens[0];
	buf_count = 0;
	start_mem_offset = 0;
	start_k = k = 0;
	start_j = st_index;

	/*  this section calculates mem_list_count and file_list_count
	    and also finds the possibly odd sized last array elements
	    in new_fwr_size and new_bwr_size  */
	
	while (size_wrote < bufsize) {
	    k = start_k;
	    new_buffer_write = 0;
	    mem_list_count = 0;
	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
		   (new_buffer_write < bufsize-size_wrote)) {
	        /* find mem_list_count and file_list_count such that both are
		   less than MAX_ARRAY_SIZE, the sum of their lengths are
		   equal, and the sum of all the data read and data to be
		   read in the next immediate read list is less than
		   bufsize */
	        if(mem_list_count) {
		    if((new_buffer_write + flat_buf->blocklens[k] + 
			size_wrote) > bufsize) {
		        end_bwr_size = new_buffer_write + 
			    flat_buf->blocklens[k] - (bufsize - size_wrote);
			new_buffer_write = bufsize - size_wrote;
		    }
		    else {
		        new_buffer_write += flat_buf->blocklens[k];
			end_bwr_size = flat_buf->blocklens[k];
		    }
		}
		else {
		    if (bwr_size > (bufsize - size_wrote)) {
		        new_buffer_write = bufsize - size_wrote;
			bwr_size = new_buffer_write;
		    }
		    else new_buffer_write = bwr_size;
		}
		mem_list_count++;
		k = (k + 1)%flat_buf->count;
	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
	       (new_buffer_write < bufsize-size_wrote)) */
	    j = start_j;
	    new_file_write = 0;
	    file_list_count = 0;
	    while ((file_list_count < MAX_ARRAY_SIZE) && 
		   (new_file_write < new_buffer_write)) {
	        if(file_list_count) {
		    if((new_file_write + flat_file->blocklens[j]) > 
		       new_buffer_write) {
		        end_fwr_size = new_buffer_write - new_file_write;
			new_file_write = new_buffer_write;
			j--;
		    }
		    else {
		        new_file_write += flat_file->blocklens[j];
			end_fwr_size = flat_file->blocklens[j];
		    }
		}
		else {
		    if (fwr_size > new_buffer_write) {
		        new_file_write = new_buffer_write;
			fwr_size = new_file_write;
		    }
		    else new_file_write = fwr_size;
		}
		file_list_count++;
		if (j < (flat_file->count - 1)) j++;
		else j = 0;
		
		k = start_k;
		if ((new_file_write < new_buffer_write) && 
		    (file_list_count == MAX_ARRAY_SIZE)) {
		    new_buffer_write = 0;
		    mem_list_count = 0;
		    while (new_buffer_write < new_file_write) {
		        if(mem_list_count) {
			    if((new_buffer_write + flat_buf->blocklens[k]) >
			       new_file_write) {
			        end_bwr_size = new_file_write -
				  new_buffer_write;
				new_buffer_write = new_file_write;
				k--;
			    }
			    else {
			        new_buffer_write += flat_buf->blocklens[k];
				end_bwr_size = flat_buf->blocklens[k];
			    }
			}
			else {
			    new_buffer_write = bwr_size;
			    if (bwr_size > (bufsize - size_wrote)) {
			        new_buffer_write = bufsize - size_wrote;
				bwr_size = new_buffer_write;
			    }
			}
			mem_list_count++;
			k = (k + 1)%flat_buf->count;
		    } /* while (new_buffer_write < new_file_write) */
		} /* if ((new_file_write < new_buffer_write) &&
		     (file_list_count == MAX_ARRAY_SIZE)) */
	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
		 (new_buffer_write < bufsize-size_wrote)) */

	    /*  fills the allocated readlist arrays  */
	    k = start_k;
	    j = start_j;
	    for (i=0; i<mem_list_count; i++) {	     
	        mem_offsets[i] = ((char*)buf + buftype_extent*
					 (buf_count/flat_buf->count) +
					 (int)flat_buf->indices[k]);
		
		if(!i) {
		    mem_lengths[0] = bwr_size;
		    mem_offsets[0] += flat_buf->blocklens[k] - bwr_size;
		}
		else {
		    if (i == (mem_list_count - 1)) {
		        mem_lengths[i] = end_bwr_size;
			if (flat_buf->blocklens[k] == end_bwr_size)
			    bwr_size = flat_buf->blocklens[(k+1)%
							  flat_buf->count];
			else {
			    bwr_size = flat_buf->blocklens[k] - end_bwr_size;
			    k--;
			    buf_count--;
			}
		    }
		    else {
		        mem_lengths[i] = flat_buf->blocklens[k];
		    }
		}
		buf_count++;
		k = (k + 1)%flat_buf->count;
	    } /* for (i=0; i<mem_list_count; i++) */
	    for (i=0; i<file_list_count; i++) {
	        file_offsets[i] = disp + flat_file->indices[j] + n_filetypes *
		    filetype_extent;
	        if (!i) {
		    file_lengths[0] = fwr_size;
		    file_offsets[0] += flat_file->blocklens[j] - fwr_size;
		}
		else {
		    if (i == (file_list_count - 1)) {
		        file_lengths[i] = end_fwr_size;
			if (flat_file->blocklens[j] == end_fwr_size)
			    fwr_size = flat_file->blocklens[(j+1)%
							  flat_file->count];   
			else {
			    fwr_size = flat_file->blocklens[j] - end_fwr_size;
			    j--;
			}
		    }
		    else file_lengths[i] = flat_file->blocklens[j];
		}
		if (j < flat_file->count - 1) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (i=0; i<file_list_count; i++) */
	    pvfs_write_list(fd->fd_sys,mem_list_count, mem_offsets,
			   mem_lengths, file_list_count, file_offsets,
			   file_lengths);
	    size_wrote += new_buffer_write;
	    start_k = k;
	    start_j = j;
	} /* while (size_wrote < bufsize) */
	ADIOI_Free(mem_offsets);
	ADIOI_Free(mem_lengths);
    }
    ADIOI_Free(file_offsets);
    ADIOI_Free(file_lengths);

    if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
    if (err_flag) {
	*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
					   myname, __LINE__, MPI_ERR_IO,
					   "**io",
					   "**io %s", strerror(errno));
    }
    else *error_code = MPI_SUCCESS;

    fd->fp_sys_posn = -1;   /* set it to null. */

#ifdef HAVE_STATUS_SET_BYTES
    MPIR_Status_set_bytes(status, datatype, bufsize);
/* This is a temporary way of filling in status. The right way is to 
   keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
#endif

    if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
}
Ejemplo n.º 24
0
void ADIOI_GRIDFTP_WriteDiscontig(ADIO_File fd, void *buf, int count,
				 MPI_Datatype datatype, int file_ptr_type,
				 ADIO_Offset offset, ADIO_Status *status, int
				 *error_code)
{
    char myname[]="ADIOI_GRIDFTP_WriteDiscontig";
    int myrank,nprocs;
    MPI_Aint btype_size,btype_extent;
    MPI_Aint ftype_size,ftype_extent;
    MPI_Aint etype_size;
    MPI_Aint extent;
    ADIOI_Flatlist_node *flat_file;
    int buf_contig,boff,i,nblks;
    globus_off_t start,end,goff;
    globus_size_t bytes_written;
    globus_result_t result;

    MPI_Comm_rank(fd->comm,&myrank);
    MPI_Comm_size(fd->comm,&nprocs);
    etype_size=fd->etype_size;
    MPI_Type_size(fd->filetype,&ftype_size);
    MPI_Type_extent(fd->filetype,&ftype_extent);
    /* This is arguably unnecessary, as this routine assumes that the
       buffer in memory is contiguous */
    MPI_Type_size(datatype,&btype_size);
    MPI_Type_extent(datatype,&btype_extent);
    ADIOI_Datatype_iscontig(datatype,&buf_contig);
    
    if ( ( btype_extent!=btype_size ) || ( ! buf_contig ) )
	{
	    FPRINTF(stderr,"[%d/%d] %s called with discontigous memory buffer\n",
		    myrank,nprocs,myname);
	    fflush(stderr);
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
		    myname, __LINE__, MPI_ERR_IO,
		    "**io",
		    "**io %s", globus_object_printable_to_string(globus_error_get(result)));
	    return;
	}
    /* from here we can assume btype_extent==btype_size */

    /* Flatten out fd->filetype so we know which blocks to skip */
    ADIOI_Flatten_datatype(fd->filetype);
    flat_file = ADIOI_Flatlist;
    while (flat_file->type != fd->filetype && flat_file->next!=NULL)
	flat_file = flat_file->next;

    /* Figure out how big the area to write is */
    /* ASSUMPTION: ftype_size is an integer multiple of btype_size or vice versa. */
    start=(globus_off_t)(offset*etype_size);
    goff=start;
    boff=0;
    extent=0;
    nblks=0;
    while ( boff < (count*btype_size) )
	{
	    int blklen;

	    for (i=0;i<flat_file->count;i++)
		{
		    if ( (boff+flat_file->blocklens[i]) < (count*btype_size) )
			blklen=flat_file->blocklens[i];
		    else
			blklen=(count*btype_size)-boff;
		    boff+=blklen;
		    extent=MAX(extent,nblks*ftype_extent+flat_file->indices[i]+blklen);
		    if ( boff>=(count*btype_size) )
			break;
		}
	    nblks++;
	}
    if ( extent < count*btype_size )
	{
	    FPRINTF(stderr,"[%d/%d] %s error in computing extent -- extent %d is smaller than total bytes requested %d!\n",
		    myrank,nprocs,myname,extent,count*btype_size);
	    fflush(stderr);
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
		    myname, __LINE__, MPI_ERR_IO,
		    "**io",
		    "**io %s", globus_object_printable_to_string(globus_error_get(result)));
	    return;
	}
    end=start+(globus_off_t)extent;
    FPRINTF(stderr,"[%d/%d] %s writing %d bytes into extent of %d bytes starting at offset %Ld\n",
	    myrank,nprocs,myname,count*btype_size,extent,(long long)start);
    fflush(stderr);

    /* start up the globus partial write */
    globus_mutex_init(&writediscontig_ctl_lock, GLOBUS_NULL);
    globus_cond_init(&writediscontig_ctl_cond, GLOBUS_NULL);
    writediscontig_ctl_done=GLOBUS_FALSE;
    if ( (result=globus_ftp_client_partial_put(&(gridftp_fh[fd->fd_sys]),
					       fd->filename,
					       &(oattr[fd->fd_sys]),
					       GLOBUS_NULL,
					       start,
					       end,
					       writediscontig_ctl_cb,
					       GLOBUS_NULL))!=GLOBUS_SUCCESS )
	{
	    globus_err_handler("globus_ftp_client_partial_get",myname,result);
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
		    myname, __LINE__, MPI_ERR_IO,
		    "**io",
		    "**io %s", globus_object_printable_to_string(globus_error_get(result)));
	    return;
	}

    /* Do all the actual I/Os */
    boff=0;
    nblks=0;
    while ( boff < (count*btype_size) )
	{
	    int i,blklen;

	    for (i=0;i<flat_file->count;i++)
		{
		    if ( (boff+flat_file->blocklens[i]) < (count*btype_size) )
			blklen=flat_file->blocklens[i];
		    else
			blklen=(count*btype_size)-boff;
		    if ( blklen > 0 )
			{
			    goff=start+nblks*ftype_extent+((globus_off_t)flat_file->indices[i]);
			    /*
			    FPRINTF(stderr,"[%d/%d] %s writing %d bytes from boff=%d at goff=%Ld\n",myrank,nprocs,myname,blklen,boff,goff);
			    */
			    if ( (result=globus_ftp_client_register_write(&(gridftp_fh[fd->fd_sys]),
									  ((globus_byte_t *)buf)+boff,
									  (globus_size_t)blklen,
									  goff,
									  GLOBUS_TRUE,
									  writediscontig_data_cb,
									  (void *)(&bytes_written)))!=GLOBUS_SUCCESS )
				{
				    globus_err_handler("globus_ftp_client_register_write",myname,result);
				    *error_code=MPI_ERR_IO;
				    ADIOI_Error(fd,*error_code,myname);
				    return;
				}
			    boff+=blklen;
			    if ( boff>=(count*btype_size) )
				break;
			}
		}
	    nblks++;
	}

    
    /* The ctl callback won't start till the data callbacks complete, so it's
       safe to wait on just the ctl callback */
    globus_mutex_lock(&writediscontig_ctl_lock);
    while ( writediscontig_ctl_done!=GLOBUS_TRUE )
	globus_cond_wait(&writediscontig_ctl_cond,&writediscontig_ctl_lock);
    globus_mutex_unlock(&writediscontig_ctl_lock);
    globus_mutex_destroy(&writediscontig_ctl_lock);
    globus_cond_destroy(&writediscontig_ctl_cond);

#ifdef HAVE_STATUS_SET_BYTES
    MPIR_Status_set_bytes(status, datatype, bytes_written);
#endif
    if (file_ptr_type != ADIO_EXPLICIT_OFFSET)
    {
	fd->fp_ind += extent;
	fd->fp_sys_posn = fd->fp_ind;
    }
    else {
	fd->fp_sys_posn = offset + extent;
    }
}
Ejemplo n.º 25
0
void ADIOI_PVFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
{
    MPI_Datatype copy_etype, copy_filetype;
    int combiner, i, j, k, filetype_is_contig, ntimes, err;
    ADIOI_Flatlist_node *flat_file;
    ADIO_Offset curr_fsize, alloc_size, size, len, done;
    ADIO_Status status;
    char *buf;
#ifndef PRINT_ERR_MSG
    static char myname[] = "ADIOI_PVFS_FCNTL";
#endif

    switch(flag) {
    case ADIO_FCNTL_SET_VIEW:
        /* free copies of old etypes and filetypes and delete flattened 
           version of filetype if necessary */

	MPI_Type_get_envelope(fd->etype, &i, &j, &k, &combiner);
	if (combiner != MPI_COMBINER_NAMED) MPI_Type_free(&(fd->etype));

	ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
	if (!filetype_is_contig) ADIOI_Delete_flattened(fd->filetype);

	MPI_Type_get_envelope(fd->filetype, &i, &j, &k, &combiner);
	if (combiner != MPI_COMBINER_NAMED) MPI_Type_free(&(fd->filetype));

	/* set new info */
	ADIO_SetInfo(fd, fcntl_struct->info, &err);

        /* set new etypes and filetypes */

	MPI_Type_get_envelope(fcntl_struct->etype, &i, &j, &k, &combiner);
	if (combiner == MPI_COMBINER_NAMED) fd->etype = fcntl_struct->etype;
	else {
	    MPI_Type_contiguous(1, fcntl_struct->etype, &copy_etype);
	    MPI_Type_commit(&copy_etype);
	    fd->etype = copy_etype;
	}
	MPI_Type_get_envelope(fcntl_struct->filetype, &i, &j, &k, &combiner);
	if (combiner == MPI_COMBINER_NAMED) 
	    fd->filetype = fcntl_struct->filetype;
	else {
	    MPI_Type_contiguous(1, fcntl_struct->filetype, &copy_filetype);
	    MPI_Type_commit(&copy_filetype);
	    fd->filetype = copy_filetype;
	    ADIOI_Flatten_datatype(fd->filetype);
            /* this function will not flatten the filetype if it turns out
               to be all contiguous. */
	}

	MPI_Type_size(fd->etype, &(fd->etype_size));
	fd->disp = fcntl_struct->disp;

        /* reset MPI-IO file pointer to point to the first byte that can
           be accessed in this view. */

        ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
	if (filetype_is_contig) fd->fp_ind = fcntl_struct->disp;
	else {
	    flat_file = ADIOI_Flatlist;
	    while (flat_file->type != fd->filetype) 
		flat_file = flat_file->next;
	    for (i=0; i<flat_file->count; i++) {
		if (flat_file->blocklens[i]) {
		    fd->fp_ind = fcntl_struct->disp + flat_file->indices[i];
		    break;
		}
	    }
	}
	*error_code = MPI_SUCCESS;
	break;

    case ADIO_FCNTL_GET_FSIZE:
	fcntl_struct->fsize = pvfs_lseek(fd->fd_sys, 0, SEEK_END);
	if (fd->fp_sys_posn != -1) 
	     pvfs_lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
#ifdef PRINT_ERR_MSG
	*error_code = (fcntl_struct->fsize == -1) ? MPI_ERR_UNKNOWN : MPI_SUCCESS;
#else
	if (fcntl_struct->fsize == -1) {
	    *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
			      myname, "I/O Error", "%s", strerror(errno));
	    ADIOI_Error(fd, *error_code, myname);	    
	}
	else *error_code = MPI_SUCCESS;
#endif
	break;

    case ADIO_FCNTL_SET_DISKSPACE:
	/* will be called by one process only */
	/* On file systems with no preallocation function, I have to 
           explicitly write 
           to allocate space. Since there could be holes in the file, 
           I need to read up to the current file size, write it back, 
           and then write beyond that depending on how much 
           preallocation is needed.
           read/write in sizes of no more than ADIOI_PREALLOC_BUFSZ */

	curr_fsize = pvfs_lseek(fd->fd_sys, 0, SEEK_END);
	alloc_size = fcntl_struct->diskspace;

	size = ADIOI_MIN(curr_fsize, alloc_size);
	
	ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ;
	buf = (char *) ADIOI_Malloc(ADIOI_PREALLOC_BUFSZ);
	done = 0;

	for (i=0; i<ntimes; i++) {
	    len = ADIOI_MIN(size-done, ADIOI_PREALLOC_BUFSZ);
	    ADIO_ReadContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, done,
			    &status, error_code);
	    if (*error_code != MPI_SUCCESS) {
#ifdef PRINT_ERR_MSG
		FPRINTF(stderr, "ADIOI_PVFS_Fcntl: To preallocate disk space, ROMIO needs to read the file and write it back, but is unable to read the file. Please give the file read permission and open it with MPI_MODE_RDWR.\n");
		MPI_Abort(MPI_COMM_WORLD, 1);
#else
		*error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_PREALLOC_PERM,
			      myname, (char *) 0, (char *) 0);
		ADIOI_Error(fd, *error_code, myname);
                return;  
#endif
	    }
	    ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, done,
			     &status, error_code);
	    if (*error_code != MPI_SUCCESS) return;
	    done += len;
	}

	if (alloc_size > curr_fsize) {
	    memset(buf, 0, ADIOI_PREALLOC_BUFSZ); 
	    size = alloc_size - curr_fsize;
	    ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ;
	    for (i=0; i<ntimes; i++) {
		len = ADIOI_MIN(alloc_size-done, ADIOI_PREALLOC_BUFSZ);
		ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, 
				 done, &status, error_code);
		if (*error_code != MPI_SUCCESS) return;
		done += len;  
	    }
	}
	ADIOI_Free(buf);
	if (fd->fp_sys_posn != -1) 
	    pvfs_lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
	*error_code = MPI_SUCCESS;
	break;

    case ADIO_FCNTL_SET_IOMODE:
        /* for implementing PFS I/O modes. will not occur in MPI-IO
           implementation.*/
	if (fd->iomode != fcntl_struct->iomode) {
	    fd->iomode = fcntl_struct->iomode;
	    MPI_Barrier(MPI_COMM_WORLD);
	}
	*error_code = MPI_SUCCESS;
	break;

    case ADIO_FCNTL_SET_ATOMICITY:
	*error_code = MPI_ERR_UNKNOWN;
	break;

    default:
	FPRINTF(stderr, "Unknown flag passed to ADIOI_PVFS_Fcntl\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
    }
}
Ejemplo n.º 26
0
void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count,
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code)
{
/* offset is in units of etype relative to the filetype. */

    ADIOI_Flatlist_node *flat_buf, *flat_file;
    int i, j, k, l, brd_size, frd_size=0, st_index=0;
    int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
    int n_filetypes, etype_in_filetype;
    ADIO_Offset abs_off_in_filetype=0;
    int filetype_size, etype_size, buftype_size;
    MPI_Aint filetype_extent, buftype_extent; 
    int buf_count, buftype_is_contig, filetype_is_contig;
    ADIO_Offset userbuf_off;
    ADIO_Offset off, disp, start_off;
    int flag, st_frd_size, st_n_filetypes;
    int new_brd_size, new_frd_size;

    int mem_list_count, file_list_count;
    char **mem_offsets;
    int64_t *file_offsets;
    int *mem_lengths;
    int32_t *file_lengths;
    int total_blks_to_read;

    int max_mem_list, max_file_list;

    int b_blks_read;
    int f_data_read;
    int size_read=0, n_read_lists, extra_blks;

    int end_brd_size, end_frd_size;
    int start_k, start_j, new_file_read, new_buffer_read;
    int start_mem_offset;

#define MAX_ARRAY_SIZE 1024

#ifndef PRINT_ERR_MESG
  static char myname[] = "ADIOI_PVFS_ReadStrided";
#endif

    *error_code = MPI_SUCCESS;  /* changed below if error */

    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
    MPI_Type_size(fd->filetype, &filetype_size);
    if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, 0);
#endif
	*error_code = MPI_SUCCESS; 
	return;
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size(datatype, &buftype_size);
    MPI_Type_extent(datatype, &buftype_extent);
    etype_size = fd->etype_size;

    bufsize = buftype_size * count;

    if (!buftype_is_contig && filetype_is_contig) {

/* noncontiguous in memory, contiguous in file. */
        int64_t file_offsets;
	int32_t file_lengths;

	ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;

	off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
	    fd->disp + etype_size * offset;

	file_list_count = 1;
	file_offsets = off;
	file_lengths = 0;
	total_blks_to_read = count*flat_buf->count;
	b_blks_read = 0;

	/* allocate arrays according to max usage */
	if (total_blks_to_read > MAX_ARRAY_SIZE)
	    mem_list_count = MAX_ARRAY_SIZE;
	else mem_list_count = total_blks_to_read;
	mem_offsets = (char**)ADIOI_Malloc(mem_list_count*sizeof(char*));
	mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int));

	j = 0;
	/* step through each block in memory, filling memory arrays */
	while (b_blks_read < total_blks_to_read) {
	    for (i=0; i<flat_buf->count; i++) {
		mem_offsets[b_blks_read % MAX_ARRAY_SIZE] = 
		    (char*)((char *)buf + j*buftype_extent + flat_buf->indices[i]);
		mem_lengths[b_blks_read % MAX_ARRAY_SIZE] = 
		    flat_buf->blocklens[i];
		file_lengths += flat_buf->blocklens[i];
		b_blks_read++;
		if (!(b_blks_read % MAX_ARRAY_SIZE) ||
		    (b_blks_read == total_blks_to_read)) {

		    /* in the case of the last read list call,
		       adjust mem_list_count */
		    if (b_blks_read == total_blks_to_read) {
		        mem_list_count = total_blks_to_read % MAX_ARRAY_SIZE;
			/* in case last read list call fills max arrays */
			if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
		    }

		    pvfs_read_list(fd->fd_sys ,mem_list_count, mem_offsets,
				   mem_lengths, file_list_count,
				   &file_offsets, &file_lengths);
		  
		    /* in the case of the last read list call, leave here */
		    if (b_blks_read == total_blks_to_read) break;

		    file_offsets += file_lengths;
		    file_lengths = 0;
		} 
	    } /* for (i=0; i<flat_buf->count; i++) */
	    j++;
	} /* while (b_blks_read < total_blks_to_read) */
	ADIOI_Free(mem_offsets);
	ADIOI_Free(mem_lengths);

        if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;

	fd->fp_sys_posn = -1;  /* set it to null. */

#ifdef HAVE_STATUS_SET_BYTES
	MPIR_Status_set_bytes(status, datatype, bufsize);
	/* This isa temporary way of filling in status.  The right way is to
	   keep tracke of how much data was actually read adn placed in buf
	   by ADIOI_BUFFERED_READ. */
#endif
	ADIOI_Delete_flattened(datatype);

	return;
    } /* if (!buftype_is_contig && filetype_is_contig) */

    /* know file is noncontiguous from above */
    /* noncontiguous in file */

    /* filetype already flattened in ADIO_Open */
    flat_file = ADIOI_Flatlist;
    while (flat_file->type != fd->filetype) flat_file = flat_file->next;

    disp = fd->disp;

    /* for each case - ADIO_Individual pointer or explicit, find the file
       offset in bytes (offset), n_filetypes (how many filetypes into
       file to start), frd_size (remaining amount of data in present
       file block), and st_index (start point in terms of blocks in
       starting filetype) */
    if (file_ptr_type == ADIO_INDIVIDUAL) {
        offset = fd->fp_ind; /* in bytes */
	n_filetypes = -1;
	flag = 0;
	while (!flag) {
	    n_filetypes++;
	    for (i=0; i<flat_file->count; i++) {
	        if (disp + flat_file->indices[i] + 
		    (ADIO_Offset) n_filetypes*filetype_extent +
		    flat_file->blocklens[i]  >= offset) {
		    st_index = i;
		    frd_size = (int) (disp + flat_file->indices[i] + 
				      (ADIO_Offset) n_filetypes*filetype_extent
				      + flat_file->blocklens[i] - offset);
		    flag = 1;
		    break;
		}
	    }
	} /* while (!flag) */
    } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
    else {
        n_etypes_in_filetype = filetype_size/etype_size;
	n_filetypes = (int) (offset / n_etypes_in_filetype);
	etype_in_filetype = (int) (offset % n_etypes_in_filetype);
	size_in_filetype = etype_in_filetype * etype_size;
	
	sum = 0;
	for (i=0; i<flat_file->count; i++) {
	    sum += flat_file->blocklens[i];
	    if (sum > size_in_filetype) {
	        st_index = i;
		frd_size = sum - size_in_filetype;
		abs_off_in_filetype = flat_file->indices[i] +
		    size_in_filetype - (sum - flat_file->blocklens[i]);
		break;
	    }
	}
	
	/* abs. offset in bytes in the file */
	offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + 
	    abs_off_in_filetype;
    } /* else [file_ptr_type != ADIO_INDIVIDUAL] */

    start_off = offset;
    st_frd_size = frd_size;
    st_n_filetypes = n_filetypes;
    
    if (buftype_is_contig && !filetype_is_contig) {

/* contiguous in memory, noncontiguous in file. should be the most
   common case. */

        int mem_lengths;
	char *mem_offsets;
	
	i = 0;
	j = st_index;
	n_filetypes = st_n_filetypes;
	
	mem_list_count = 1;
	
	/* determine how many blocks in file to read */
	f_data_read = ADIOI_MIN(st_frd_size, bufsize);
	total_blks_to_read = 1;
	j++;
	while (f_data_read < bufsize) {
	    f_data_read += flat_file->blocklens[j];
	    total_blks_to_read++;
	    if (j<(flat_file->count-1)) j++;
	    else j = 0;	
	}
      
	j = st_index;
	n_filetypes = st_n_filetypes;
	n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE;
	extra_blks = total_blks_to_read%MAX_ARRAY_SIZE;
	
	mem_offsets = buf;
	mem_lengths = 0;
	
	/* if at least one full readlist, allocate file arrays
	   at max array size and don't free until very end */
	if (n_read_lists) {
	    file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
						  sizeof(int64_t));
	    file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
						  sizeof(int32_t));
	}
	/* if there's no full readlist allocate file arrays according
	   to needed size (extra_blks) */
	else {
	    file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
						  sizeof(int64_t));
	    file_lengths = (int32_t*)ADIOI_Malloc(extra_blks*
						  sizeof(int32_t));
	}
	
	/* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
	for (i=0; i<n_read_lists; i++) {
	    file_list_count = MAX_ARRAY_SIZE;
	    if(!i) {
	        file_offsets[0] = offset;
		file_lengths[0] = st_frd_size;
		mem_lengths = st_frd_size;
	    }
	    for (k=0; k<MAX_ARRAY_SIZE; k++) {
	        if (i || k) {
		    file_offsets[k] = disp + n_filetypes*filetype_extent
		      + flat_file->indices[j];
		    file_lengths[k] = flat_file->blocklens[j];
		    mem_lengths += file_lengths[k];
		}
		if (j<(flat_file->count - 1)) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (k=0; k<MAX_ARRAY_SIZE; k++) */
	    pvfs_read_list(fd->fd_sys, mem_list_count,
			   &mem_offsets, &mem_lengths,
			   file_list_count, file_offsets,
			   file_lengths);
	    mem_offsets += mem_lengths;
	    mem_lengths = 0;
	} /* for (i=0; i<n_read_lists; i++) */

	/* for file arrays smaller than MAX_ARRAY_SIZE (last read_list call) */
	if (extra_blks) {
	    file_list_count = extra_blks;
	    if(!i) {
	        file_offsets[0] = offset;
		file_lengths[0] = st_frd_size;
	    }
	    for (k=0; k<extra_blks; k++) {
	        if(i || k) {
		    file_offsets[k] = disp + n_filetypes*filetype_extent +
		      flat_file->indices[j];
		    if (k == (extra_blks - 1)) {
		        file_lengths[k] = bufsize - (int32_t) mem_lengths
			  - (int32_t) mem_offsets + (int32_t)  buf;
		    }
		    else file_lengths[k] = flat_file->blocklens[j];
		} /* if(i || k) */
		mem_lengths += file_lengths[k];
		if (j<(flat_file->count - 1)) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (k=0; k<extra_blks; k++) */
	    pvfs_read_list(fd->fd_sys, mem_list_count, &mem_offsets,
			   &mem_lengths, file_list_count, file_offsets,
			   file_lengths);
	}
    }
    else {
/* noncontiguous in memory as well as in file */
      
        ADIOI_Flatten_datatype(datatype);
	flat_buf = ADIOI_Flatlist;
	while (flat_buf->type != datatype) flat_buf = flat_buf->next;

	size_read = 0;
	n_filetypes = st_n_filetypes;
	frd_size = st_frd_size;
	brd_size = flat_buf->blocklens[0];
	buf_count = 0;
	start_mem_offset = 0;
	start_k = k = 0;
	start_j = st_index;
	max_mem_list = 0;
	max_file_list = 0;

	/* run through and file max_file_list and max_mem_list so that you 
	   can allocate the file and memory arrays less than MAX_ARRAY_SIZE
	   if possible */

	while (size_read < bufsize) {
	    k = start_k;
	    new_buffer_read = 0;
	    mem_list_count = 0;
	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
		   (new_buffer_read < bufsize-size_read)) {
	        /* find mem_list_count and file_list_count such that both are
		   less than MAX_ARRAY_SIZE, the sum of their lengths are
		   equal, and the sum of all the data read and data to be
		   read in the next immediate read list is less than
		   bufsize */
	        if(mem_list_count) {
		    if((new_buffer_read + flat_buf->blocklens[k] + 
			size_read) > bufsize) {
		        end_brd_size = new_buffer_read + 
			    flat_buf->blocklens[k] - (bufsize - size_read);
			new_buffer_read = bufsize - size_read;
		    }
		    else {
		        new_buffer_read += flat_buf->blocklens[k];
			end_brd_size = flat_buf->blocklens[k];
		    }
		}
		else {
		    if (brd_size > (bufsize - size_read)) {
		        new_buffer_read = bufsize - size_read;
			brd_size = new_buffer_read;
		    }
		    else new_buffer_read = brd_size;
		}
		mem_list_count++;
		k = (k + 1)%flat_buf->count;
	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
	       (new_buffer_read < bufsize-size_read)) */
	    j = start_j;
	    new_file_read = 0;
	    file_list_count = 0;
	    while ((file_list_count < MAX_ARRAY_SIZE) && 
		   (new_file_read < new_buffer_read)) {
	        if(file_list_count) {
		    if((new_file_read + flat_file->blocklens[j]) > 
		       new_buffer_read) {
		        end_frd_size = new_buffer_read - new_file_read;
			new_file_read = new_buffer_read;
			j--;
		    }
		    else {
		        new_file_read += flat_file->blocklens[j];
			end_frd_size = flat_file->blocklens[j];
		    }
		}
		else {
		    if (frd_size > new_buffer_read) {
		        new_file_read = new_buffer_read;
			frd_size = new_file_read;
		    }
		    else new_file_read = frd_size;
		}
		file_list_count++;
		if (j < (flat_file->count - 1)) j++;
		else j = 0;
		
		k = start_k;
		if ((new_file_read < new_buffer_read) && 
		    (file_list_count == MAX_ARRAY_SIZE)) {
		    new_buffer_read = 0;
		    mem_list_count = 0;
		    while (new_buffer_read < new_file_read) {
		        if(mem_list_count) {
			    if((new_buffer_read + flat_buf->blocklens[k]) >
			       new_file_read) {
			        end_brd_size = new_file_read - new_buffer_read;
				new_buffer_read = new_file_read;
				k--;
			    }
			    else {
			        new_buffer_read += flat_buf->blocklens[k];
				end_brd_size = flat_buf->blocklens[k];
			    }
			}
			else {
			    new_buffer_read = brd_size;
			    if (brd_size > (bufsize - size_read)) {
			        new_buffer_read = bufsize - size_read;
				brd_size = new_buffer_read;
			    }
			}
			mem_list_count++;
			k = (k + 1)%flat_buf->count;
		    } /* while (new_buffer_read < new_file_read) */
		} /* if ((new_file_read < new_buffer_read) && (file_list_count
		     == MAX_ARRAY_SIZE)) */
	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
		 (new_buffer_read < bufsize-size_read)) */

	    /*  fakes filling the readlist arrays of lengths found above  */
	    k = start_k;
	    j = start_j;
	    for (i=0; i<mem_list_count; i++) {	     
		if(i) {
		    if (i == (mem_list_count - 1)) {
			if (flat_buf->blocklens[k] == end_brd_size)
			    brd_size = flat_buf->blocklens[(k+1)%
							  flat_buf->count];
			else {
			    brd_size = flat_buf->blocklens[k] - end_brd_size;
			    k--;
			    buf_count--;
			}
		    }
		}
		buf_count++;
		k = (k + 1)%flat_buf->count;
	    } /* for (i=0; i<mem_list_count; i++) */
	    for (i=0; i<file_list_count; i++) {
		if (i) {
		    if (i == (file_list_count - 1)) {
			if (flat_file->blocklens[j] == end_frd_size)
			    frd_size = flat_file->blocklens[(j+1)%
							  flat_file->count];   
			else {
			    frd_size = flat_file->blocklens[j] - end_frd_size;
			    j--;
			}
		    }
		}
		if (j < flat_file->count - 1) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (i=0; i<file_list_count; i++) */
	    size_read += new_buffer_read;
	    start_k = k;
	    start_j = j;
	    if (max_mem_list < mem_list_count)
	        max_mem_list = mem_list_count;
	    if (max_file_list < file_list_count)
	        max_file_list = file_list_count;
	} /* while (size_read < bufsize) */

	mem_offsets = (char **)ADIOI_Malloc(max_mem_list*sizeof(char *));
	mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int));
	file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t));
	file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t));
	    
	size_read = 0;
	n_filetypes = st_n_filetypes;
	frd_size = st_frd_size;
	brd_size = flat_buf->blocklens[0];
	buf_count = 0;
	start_mem_offset = 0;
	start_k = k = 0;
	start_j = st_index;

	/*  this section calculates mem_list_count and file_list_count
	    and also finds the possibly odd sized last array elements
	    in new_frd_size and new_brd_size  */
	
	while (size_read < bufsize) {
	    k = start_k;
	    new_buffer_read = 0;
	    mem_list_count = 0;
	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
		   (new_buffer_read < bufsize-size_read)) {
	        /* find mem_list_count and file_list_count such that both are
		   less than MAX_ARRAY_SIZE, the sum of their lengths are
		   equal, and the sum of all the data read and data to be
		   read in the next immediate read list is less than
		   bufsize */
	        if(mem_list_count) {
		    if((new_buffer_read + flat_buf->blocklens[k] + 
			size_read) > bufsize) {
		        end_brd_size = new_buffer_read + 
			    flat_buf->blocklens[k] - (bufsize - size_read);
			new_buffer_read = bufsize - size_read;
		    }
		    else {
		        new_buffer_read += flat_buf->blocklens[k];
			end_brd_size = flat_buf->blocklens[k];
		    }
		}
		else {
		    if (brd_size > (bufsize - size_read)) {
		        new_buffer_read = bufsize - size_read;
			brd_size = new_buffer_read;
		    }
		    else new_buffer_read = brd_size;
		}
		mem_list_count++;
		k = (k + 1)%flat_buf->count;
	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
	       (new_buffer_read < bufsize-size_read)) */
	    j = start_j;
	    new_file_read = 0;
	    file_list_count = 0;
	    while ((file_list_count < MAX_ARRAY_SIZE) && 
		   (new_file_read < new_buffer_read)) {
	        if(file_list_count) {
		    if((new_file_read + flat_file->blocklens[j]) > 
		       new_buffer_read) {
		        end_frd_size = new_buffer_read - new_file_read;
			new_file_read = new_buffer_read;
			j--;
		    }
		    else {
		        new_file_read += flat_file->blocklens[j];
			end_frd_size = flat_file->blocklens[j];
		    }
		}
		else {
		    if (frd_size > new_buffer_read) {
		        new_file_read = new_buffer_read;
			frd_size = new_file_read;
		    }
		    else new_file_read = frd_size;
		}
		file_list_count++;
		if (j < (flat_file->count - 1)) j++;
		else j = 0;
		
		k = start_k;
		if ((new_file_read < new_buffer_read) && 
		    (file_list_count == MAX_ARRAY_SIZE)) {
		    new_buffer_read = 0;
		    mem_list_count = 0;
		    while (new_buffer_read < new_file_read) {
		        if(mem_list_count) {
			    if((new_buffer_read + flat_buf->blocklens[k]) >
			       new_file_read) {
			        end_brd_size = new_file_read - new_buffer_read;
				new_buffer_read = new_file_read;
				k--;
			    }
			    else {
			        new_buffer_read += flat_buf->blocklens[k];
				end_brd_size = flat_buf->blocklens[k];
			    }
			}
			else {
			    new_buffer_read = brd_size;
			    if (brd_size > (bufsize - size_read)) {
			        new_buffer_read = bufsize - size_read;
				brd_size = new_buffer_read;
			    }
			}
			mem_list_count++;
			k = (k + 1)%flat_buf->count;
		    } /* while (new_buffer_read < new_file_read) */
		} /* if ((new_file_read < new_buffer_read) && (file_list_count
		     == MAX_ARRAY_SIZE)) */
	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
		 (new_buffer_read < bufsize-size_read)) */

	    /*  fills the allocated readlist arrays  */
	    k = start_k;
	    j = start_j;
	    for (i=0; i<mem_list_count; i++) {	     
	        mem_offsets[i] = (char*)((char *)buf + buftype_extent*
					 (buf_count/flat_buf->count) +
					 (int)flat_buf->indices[k]);
		if(!i) {
		    mem_lengths[0] = brd_size;
		    mem_offsets[0] += flat_buf->blocklens[k] - brd_size;
		}
		else {
		    if (i == (mem_list_count - 1)) {
		        mem_lengths[i] = end_brd_size;
			if (flat_buf->blocklens[k] == end_brd_size)
			    brd_size = flat_buf->blocklens[(k+1)%
							  flat_buf->count];
			else {
			    brd_size = flat_buf->blocklens[k] - end_brd_size;
			    k--;
			    buf_count--;
			}
		    }
		    else {
		        mem_lengths[i] = flat_buf->blocklens[k];
		    }
		}
		buf_count++;
		k = (k + 1)%flat_buf->count;
	    } /* for (i=0; i<mem_list_count; i++) */
	    for (i=0; i<file_list_count; i++) {
	        file_offsets[i] = disp + flat_file->indices[j] + n_filetypes *
		    filetype_extent;
	        if (!i) {
		    file_lengths[0] = frd_size;
		    file_offsets[0] += flat_file->blocklens[j] - frd_size;
		}
		else {
		    if (i == (file_list_count - 1)) {
		        file_lengths[i] = end_frd_size;
			if (flat_file->blocklens[j] == end_frd_size)
			    frd_size = flat_file->blocklens[(j+1)%
							  flat_file->count];   
			else {
			    frd_size = flat_file->blocklens[j] - end_frd_size;
			    j--;
			}
		    }
		    else file_lengths[i] = flat_file->blocklens[j];
		}
		if (j < flat_file->count - 1) j++;
		else {
		    j = 0;
		    n_filetypes++;
		}
	    } /* for (i=0; i<file_list_count; i++) */

	    /* 
	    printf("about to call read_list in noncontig/noncontig\n");
	    printf("offsets and lengths in terms of integers\n");
	    printf("\nmem_list_count = %d\n", mem_list_count);
	    for (i=0; i<mem_list_count; i++) {
	      printf("mem_offsets[%2d] = %2d   ", i, (int)(mem_offsets[i] - (int)buf)/4);
	      printf("mem_lengths[%2d] = %2d\n", i, mem_lengths[i]/4);
	    }
	    printf("\nfile_list_count = %d\n", file_list_count);
	    for (i=0; i<file_list_count; i++) {
	      printf("file_offsets[%2d] = %2d   ", i, (int)file_offsets[i]/4);
	      printf("file_lengths[%2d] = %2d\n", i, file_lengths[i]/4);
	    }
	    printf("\n\n");
	    */
	    pvfs_read_list(fd->fd_sys,mem_list_count, mem_offsets,
			   mem_lengths, file_list_count, file_offsets,
			   file_lengths);
	    size_read += new_buffer_read;
	    start_k = k;
	    start_j = j;
	} /* while (size_read < bufsize) */
	ADIOI_Free(mem_offsets);
	ADIOI_Free(mem_lengths);
    }
    ADIOI_Free(file_offsets);
    ADIOI_Free(file_lengths);
    
    if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
    fd->fp_sys_posn = -1;   /* set it to null. */
    
#ifdef HAVE_STATUS_SET_BYTES
    MPIR_Status_set_bytes(status, datatype, bufsize);
    /* This is a temporary way of filling in status. The right way is to 
       keep track of how much data was actually read and placed in buf 
       by ADIOI_BUFFERED_READ. */
#endif
    
    if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
}
Ejemplo n.º 27
0
void ADIOI_Calc_file_realms (ADIO_File fd, ADIO_Offset min_st_offset,
			     ADIO_Offset max_end_offset)
{
    int nprocs_for_coll;
    int file_realm_calc_type;
    
    MPI_Datatype *file_realm_types = NULL;
    ADIO_Offset *file_realm_st_offs = NULL;

#ifdef AGGREGATION_PROFILE
    MPE_Log_event (5004, 0, NULL);
#endif
#ifdef DEBUG
    printf ("ADIOI_Calc_file_realms\n");
#endif
    
    nprocs_for_coll = fd->hints->cb_nodes;    
    file_realm_calc_type = fd->hints->cb_fr_type;

    /* If PFRs are disabled we know these pointers are not allocated */
    if (fd->hints->cb_pfr != ADIOI_HINT_ENABLE) {
	fd->file_realm_st_offs = NULL;
	fd->file_realm_types = NULL;
    }

    if (nprocs_for_coll == 1) {
	/* if there's only one aggregator, we can reset the file
	 * realms every single time */
	if (fd->file_realm_st_offs == NULL)
	{
	    file_realm_st_offs = (ADIO_Offset *)
		ADIOI_Malloc (sizeof(ADIO_Offset));
	    file_realm_types   = (MPI_Datatype *)
		ADIOI_Malloc (sizeof(MPI_Datatype));
	}
	else
	{
	    file_realm_st_offs = fd->file_realm_st_offs;
	    file_realm_types   = fd->file_realm_types;
	}
	*file_realm_st_offs = min_st_offset;
	MPI_Type_contiguous ((max_end_offset - min_st_offset + 1), MPI_BYTE,
			     file_realm_types);
	MPI_Type_commit (file_realm_types);
	ADIOI_Add_contig_flattened (*file_realm_types);
    }
    else if (fd->file_realm_st_offs == NULL) {
	file_realm_st_offs = (ADIO_Offset *)
	    ADIOI_Malloc (nprocs_for_coll * sizeof(ADIO_Offset));
	file_realm_types   = (MPI_Datatype *)
	    ADIOI_Malloc (nprocs_for_coll * sizeof(MPI_Datatype));
	
	if (file_realm_calc_type == ADIOI_FR_AAR) {
	    ADIOI_Calc_file_realms_aar (fd, nprocs_for_coll,
					fd->hints->cb_pfr,
					min_st_offset, max_end_offset,
					file_realm_st_offs, file_realm_types);
	    /* flatten file realm datatype for future use - only one
	     * because all are the same*/
	    ADIOI_Flatten_datatype (file_realm_types[0]);
	}
	else if (file_realm_calc_type == ADIOI_FR_FSZ) {
	    ADIOI_Calc_file_realms_fsize (fd, nprocs_for_coll, max_end_offset,
					  file_realm_st_offs,
					  file_realm_types);
	    /* flatten file realm datatype for future use - only one
	     * because all are the same*/
	    ADIOI_Flatten_datatype (file_realm_types[0]);
	}
	else if (file_realm_calc_type == ADIOI_FR_USR_REALMS) {
	    /* copy user provided realm datatypes and realm offsets in
	     * hints to file descriptor. may also want to verify that
	     * the provided file realms are covering (for pfr at
	     * least) and non-overlapping */
	}
	else if (file_realm_calc_type > 0) {
	    ADIOI_Calc_file_realms_user_size (fd, file_realm_calc_type,
					      nprocs_for_coll,
					      file_realm_st_offs,
					      file_realm_types);
	    /* flatten file realm datatype for future use - only one
	     * because all are the same */
	    ADIOI_Flatten_datatype (file_realm_types[0]);
	}
    }
    fd->file_realm_st_offs = file_realm_st_offs;
    fd->file_realm_types   = file_realm_types;
#ifdef AGGREGATION_PROFILE
    MPE_Log_event (5005, 0, NULL);
#endif
}
Ejemplo n.º 28
0
void ADIOI_PFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
{
    MPI_Datatype copy_etype, copy_filetype;
    int combiner, i, j, k, filetype_is_contig, err;
    ADIOI_Flatlist_node *flat_file;
    int iomod, np_total, np_comm;
#ifndef PRINT_ERR_MSG
    static char myname[] = "ADIOI_PFS_FCNTL";
#endif

    switch(flag) {
    case ADIO_FCNTL_SET_VIEW:
        /* free copies of old etypes and filetypes and delete flattened 
           version of filetype if necessary */

	MPI_Type_get_envelope(fd->etype, &i, &j, &k, &combiner);
	if (combiner != MPI_COMBINER_NAMED) MPI_Type_free(&(fd->etype));

	ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
	if (!filetype_is_contig) ADIOI_Delete_flattened(fd->filetype);

	MPI_Type_get_envelope(fd->filetype, &i, &j, &k, &combiner);
	if (combiner != MPI_COMBINER_NAMED) MPI_Type_free(&(fd->filetype));

	/* set new info */
	ADIO_SetInfo(fd, fcntl_struct->info, &err);

        /* set new etypes and filetypes */

	MPI_Type_get_envelope(fcntl_struct->etype, &i, &j, &k, &combiner);
	if (combiner == MPI_COMBINER_NAMED) fd->etype = fcntl_struct->etype;
	else {
	    MPI_Type_contiguous(1, fcntl_struct->etype, &copy_etype);
	    MPI_Type_commit(&copy_etype);
	    fd->etype = copy_etype;
	}
	MPI_Type_get_envelope(fcntl_struct->filetype, &i, &j, &k, &combiner);
	if (combiner == MPI_COMBINER_NAMED) 
	    fd->filetype = fcntl_struct->filetype;
	else {
	    MPI_Type_contiguous(1, fcntl_struct->filetype, &copy_filetype);
	    MPI_Type_commit(&copy_filetype);
	    fd->filetype = copy_filetype;
	    ADIOI_Flatten_datatype(fd->filetype);
            /* this function will not flatten the filetype if it turns out
               to be all contiguous. */
	}

	MPI_Type_size(fd->etype, &(fd->etype_size));
	fd->disp = fcntl_struct->disp;

        /* reset MPI-IO file pointer to point to the first byte that can
           be accessed in this view. */

        ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
	if (filetype_is_contig) fd->fp_ind = fcntl_struct->disp;
	else {
	    flat_file = ADIOI_Flatlist;
	    while (flat_file->type != fd->filetype) 
		flat_file = flat_file->next;
	    for (i=0; i<flat_file->count; i++) {
		if (flat_file->blocklens[i]) {
		    fd->fp_ind = fcntl_struct->disp + flat_file->indices[i];
		    break;
		}
	    }
	}
	*error_code = MPI_SUCCESS;
	break;

    case ADIO_FCNTL_GET_FSIZE:
	if (!(fd->atomicity)) {
          /* in M_ASYNC mode, all processes are not aware of changes 
             in file size (although the manual says otherwise). Therefore, 
             temporarily change to M_UNIX and then change 
             back to M_ASYNC.*/ 
	    MPI_Comm_size(MPI_COMM_WORLD, &np_total);
	    MPI_Comm_size(fd->comm, &np_comm);
	    if (np_total == np_comm) {
		err = _setiomode(fd->fd_sys, M_UNIX);
		err = _setiomode(fd->fd_sys, M_ASYNC);
	    }
            /* else it is M_UNIX anyway, so no problem */
	}
	fcntl_struct->fsize = lseek(fd->fd_sys, 0, SEEK_END);
	if (fd->fp_sys_posn != -1) 
	    lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
	*error_code = MPI_SUCCESS;
	break;

    case ADIO_FCNTL_SET_DISKSPACE:
	err = _lsize(fd->fd_sys, fcntl_struct->diskspace, SEEK_SET);
#ifdef PRINT_ERR_MSG
	*error_code = (err == -1) ? MPI_ERR_UNKNOWN : MPI_SUCCESS ;
#else
	if (err == -1) {
	    *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
			      myname, "I/O Error", "%s", strerror(errno));
	    ADIOI_Error(fd, *error_code, myname);	    
	}
	else *error_code = MPI_SUCCESS;
#endif
	break;

    case ADIO_FCNTL_SET_IOMODE:
        /* for implementing PFS I/O modes. will not occur in MPI-IO
           implementation.*/
	if (fd->iomode != fcntl_struct->iomode) {
	    fd->iomode = fcntl_struct->iomode;
	    setiomode(fd->fd_sys, iomode);
           /* for some unknown reason, the compiler gives a warning here */
	}
	*error_code = MPI_SUCCESS;
	break;

    case ADIO_FCNTL_SET_ATOMICITY:
	MPI_Comm_size(MPI_COMM_WORLD, &np_total);
	MPI_Comm_size(fd->comm, &np_comm);
	if (np_total == np_comm) {
	    iomod = (fcntl_struct->atomicity == 0) ? M_ASYNC : M_UNIX;
	    err = _setiomode(fd->fd_sys, iomod);
	}
        /* else can't do anything because setiomode is global. but
           the file will have been opened with M_UNIX anyway, because
           gopen is also global. */

	fd->atomicity = (fcntl_struct->atomicity == 0) ? 0 : 1;
#ifdef PRINT_ERR_MSG
	*error_code = (err == -1) ? MPI_ERR_UNKNOWN : MPI_SUCCESS ;
#else
	if (err == -1) {
	    *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
			      myname, "I/O Error", "%s", strerror(errno));
	    ADIOI_Error(fd, *error_code, myname);	    
	}
	else *error_code = MPI_SUCCESS;
#endif
	break;

    default:
	FPRINTF(stderr, "Unknown flag passed to ADIOI_PFS_Fcntl\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
    }
}