Example #1
0
/* ADIOI_ZOIDFS_Open:
 *  one process opens (or creates) the file, then broadcasts the result to the
 *  remaining processors.
 *
 * ADIO_Open used to perform an optimization when MPI_MODE_CREATE (and before
 * that, MPI_MODE_EXCL) was set.  Because ZoidFS handles file lookup and
 * creation more scalably than traditional file systems, ADIO_Open now skips any
 * special handling when CREATE is set.  */
void ADIOI_ZOIDFS_Open(ADIO_File fd, int *error_code)
{
    int rank;
    static char myname[] = "ADIOI_ZOIDFS_OPEN";
    ADIOI_ZOIDFS_object *zoidfs_obj_ptr;

    /* since one process is doing the open, that means one process is also
     * doing the error checking.  define a struct for both the object reference
     * and the error code to broadcast to all the processors */

    open_status o_status;
    MPI_Datatype open_status_type;
    MPI_Datatype types[2] = {MPI_INT, MPI_BYTE};
    int lens[2] = {1, sizeof(ADIOI_ZOIDFS_object)};
    MPI_Aint offsets[2];

    memset(&o_status, 0, sizeof(o_status));
    zoidfs_obj_ptr = (ADIOI_ZOIDFS_object *)
	ADIOI_Malloc(sizeof(ADIOI_ZOIDFS_object));
    /* --BEGIN ERROR HANDLING-- */
    if (zoidfs_obj_ptr == NULL) {
	*error_code = MPIO_Err_create_code(MPI_SUCCESS,
					   MPIR_ERR_RECOVERABLE,
					   myname, __LINE__,
					   MPI_ERR_UNKNOWN,
					   "Error allocating memory", 0);
	return;
    }
    /* --END ERROR HANDLING-- */

    MPI_Comm_rank(fd->comm, &rank);

    ADIOI_ZOIDFS_Init(rank, error_code);
    if (*error_code != MPI_SUCCESS)
    {
	/* ADIOI_ZOIDFS_INIT handles creating error codes on its own */
	ADIOI_Free(zoidfs_obj_ptr);
	return;
    }

    /* one process resolves name and will later bcast to others */
#ifdef ADIOI_MPE_LOGGING
    MPE_Log_event( ADIOI_MPE_open_a, 0, NULL );
#endif
    if (rank == fd->hints->ranklist[0] && fd->fs_ptr == NULL) {
	    fake_an_open(fd->filename, fd->access_mode,
		    fd->hints->striping_factor,
		    fd->hints->striping_unit,
		    zoidfs_obj_ptr, &o_status);
	    /* store credentials and object reference in fd */
	    *zoidfs_obj_ptr = o_status.handle;
	    fd->fs_ptr = zoidfs_obj_ptr;
    }
#ifdef ADIOI_MPE_LOGGING
    MPE_Log_event( ADIOI_MPE_open_b, 0, NULL );
#endif

    /* broadcast status and (possibly valid) object reference */
    MPI_Get_address(&o_status.error, &offsets[0]);
    MPI_Get_address(&o_status.handle, &offsets[1]);

    MPI_Type_struct(2, lens, offsets, types, &open_status_type);
    MPI_Type_commit(&open_status_type);

    /* Assertion: if we hit this Bcast, then all processes collectively
     *            called this open.
     *
     * That's because deferred open never happens with this fs.
     */
    MPI_Bcast(MPI_BOTTOM, 1, open_status_type, fd->hints->ranklist[0],
	      fd->comm);
    MPI_Type_free(&open_status_type);

    /* --BEGIN ERROR HANDLING-- */
    if (o_status.error != ZFS_OK)
    {
	ADIOI_Free(zoidfs_obj_ptr);
	fd->fs_ptr = NULL;
	*error_code = MPIO_Err_create_code(MPI_SUCCESS,
					   MPIR_ERR_RECOVERABLE,
					   myname, __LINE__,
					   ADIOI_ZOIDFS_error_convert(o_status.error),
					   "Unknown error", 0);
	/* TODO: FIX STRING */
	return;
    }
    /* --END ERROR HANDLING-- */

    *zoidfs_obj_ptr = o_status.handle;
    fd->fs_ptr = zoidfs_obj_ptr;

    *error_code = MPI_SUCCESS;
    return;
}
Example #2
0
/*
 *	file_open_pvfs2: This is the same strategy as ROMIO's pvfs2 open
 *
 *	Function:	- opens a new file
 *	Accepts:	- same arguments as MPI_File_open()
 *	Returns:	- Success if new file handle
 */
int
mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm,
                        const char* filename,
                        int access_mode,
                        struct ompi_info_t *info,
                        mca_io_ompio_file_t *fh)
{
    int ret;
    mca_fs_pvfs2 *pvfs2_fs;
    PVFS_fs_id pvfs2_id;
    char pvfs2_path[OMPIO_MAX_NAME] = {0};
    char * ncache_timeout;
    open_status o_status = {0, {0, 0}};
    struct ompi_datatype_t *open_status_type;
    struct ompi_datatype_t *types[2] = {&ompi_mpi_int.dt, &ompi_mpi_byte.dt};
    int lens[2] = {1, sizeof(PVFS_object_ref)};
    OPAL_PTRDIFF_TYPE offsets[2];
    char char_stripe[MPI_MAX_INFO_KEY];
    int flag;
    int fs_pvfs2_stripe_size = -1;
    int fs_pvfs2_stripe_width = -1;

    /* We are going to do what ROMIO does with one process resolving
     * the name and broadcasting to others */

    pvfs2_fs = (mca_fs_pvfs2 *) malloc(sizeof(mca_fs_pvfs2));
    if (NULL == pvfs2_fs) {
        opal_output (1, "OUT OF MEMORY\n");
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    if (!mca_fs_pvfs2_IS_INITIALIZED) {
        /* disable the pvfs2 ncache */
        ncache_timeout = getenv("PVFS2_NCACHE_TIMEOUT");
        if (ncache_timeout == NULL ) {
            setenv("PVFS2_NCACHE_TIMEOUT", "0", 1);
        }
        ret = PVFS_util_init_defaults();
        if (ret < 0) {
            PVFS_perror("PVFS_util_init_defaults", ret);
            return OMPI_ERROR;
        }
        mca_fs_pvfs2_IS_INITIALIZED = 1;
    }

    memset(&(pvfs2_fs->credentials), 0, sizeof(PVFS_credentials));
    PVFS_util_gen_credentials(&(pvfs2_fs->credentials));

    /* check for stripe size and stripe depth in the info object and
       update mca_fs_pvfs2_stripe_width and mca_fs_pvfs2_stripe_size
       before calling fake_an_open() */

    ompi_info_get (info, "stripe_size", MPI_MAX_INFO_VAL, char_stripe, &flag);
    if ( flag ) {
        sscanf ( char_stripe, "%d", &fs_pvfs2_stripe_size );
    }

    ompi_info_get (info, "stripe_width", MPI_MAX_INFO_VAL, char_stripe, &flag);
    if ( flag ) {
        sscanf ( char_stripe, "%d", &fs_pvfs2_stripe_width );
    }

    if (fs_pvfs2_stripe_size < 0) {
        fs_pvfs2_stripe_size = mca_fs_pvfs2_stripe_size;
    }

    if (fs_pvfs2_stripe_width < 0) {
        fs_pvfs2_stripe_width = mca_fs_pvfs2_stripe_width;
    }


    if (OMPIO_ROOT == fh->f_rank) {
        ret = PVFS_util_resolve(filename, &pvfs2_id, pvfs2_path, OMPIO_MAX_NAME);
        if (ret < 0 ) {
            PVFS_perror("PVFS_util_resolve", ret);
	    o_status.error = -1;
        }
        else {
            fake_an_open (pvfs2_id,
                          pvfs2_path,
                          access_mode,
                          fs_pvfs2_stripe_width,
                          (PVFS_size)fs_pvfs2_stripe_size,
                          pvfs2_fs,
                          &o_status);
        }
        pvfs2_fs->object_ref = o_status.object_ref;
	fh->f_fs_ptr = pvfs2_fs;
    }

    /* broadcast status and (possibly valid) object reference */
    offsets[0] = (MPI_Aint)(&o_status.error);
    offsets[1] = (MPI_Aint)(&o_status.object_ref);

    ompi_datatype_create_struct (2, lens, offsets, types, &open_status_type);
    ompi_datatype_commit (&open_status_type);

    fh->f_comm->c_coll.coll_bcast (MPI_BOTTOM,
                                   1,
                                   open_status_type,
                                   OMPIO_ROOT,
                                   fh->f_comm,
                                   fh->f_comm->c_coll.coll_bcast_module);

    ompi_datatype_destroy (&open_status_type);

    if (o_status.error != 0) {
	/* No need to free the pvfs2_fs structure, since it will
	   be deallocated in file_close in case of an error */
	fh->f_fs_ptr = NULL;
	return OMPI_ERROR;
    }

    pvfs2_fs->object_ref = o_status.object_ref;
    fh->f_fs_ptr = pvfs2_fs;

    /* update the internal ompio structure to store stripe
       size and stripe depth correctly.
       Hadi(to be done): For this read the stripe size and stripe depth from
       the file itself
    */

    if (fs_pvfs2_stripe_size > 0 && fs_pvfs2_stripe_width > 0) {
        fh->f_stripe_size = fs_pvfs2_stripe_size;
        fh->f_stripe_count = fs_pvfs2_stripe_width;
    }

    return OMPI_SUCCESS;
}
Example #3
0
/* ADIOI_PVFS2_Open:
 *  one process opens (or creates) the file, then broadcasts the result to the
 *  remaining processors. 
 *
 *  ADIO_Open used to perform an optimization when MPI_MODE_CREATE (and before
 * that, MPI_MODE_EXCL) was set.  Because PVFS2 handles file lookup and
 * creation more scalably than other file systems, ADIO_Open now skips any
 * special handling when CREATE is set.  */
void ADIOI_PVFS2_Open(ADIO_File fd, int *error_code)
{
    int rank, ret;
    PVFS_fs_id cur_fs;
    static char myname[] = "ADIOI_PVFS2_OPEN";
    char pvfs_path[PVFS_NAME_MAX] = {0};

    ADIOI_PVFS2_fs *pvfs2_fs;

    /* since one process is doing the open, that means one process is also
     * doing the error checking.  define a struct for both the object reference
     * and the error code to broadcast to all the processors */

    open_status o_status = {0, {0, 0}};
    MPI_Datatype open_status_type;
    MPI_Datatype types[2] = {MPI_INT, MPI_BYTE};
    int lens[2] = {1, sizeof(PVFS_object_ref)};
    MPI_Aint offsets[2];
    
    pvfs2_fs = (ADIOI_PVFS2_fs *) ADIOI_Malloc(sizeof(ADIOI_PVFS2_fs));

    /* --BEGIN ERROR HANDLING-- */
    if (pvfs2_fs == NULL) {
	*error_code = MPIO_Err_create_code(MPI_SUCCESS,
					   MPIR_ERR_RECOVERABLE,
					   myname, __LINE__,
					   MPI_ERR_UNKNOWN,
					   "Error allocating memory", 0);
	return;
    }
    /* --END ERROR HANDLING-- */

    MPI_Comm_rank(fd->comm, &rank);

    ADIOI_PVFS2_Init(error_code);
    if (*error_code != MPI_SUCCESS)
    {
	/* ADIOI_PVFS2_INIT handles creating error codes on its own */
	return;
    }

    /* currently everyone gets their own credentials */
    ADIOI_PVFS2_makecredentials(&(pvfs2_fs->credentials));

    /* one process resolves name and will later bcast to others */
    if (rank == fd->hints->ranklist[0] && fd->fs_ptr == NULL) {
	/* given the filename, figure out which pvfs filesystem it is on */
	ret = PVFS_util_resolve(fd->filename, &cur_fs, 
		pvfs_path, PVFS_NAME_MAX);
	if (ret < 0 ) {
	    PVFS_perror("PVFS_util_resolve", ret);
	    /* TODO: pick a good error for this */
	    o_status.error = -1;
	} else  {
	    fake_an_open(cur_fs, pvfs_path,
                         fd->access_mode, fd->hints->striping_factor,
                         fd->hints->striping_unit,
                         pvfs2_fs, &o_status);
	}

	/* store credentials and object reference in fd */
	pvfs2_fs->object_ref = o_status.object_ref;
	fd->fs_ptr = pvfs2_fs;
    }

    /* broadcast status and (possibly valid) object reference */
    MPI_Address(&o_status.error, &offsets[0]);
    MPI_Address(&o_status.object_ref, &offsets[1]);

    MPI_Type_struct(2, lens, offsets, types, &open_status_type);
    MPI_Type_commit(&open_status_type);

    /* Assertion: if we hit this Bcast, then all processes collectively
     *            called this open.
     *
     * That's because deferred open never happens with PVFS2.
     */
    MPI_Bcast(MPI_BOTTOM, 1, open_status_type, fd->hints->ranklist[0],
	      fd->comm);
    MPI_Type_free(&open_status_type);

    /* --BEGIN ERROR HANDLING-- */
    if (o_status.error != 0)
    { 
	ADIOI_Free(pvfs2_fs);
	*error_code = MPIO_Err_create_code(MPI_SUCCESS,
					   MPIR_ERR_RECOVERABLE,
					   myname, __LINE__,
					   ADIOI_PVFS2_error_convert(o_status.error),
					   "Unknown error", 0);
	/* TODO: FIX STRING */
	return;
    }
    /* --END ERROR HANDLING-- */

    pvfs2_fs->object_ref = o_status.object_ref;
    fd->fs_ptr = pvfs2_fs;

    *error_code = MPI_SUCCESS;
    return;
}