예제 #1
0
파일: info_dup.c 프로젝트: ICLDisco/ompi
/*@
    MPI_Info_dup - Returns a duplicate of the info object

Input Parameters:
. info - info object (handle)

Output Parameters:
. newinfo - duplicate of info object (handle)

.N fortran
@*/
int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo)
{
    MPI_Info curr_old, curr_new;

    if ((info <= (MPI_Info) 0) || (info->cookie != MPIR_INFO_COOKIE)) {
        FPRINTF(stderr, "MPI_Info_dup: Invalid info object\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    *newinfo = (MPI_Info) ADIOI_Malloc(sizeof(struct MPIR_Info));
    curr_new = *newinfo;
    curr_new->cookie = MPIR_INFO_COOKIE;
    curr_new->key = 0;
    curr_new->value = 0;
    curr_new->next = 0;

    curr_old = info->next;
    while (curr_old) {
	curr_new->next = (MPI_Info) ADIOI_Malloc(sizeof(struct MPIR_Info));
	curr_new = curr_new->next;
	curr_new->cookie = 0;  /* cookie not set on purpose */
	curr_new->key = ADIOI_Strdup(curr_old->key);
	curr_new->value = ADIOI_Strdup(curr_old->value);
	curr_new->next = 0;
	
	curr_old = curr_old->next;
    }

    return MPI_SUCCESS;
}
예제 #2
0
파일: ad_fstype.c 프로젝트: ICLDisco/ompi
/* ADIO_FileSysType_parentdir
 *
 * Returns pointer to string in dirnamep; that string is allocated with
 * strdup and must be free()'d.
 */
static void ADIO_FileSysType_parentdir(const char *filename, char **dirnamep)
{
    int err;
    char *dir = NULL, *slash;
    struct stat statbuf;
    
    err = lstat(filename, &statbuf);

    if (err || (!S_ISLNK(statbuf.st_mode))) {
	/* no such file, or file is not a link; these are the "normal"
	 * cases where we can just return the parent directory.
	 */
	dir = ADIOI_Strdup(filename);
    }
    else {
	/* filename is a symlink.  we've presumably already tried
	 * to stat it and found it to be missing (dangling link),
	 * but this code doesn't care if the target is really there
	 * or not.
	 */
	ssize_t namelen;
	char *linkbuf;

	linkbuf = ADIOI_Malloc(PATH_MAX+1);
	namelen = readlink(filename, linkbuf, PATH_MAX+1);
	if (namelen == -1) {
	    /* something strange has happened between the time that
	     * we determined that this was a link and the time that
	     * we attempted to read it; punt and use the old name.
	     */
	    dir = ADIOI_Strdup(filename);
	}
	else {
	    /* successfully read the link */
	    linkbuf[namelen] = '\0'; /* readlink doesn't null terminate */
	    dir = ADIOI_Strdup(linkbuf);
	}
	ADIOI_Free(linkbuf);
    }

    slash = strrchr(dir, '/');
    if (!slash) ADIOI_Strncpy(dir, ".", 2);
    else {
	if (slash == dir) *(dir + 1) = '\0';
	else *slash = '\0';
    }

    *dirnamep = dir;
    return;
}
예제 #3
0
void ADIOI_BEEGFS_SetInfo( ADIO_File fd, MPI_Info users_info, int *error_code )
{
    char *value, *pathname, *dname, *slash;
    int flag, stripe_val[2], numtargets = 0, chunksize = 0;
    struct BeegfsIoctl_MkFileWithStripeHints_Arg createFileArg;
    int err, myrank, fd_pdir, perm, old_mask;
    static char myname[] = "ADIOI_BEEGFS_SETINFO";

    /* set error code to success */
    *error_code = MPI_SUCCESS;

    value = ( char * )ADIOI_Malloc( ( MPI_MAX_INFO_VAL + 1 ) * sizeof( char ) );

    MPI_Comm_rank( fd->comm, &myrank );

    /* set hints */
    if( ( fd->info ) == MPI_INFO_NULL ) {
	MPI_Info_create( &( fd->info ) );

	ADIOI_Info_set( fd->info, "striping_unit", "0" );
	ADIOI_Info_set( fd->info, "striping_factor", "0" );

	/* set users infos */
	if( users_info != MPI_INFO_NULL ) {
	    /* striping information */
	    ADIOI_Info_get( users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag );
	    if( flag )
		chunksize = atoi( value );

	    ADIOI_Info_get( users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag );
	    if( flag )
		numtargets = atoi( value );

	    /* check stripe info consistency */
	    if( myrank == 0 ) {
		stripe_val[0] = numtargets;
		stripe_val[1] = chunksize;
	    }
	    MPI_Bcast( stripe_val, 2, MPI_INT, 0, fd->comm );

	    if( stripe_val[0] != numtargets || stripe_val[1] != chunksize ) {
		FPRINTF( stderr, "ADIOI_BEEGFS_SetInfo: All keys"
			         "-striping_factor:striping_unit "
			         "need to be identical across all processes\n" );
		MPI_Abort( MPI_COMM_WORLD, 1 );
	    }

	    /* if user has specified striping info, process 0 tries to set it */
	    if( myrank == 0 && ( fd->access_mode & ADIO_CREATE ) && numtargets && chunksize ) {
		/* open the parent dir to get/set striping info */
		pathname = ADIOI_Strdup( fd->filename );
		dname = strrchr( pathname, '/' );
		if( dname != NULL ) {
		    *dname = '\0'; // replace / with nul-character
		    fd_pdir = open( pathname, O_RDONLY );
		    if( fd_pdir == -1 ) {
			FPRINTF( stderr, "Error opening %s: %s\n", pathname, strerror( errno ) );
		    }
		}
		else {
		    /* current dir relative path */
		    fd_pdir = open( ".", O_RDONLY );
		    if( fd_pdir == -1 ) {
			FPRINTF( stderr, "Error opening .: %s\n", strerror( errno ) );
		    }
		}
		ADIOI_Free( pathname );

		if( fd->perm == ADIO_PERM_NULL ) {
		    old_mask = umask( 022 );
		    umask( old_mask );
		    perm = old_mask ^ 0666;
		}
		else perm = fd->perm;

		/* set create hints depending on e10 hints previously set */
		slash = strrchr( fd->filename, '/' );
		if( slash != NULL )
		    slash += 1;
		else
		    slash = fd->filename;

		createFileArg.filename = slash;
		createFileArg.mode = perm;
		createFileArg.numtargets = numtargets;
		createFileArg.chunksize = chunksize;

		/* create the hint file */
		err = ioctl( fd_pdir, BEEGFS_IOC_MKFILE_STRIPEHINTS, &createFileArg );
		if( err ) {
		    FPRINTF( stderr, "BEEGFS_IOC_MKFILE_STRIPEHINTS: %s. ", strerror( errno ) );
		    if( errno == EEXIST ) {
			/* ignore user striping and use current file info */
			FPRINTF( stderr, "[rank:%d] Failure to set stripe info for %s!\n", myrank, fd->filename );
		    }
		}
		/* close the parent dir file descriptor */
		close( fd_pdir );
	    } /* End of striping parameters validation */
	}

	MPI_Barrier( fd->comm );
    }

    /* set rest of the MPI hints (including E10 hints) */
    ADIOI_GEN_SetInfo( fd, users_info, error_code );

    ADIOI_Free( value );
}
예제 #4
0
/*@
  MPI_Register_datarep - Register functions for user-defined data
                         representations

Input Parameters:
+ datarep - data representation name (string)
. read_conversion_fn - function invoked to convert from file representation to
                 native representation (function)
. write_conversion_fn - function invoked to convert from native representation to
                  file representation (function)
. dtype_file_extent_fn - function invoked to get the exted of a datatype as represented
                  in the file (function)
- extra_state - pointer to extra state that is passed to each of the
                three functions

 Notes:
 This function allows the user to provide routines to convert data from
 an external representation, used within a file, and the native representation,
 used within the CPU.  There is one predefined data representation,
 'external32'.  Please consult the MPI-2 standard for details on this
 function.

.N fortran

  @*/
int MPI_Register_datarep(ROMIO_CONST char *datarep,
                         MPI_Datarep_conversion_function *read_conversion_fn,
                         MPI_Datarep_conversion_function *write_conversion_fn,
                         MPI_Datarep_extent_function *dtype_file_extent_fn,
                         void *extra_state)
{
    int error_code;
    ADIOI_Datarep *adio_datarep;
    static char myname[] = "MPI_REGISTER_DATAREP";

    ROMIO_THREAD_CS_ENTER();

    /* --BEGIN ERROR HANDLING-- */
    /* check datarep name (use strlen instead of strnlen because
       strnlen is not portable) */
    if (datarep == NULL ||
            strlen(datarep) < 1 ||
            strlen(datarep) > MPI_MAX_DATAREP_STRING)
    {
        error_code = MPIO_Err_create_code(MPI_SUCCESS,
                                          MPIR_ERR_RECOVERABLE,
                                          myname, __LINE__,
                                          MPI_ERR_ARG,
                                          "**datarepname", 0);
        error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code);
        goto fn_exit;
    }
    /* --END ERROR HANDLING-- */

    MPIR_MPIOInit(&error_code);
    if (error_code != MPI_SUCCESS) goto fn_exit;

    /* --BEGIN ERROR HANDLING-- */
    /* check datarep isn't already registered */
    for (adio_datarep = ADIOI_Datarep_head; adio_datarep; adio_datarep = adio_datarep->next) {
        if (!strncmp(datarep, adio_datarep->name, MPI_MAX_DATAREP_STRING)) {
            error_code = MPIO_Err_create_code(MPI_SUCCESS,
                                              MPIR_ERR_RECOVERABLE,
                                              myname, __LINE__,
                                              MPI_ERR_DUP_DATAREP,
                                              "**datarepused",
                                              "**datarepused %s",
                                              datarep);
            error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code);
            goto fn_exit;
        }
    }

    /* Check Non-NULL Read and Write conversion function pointer */
    /* Read and Write conversions are currently not supported.   */
    if ( (read_conversion_fn != NULL) || (write_conversion_fn != NULL) )
    {
        error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
                                          myname, __LINE__,
                                          MPI_ERR_CONVERSION,
                                          "**drconvnotsupported", 0);

        error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code);
        goto fn_exit;
    }

    /* check extent function pointer */
    if (dtype_file_extent_fn == NULL)
    {
        error_code = MPIO_Err_create_code(MPI_SUCCESS,
                                          MPIR_ERR_RECOVERABLE,
                                          myname, __LINE__,
                                          MPI_ERR_ARG,
                                          "**datarepextent", 0);
        error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code);
        goto fn_exit;
    }
    /* --END ERROR HANDLING-- */

    adio_datarep = ADIOI_Malloc(sizeof(ADIOI_Datarep));
    adio_datarep->name = ADIOI_Strdup(datarep);
    adio_datarep->state         = extra_state;
    adio_datarep->read_conv_fn  = read_conversion_fn;
    adio_datarep->write_conv_fn = write_conversion_fn;
    adio_datarep->extent_fn     = dtype_file_extent_fn;
    adio_datarep->next          = ADIOI_Datarep_head;

    ADIOI_Datarep_head = adio_datarep;

    error_code = MPI_SUCCESS;

fn_exit:
    ROMIO_THREAD_CS_EXIT();

    return error_code;
}
예제 #5
0
MPI_File ADIO_Open(MPI_Comm orig_comm,
		   MPI_Comm comm, const char *filename, int file_system,
		   ADIOI_Fns *ops,
		   int access_mode, ADIO_Offset disp, MPI_Datatype etype, 
		   MPI_Datatype filetype,
		   MPI_Info info, int perm, int *error_code)
{
    MPI_File mpi_fh;
    ADIO_File fd;
    int err, rank, procs;
    static char myname[] = "ADIO_OPEN";
    int  max_error_code;
    MPI_Info dupinfo;
    int syshints_processed, can_skip;
    char *p;

    *error_code = MPI_SUCCESS;

    /* obtain MPI_File handle */
    mpi_fh = MPIO_File_create(sizeof(struct ADIOI_FileD));
    if (mpi_fh == MPI_FILE_NULL) {
	fd = MPI_FILE_NULL;
	*error_code = MPIO_Err_create_code(*error_code,
					   MPIR_ERR_RECOVERABLE,
					   myname,
					   __LINE__,
					   MPI_ERR_OTHER,
					   "**nomem2",0);
	goto fn_exit;

    }
    fd = MPIO_File_resolve(mpi_fh);

    fd->cookie = ADIOI_FILE_COOKIE;
    fd->fp_ind = disp;
    fd->fp_sys_posn = 0;
    fd->comm = comm;       /* dup'ed in MPI_File_open */
    fd->filename = ADIOI_Strdup(filename);
    fd->file_system = file_system;
    fd->fs_ptr = NULL;

    fd->fns = ops;

    fd->disp = disp;
    fd->split_coll_count = 0;
    fd->shared_fp_fd = ADIO_FILE_NULL;
    fd->atomicity = 0;
    fd->etype = etype;          /* MPI_BYTE by default */
    fd->filetype = filetype;    /* MPI_BYTE by default */
    fd->etype_size = 1;  /* default etype is MPI_BYTE */

    fd->file_realm_st_offs = NULL;
    fd->file_realm_types = NULL;

    fd->perm = perm;

    fd->async_count = 0;

    fd->fortran_handle = -1;

    fd->err_handler = ADIOI_DFLT_ERR_HANDLER;

    fd->io_buf_window = MPI_WIN_NULL;
    fd->io_buf_put_amounts_window = MPI_WIN_NULL;

    MPI_Comm_rank(comm, &rank);
    MPI_Comm_size(comm, &procs);
/* create and initialize info object */
    fd->hints = (ADIOI_Hints *)ADIOI_Calloc(1, sizeof(struct ADIOI_Hints_struct));
    if (fd->hints == NULL) {
	*error_code = MPIO_Err_create_code(*error_code,
					   MPIR_ERR_RECOVERABLE,
					   myname,
					   __LINE__,
					   MPI_ERR_OTHER,
					   "**nomem2",0);
	goto fn_exit;
    }
    fd->hints->cb_config_list = NULL;
    fd->hints->ranklist = NULL;
    fd->hints->initialized = 0;
    fd->info = MPI_INFO_NULL;

    /* move system-wide hint processing *back* into open, but this time the
     * hintfile reader will do a scalable read-and-broadcast.  The global
     * ADIOI_syshints will get initialized at first open.  subsequent open
     * calls will just use result from first open.
     *
     * We have two goals here:
     * 1: avoid processing the hintfile multiple times
     * 2: have all processes participate in hintfile processing (so we can read-and-broadcast)
     *
     * a code might do an "initialize from 0", so we can only skip hint
     * processing once everyone has particpiated in hint processing */
    if (ADIOI_syshints == MPI_INFO_NULL)
	syshints_processed = 0;
    else
	syshints_processed = 1;

    MPI_Allreduce(&syshints_processed, &can_skip, 1, MPI_INT, MPI_MIN, fd->comm);
    if (!can_skip) {
	if (ADIOI_syshints == MPI_INFO_NULL)
	    MPI_Info_create(&ADIOI_syshints);
	ADIOI_process_system_hints(fd, ADIOI_syshints);
    }

    ADIOI_incorporate_system_hints(info, ADIOI_syshints, &dupinfo);
    ADIO_SetInfo(fd, dupinfo, &err);
    if (dupinfo != MPI_INFO_NULL) {
	*error_code = MPI_Info_free(&dupinfo);
	if (*error_code != MPI_SUCCESS)
	    goto fn_exit;
    }
    ADIOI_Info_set(fd->info, "romio_filesystem_type", fd->fns->fsname);

    /* Instead of repeatedly allocating this buffer in collective read/write,
     * allocating up-front might make memory management on small platforms
     * (e.g. Blue Gene) more efficent */

    fd->io_buf = ADIOI_Malloc(fd->hints->cb_buffer_size);
     /* deferred open: 
     * we can only do this optimization if 'fd->hints->deferred_open' is set
     * (which means the user hinted 'no_indep_rw' and collective buffering).
     * Furthermore, we only do this if our collective read/write routines use
     * our generic function, and not an fs-specific routine (we can defer opens
     * only if we use our aggreagation code). */
    if (fd->hints->deferred_open && 
		    !(uses_generic_read(fd) \
			    && uses_generic_write(fd))) {
	    fd->hints->deferred_open = 0;
    }
    if (ADIO_Feature(fd, ADIO_SCALABLE_OPEN))
	    /* disable deferred open on these fs so that scalable broadcast
	     * will always use the propper communicator */
	    fd->hints->deferred_open = 0;


    /* on BlueGene, the cb_config_list is built when hints are processed. No
     * one else does that right now */
    if (fd->hints->ranklist == NULL) {
	build_cb_config_list(fd, orig_comm, comm, rank, procs, error_code);
	if (*error_code != MPI_SUCCESS) 
	    goto fn_exit;
    }
    fd->is_open = 0;
    fd->my_cb_nodes_index = -2;
    fd->is_agg = is_aggregator(rank, fd);
    /* deferred open used to split the communicator to create an "aggregator
     * communicator", but we only used it as a way to indicate that deferred
     * open happened.  fd->is_open and fd->is_agg are sufficient */

    /* actual opens start here */
    /* generic open: one process opens to create the file, all others open */
    /* nfs open: everybody opens or else you'll end up with "file not found"
     * due to stupid nfs consistency semantics */
    /* scalable open: one process opens and broadcasts results to everyone */

    ADIOI_OpenColl(fd, rank, access_mode, error_code);

    /* for debugging, it can be helpful to see the hints selected. Some file
     * systes set up the hints in the open call (e.g. lustre) */
    p = getenv("ROMIO_PRINT_HINTS");
    if (rank == 0 && p != NULL ) {
	ADIOI_Info_print_keyvals(fd->info);
    }

 fn_exit:
    MPI_Allreduce(error_code, &max_error_code, 1, MPI_INT, MPI_MAX, comm);
    if (max_error_code != MPI_SUCCESS) {

        /* If the file was successfully opened, close it */
        if (*error_code == MPI_SUCCESS) {
        
            /* in the deferred open case, only those who have actually
               opened the file should close it */
            if (fd->hints->deferred_open)  {
                if (fd->is_agg) {
                    (*(fd->fns->ADIOI_xxx_Close))(fd, error_code);
                }
            }
            else {
                (*(fd->fns->ADIOI_xxx_Close))(fd, error_code);
            }
        }
	ADIOI_Free(fd->filename);
	ADIOI_Free(fd->hints->ranklist);
	ADIOI_Free(fd->hints->cb_config_list);
	ADIOI_Free(fd->hints);
	if (fd->info != MPI_INFO_NULL) MPI_Info_free(&(fd->info));
	ADIOI_Free(fd->io_buf);
	ADIOI_Free(fd);
        fd = ADIO_FILE_NULL;
	if (*error_code == MPI_SUCCESS)
	{
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO,
					       "**oremote_fail", 0);
	}
    }

    return fd;
}
예제 #6
0
void ADIOI_PANFS_Open(ADIO_File fd, int *error_code)
{
    char* value;
    int perm, old_mask, amode, flag;
    static char myname[] = "ADIOI_PANFS_OPEN";

    if (fd->perm == ADIO_PERM_NULL) {
        old_mask = umask(022);
        umask(old_mask);
        perm = ~old_mask & 0666;
    }
    else perm = fd->perm;

    amode = 0;
    if (fd->access_mode & ADIO_CREATE)
    {
        pan_fs_client_layout_agg_type_t layout_type = PAN_FS_CLIENT_LAYOUT_TYPE__DEFAULT;
        unsigned long int layout_stripe_unit = 0;
        unsigned long int layout_parity_stripe_width = 0;
        unsigned long int layout_parity_stripe_depth = 0; 
        unsigned long int layout_total_num_comps = 0;
        pan_fs_client_layout_visit_t layout_visit_policy  = PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN;
        int myrank;

        MPI_Comm_rank(fd->comm, &myrank);

        *error_code = MPI_SUCCESS;
        value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
        ADIOI_Info_get(fd->info, "panfs_layout_type", MPI_MAX_INFO_VAL, 
                 value, &flag);
        if (flag) {
            layout_type = strtoul(value,NULL,10);
        }
        ADIOI_Info_get(fd->info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL, 
                 value, &flag);
        if (flag) {
            layout_stripe_unit = strtoul(value,NULL,10);
        }
        ADIOI_Info_get(fd->info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL, 
                 value, &flag);
        if (flag) {
            layout_total_num_comps = strtoul(value,NULL,10);
        }
        ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL, 
                 value, &flag);
        if (flag) {
            layout_parity_stripe_width = strtoul(value,NULL,10);
        }
        ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL, 
                 value, &flag);
        if (flag) {
            layout_parity_stripe_depth = strtoul(value,NULL,10);
        }
        ADIOI_Info_get(fd->info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL, 
                 value, &flag);
        if (flag) {
            layout_visit_policy = strtoul(value,NULL,10);
        }
        ADIOI_Free(value);

        amode = amode | O_CREAT;
        /* Check for valid set of hints */
        if ((layout_type < PAN_FS_CLIENT_LAYOUT_TYPE__DEFAULT) ||
           (layout_type > PAN_FS_CLIENT_LAYOUT_TYPE__RAID10))
        {
            FPRINTF(stderr, "%s: panfs_layout_type is not a valid value: %u.\n", myname, layout_type);
            MPI_Abort(MPI_COMM_WORLD, 1);
        }
        if ((layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0) &&
           ((layout_stripe_unit == 0) || (layout_total_num_comps == 0)))
        {
            if(layout_stripe_unit == 0)
            {
                FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_stripe_unit hint which is necessary to specify a valid RAID0 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
            }
            if(layout_total_num_comps == 0)
            {
                FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_total_num_comps hint which is necessary to specify a valid RAID0 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
            }
            MPI_Abort(MPI_COMM_WORLD, 1);
        }
        if (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)
        {
            if ((layout_stripe_unit == 0) ||
               (layout_parity_stripe_width == 0) ||
               (layout_parity_stripe_depth == 0) ||
               (layout_total_num_comps == 0))
            {
                if(layout_stripe_unit == 0)
                {
                    FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_stripe_unit hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
                }
                if(layout_total_num_comps == 0)
                {
                    FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_total_num_comps hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
                }
                if(layout_parity_stripe_width == 0)
                {
                    FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_parity_stripe_width hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
                }
                if(layout_parity_stripe_depth == 0)
                {
                    FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_parity_stripe_depth hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
                }
                MPI_Abort(MPI_COMM_WORLD, 1);
           }
           if ((layout_visit_policy < PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN) ||
              (layout_visit_policy > PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN_WITH_HASHED_OFFSET))
           {
                FPRINTF(stderr, "%s: panfs_layout_visit_policy is not a valid value: %u.\n", myname, layout_visit_policy);
                MPI_Abort(MPI_COMM_WORLD, 1);
           }
        }
        if (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)
        {
            if ((layout_stripe_unit == 0) || (layout_total_num_comps == 0))
            {
                if(layout_stripe_unit == 0)
                {
                    FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_stripe_unit hint which is necessary to specify a valid RAID10 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
                }
                if(layout_total_num_comps == 0)
                {
                    FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_total_num_comps hint which is necessary to specify a valid RAID10 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
                }
                MPI_Abort(MPI_COMM_WORLD, 1);
            }
            if ((layout_visit_policy < PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN) ||
              (layout_visit_policy > PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN_WITH_HASHED_OFFSET))
            {
                FPRINTF(stderr, "%s: panfs_layout_visit_policy is not a valid value: %u.\n", myname, layout_visit_policy);
                MPI_Abort(MPI_COMM_WORLD, 1);
            }
        }
        /* Create the file via ioctl() or open(). ADIOI_PANFS_Open's caller 
         * already optimizes performance by only calling this function with
         * ADIO_CREATE on rank 0.  Therefore, we don't need to worry about 
         * implementing that optimization here. */
        if((layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0) || (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE) 
                || (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)) {
            pan_fs_client_layout_create_args_t file_create_args;    
            int fd_dir;
            char* slash;
            struct stat stat_buf;
            int err;
            char *path;

            /* Check that the file does not exist before
             * trying to create it.  The ioctl itself should
             * be able to handle this condition.  Currently,
             * the ioctl will return successfully if the file
             * has been previously created.  Filed bug 33862
             * to track the problem.
             */
            err = stat(fd->filename,&stat_buf);
            if((err == -1) && (errno != ENOENT))
            {
                FPRINTF(stderr,"%s: Unexpected I/O Error calling stat() on PanFS file: %s.\n", myname, strerror(errno));
                MPI_Abort(MPI_COMM_WORLD, 1);
            }
            else if (err == 0)
            {
                FPRINTF(stderr,"%s: Cannot create PanFS file with ioctl when file already exists.\n", myname);
                MPI_Abort(MPI_COMM_WORLD, 1);
            }
            else
            {
                /* (err == -1) && (errno == ENOENT) */
                /* File does not exist */
                path = ADIOI_Strdup(fd->filename);
                slash = strrchr(path, '/');
                if (!slash)
                    ADIOI_Strncpy(path, ".", 2);
                else {
                    if (slash == path) 
                        *(path + 1) = '\0';
                    else *slash = '\0';
                }

                /* create PanFS object */
                bzero(&file_create_args,sizeof(pan_fs_client_layout_create_args_t)); 
                /* open directory */
                fd_dir = open(path, O_RDONLY);
                if (fd_dir < 0) {
                    FPRINTF(stderr, "%s: I/O Error opening parent directory to create PanFS file using ioctl: %s.\n", myname, strerror(errno));
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
                else
                {
                    char *file_name_ptr = fd->filename;
                    slash = strrchr(fd->filename, '/');
                    if (slash)
                    {
                        file_name_ptr = slash + 1;
                    }
                    /* create file in the directory */
                    file_create_args.mode = perm;
                    file_create_args.version = PAN_FS_CLIENT_LAYOUT_VERSION;
                    file_create_args.flags = PAN_FS_CLIENT_LAYOUT_CREATE_F__NONE;
                    ADIOI_Strncpy(file_create_args.filename, file_name_ptr, strlen(fd->filename)+1); 
                    file_create_args.layout.agg_type = layout_type;
                    file_create_args.layout.layout_is_valid = 1;
                    if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)
                    {
                        file_create_args.layout.u.raid1_5_parity_stripe.total_num_comps = layout_total_num_comps;
                        file_create_args.layout.u.raid1_5_parity_stripe.parity_stripe_width   = layout_parity_stripe_width;
                        file_create_args.layout.u.raid1_5_parity_stripe.parity_stripe_depth   = layout_parity_stripe_depth;
                        file_create_args.layout.u.raid1_5_parity_stripe.stripe_unit     = layout_stripe_unit;
                        file_create_args.layout.u.raid1_5_parity_stripe.layout_visit_policy   = layout_visit_policy;
                    }
                    else if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0)
                    {
                        file_create_args.layout.u.raid0.total_num_comps = layout_total_num_comps;
                        file_create_args.layout.u.raid0.stripe_unit     = layout_stripe_unit;
                    }
                    else if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)
                    {
                        file_create_args.layout.u.raid10.total_num_comps     = layout_total_num_comps;
                        file_create_args.layout.u.raid10.stripe_unit         = layout_stripe_unit;
                        file_create_args.layout.u.raid10.layout_visit_policy = layout_visit_policy;
                    }
                    err = ioctl(fd_dir, PAN_FS_CLIENT_LAYOUT_CREATE_FILE, &file_create_args);
                    if (err < 0) {
                        FPRINTF(stderr, "%s: I/O Error doing ioctl on parent directory to create PanFS file using ioctl: %s.\n", myname, strerror(errno));
                        MPI_Abort(MPI_COMM_WORLD, 1);
                    }
                    err = close(fd_dir);
                }
                ADIOI_Free(path);
            }
        }
        else
        {
            int create_fd = open(fd->filename,amode,perm);
            if(create_fd != -1)
            {
                close(create_fd);
            }
            else
            {
                FPRINTF(stderr, "%s: I/O Error creating PanFS file using open: %s.\n", myname, strerror(errno));
                MPI_Abort(MPI_COMM_WORLD, 1);
            }
        }
    }
    if (fd->access_mode & ADIO_RDONLY)
	amode = amode | O_RDONLY;
    if (fd->access_mode & ADIO_WRONLY)
	amode = amode | O_WRONLY;
    if (fd->access_mode & ADIO_RDWR)
	amode = amode | O_RDWR;
    if (fd->access_mode & ADIO_EXCL)
	amode = amode | O_EXCL;

	value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
	ADIOI_Info_get(fd->info, "panfs_concurrent_write", MPI_MAX_INFO_VAL, 
		     value, &flag);
	if (flag) {
        unsigned long int concurrent_write = strtoul(value,NULL,10);
        if(concurrent_write == 1)
        {
            amode = amode | O_CONCURRENT_WRITE;
        }
	}
	ADIOI_Free(value);

    fd->fd_sys = open(fd->filename, amode, perm);
    fd->fd_direct = -1;

    if (fd->fd_sys != -1)
    {
        int rc;
        char temp_buffer[TEMP_BUFFER_SIZE];
        pan_fs_client_layout_query_args_t file_query_args;
        bzero(&file_query_args,sizeof(pan_fs_client_layout_query_args_t));
        file_query_args.version = PAN_FS_CLIENT_LAYOUT_VERSION;
        rc = ioctl(fd->fd_sys, PAN_FS_CLIENT_LAYOUT_QUERY_FILE, &file_query_args);
        if (rc < 0)
        {
            /* Error - set layout type to unknown */
	        ADIOI_Info_set(fd->info, "panfs_layout_type", "PAN_FS_CLIENT_LAYOUT_TYPE__INVALID");
        }
        else 
        {
            ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.agg_type);
            ADIOI_Info_set(fd->info, "panfs_layout_type", temp_buffer);
            if (file_query_args.layout.layout_is_valid == 1)
            {
                switch (file_query_args.layout.agg_type)
                {
                    case PAN_FS_CLIENT_LAYOUT_TYPE__RAID0:
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.stripe_unit);
                        ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.total_num_comps);
                        ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
                        break;
                    case PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE:
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.stripe_unit);
                        ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_width);
                        ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_width", temp_buffer);
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_depth);
                        ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", temp_buffer);
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.total_num_comps);
                        ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.layout_visit_policy);
                        ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
                        break;
                    case PAN_FS_CLIENT_LAYOUT_TYPE__RAID10:
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.stripe_unit);
                        ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.total_num_comps);
                        ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.layout_visit_policy);
                        ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
                        break;
		  default:
			  break;
                }
            }
        }
    }

    if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
	fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);

    if (fd->fd_sys == -1) {
	if (errno == ENAMETOOLONG)
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_BAD_FILE,
					       "**filenamelong",
					       "**filenamelong %s %d",
					       fd->filename,
					       strlen(fd->filename));
	else if (errno == ENOENT)
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_NO_SUCH_FILE,
					       "**filenoexist",
					       "**filenoexist %s",
					       fd->filename);
	else if (errno == ENOTDIR || errno == ELOOP)
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE,
					       myname, __LINE__,
					       MPI_ERR_BAD_FILE,
					       "**filenamedir",
					       "**filenamedir %s",
					       fd->filename);
	else if (errno == EACCES) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_ACCESS,
					       "**fileaccess",
					       "**fileaccess %s", 
					       fd->filename );
	}
	else if (errno == EROFS) {
	    /* Read only file or file system and write access requested */
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_READ_ONLY,
					       "**ioneedrd", 0 );
	}
	else {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO, "**io",
					       "**io %s", strerror(errno));
	}
    }
    else *error_code = MPI_SUCCESS;
}