示例#1
0
/* ADIOI_cb_bcast_rank_map() - broadcast the rank array
 *
 * Parameters:
 * fd - ADIO_File for which update is occurring.  cb_nodes and ranklist
 * parameters must be up-to-date on rank 0 of the fd->comm.
 *
 * should probably be a void fn.
 */
int ADIOI_cb_bcast_rank_map(ADIO_File fd)
{
    int my_rank;
    char *value;
	int error_code = MPI_SUCCESS;
	static char myname[] = "ADIOI_cb_bcast_rank_map";

    MPI_Bcast(&(fd->hints->cb_nodes), 1, MPI_INT, 0, fd->comm);
    if (fd->hints->cb_nodes > 0) {
	MPI_Comm_rank(fd->comm, &my_rank);
	if (my_rank != 0) {
	    fd->hints->ranklist = ADIOI_Malloc(fd->hints->cb_nodes*sizeof(int));
	    if (fd->hints->ranklist == NULL) {
                error_code = MPIO_Err_create_code(error_code,
                                                  MPIR_ERR_RECOVERABLE,
                                                  myname,
                                                  __LINE__,
                                                  MPI_ERR_OTHER,
                                                  "**nomem2",0);
                return error_code;
	    }
	}
	MPI_Bcast(fd->hints->ranklist, fd->hints->cb_nodes, MPI_INT, 0, 
		  fd->comm);
    }
    /* TEMPORARY -- REMOVE WHEN NO LONGER UPDATING INFO FOR
     * FS-INDEP. */
    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
    ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", fd->hints->cb_nodes);
    ADIOI_Info_set(fd->info, "cb_nodes", value);
    ADIOI_Free(value);

    return 0;
}
示例#2
0
/* given 'info', incorporate any hints in 'sysinfo' that are not already set
 * into 'new_info'.  Caller must free 'new_info' later. */
void ADIOI_incorporate_system_hints(MPI_Info info, 
	MPI_Info sysinfo, 
	MPI_Info *new_info) 
{
    int i, nkeys_sysinfo, flag;
    char  val[MPI_MAX_INFO_VAL], key[MPI_MAX_INFO_KEY];

    if (sysinfo == MPI_INFO_NULL)
	nkeys_sysinfo = 0;
    else
	MPI_Info_get_nkeys(sysinfo, &nkeys_sysinfo);

    /* short-circuit: return immediately if no hints to process */
    if (info == MPI_INFO_NULL && nkeys_sysinfo == 0)  {
	*new_info = MPI_INFO_NULL;
	return;
    }

    if (info == MPI_INFO_NULL) 
	MPI_Info_create(new_info);
    else
	MPI_Info_dup(info, new_info);

    for (i=0; i<nkeys_sysinfo; i++) {
	MPI_Info_get_nthkey(sysinfo, i, key);
	/* don't care about the value, just want to know if hint set already*/
	if (info != MPI_INFO_NULL) ADIOI_Info_get(info, key, 1, val, &flag); 
	if (flag == 1) continue;  /* skip any hints already set by user */
	ADIOI_Info_get(sysinfo, key, MPI_MAX_INFO_VAL-1, val, &flag);
	ADIOI_Info_set(*new_info, key, val);
	flag = 0;
    }

    return;
}
/* ADIOI_cb_bcast_rank_map() - broadcast the rank array
 *
 * Parameters:
 * fd - ADIO_File for which update is occurring.  cb_nodes and ranklist
 * parameters must be up-to-date on rank 0 of the fd->comm.
 *
 * should probably be a void fn.
 */
int ADIOI_cb_bcast_rank_map(ADIO_File fd)
{
    int my_rank;
    char *value;

    MPI_Bcast(&(fd->hints->cb_nodes), 1, MPI_INT, 0, fd->comm);
    if (fd->hints->cb_nodes > 0) {
	MPI_Comm_rank(fd->comm, &my_rank);
	if (my_rank != 0) {
	    fd->hints->ranklist = ADIOI_Malloc(fd->hints->cb_nodes*sizeof(int));
	    if (fd->hints->ranklist == NULL) {
		/* NEED TO HANDLE ENOMEM */
	    }
	}
	MPI_Bcast(fd->hints->ranklist, fd->hints->cb_nodes, MPI_INT, 0, 
		  fd->comm);
    }
    /* TEMPORARY -- REMOVE WHEN NO LONGER UPDATING INFO FOR
     * FS-INDEP. */
    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
    ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", fd->hints->cb_nodes);
    ADIOI_Info_set(fd->info, "cb_nodes", value);
    ADIOI_Free(value);

    return 0;
}
示例#4
0
static int build_cb_config_list(ADIO_File fd, 
	MPI_Comm orig_comm, MPI_Comm comm, 
	int rank, int procs, int *error_code)
{
    ADIO_cb_name_array array;
    int *tmp_ranklist;
    int rank_ct;
    char *value;
    static char myname[] = "ADIO_OPEN cb_config_list";

    /* gather the processor name array if we don't already have it */
    /* this has to be done early in ADIO_Open so that we can cache the name
     * array in both the dup'd communicator (in case we want it later) and the
     * original communicator */
    ADIOI_cb_gather_name_array(orig_comm, comm, &array);

/* parse the cb_config_list and create a rank map on rank 0 */
    if (rank == 0) {
	tmp_ranklist = (int *) ADIOI_Malloc(sizeof(int) * procs);
	if (tmp_ranklist == NULL) {
	    *error_code = MPIO_Err_create_code(*error_code,
					       MPIR_ERR_RECOVERABLE,
					       myname,
					       __LINE__,
					       MPI_ERR_OTHER,
					       "**nomem2",0);
	    return 0;
	}

	rank_ct = ADIOI_cb_config_list_parse(fd->hints->cb_config_list, 
					     array, tmp_ranklist,
					     fd->hints->cb_nodes);

	/* store the ranklist using the minimum amount of memory */
	if (rank_ct > 0) {
	    fd->hints->ranklist = (int *) ADIOI_Malloc(sizeof(int) * rank_ct);
	    memcpy(fd->hints->ranklist, tmp_ranklist, sizeof(int) * rank_ct);
	}
	ADIOI_Free(tmp_ranklist);
	fd->hints->cb_nodes = rank_ct;
	/* TEMPORARY -- REMOVE WHEN NO LONGER UPDATING INFO FOR FS-INDEP. */
	value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
	MPL_snprintf(value, MPI_MAX_INFO_VAL+1, "%d", rank_ct);
	ADIOI_Info_set(fd->info, "cb_nodes", value);
	ADIOI_Free(value);
    }

    ADIOI_cb_bcast_rank_map(fd);
    if (fd->hints->cb_nodes <= 0) {
	*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
					   myname, __LINE__, MPI_ERR_IO,
					   "**ioagnomatch", 0);
	fd = ADIO_FILE_NULL;
    }
    return 0;
}
示例#5
0
/* parse the file-of-hints.  Format is zero or more lines of "<key> <value>\n".
 * A # in collumn zero is a comment and the line will be ignored.  Do our best
 * to ignore badly formed lines too. 
 *
 * The caller provides an 'info' object.  Each key-value pair found by the
 * parser will get added to the info object.  any keys already set will be left
 * alone on the assumption that the caller knows best. 
 *
 * because MPI-IO hints are optional, we can get away with limited error
 * reporting.  */
static int file_to_info(int fd, MPI_Info info)
{
    char *buffer, *token, *key, *val, *garbage;
    char *pos1=NULL, *pos2=NULL;
    int flag, ret;
    char dummy;
    struct stat statbuf;

    /* assumption: config files will be small (less than 1MB) */
    fstat(fd, &statbuf);
    /* add 1 to size to make room for NULL termination */
    buffer = (char *)ADIOI_Calloc(statbuf.st_size + 1, sizeof (char));
    if (buffer == NULL) return -1;

    ret = read(fd, buffer, statbuf.st_size);
    if (ret < 0) return -1;
    token = strtok_r(buffer, "\n", &pos1);
    do {
	if ( (key = strtok_r(token, " \t", &pos2)) == NULL) 
	    /* malformed line: found no items */
	    continue;
	if (token[0] == '#') 
	    /* ignore '#'-delimited comments */
	    continue;
	if ( (val = strtok_r(NULL, " \t", &pos2))  == NULL) 
	    /* malformed line: found key without value */
	    continue;
	if ( (garbage = strtok_r(NULL, " \t", &pos2)) != NULL) 
	    /* malformed line: more than two items */
	    continue;
	    
#ifdef SYSHINT_DEBUG
	printf("found: key=%s val=%s\n", key, val);
#endif
	/* don't actually care what the value is. only want to know if key
	 * exists: we leave it alone if so*/
	ADIOI_Info_get(info, key, 1, &dummy, &flag);
	if (flag == 1) continue;
	ADIOI_Info_set(info, key, val);
    } while ((token = strtok_r(NULL, "\n", &pos1)) != NULL);
    ADIOI_Free(buffer);
    return 0;
}
示例#6
0
/* takes an extra romio_cb_pfr param to decide whether file realms
 * should start at byte 0 of the file*/
void ADIOI_Calc_file_realms_aar (ADIO_File fd, int nprocs_for_coll, int cb_pfr,
				 ADIO_Offset min_st_offset,
				 ADIO_Offset max_end_offset,
				 ADIO_Offset *file_realm_st_offs,
				 MPI_Datatype *file_realm_types)
{
    int fr_size, aligned_fr_size, i;
    MPI_Datatype simpletype;
    ADIO_Offset aligned_start_off;
    char value[9];

    fr_size = (max_end_offset - min_st_offset + nprocs_for_coll) /
	nprocs_for_coll;
    align_fr(fr_size, min_st_offset, fd->hints->cb_fr_alignment,
	     &aligned_fr_size, &aligned_start_off);
    fr_size = aligned_fr_size;
    ADIOI_Create_fr_simpletype (fr_size, nprocs_for_coll, &simpletype);
    if (cb_pfr == ADIOI_HINT_ENABLE)
	file_realm_st_offs[0] = 0;
    else
	file_realm_st_offs[0] = aligned_start_off;
    file_realm_types[0]   = simpletype;

#ifdef DEBUG
    printf ("file_realm[0] = (%lld, %d)\n", file_realm_st_offs[0],
	    fr_size);
#endif
    for (i=1; i < nprocs_for_coll; i++)
    {
	file_realm_st_offs[i] = file_realm_st_offs[i-1] + fr_size;
	file_realm_types[i]   = simpletype;
#ifdef DEBUG
	printf ("file_realm[%d] = (%lld, %d)\n", i, file_realm_st_offs[i],
		fr_size);
#endif
    }
    if (fd->hints->cb_pfr == ADIOI_HINT_ENABLE) {
	sprintf (value, "%d", fr_size);
	ADIOI_Info_set (fd->info, "romio_cb_fr_type", value);
    }
}
示例#7
0
void ADIOI_GRIDFTP_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{

    if (!(fd->info))
	{
	    if ( users_info==MPI_INFO_NULL )
		{
		    /* This must be part of the open call. */
		    MPI_Info_create(&(fd->info));
		}
	    else
		{
		    MPI_Info_dup(users_info,&(fd->info));
		}
	}
    else
	{
	    int i,nkeys,valuelen,flag;
	    char key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];

	    if ( users_info!=MPI_INFO_NULL )
		{
		    MPI_Info_get_nkeys(users_info,&nkeys);
		    for (i=0;i<nkeys;i++)
			{
			    MPI_Info_get_nthkey(users_info,i,key);
			    MPI_Info_get_valuelen(users_info,key,&valuelen,&flag);
			    if (flag)
				{
				    ADIOI_Info_get(users_info,key,valuelen,value,&flag);
				    if (flag) ADIOI_Info_set(fd->info,key,value);
				}
			}
		}
	}

    /* let the generic ROMIO and MPI-I/O stuff happen... */
    ADIOI_GEN_SetInfo(fd, users_info, error_code);
}
示例#8
0
void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, 
	int access_mode, int *error_code)
{
    int orig_amode_excl, orig_amode_wronly;
    MPI_Comm tmp_comm;
    MPI_Datatype stats_type;  /* deferred open: some processes might not
				 open the file, so we'll exchange some
				 information with those non-aggregators */

    orig_amode_excl = access_mode;

    if (access_mode & ADIO_CREATE ){
       if(rank == fd->hints->ranklist[0]) {
	   /* remove delete_on_close flag if set */
	   if (access_mode & ADIO_DELETE_ON_CLOSE)
	       fd->access_mode = access_mode ^ ADIO_DELETE_ON_CLOSE;
	   else 
	       fd->access_mode = access_mode;
	       
	   tmp_comm = fd->comm;
	   fd->comm = MPI_COMM_SELF;
	   (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
	   fd->comm = tmp_comm;
	   MPI_Bcast(error_code, 1, MPI_INT, \
		     fd->hints->ranklist[0], fd->comm);
	   /* if no error, close the file and reopen normally below */
	   if (*error_code == MPI_SUCCESS) 
	       (*(fd->fns->ADIOI_xxx_Close))(fd, error_code);

	   fd->access_mode = access_mode; /* back to original */
       }
       else MPI_Bcast(error_code, 1, MPI_INT, fd->hints->ranklist[0], fd->comm);

       if (*error_code != MPI_SUCCESS) {
	   return;
       } 
       else {
           /* turn off CREAT (and EXCL if set) for real multi-processor open */
           access_mode ^= ADIO_CREATE; 
	   if (access_mode & ADIO_EXCL)
		   access_mode ^= ADIO_EXCL;
       }
    }
    fd->blksize = 1024*1024*4; /* this large default value should be good for
				 most file systems.  any ROMIO driver is free
				 to stat the file and find an optimial value */

    /* if we are doing deferred open, non-aggregators should return now */
    if (fd->hints->deferred_open ) {
        if (!(fd->is_agg)) {
	    char value[MPI_MAX_INFO_VAL+1];
            /* we might have turned off EXCL for the aggregators.
             * restore access_mode that non-aggregators get the right
             * value from get_amode */
            fd->access_mode = orig_amode_excl;
	    /* In file-system specific open, a driver might collect some
	     * information via stat().  Deferred open means not every process
	     * participates in fs-specific open, but they all participate in
	     * this open call.  Broadcast a bit of information in case
	     * lower-level file system driver (e.g. 'bluegene') collected it
	     * (not all do)*/
	    stats_type = make_stats_type(fd);
	    MPI_Bcast(MPI_BOTTOM, 1, stats_type, fd->hints->ranklist[0], fd->comm);
	    ADIOI_Assert(fd->blksize > 0);
	    /* some file systems (e.g. lustre) will inform the user via the
	     * info object about the file configuration.  deferred open,
	     * though, skips that step for non-aggregators.  we do the
	     * info-setting here */
	    sprintf(value, "%d", fd->hints->striping_unit);
	    ADIOI_Info_set(fd->info, "striping_unit", value);

	    sprintf(value, "%d", fd->hints->striping_factor);
	    ADIOI_Info_set(fd->info, "striping_factor", value);

	    sprintf(value, "%d", fd->hints->start_iodevice);
	    ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);

	    *error_code = MPI_SUCCESS;
	    MPI_Type_free(&stats_type);
	    return;
	}
    }

/* For writing with data sieving, a read-modify-write is needed. If 
   the file is opened for write_only, the read will fail. Therefore,
   if write_only, open the file as read_write, but record it as write_only
   in fd, so that get_amode returns the right answer. */

    /* observation from David Knaak: file systems that do not support data
     * sieving do not need to change the mode */

    orig_amode_wronly = access_mode;
    if ( (access_mode & ADIO_WRONLY) &&
	    ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) ) {
	access_mode = access_mode ^ ADIO_WRONLY;
	access_mode = access_mode | ADIO_RDWR;
    }
    fd->access_mode = access_mode;

    (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);

    /* if error, may be it was due to the change in amode above. 
       therefore, reopen with access mode provided by the user.*/ 
    fd->access_mode = orig_amode_wronly;  
    if (*error_code != MPI_SUCCESS) 
        (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);

    /* if we turned off EXCL earlier, then we should turn it back on */
    if (fd->access_mode != orig_amode_excl) fd->access_mode = orig_amode_excl;

    /* broadcast information to all proceses in
     * communicator, not just those who participated in open */

    stats_type = make_stats_type(fd);
    MPI_Bcast(MPI_BOTTOM, 1, stats_type, fd->hints->ranklist[0], fd->comm);
    MPI_Type_free(&stats_type);
    /* file domain code will get terribly confused in a hard-to-debug way if
     * gpfs blocksize not sensible */
    ADIOI_Assert( fd->blksize > 0);

    /* for deferred open: this process has opened the file (because if we are
     * not an aggregaor and we are doing deferred open, we returned earlier)*/
    fd->is_open = 1;

}
void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
    static char myname[] = "ADIOI_PANFS_SETINFO";
    char* value;
    int flag, tmp_val = -1;
    unsigned long int concurrent_write = 0; 
    pan_fs_client_layout_agg_type_t layout_type = PAN_FS_CLIENT_LAYOUT_TYPE__DEFAULT;
    unsigned long int layout_stripe_unit = 0;
    unsigned long int layout_parity_stripe_width = 0;
    unsigned long int layout_parity_stripe_depth = 0; 
    unsigned long int layout_total_num_comps = 0;
    pan_fs_client_layout_visit_t layout_visit_policy  = PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN;
    int gen_error_code;

    *error_code = MPI_SUCCESS;

    if (fd->info == MPI_INFO_NULL) {
	    /* This must be part of the open call. can set striping parameters 
         * if necessary. 
         */ 
	    MPI_Info_create(&(fd->info));

        /* has user specified striping parameters 
               and do they have the same value on all processes? */
        if (users_info != MPI_INFO_NULL) {
	        value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));

            ADIOI_Info_get(users_info, "panfs_concurrent_write", MPI_MAX_INFO_VAL, 
                 value, &flag);
            if (flag) {
                concurrent_write = strtoul(value,NULL,10);
                tmp_val = concurrent_write;
                MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
                if (tmp_val != concurrent_write) {
                    FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_concurrent_write\" must be the same on all processes\n");
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
	            ADIOI_Info_set(fd->info, "panfs_concurrent_write", value); 
            }

            ADIOI_Info_get(users_info, "panfs_layout_type", MPI_MAX_INFO_VAL, 
                 value, &flag);
            if (flag) {
                layout_type = strtoul(value,NULL,10);
                tmp_val = layout_type;
                MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
                if (tmp_val != layout_type) {
                    FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_type\" must be the same on all processes\n");
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
	            ADIOI_Info_set(fd->info, "panfs_layout_type", value); 
            }

            ADIOI_Info_get(users_info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL, 
                 value, &flag);
            if (flag) {
                layout_stripe_unit = strtoul(value,NULL,10);
                tmp_val = layout_stripe_unit;
                MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
                if (tmp_val != layout_stripe_unit) {
                    FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_stripe_unit\" must be the same on all processes\n");
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
	            ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", value); 
            }

            ADIOI_Info_get(users_info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL, 
                 value, &flag);
            if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)) {
                layout_parity_stripe_width = strtoul(value,NULL,10);
                tmp_val = layout_parity_stripe_width;
                MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
                if (tmp_val != layout_parity_stripe_width) {
                    FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_parity_stripe_width\" must be the same on all processes\n");
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
	            ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_width", value); 
            }

            ADIOI_Info_get(users_info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL, 
                 value, &flag);
            if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)) {
                layout_parity_stripe_depth = strtoul(value,NULL,10);
                tmp_val = layout_parity_stripe_depth;
                MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
                if (tmp_val != layout_parity_stripe_depth) {
                    FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_parity_stripe_depth\" must be the same on all processes\n");
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
	            ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", value); 
            }

            ADIOI_Info_get(users_info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL, 
                 value, &flag);
            if (flag) {
                layout_total_num_comps = strtoul(value,NULL,10);
                tmp_val = layout_total_num_comps;
                MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
                if (tmp_val != layout_total_num_comps) {
                    FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_total_num_comps\" must be the same on all processes\n");
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
	            ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", value); 
            }

            ADIOI_Info_get(users_info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL, 
                 value, &flag);
            if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE || layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)) {
                layout_visit_policy = strtoul(value,NULL,10);
                tmp_val = layout_visit_policy;
                MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
                if (tmp_val != layout_visit_policy) {
                    FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_visit_policy\" must be the same on all processes\n");
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
	            ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", value); 
            }

	        ADIOI_Free(value);

        }
    }

    ADIOI_GEN_SetInfo(fd, users_info, &gen_error_code); 
    /* If this function is successful, use the error code returned from ADIOI_GEN_SetInfo
     * otherwise use the error_code generated by this function
     */
    if(*error_code == MPI_SUCCESS)
    {
        *error_code = gen_error_code;
    }
}
示例#10
0
void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
/* if fd->info is null, create a new info object.
   Initialize fd->info to default values.
   Initialize fd->hints to default values.
   Examine the info object passed by the user. If it contains values that
   ROMIO understands, override the default. */

    MPI_Info info;
    char *value;
    int flag, nprocs = 0, len;
    int ok_to_override_cb_nodes = 0;
    static char myname[] = "ADIOI_GEN_SETINFO";


    /* if we've already set up default hints and the user has not asked us to
     * process any hints (MPI_INFO_NULL), then we can short-circuit hint
     * processing */
    if (fd->hints->initialized && fd->info == MPI_INFO_NULL) {
        *error_code = MPI_SUCCESS;
        return;
    }
    ad_get_env_vars();

    if (fd->info == MPI_INFO_NULL)
        MPI_Info_create(&(fd->info));
    info = fd->info;

    MPI_Comm_size(fd->comm, &nprocs);

    /* Note that fd->hints is allocated at file open time; thus it is
     * not necessary to allocate it, or check for allocation, here.
     */

    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL + 1) * sizeof(char));
    if (value == NULL) {
        *error_code = MPIO_Err_create_code(*error_code,
                                           MPIR_ERR_RECOVERABLE,
                                           myname, __LINE__, MPI_ERR_OTHER, "**nomem2", 0);
        return;
    }

    /* initialize info and hints to default values if they haven't been
     * previously initialized
     */
    if (!fd->hints->initialized) {

        /* buffer size for collective I/O */
        ADIOI_Info_set(info, "cb_buffer_size", ADIOI_CB_BUFFER_SIZE_DFLT);
        fd->hints->cb_buffer_size = atoi(ADIOI_CB_BUFFER_SIZE_DFLT);

        /* default is to let romio automatically decide when to use
         * collective buffering
         */
        ADIOI_Info_set(info, "romio_cb_read", "automatic");
        fd->hints->cb_read = ADIOI_HINT_AUTO;
        ADIOI_Info_set(info, "romio_cb_write", "automatic");
        fd->hints->cb_write = ADIOI_HINT_AUTO;

        fd->hints->cb_config_list = NULL;

        /* number of processes that perform I/O in collective I/O */
        MPL_snprintf(value, MPI_MAX_INFO_VAL + 1, "%d", nprocs);
        ADIOI_Info_set(info, "cb_nodes", value);
        fd->hints->cb_nodes = nprocs;

        /* hint indicating that no indep. I/O will be performed on this file */
        ADIOI_Info_set(info, "romio_no_indep_rw", "false");
        fd->hints->no_indep_rw = 0;

        /* hint instructing the use of persistent file realms */
        ADIOI_Info_set(info, "romio_cb_pfr", "disable");
        fd->hints->cb_pfr = ADIOI_HINT_DISABLE;

        /* hint guiding the assignment of persistent file realms */
        ADIOI_Info_set(info, "romio_cb_fr_types", "aar");
        fd->hints->cb_fr_type = ADIOI_FR_AAR;

        /* hint to align file realms with a certain byte value */
        ADIOI_Info_set(info, "romio_cb_fr_alignment", "1");
        fd->hints->cb_fr_alignment = 1;

        /* hint to set a threshold percentage for a datatype's size/extent at
         * which data sieving should be done in collective I/O */
        ADIOI_Info_set(info, "romio_cb_ds_threshold", "0");
        fd->hints->cb_ds_threshold = 0;

        /* hint to switch between point-to-point or all-to-all for two-phase */
        ADIOI_Info_set(info, "romio_cb_alltoall", "automatic");
        fd->hints->cb_alltoall = ADIOI_HINT_AUTO;

        /* deferred_open derived from no_indep_rw and cb_{read,write} */
        fd->hints->deferred_open = 0;

        /* buffer size for data sieving in independent reads */
        ADIOI_Info_set(info, "ind_rd_buffer_size", ADIOI_IND_RD_BUFFER_SIZE_DFLT);
        fd->hints->ind_rd_buffer_size = atoi(ADIOI_IND_RD_BUFFER_SIZE_DFLT);

        /* buffer size for data sieving in independent writes */
        ADIOI_Info_set(info, "ind_wr_buffer_size", ADIOI_IND_WR_BUFFER_SIZE_DFLT);
        fd->hints->ind_wr_buffer_size = atoi(ADIOI_IND_WR_BUFFER_SIZE_DFLT);

        /* default is to let romio automatically decide when to use data
         * sieving
         */
        ADIOI_Info_set(info, "romio_ds_read", "automatic");
        fd->hints->ds_read = ADIOI_HINT_AUTO;
        ADIOI_Info_set(info, "romio_ds_write", "automatic");
        fd->hints->ds_write = ADIOI_HINT_AUTO;

        /* still to do: tune this a bit for a variety of file systems. there's
         * no good default value so just leave it unset */
        fd->hints->min_fdomain_size = 0;
        fd->hints->striping_unit = 0;

        fd->hints->initialized = 1;

        /* ADIO_Open sets up collective buffering arrays.  If we are in this
         * path from say set_file_view, then we've don't want to adjust the
         * array: we'll get a segfault during collective i/o.  We only want to
         * look at the users cb_nodes if it's open time  */
        ok_to_override_cb_nodes = 1;

    }

    /* add in user's info if supplied */
    if (users_info != MPI_INFO_NULL) {
        ADIOI_Info_check_and_install_int(fd, users_info, "cb_buffer_size",
                                         &(fd->hints->cb_buffer_size), myname, error_code);

        /* aligning file realms to certain sizes (e.g. stripe sizes)
         * may benefit I/O performance */
        ADIOI_Info_check_and_install_int(fd, users_info, "romio_cb_fr_alignment",
                                         &(fd->hints->cb_fr_alignment), myname, error_code);

        /* for collective I/O, try to be smarter about when to do data sieving
         * using a specific threshold for the datatype size/extent
         * (percentage 0-100%) */
        ADIOI_Info_check_and_install_int(fd, users_info, "romio_cb_ds_threshold",
                                         &(fd->hints->cb_ds_threshold), myname, error_code);

        ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_cb_alltoall",
                                             &(fd->hints->cb_alltoall), myname, error_code);

        /* new hints for enabling/disabling coll. buffering on
         * reads/writes
         */
        ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_cb_read",
                                             &(fd->hints->cb_read), myname, error_code);
        if (fd->hints->cb_read == ADIOI_HINT_DISABLE) {
            /* romio_cb_read overrides no_indep_rw */
            ADIOI_Info_set(info, "romio_no_indep_rw", "false");
            fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
        }

        ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_cb_write",
                                             &(fd->hints->cb_write), myname, error_code);
        if (fd->hints->cb_write == ADIOI_HINT_DISABLE) {
            /* romio_cb_write overrides no_indep_rw */
            ADIOI_Info_set(info, "romio_no_indep_rw", "false");
            fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
        }

        /* enable/disable persistent file realms for collective I/O */
        /* may want to check for no_indep_rdwr hint as well */
        ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_cb_pfr",
                                             &(fd->hints->cb_pfr), myname, error_code);


        /* file realm assignment types ADIOI_FR_AAR(0),
         * ADIOI_FR_FSZ(-1), ADIOI_FR_USR_REALMS(-2), all others specify
         * a regular fr size in bytes. probably not the best way... */
        ADIOI_Info_check_and_install_int(fd, users_info, "romio_cb_fr_type",
                                         &(fd->hints->cb_fr_type), myname, error_code);

        /* Has the user indicated all I/O will be done collectively? */
        ADIOI_Info_check_and_install_true(fd, users_info, "romio_no_indep_rw",
                                          &(fd->hints->no_indep_rw), myname, error_code);
        if (fd->hints->no_indep_rw == 1) {
            /* if 'no_indep_rw' set, also hint that we will do
             * collective buffering: if we aren't doing independent io,
             * then we have to do collective  */
            ADIOI_Info_set(info, "romio_cb_write", "enable");
            ADIOI_Info_set(info, "romio_cb_read", "enable");
            fd->hints->cb_read = 1;
            fd->hints->cb_write = 1;
        }
        /* new hints for enabling/disabling data sieving on
         * reads/writes
         */
        ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_ds_read",
                                             &(fd->hints->ds_read), myname, error_code);
        ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_ds_write",
                                             &(fd->hints->ds_write), myname, error_code);

        if (ok_to_override_cb_nodes) {
            /* MPI_File_open path sets up some data structrues that don't
             * get resized in the MPI_File_set_view path, so ignore
             * cb_nodes in the set_view case */
            ADIOI_Info_check_and_install_int(fd, users_info, "cb_nodes",
                                             &(fd->hints->cb_nodes), myname, error_code);
            if ((fd->hints->cb_nodes <= 0) || (fd->hints->cb_nodes > nprocs)) {
                /* can't ask for more aggregators than mpi processes, though it
                 * might be interesting to think what such oversubscription
                 * might mean... someday */
                MPL_snprintf(value, MPI_MAX_INFO_VAL + 1, "%d", nprocs);
                ADIOI_Info_set(info, "cb_nodes", value);
                fd->hints->cb_nodes = nprocs;
            }
        }
        /* if (ok_to_override_cb_nodes) */
        ADIOI_Info_check_and_install_int(fd, users_info, "ind_wr_buffer_size",
                                         &(fd->hints->ind_wr_buffer_size), myname, error_code);
        ADIOI_Info_check_and_install_int(fd, users_info, "ind_rd_buffer_size",
                                         &(fd->hints->ind_rd_buffer_size), myname, error_code);

        if (fd->hints->cb_config_list == NULL) {
            /* only set cb_config_list if it isn't already set.  Note that
             * since we set it below, this ensures that the cb_config_list hint
             * will be set at file open time either by the user or to the
             * default */
            /* if it has been set already, we ignore it the second time.
             * otherwise we would get an error if someone used the same info
             * value with a cb_config_list value in it in a couple of calls,
             * which would be irritating. */
            ADIOI_Info_check_and_install_str(fd, users_info, "cb_config_list",
                                             &(fd->hints->cb_config_list), myname, error_code);

        }
        ADIOI_Info_check_and_install_int(fd, users_info, "romio_min_fdomain_size",
                                         &(fd->hints->min_fdomain_size), myname, error_code);

        /* Now we use striping unit in common code so we should
         * process hints for it. */
        ADIOI_Info_check_and_install_int(fd, users_info, "striping_unit",
                                         &(fd->hints->striping_unit), myname, error_code);
    }

    /* Begin hint post-processig: some hints take precidence over or conflict
     * with others, or aren't supported by some file systems */

    /* handle cb_config_list default value here; avoids an extra
     * free/alloc and insures it is always set
     */
    if (fd->hints->cb_config_list == NULL) {
        ADIOI_Info_set(info, "cb_config_list", ADIOI_CB_CONFIG_LIST_DFLT);
        len = (strlen(ADIOI_CB_CONFIG_LIST_DFLT) + 1) * sizeof(char);
        fd->hints->cb_config_list = ADIOI_Malloc(len);
        if (fd->hints->cb_config_list == NULL) {
            ADIOI_Free(value);
            *error_code = MPIO_Err_create_code(*error_code,
                                               MPIR_ERR_RECOVERABLE,
                                               myname, __LINE__, MPI_ERR_OTHER, "**nomem2", 0);
            return;
        }
        ADIOI_Strncpy(fd->hints->cb_config_list, ADIOI_CB_CONFIG_LIST_DFLT, len);
    }
    /* deferred_open won't be set by callers, but if the user doesn't
     * explicitly disable collecitve buffering (two-phase) and does hint that
     * io w/o independent io is going on, we'll set this internal hint as a
     * convenience */
    if (((fd->hints->cb_read != ADIOI_HINT_DISABLE)
         && (fd->hints->cb_write != ADIOI_HINT_DISABLE)
         && fd->hints->no_indep_rw)) {
        fd->hints->deferred_open = 1;
    } else {
        /* setting romio_no_indep_rw enable and romio_cb_{read,write}
         * disable at the same time doesn't make sense. honor
         * romio_cb_{read,write} and force the no_indep_rw hint to
         * 'disable' */
        ADIOI_Info_set(info, "romio_no_indep_rw", "false");
        fd->hints->no_indep_rw = 0;
        fd->hints->deferred_open = 0;
    }

    if (ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) == 0) {
        /* disable data sieving for fs that do not
         * support file locking */
        ADIOI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, &flag);
        if (flag) {
            /* get rid of this value if it is set */
            ADIOI_Info_delete(info, "ind_wr_buffer_size");
        }
        /* note: leave ind_wr_buffer_size alone; used for other cases
         * as well. -- Rob Ross, 04/22/2003
         */
        ADIOI_Info_set(info, "romio_ds_write", "disable");
        fd->hints->ds_write = ADIOI_HINT_DISABLE;
    }

    ADIOI_Free(value);

    *error_code = MPI_SUCCESS;
}
void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
    char *value;
    int flag, tmp_value;
    static char myname[] = "ADIOI_PVFS_SETINFO";

    if ((fd->info) == MPI_INFO_NULL) {
	/* part of the open call */
	MPI_Info_create(&(fd->info));
	ADIOI_Info_set(fd->info, "romio_pvfs2_debugmask", "0");
	fd->hints->fs_hints.pvfs2.debugmask = 0;

	ADIOI_Info_set(fd->info, "striping_factor", "0");
	fd->hints->striping_factor = 0;

	ADIOI_Info_set(fd->info, "striping_unit", "0");
	fd->hints->striping_unit = 0;

	/* disable the aggressive strided optimizations by default */
        ADIOI_Info_set(fd->info, "romio_pvfs2_posix_read", "disable");
        ADIOI_Info_set(fd->info, "romio_pvfs2_posix_write", "disable");
        fd->hints->fs_hints.pvfs2.posix_read = ADIOI_HINT_DISABLE;
        fd->hints->fs_hints.pvfs2.posix_write = ADIOI_HINT_DISABLE;

        ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_read", "disable");
        ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_write", "disable");
        fd->hints->fs_hints.pvfs2.dtype_read = ADIOI_HINT_DISABLE;
        fd->hints->fs_hints.pvfs2.dtype_write = ADIOI_HINT_DISABLE;

        ADIOI_Info_set(fd->info, "romio_pvfs2_listio_read", "disable");
        ADIOI_Info_set(fd->info, "romio_pvfs2_listio_write", "disable");
        fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_DISABLE;
        fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_DISABLE;

	
	/* any user-provided hints? */
	if (users_info != MPI_INFO_NULL) {
	    /* pvfs2 debugging */
	    value = (char *) ADIOI_Malloc( (MPI_MAX_INFO_VAL+1)*sizeof(char));
	    ADIOI_Info_get(users_info, "romio_pvfs2_debugmask", 
		    MPI_MAX_INFO_VAL, value, &flag);
	    if (flag) {
		tmp_value = fd->hints->fs_hints.pvfs2.debugmask = 
		    PVFS_debug_eventlog_to_mask(value);

		MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
		/* --BEGIN ERROR HANDLING-- */
		if (tmp_value != fd->hints->fs_hints.pvfs2.debugmask) {
		    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						       "romio_pvfs2_debugmask",
						       error_code);
		    return;
		}
		/* --END ERROR HANDLING-- */
		
		ADIOI_Info_set(fd->info, "romio_pvfs2_debugmask", value);
	    }

	    /* the striping factor */
	    ADIOI_Info_get(users_info, "striping_factor", 
		    MPI_MAX_INFO_VAL, value, &flag);
	    if (flag) {
		tmp_value = fd->hints->striping_factor =  atoi(value);

		MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
		/* --BEGIN ERROR HANDLING-- */
		if (tmp_value != fd->hints->striping_factor) {
		    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						       "striping_factor",
						       error_code);
		    return;
		}
		/* --END ERROR HANDLING-- */
		
		ADIOI_Info_set(fd->info, "striping_factor", value);
	    }

	    /* the striping unit */
	    ADIOI_Info_get(users_info, "striping_unit",
		    MPI_MAX_INFO_VAL, value, &flag);
	    if (flag) {
		tmp_value = fd->hints->striping_unit = atoi(value);
		MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
		/* --BEGIN ERROR HANDLING-- */
		if (tmp_value != fd->hints->striping_unit) {
		    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, 
			                               "striping_unit",
			                                error_code);
		    return;
		}
		/* --END ERROR HANDLING-- */

		ADIOI_Info_set(fd->info, "striping_unit", value);
	    }

	    /* distribution name */
	    ADIOI_Info_get(users_info, "romio_pvfs2_distribution_name",
		    MPI_MAX_INFO_VAL, value, &flag);
	    if (flag) {
	    }


	    /* POSIX read */
            ADIOI_Info_get(users_info, "romio_pvfs2_posix_read",
                         MPI_MAX_INFO_VAL, value, &flag);
            if (flag) {
                if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
                {
                    ADIOI_Info_set(fd->info, "romio_pvfs2_posix_read", value);
                    fd->hints->fs_hints.pvfs2.posix_read = ADIOI_HINT_ENABLE;
                }
                else if ( !strcmp(value, "disable") ||
                          !strcmp(value, "DISABLE"))
                {
                    ADIOI_Info_set(fd->info , "romio_pvfs2_posix_read", value);
                    fd->hints->fs_hints.pvfs2.posix_read = ADIOI_HINT_DISABLE;
                }
                tmp_value = fd->hints->fs_hints.pvfs2.posix_read;
                MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
                if (tmp_value != fd->hints->fs_hints.pvfs2.posix_read) {
                    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
                                                       "posix_read",
                                                       error_code);
                    return;
                }
            }

            /* POSIX write */
            ADIOI_Info_get(users_info, "romio_pvfs2_posix_write",
                         MPI_MAX_INFO_VAL, value, &flag);
            if (flag) {
                if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
                {
                    ADIOI_Info_set(fd->info, "romio_pvfs2_posix_write", value);
                    fd->hints->fs_hints.pvfs2.posix_write = ADIOI_HINT_ENABLE;
                }
                else if ( !strcmp(value, "disable") ||
                          !strcmp(value, "DISABLE"))
                {
                    ADIOI_Info_set(fd->info , "romio_pvfs2_posix_write", value);
                    fd->hints->fs_hints.pvfs2.posix_write = ADIOI_HINT_DISABLE;
                }
                tmp_value = fd->hints->fs_hints.pvfs2.posix_write;
                MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
                if (tmp_value != fd->hints->fs_hints.pvfs2.posix_write) {
                    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
                                                       "posix_write",
                                                       error_code);
                    return;
                }
            }

	    /* Datatype read */
            ADIOI_Info_get(users_info, "romio_pvfs2_dtype_read",
                         MPI_MAX_INFO_VAL, value, &flag);
            if (flag) {
                if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
                {
                    ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_read", value);
                    fd->hints->fs_hints.pvfs2.dtype_read = ADIOI_HINT_ENABLE;
                }
                else if ( !strcmp(value, "disable") ||
                          !strcmp(value, "DISABLE"))
                {
                    ADIOI_Info_set(fd->info , "romio_pvfs2_dtype_read", value);
                    fd->hints->fs_hints.pvfs2.dtype_read = ADIOI_HINT_DISABLE;
                }
                tmp_value = fd->hints->fs_hints.pvfs2.dtype_read;
                MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
                if (tmp_value != fd->hints->fs_hints.pvfs2.dtype_read) {
                    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
                                                       "dtype_read",
                                                       error_code);
                    return;
                }
            }

            /* Datatype write */
            ADIOI_Info_get(users_info, "romio_pvfs2_dtype_write",
                         MPI_MAX_INFO_VAL, value, &flag);
            if (flag) {
                if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
                {
                    ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_write", value);
                    fd->hints->fs_hints.pvfs2.dtype_write = ADIOI_HINT_ENABLE;
                }
                else if ( !strcmp(value, "disable") ||
                          !strcmp(value, "DISABLE"))
                {
                    ADIOI_Info_set(fd->info , "romio_pvfs2_dtype_write", value);
                    fd->hints->fs_hints.pvfs2.dtype_write = ADIOI_HINT_DISABLE;
                }
                tmp_value = fd->hints->fs_hints.pvfs2.dtype_write;
                MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
                if (tmp_value != fd->hints->fs_hints.pvfs2.dtype_write) {
                    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
                                                       "dtype_write",
                                                       error_code);
                    return;
                }
            }

	    /* Listio read */
            ADIOI_Info_get(users_info, "romio_pvfs2_listio_read",
                         MPI_MAX_INFO_VAL, value, &flag);
            if (flag) {
                if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
                {
                    ADIOI_Info_set(fd->info, "romio_pvfs2_listio_read", value);
                    fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_ENABLE;
                }
                else if ( !strcmp(value, "disable") ||
                          !strcmp(value, "DISABLE"))
                {
                    ADIOI_Info_set(fd->info , "romio_pvfs2_listio_read", value);
                    fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_DISABLE;
                }
                tmp_value = fd->hints->fs_hints.pvfs2.listio_read;
                MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
                if (tmp_value != fd->hints->fs_hints.pvfs2.listio_read) {
                    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
                                                       "listio_read",
                                                       error_code);
                    return;
                }
            }

            /* Datatype write */
            ADIOI_Info_get(users_info, "romio_pvfs2_listio_write",
                         MPI_MAX_INFO_VAL, value, &flag);
            if (flag) {
                if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
                {
                    ADIOI_Info_set(fd->info, "romio_pvfs2_listio_write", value);
                    fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_ENABLE;
                }
                else if ( !strcmp(value, "disable") ||
                          !strcmp(value, "DISABLE"))
                {
                    ADIOI_Info_set(fd->info , "romio_pvfs2_listio_write", value);
                    fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_DISABLE;
                }
                tmp_value = fd->hints->fs_hints.pvfs2.listio_write;
                MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
                if (tmp_value != fd->hints->fs_hints.pvfs2.listio_write) {
                    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
                                                       "listio_write",
                                                       error_code);
                    return;
                }
            }
            ADIOI_Free(value);

	}
    }
    /* set the values for collective I/O and data sieving parameters */
    ADIOI_GEN_SetInfo(fd, users_info, error_code);

    *error_code = MPI_SUCCESS;
}
示例#12
0
void ADIOI_BEEGFS_SetInfo( ADIO_File fd, MPI_Info users_info, int *error_code )
{
    char *value, *pathname, *dname, *slash;
    int flag, stripe_val[2], numtargets = 0, chunksize = 0;
    struct BeegfsIoctl_MkFileWithStripeHints_Arg createFileArg;
    int err, myrank, fd_pdir, perm, old_mask;
    static char myname[] = "ADIOI_BEEGFS_SETINFO";

    /* set error code to success */
    *error_code = MPI_SUCCESS;

    value = ( char * )ADIOI_Malloc( ( MPI_MAX_INFO_VAL + 1 ) * sizeof( char ) );

    MPI_Comm_rank( fd->comm, &myrank );

    /* set hints */
    if( ( fd->info ) == MPI_INFO_NULL ) {
	MPI_Info_create( &( fd->info ) );

	ADIOI_Info_set( fd->info, "striping_unit", "0" );
	ADIOI_Info_set( fd->info, "striping_factor", "0" );

	/* set users infos */
	if( users_info != MPI_INFO_NULL ) {
	    /* striping information */
	    ADIOI_Info_get( users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag );
	    if( flag )
		chunksize = atoi( value );

	    ADIOI_Info_get( users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag );
	    if( flag )
		numtargets = atoi( value );

	    /* check stripe info consistency */
	    if( myrank == 0 ) {
		stripe_val[0] = numtargets;
		stripe_val[1] = chunksize;
	    }
	    MPI_Bcast( stripe_val, 2, MPI_INT, 0, fd->comm );

	    if( stripe_val[0] != numtargets || stripe_val[1] != chunksize ) {
		FPRINTF( stderr, "ADIOI_BEEGFS_SetInfo: All keys"
			         "-striping_factor:striping_unit "
			         "need to be identical across all processes\n" );
		MPI_Abort( MPI_COMM_WORLD, 1 );
	    }

	    /* if user has specified striping info, process 0 tries to set it */
	    if( myrank == 0 && ( fd->access_mode & ADIO_CREATE ) && numtargets && chunksize ) {
		/* open the parent dir to get/set striping info */
		pathname = ADIOI_Strdup( fd->filename );
		dname = strrchr( pathname, '/' );
		if( dname != NULL ) {
		    *dname = '\0'; // replace / with nul-character
		    fd_pdir = open( pathname, O_RDONLY );
		    if( fd_pdir == -1 ) {
			FPRINTF( stderr, "Error opening %s: %s\n", pathname, strerror( errno ) );
		    }
		}
		else {
		    /* current dir relative path */
		    fd_pdir = open( ".", O_RDONLY );
		    if( fd_pdir == -1 ) {
			FPRINTF( stderr, "Error opening .: %s\n", strerror( errno ) );
		    }
		}
		ADIOI_Free( pathname );

		if( fd->perm == ADIO_PERM_NULL ) {
		    old_mask = umask( 022 );
		    umask( old_mask );
		    perm = old_mask ^ 0666;
		}
		else perm = fd->perm;

		/* set create hints depending on e10 hints previously set */
		slash = strrchr( fd->filename, '/' );
		if( slash != NULL )
		    slash += 1;
		else
		    slash = fd->filename;

		createFileArg.filename = slash;
		createFileArg.mode = perm;
		createFileArg.numtargets = numtargets;
		createFileArg.chunksize = chunksize;

		/* create the hint file */
		err = ioctl( fd_pdir, BEEGFS_IOC_MKFILE_STRIPEHINTS, &createFileArg );
		if( err ) {
		    FPRINTF( stderr, "BEEGFS_IOC_MKFILE_STRIPEHINTS: %s. ", strerror( errno ) );
		    if( errno == EEXIST ) {
			/* ignore user striping and use current file info */
			FPRINTF( stderr, "[rank:%d] Failure to set stripe info for %s!\n", myrank, fd->filename );
		    }
		}
		/* close the parent dir file descriptor */
		close( fd_pdir );
	    } /* End of striping parameters validation */
	}

	MPI_Barrier( fd->comm );
    }

    /* set rest of the MPI hints (including E10 hints) */
    ADIOI_GEN_SetInfo( fd, users_info, error_code );

    ADIOI_Free( value );
}
示例#13
0
void ADIOI_XFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
    char *value, * c;
    int flag;
    static char xfs_initialized = 0;

    if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info));

    ADIOI_Info_set(fd->info, "direct_read", "false");
    ADIOI_Info_set(fd->info, "direct_write", "false");
    fd->direct_read = fd->direct_write = 0;

	if (!xfs_initialized) {
		xfs_initialized = 1;
		c = getenv("MPIO_DIRECT_READ_CHUNK_SIZE");
		if (c) {
			int io;
			io = atoi(c);
			if (io <= 0) {
				fprintf(stderr,
"MPI: Ignoring an invalid setting for MPIO_DIRECT_READ_CHUNK_SIZE.\n"
"     It must be set to a positive integer value.\n");
			} else {
				xfs_direct_read_chunk_size = io;
			}
		} else {
			xfs_direct_read_chunk_size = 0;
		}

		c = getenv("MPIO_DIRECT_WRITE_CHUNK_SIZE");
		if (c) {
			int io;
			io = atoi(c);
			if (io <= 0) {
				fprintf(stderr,
"MPI: Ignoring an invalid setting for MPIO_DIRECT_WRITE_CHUNK_SIZE.\n"
"     It must be set to a positive integer value.\n");
			} else {
				xfs_direct_write_chunk_size = io;
			}
		} else {
			xfs_direct_write_chunk_size = 0;
		}
	}

	if (!fd->hints->initialized) {
		fd->hints->fs_hints.xfs.read_chunk_sz =
			xfs_direct_read_chunk_size;
		fd->hints->fs_hints.xfs.write_chunk_sz =
			xfs_direct_write_chunk_size;
	}

    /* has user specified values for keys "direct_read" and "direct write"? */
    if (users_info != MPI_INFO_NULL) {
	value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));

	ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL, 
			 value, &flag);
	if (flag && !strcmp(value, "true")) {
	    ADIOI_Info_set(fd->info, "direct_read", "true");
	    fd->direct_read = 1;
	}

	ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL, 
			 value, &flag);
	if (flag && !strcmp(value, "true")) {
	    ADIOI_Info_set(fd->info, "direct_write", "true");
	    fd->direct_write = 1;
	}

	ADIOI_Free(value);
    }
    
    /* set the values for collective I/O and data sieving parameters */
    ADIOI_GEN_SetInfo(fd, users_info, error_code);

    /* Environment variables override MPI_Info hints */
    if (ADIOI_Direct_read) fd->direct_read = 1;
    if (ADIOI_Direct_write) fd->direct_write = 1;

    /* environment variables checked in ADIO_Init */

    *error_code = MPI_SUCCESS;
}
示例#14
0
void ADIOI_PFS_Open(ADIO_File fd, int *error_code)
{
    int perm, amode, old_mask, np_comm, np_total, err, flag;
    char *value;
    struct sattr attr;
    static char myname[] = "ADIOI_PFS_OPEN";

    if (fd->perm == ADIO_PERM_NULL) {
	old_mask = umask(022);
	umask(old_mask);
	perm = old_mask ^ 0666;
    }
    else perm = fd->perm;

    amode = 0;
    if (fd->access_mode & ADIO_CREATE)
	amode = amode | O_CREAT;
    if (fd->access_mode & ADIO_RDONLY)
	amode = amode | O_RDONLY;
    if (fd->access_mode & ADIO_WRONLY)
	amode = amode | O_WRONLY;
    if (fd->access_mode & ADIO_RDWR)
	amode = amode | O_RDWR;
    if (fd->access_mode & ADIO_EXCL)
	amode = amode | O_EXCL;

    MPI_Comm_size(MPI_COMM_WORLD, &np_total);
    MPI_Comm_size(fd->comm, &np_comm);

    if (np_total == np_comm) 
	fd->fd_sys = _gopen(fd->filename, amode, M_ASYNC, perm);
    else fd->fd_sys = open(fd->filename, amode, perm);
    fd->fd_direct = -1;

    if (fd->fd_sys != -1) {
	value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));

        /* if user has asked for pfs server buffering to be turned on,
           it will be set to true in fd->info in the earlier call
           to ADIOI_PFS_SetInfo. Turn it on now, since we now have a 
           valid file descriptor. */

	ADIOI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL, 
		     value, &flag);
	if (flag && (!strcmp(value, "true"))) {
	    err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, TRUE);
	    if (err) ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");
	}

        /* get file striping information and set it in info */
	err = fcntl(fd->fd_sys, F_GETSATTR, &attr);

	if (!err) {
	    MPL_snprintf(value, MPI_MAX_INFO_VAL+1, "%d", attr.s_sunitsize);
	    ADIOI_Info_set(fd->info, "striping_unit", value);

	    MPL_snprintf(value, MPI_MAX_INFO_VAL+1, "%d", attr.s_sfactor);
	    ADIOI_Info_set(fd->info, "striping_factor", value);

	    MPL_snprintf(value, MPI_MAX_INFO_VAL+1, "%d", attr.s_start_sdir);
	    ADIOI_Info_set(fd->info, "start_iodevice", value);
	}
	ADIOI_Free(value);

	if (fd->access_mode & ADIO_APPEND) 
	    fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
    }

    if (fd->fd_sys == -1) {
	*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
					   myname, __LINE__, MPI_ERR_IO,
					   "**io",
					   "**io %s", strerror(errno));
    }
    else *error_code = MPI_SUCCESS;
}
void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
    char *value;
    int flag, stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1;
    struct lov_user_md lum = { 0 };
    int err, myrank, fd_sys, perm, amode, old_mask;
    int int_val, tmp_val;
    static char myname[] = "ADIOI_LUSTRE_SETINFO";

    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
    if ( (fd->info) == MPI_INFO_NULL) {
	/* This must be part of the open call. can set striping parameters
           if necessary. */
	MPI_Info_create(&(fd->info));

	ADIOI_Info_set(fd->info, "direct_read", "false");
	ADIOI_Info_set(fd->info, "direct_write", "false");
	fd->direct_read = fd->direct_write = 0;
        /* initialize lustre hints */
	ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", "1");
        fd->hints->fs_hints.lustre.co_ratio = 1;
	ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", "0");
        fd->hints->fs_hints.lustre.coll_threshold = 0;
	ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "enable");
        fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_ENABLE;

	/* has user specified striping or server buffering parameters
           and do they have the same value on all processes? */
	if (users_info != MPI_INFO_NULL) {
            /* striping information */
	    ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
			 value, &flag);
	    if (flag)
		str_unit=atoi(value);

	    ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
			 value, &flag);
	    if (flag)
		str_factor=atoi(value);

	    ADIOI_Info_get(users_info, "romio_lustre_start_iodevice",
                         MPI_MAX_INFO_VAL, value, &flag);
	    if (flag)
		start_iodev=atoi(value);

            /* direct read and write */
	    ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
			 value, &flag);
	    if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
		ADIOI_Info_set(fd->info, "direct_read", "true");
		fd->direct_read = 1;
	    }
	    ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL,
			     value, &flag);
	    if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
		ADIOI_Info_set(fd->info, "direct_write", "true");
		fd->direct_write = 1;
	    }
	}

        /* set striping information with ioctl */
	MPI_Comm_rank(fd->comm, &myrank);
	if (myrank == 0) {
	    stripe_val[0] = str_factor;
	    stripe_val[1] = str_unit;
	    stripe_val[2] = start_iodev;
	}
	MPI_Bcast(stripe_val, 3, MPI_INT, 0, fd->comm);

	if (stripe_val[0] != str_factor
		|| stripe_val[1] != str_unit
		|| stripe_val[2] != start_iodev) {
	    FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys"
		    "-striping_factor:striping_unit:start_iodevice "
		    "need to be identical across all processes\n");
	    MPI_Abort(MPI_COMM_WORLD, 1);
	} else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) {
	     /* if user has specified striping info, process 0 tries to set it */
	    if (!myrank) {
		if (fd->perm == ADIO_PERM_NULL) {
		    old_mask = umask(022);
		    umask(old_mask);
		    perm = old_mask ^ 0666;
		}
		else perm = fd->perm;

		amode = 0;
		if (fd->access_mode & ADIO_CREATE)
		    amode = amode | O_CREAT;
		if (fd->access_mode & ADIO_RDONLY)
		    amode = amode | O_RDONLY;
		if (fd->access_mode & ADIO_WRONLY)
		    amode = amode | O_WRONLY;
		if (fd->access_mode & ADIO_RDWR)
		    amode = amode | O_RDWR;
		if (fd->access_mode & ADIO_EXCL)
		    amode = amode | O_EXCL;

		/* we need to create file so ensure this is set */
		amode = amode | O_LOV_DELAY_CREATE | O_CREAT;

		fd_sys = open(fd->filename, amode, perm);
		if (fd_sys == -1) {
		    if (errno != EEXIST)
			fprintf(stderr,
				"Failure to open file %s %d %d\n",strerror(errno), amode, perm);
		} else {
		    lum.lmm_magic = LOV_USER_MAGIC;
		    lum.lmm_pattern = 0;
		    lum.lmm_stripe_size = str_unit;
		    lum.lmm_stripe_count = str_factor;
		    lum.lmm_stripe_offset = start_iodev;

		    err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum);
		    if (err == -1 && errno != EEXIST) {
			fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
		    }
		    close(fd_sys);
	       }
	    } /* End of striping parameters validation */
	}
	MPI_Barrier(fd->comm);
    }
    /* get other hint */
    if (users_info != MPI_INFO_NULL) {
        /* CO: IO Clients/OST,
         * to keep the load balancing between clients and OSTs */
        ADIOI_Info_get(users_info, "romio_lustre_co_ratio", MPI_MAX_INFO_VAL, value,
                     &flag);
	if (flag && (int_val = atoi(value)) > 0) {
            tmp_val = int_val;
	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
	    if (tmp_val != int_val) {
                MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
                                                   "romio_lustre_co_ratio",
                                                   error_code);
                ADIOI_Free(value);
		return;
	    }
	    ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", value);
            fd->hints->fs_hints.lustre.co_ratio = atoi(value);
	}
        /* coll_threshold:
         * if the req size is bigger than this, collective IO may not be performed.
         */
	ADIOI_Info_get(users_info, "romio_lustre_coll_threshold", MPI_MAX_INFO_VAL, value,
                     &flag);
	if (flag && (int_val = atoi(value)) > 0) {
            tmp_val = int_val;
	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
	    if (tmp_val != int_val) {
	        MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
		                                   "romio_lustre_coll_threshold",
	                                           error_code);
                ADIOI_Free(value);
	        return;
	    }
	    ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", value);
            fd->hints->fs_hints.lustre.coll_threshold = atoi(value);
        }
        /* ds_in_coll: disable data sieving in collective IO */
	ADIOI_Info_get(users_info, "romio_lustre_ds_in_coll", MPI_MAX_INFO_VAL,
	             value, &flag);
	if (flag && (!strcmp(value, "disable") ||
                     !strcmp(value, "DISABLE"))) {
            tmp_val = int_val = 2;
	    MPI_Bcast(&tmp_val, 2, MPI_INT, 0, fd->comm);
	    if (tmp_val != int_val) {
	        MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
		                                   "romio_lustre_ds_in_coll",
						   error_code);
                ADIOI_Free(value);
                return;
	    }
	    ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "disable");
            fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_DISABLE;
	}
    }
    /* set the values for collective I/O and data sieving parameters */
    ADIOI_GEN_SetInfo(fd, users_info, error_code);

    if (ADIOI_Direct_read) fd->direct_read = 1;
    if (ADIOI_Direct_write) fd->direct_write = 1;

    ADIOI_Free(value);

    *error_code = MPI_SUCCESS;
}
示例#16
0
void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
{
    int perm, old_mask, amode, amode_direct;
    int lumlen;
    struct lov_user_md *lum = NULL;
    char *value;

#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
    static char myname[] = "ADIOI_LUSTRE_OPEN";
#endif

    if (fd->perm == ADIO_PERM_NULL) {
	old_mask = umask(022);
	umask(old_mask);
	perm = old_mask ^ 0666;
    }
    else perm = fd->perm;

    amode = 0;
    if (fd->access_mode & ADIO_CREATE)
	amode = amode | O_CREAT;
    if (fd->access_mode & ADIO_RDONLY)
	amode = amode | O_RDONLY;
    if (fd->access_mode & ADIO_WRONLY)
	amode = amode | O_WRONLY;
    if (fd->access_mode & ADIO_RDWR)
	amode = amode | O_RDWR;
    if (fd->access_mode & ADIO_EXCL)
	amode = amode | O_EXCL;

    amode_direct = amode | O_DIRECT;

    fd->fd_sys = open(fd->filename, amode|O_CREAT, perm);

    if (fd->fd_sys != -1) {
        int err;

        /* get file striping information and set it in info */
	/* odd malloc here because lov_user_md contains some fixed data and
	 * then a list of 'lmm_objects' representing stripe */
        lumlen = sizeof(struct lov_user_md) +
                 MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data);
	/* furthermore, Pascal Deveze reports that, even though we pass a
	 * "GETSTRIPE" (read) flag to the ioctl, if some of the values of this
	 * struct are uninitialzed, the call can give an error.  calloc in case
	 * there are other members that must be initialized and in case
	 * lov_user_md struct changes in future */
	lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen);
        lum->lmm_magic = LOV_USER_MAGIC;
        err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum);
        if (!err) {
            value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));

            fd->hints->striping_unit = lum->lmm_stripe_size;
            sprintf(value, "%d", lum->lmm_stripe_size);
            ADIOI_Info_set(fd->info, "striping_unit", value);

            fd->hints->striping_factor = lum->lmm_stripe_count;
            sprintf(value, "%d", lum->lmm_stripe_count);
            ADIOI_Info_set(fd->info, "striping_factor", value);

            fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset;
            sprintf(value, "%d", lum->lmm_stripe_offset);
            ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);

            ADIOI_Free(value);
        }
        ADIOI_Free(lum);

        if (fd->access_mode & ADIO_APPEND)
            fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
    } 

    if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
	fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);

    fd->fd_direct = -1;
    if (fd->direct_write || fd->direct_read) {
	fd->fd_direct = open(fd->filename, amode_direct, perm);
	if (fd->fd_direct != -1) {
	    fd->d_mem = fd->d_miniosz = (1<<12);
	} else {
	    perror("cannot open file with O_Direct");
	    fd->direct_write = fd->direct_read = 0;
	}
    }

    /* --BEGIN ERROR HANDLING-- */
    if (fd->fd_sys == -1 || ((fd->fd_direct == -1) && 
		(fd->direct_write || fd->direct_read))) {
	if (errno == ENAMETOOLONG)
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_BAD_FILE,
					       "**filenamelong",
					       "**filenamelong %s %d",
					       fd->filename,
					       strlen(fd->filename));
	else if (errno == ENOENT)
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_NO_SUCH_FILE,
					       "**filenoexist",
					       "**filenoexist %s",
					       fd->filename);
	else if (errno == ENOTDIR || errno == ELOOP)
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE,
					       myname, __LINE__,
					       MPI_ERR_BAD_FILE,
					       "**filenamedir",
					       "**filenamedir %s",
					       fd->filename);
	else if (errno == EACCES) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_ACCESS,
					       "**fileaccess",
					       "**fileaccess %s", 
					       fd->filename );
	}
	else if (errno == EROFS) {
	    /* Read only file or file system and write access requested */
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_READ_ONLY,
					       "**ioneedrd", 0 );
	}
	else {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO, "**io",
					       "**io %s", strerror(errno));
	}
    }
    /* --END ERROR HANDLING-- */
    else *error_code = MPI_SUCCESS;

}
示例#17
0
void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
    char *value, *value_in_fd;
    int flag, tmp_val, str_factor=-1, str_unit=-1, start_iodev=-1;
    struct sattr attr;
    int err, myrank, fd_sys, perm, amode, old_mask;

    if ( (fd->info) == MPI_INFO_NULL) {
	/* This must be part of the open call. can set striping parameters 
           if necessary. */ 
	MPI_Info_create(&(fd->info));
	
	/* has user specified striping or server buffering parameters 
           and do they have the same value on all processes? */
	if (users_info != MPI_INFO_NULL) {
	    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));

	    ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, 
			 value, &flag);
	    if (flag) {
		str_factor=atoi(value);
		tmp_val = str_factor;
		MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
		/* --BEGIN ERROR HANDLING-- */
		if (tmp_val != str_factor) {
		    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						       "striping_factor",
						       error_code);
		    return;
		}
		/* --END ERROR HANDLING-- */
	    }

	    ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, 
			 value, &flag);
	    if (flag) {
		str_unit=atoi(value);
		tmp_val = str_unit;
		MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
		/* --BEGIN ERROR HANDLING-- */
		if (tmp_val != str_unit) {
		    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						       "striping_unit",
						       error_code);
		    return;
		}
		/* --END ERROR HANDLING-- */
	    }

	    ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, 
			 value, &flag);
	    if (flag) {
		start_iodev=atoi(value);
		tmp_val = start_iodev;
		MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
		/* --BEGIN ERROR HANDLING-- */
		if (tmp_val != start_iodev) {
		    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						       "start_iodevice",
						       error_code);
		    return;
		}
		/* --END ERROR HANDLING-- */
	    }

         /* if user has specified striping info, process 0 tries to set it */
	    if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) {
		MPI_Comm_rank(fd->comm, &myrank);
		if (!myrank) {
		    if (fd->perm == ADIO_PERM_NULL) {
			old_mask = umask(022);
			umask(old_mask);
			perm = old_mask ^ 0666;
		    }
		    else perm = fd->perm;

		    amode = 0;
		    if (fd->access_mode & ADIO_CREATE)
			amode = amode | O_CREAT;
		    if (fd->access_mode & ADIO_RDONLY)
			amode = amode | O_RDONLY;
		    if (fd->access_mode & ADIO_WRONLY)
			amode = amode | O_WRONLY;
		    if (fd->access_mode & ADIO_RDWR)
			amode = amode | O_RDWR;
		    if (fd->access_mode & ADIO_EXCL)
			amode = amode | O_EXCL;

		    fd_sys = open(fd->filename, amode, perm);
		    err = fcntl(fd_sys, F_GETSATTR, &attr);

		    if (!err) {
			if (str_unit > 0) attr.s_sunitsize = str_unit;
			if ((start_iodev >= 0) && 
			    (start_iodev < attr.s_sfactor))
			    attr.s_start_sdir = start_iodev;
			if ((str_factor > 0) && (str_factor < attr.s_sfactor))
			    attr.s_sfactor = str_factor;

			err = fcntl(fd_sys, F_SETSATTR, &attr);
		    }

		    close(fd_sys);
		}

		MPI_Barrier(fd->comm);
	    }

	    /* Has user asked for pfs server buffering to be turned on?
	       If so, mark it as true in fd->info and turn it on in 
	       ADIOI_PFS_Open after the file is opened */

	    ADIOI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, 
			 value, &flag);
	    if (flag && (!strcmp(value, "true")))
		ADIOI_Info_set(fd->info, "pfs_svr_buf", "true");
	    else ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");

	    ADIOI_Free(value);
	}
	else ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");
	
	/* set the values for collective I/O and data sieving parameters */
	ADIOI_GEN_SetInfo(fd, users_info, error_code);
    }
    
    else {
	/* The file has been opened previously and fd->fd_sys is a valid
           file descriptor. cannot set striping parameters now. */
	
	/* set the values for collective I/O and data sieving parameters */
	ADIOI_GEN_SetInfo(fd, users_info, error_code);

	/* has user specified value for pfs_svr_buf? */
	if (users_info != MPI_INFO_NULL) {
	    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));

	    ADIOI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, 
			 value, &flag);
	    if (flag && (!strcmp(value, "true") || !strcmp(value, "false"))) {
		value_in_fd = (char *) 
                          ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
		ADIOI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL, 
			 value_in_fd, &flag);
		if (strcmp(value, value_in_fd)) {
		    if (!strcmp(value, "true")) {
			err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, TRUE);
			if (!err) 
			    ADIOI_Info_set(fd->info, "pfs_svr_buf", "true");
		    }
		    else {
			err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, FALSE);
			if (!err) 
			    ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");
		    }
		}
		ADIOI_Free(value_in_fd);
	    }
	    ADIOI_Free(value);
	}

    }
    
    *error_code = MPI_SUCCESS;
}
示例#18
0
MPI_File ADIO_Open(MPI_Comm orig_comm,
		   MPI_Comm comm, const char *filename, int file_system,
		   ADIOI_Fns *ops,
		   int access_mode, ADIO_Offset disp, MPI_Datatype etype, 
		   MPI_Datatype filetype,
		   MPI_Info info, int perm, int *error_code)
{
    MPI_File mpi_fh;
    ADIO_File fd;
    int err, rank, procs;
    static char myname[] = "ADIO_OPEN";
    int  max_error_code;
    MPI_Info dupinfo;
    int syshints_processed, can_skip;
    char *p;

    *error_code = MPI_SUCCESS;

    /* obtain MPI_File handle */
    mpi_fh = MPIO_File_create(sizeof(struct ADIOI_FileD));
    if (mpi_fh == MPI_FILE_NULL) {
	fd = MPI_FILE_NULL;
	*error_code = MPIO_Err_create_code(*error_code,
					   MPIR_ERR_RECOVERABLE,
					   myname,
					   __LINE__,
					   MPI_ERR_OTHER,
					   "**nomem2",0);
	goto fn_exit;

    }
    fd = MPIO_File_resolve(mpi_fh);

    fd->cookie = ADIOI_FILE_COOKIE;
    fd->fp_ind = disp;
    fd->fp_sys_posn = 0;
    fd->comm = comm;       /* dup'ed in MPI_File_open */
    fd->filename = ADIOI_Strdup(filename);
    fd->file_system = file_system;
    fd->fs_ptr = NULL;

    fd->fns = ops;

    fd->disp = disp;
    fd->split_coll_count = 0;
    fd->shared_fp_fd = ADIO_FILE_NULL;
    fd->atomicity = 0;
    fd->etype = etype;          /* MPI_BYTE by default */
    fd->filetype = filetype;    /* MPI_BYTE by default */
    fd->etype_size = 1;  /* default etype is MPI_BYTE */

    fd->file_realm_st_offs = NULL;
    fd->file_realm_types = NULL;

    fd->perm = perm;

    fd->async_count = 0;

    fd->fortran_handle = -1;

    fd->err_handler = ADIOI_DFLT_ERR_HANDLER;

    fd->io_buf_window = MPI_WIN_NULL;
    fd->io_buf_put_amounts_window = MPI_WIN_NULL;

    MPI_Comm_rank(comm, &rank);
    MPI_Comm_size(comm, &procs);
/* create and initialize info object */
    fd->hints = (ADIOI_Hints *)ADIOI_Calloc(1, sizeof(struct ADIOI_Hints_struct));
    if (fd->hints == NULL) {
	*error_code = MPIO_Err_create_code(*error_code,
					   MPIR_ERR_RECOVERABLE,
					   myname,
					   __LINE__,
					   MPI_ERR_OTHER,
					   "**nomem2",0);
	goto fn_exit;
    }
    fd->hints->cb_config_list = NULL;
    fd->hints->ranklist = NULL;
    fd->hints->initialized = 0;
    fd->info = MPI_INFO_NULL;

    /* move system-wide hint processing *back* into open, but this time the
     * hintfile reader will do a scalable read-and-broadcast.  The global
     * ADIOI_syshints will get initialized at first open.  subsequent open
     * calls will just use result from first open.
     *
     * We have two goals here:
     * 1: avoid processing the hintfile multiple times
     * 2: have all processes participate in hintfile processing (so we can read-and-broadcast)
     *
     * a code might do an "initialize from 0", so we can only skip hint
     * processing once everyone has particpiated in hint processing */
    if (ADIOI_syshints == MPI_INFO_NULL)
	syshints_processed = 0;
    else
	syshints_processed = 1;

    MPI_Allreduce(&syshints_processed, &can_skip, 1, MPI_INT, MPI_MIN, fd->comm);
    if (!can_skip) {
	if (ADIOI_syshints == MPI_INFO_NULL)
	    MPI_Info_create(&ADIOI_syshints);
	ADIOI_process_system_hints(fd, ADIOI_syshints);
    }

    ADIOI_incorporate_system_hints(info, ADIOI_syshints, &dupinfo);
    ADIO_SetInfo(fd, dupinfo, &err);
    if (dupinfo != MPI_INFO_NULL) {
	*error_code = MPI_Info_free(&dupinfo);
	if (*error_code != MPI_SUCCESS)
	    goto fn_exit;
    }
    ADIOI_Info_set(fd->info, "romio_filesystem_type", fd->fns->fsname);

    /* Instead of repeatedly allocating this buffer in collective read/write,
     * allocating up-front might make memory management on small platforms
     * (e.g. Blue Gene) more efficent */

    fd->io_buf = ADIOI_Malloc(fd->hints->cb_buffer_size);
     /* deferred open: 
     * we can only do this optimization if 'fd->hints->deferred_open' is set
     * (which means the user hinted 'no_indep_rw' and collective buffering).
     * Furthermore, we only do this if our collective read/write routines use
     * our generic function, and not an fs-specific routine (we can defer opens
     * only if we use our aggreagation code). */
    if (fd->hints->deferred_open && 
		    !(uses_generic_read(fd) \
			    && uses_generic_write(fd))) {
	    fd->hints->deferred_open = 0;
    }
    if (ADIO_Feature(fd, ADIO_SCALABLE_OPEN))
	    /* disable deferred open on these fs so that scalable broadcast
	     * will always use the propper communicator */
	    fd->hints->deferred_open = 0;


    /* on BlueGene, the cb_config_list is built when hints are processed. No
     * one else does that right now */
    if (fd->hints->ranklist == NULL) {
	build_cb_config_list(fd, orig_comm, comm, rank, procs, error_code);
	if (*error_code != MPI_SUCCESS) 
	    goto fn_exit;
    }
    fd->is_open = 0;
    fd->my_cb_nodes_index = -2;
    fd->is_agg = is_aggregator(rank, fd);
    /* deferred open used to split the communicator to create an "aggregator
     * communicator", but we only used it as a way to indicate that deferred
     * open happened.  fd->is_open and fd->is_agg are sufficient */

    /* actual opens start here */
    /* generic open: one process opens to create the file, all others open */
    /* nfs open: everybody opens or else you'll end up with "file not found"
     * due to stupid nfs consistency semantics */
    /* scalable open: one process opens and broadcasts results to everyone */

    ADIOI_OpenColl(fd, rank, access_mode, error_code);

    /* for debugging, it can be helpful to see the hints selected. Some file
     * systes set up the hints in the open call (e.g. lustre) */
    p = getenv("ROMIO_PRINT_HINTS");
    if (rank == 0 && p != NULL ) {
	ADIOI_Info_print_keyvals(fd->info);
    }

 fn_exit:
    MPI_Allreduce(error_code, &max_error_code, 1, MPI_INT, MPI_MAX, comm);
    if (max_error_code != MPI_SUCCESS) {

        /* If the file was successfully opened, close it */
        if (*error_code == MPI_SUCCESS) {
        
            /* in the deferred open case, only those who have actually
               opened the file should close it */
            if (fd->hints->deferred_open)  {
                if (fd->is_agg) {
                    (*(fd->fns->ADIOI_xxx_Close))(fd, error_code);
                }
            }
            else {
                (*(fd->fns->ADIOI_xxx_Close))(fd, error_code);
            }
        }
	ADIOI_Free(fd->filename);
	ADIOI_Free(fd->hints->ranklist);
	ADIOI_Free(fd->hints->cb_config_list);
	ADIOI_Free(fd->hints);
	if (fd->info != MPI_INFO_NULL) MPI_Info_free(&(fd->info));
	ADIOI_Free(fd->io_buf);
	ADIOI_Free(fd);
        fd = ADIO_FILE_NULL;
	if (*error_code == MPI_SUCCESS)
	{
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO,
					       "**oremote_fail", 0);
	}
    }

    return fd;
}
示例#19
0
void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
/* if fd->info is null, create a new info object. 
   Initialize fd->info to default values.
   Initialize fd->hints to default values.
   Examine the info object passed by the user. If it contains values that
   ROMIO understands, override the default. */

    MPI_Info info;
    char *value;
    int flag, intval, tmp_val, nprocs=0, nprocs_is_valid = 0;
    static char myname[] = "ADIOI_BGL_SETINFO";

    int did_anything = 0;

    if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info));
    info = fd->info;

    /* Note that fd->hints is allocated at file open time; thus it is
     * not necessary to allocate it, or check for allocation, here.
     */

    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
    AD_BGL_assert ((value != NULL));

    /* initialize info and hints to default values if they haven't been
     * previously initialized
     */
    if (!fd->hints->initialized) {

	did_anything = 1;

	/* buffer size for collective I/O */
	ADIOI_Info_set(info, "cb_buffer_size", ADIOI_BGL_CB_BUFFER_SIZE_DFLT); 
	fd->hints->cb_buffer_size = atoi(ADIOI_BGL_CB_BUFFER_SIZE_DFLT);

	/* default is to let romio automatically decide when to use
	 * collective buffering
	 */
	ADIOI_Info_set(info, "romio_cb_read", "enable"); 
	fd->hints->cb_read = ADIOI_HINT_ENABLE;
	ADIOI_Info_set(info, "romio_cb_write", "enable"); 
	fd->hints->cb_write = ADIOI_HINT_ENABLE;

   	if ( fd->hints->cb_config_list != NULL ) ADIOI_Free (fd->hints->cb_config_list);
	fd->hints->cb_config_list = NULL;

	/* number of processes that perform I/O in collective I/O */
	MPI_Comm_size(fd->comm, &nprocs);
	nprocs_is_valid = 1;
	ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", nprocs);
	ADIOI_Info_set(info, "cb_nodes", value);
	fd->hints->cb_nodes = -1;

	/* hint indicating that no indep. I/O will be performed on this file */
	ADIOI_Info_set(info, "romio_no_indep_rw", "false");
	fd->hints->no_indep_rw = 0;

	/* bgl is not implementing file realms (ADIOI_IOStridedColl),
	   initialize to disabled it. 	   */
	/* hint instructing the use of persistent file realms */
	ADIOI_Info_set(info, "romio_cb_pfr", "disable");
	fd->hints->cb_pfr = ADIOI_HINT_DISABLE;
	
	/* hint guiding the assignment of persistent file realms */
	ADIOI_Info_set(info, "romio_cb_fr_types", "aar");
	fd->hints->cb_fr_type = ADIOI_FR_AAR;

	/* hint to align file realms with a certain byte value */
	ADIOI_Info_set(info, "romio_cb_fr_alignment", "1");
	fd->hints->cb_fr_alignment = 1;

	/* hint to set a threshold percentage for a datatype's size/extent at
	 * which data sieving should be done in collective I/O */
	ADIOI_Info_set(info, "romio_cb_ds_threshold", "0");
	fd->hints->cb_ds_threshold = 0;

	/* hint to switch between point-to-point or all-to-all for two-phase */
	ADIOI_Info_set(info, "romio_cb_alltoall", "automatic");
	fd->hints->cb_alltoall = ADIOI_HINT_AUTO;

	 /* deferred_open derived from no_indep_rw and cb_{read,write} */
	fd->hints->deferred_open = 0;

	/* buffer size for data sieving in independent reads */
	ADIOI_Info_set(info, "ind_rd_buffer_size", ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT);
	fd->hints->ind_rd_buffer_size = atoi(ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT);

	/* buffer size for data sieving in independent writes */
	ADIOI_Info_set(info, "ind_wr_buffer_size", ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT);
	fd->hints->ind_wr_buffer_size = atoi(ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT);

  if(fd->file_system == ADIO_UFS)
  {
    /* default for ufs/pvfs is to disable data sieving  */
    ADIOI_Info_set(info, "romio_ds_read", "disable"); 
    fd->hints->ds_read = ADIOI_HINT_DISABLE;
    ADIOI_Info_set(info, "romio_ds_write", "disable"); 
    fd->hints->ds_write = ADIOI_HINT_DISABLE;
  }
  else
  {
    /* default is to let romio automatically decide when to use data
     * sieving
     */
    ADIOI_Info_set(info, "romio_ds_read", "automatic"); 
    fd->hints->ds_read = ADIOI_HINT_AUTO;
    ADIOI_Info_set(info, "romio_ds_write", "automatic"); 
    fd->hints->ds_write = ADIOI_HINT_AUTO;
  }

    /* still to do: tune this a bit for a variety of file systems. there's
	 * no good default value so just leave it unset */
    fd->hints->min_fdomain_size = 0;
    fd->hints->striping_unit = 0;

    fd->hints->initialized = 1;
    }

    /* add in user's info if supplied */
    if (users_info != MPI_INFO_NULL) {
	ADIOI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL, 
		     value, &flag);
	if (flag && ((intval=atoi(value)) > 0)) {
	    tmp_val = intval;

	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
	    /* --BEGIN ERROR HANDLING-- */
	    if (tmp_val != intval) {
		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						   "cb_buffer_size",
						   error_code);
		return;
	    }
	    /* --END ERROR HANDLING-- */

	    ADIOI_Info_set(info, "cb_buffer_size", value);
	    fd->hints->cb_buffer_size = intval;

	}
#if 0
	/* bgl is not implementing file realms (ADIOI_IOStridedColl) ... */
	/* aligning file realms to certain sizes (e.g. stripe sizes)
	 * may benefit I/O performance */
	ADIOI_Info_get(users_info, "romio_cb_fr_alignment", MPI_MAX_INFO_VAL, 
		     value, &flag);
	if (flag && ((intval=atoi(value)) > 0)) {
	    tmp_val = intval;

	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
	    /* --BEGIN ERROR HANDLING-- */
	    if (tmp_val != intval) {
		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						   "romio_cb_fr_alignment",
						   error_code);
		return;
	    }
	    /* --END ERROR HANDLING-- */

	    ADIOI_Info_set(info, "romio_cb_fr_alignment", value);
	    fd->hints->cb_fr_alignment = intval;

	}

	/* for collective I/O, try to be smarter about when to do data sieving
	 * using a specific threshold for the datatype size/extent
	 * (percentage 0-100%) */
	ADIOI_Info_get(users_info, "romio_cb_ds_threshold", MPI_MAX_INFO_VAL, 
		     value, &flag);
	if (flag && ((intval=atoi(value)) > 0)) {
	    tmp_val = intval;

	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
	    /* --BEGIN ERROR HANDLING-- */
	    if (tmp_val != intval) {
		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						   "romio_cb_ds_threshold",
						   error_code);
		return;
	    }
	    /* --END ERROR HANDLING-- */

	    ADIOI_Info_set(info, "romio_cb_ds_threshold", value);
	    fd->hints->cb_ds_threshold = intval;

	}
	ADIOI_Info_get(users_info, "romio_cb_alltoall", MPI_MAX_INFO_VAL, value,
		     &flag);
	if (flag) {
	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
		ADIOI_Info_set(info, "romio_cb_alltoall", value);
		fd->hints->cb_read = ADIOI_HINT_ENABLE;
	    }
	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
		ADIOI_Info_set(info, "romio_cb_alltoall", value);
		fd->hints->cb_read = ADIOI_HINT_DISABLE;
	    }
	    else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
	    {
		ADIOI_Info_set(info, "romio_cb_alltoall", value);
		fd->hints->cb_read = ADIOI_HINT_AUTO;
	    }

	    tmp_val = fd->hints->cb_alltoall;

	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
	    /* --BEGIN ERROR HANDLING-- */
	    if (tmp_val != fd->hints->cb_alltoall) {
		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						   "romio_cb_alltoall",
						   error_code);
		return;
	    }
	    /* --END ERROR HANDLING-- */
	}
#endif
	/* new hints for enabling/disabling coll. buffering on
	 * reads/writes
	 */
	ADIOI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value,
		     &flag);
	if (flag) {
	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
		ADIOI_Info_set(info, "romio_cb_read", value);
		fd->hints->cb_read = ADIOI_HINT_ENABLE;
	    }
	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
		    /* romio_cb_read overrides no_indep_rw */
		ADIOI_Info_set(info, "romio_cb_read", value);
		ADIOI_Info_set(info, "romio_no_indep_rw", "false");
		fd->hints->cb_read = ADIOI_HINT_DISABLE;
		fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
	    }
	    else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
	    {
		ADIOI_Info_set(info, "romio_cb_read", value);
		fd->hints->cb_read = ADIOI_HINT_AUTO;
	    }

	    tmp_val = fd->hints->cb_read;

	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
	    /* --BEGIN ERROR HANDLING-- */
	    if (tmp_val != fd->hints->cb_read) {
		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						   "romio_cb_read",
						   error_code);
		return;
	    }
	    /* --END ERROR HANDLING-- */
	}
	ADIOI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value,
		     &flag);
	if (flag) {
	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
		ADIOI_Info_set(info, "romio_cb_write", value);
		fd->hints->cb_write = ADIOI_HINT_ENABLE;
	    }
	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE"))
	    {
		/* romio_cb_write overrides no_indep_rw, too */
		ADIOI_Info_set(info, "romio_cb_write", value);
		ADIOI_Info_set(info, "romio_no_indep_rw", "false");
		fd->hints->cb_write = ADIOI_HINT_DISABLE;
		fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
	    }
	    else if (!strcmp(value, "automatic") ||
		     !strcmp(value, "AUTOMATIC"))
	    {
		ADIOI_Info_set(info, "romio_cb_write", value);
		fd->hints->cb_write = ADIOI_HINT_AUTO;
	    }
	
	    tmp_val = fd->hints->cb_write;

	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
	    /* --BEGIN ERROR HANDLING-- */
	    if (tmp_val != fd->hints->cb_write) {
		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						   "romio_cb_write",
						   error_code);
		return;
	    }
	    /* --END ERROR HANDLING-- */
	}

#if 0
	/* bgl is not implementing file realms (ADIOI_IOStridedColl) ... */
	/* enable/disable persistent file realms for collective I/O */
	/* may want to check for no_indep_rdwr hint as well */
	ADIOI_Info_get(users_info, "romio_cb_pfr", MPI_MAX_INFO_VAL, value,
		     &flag);
	if (flag) {
	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
		ADIOI_Info_set(info, "romio_cb_pfr", value);
		fd->hints->cb_pfr = ADIOI_HINT_ENABLE;
	    }
	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
		ADIOI_Info_set(info, "romio_cb_pfr", value);
		fd->hints->cb_pfr = ADIOI_HINT_DISABLE;
	    }
	    else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
	    {
		ADIOI_Info_set(info, "romio_cb_pfr", value);
		fd->hints->cb_pfr = ADIOI_HINT_AUTO;
	    }

	    tmp_val = fd->hints->cb_pfr;

	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
	    /* --BEGIN ERROR HANDLING-- */
	    if (tmp_val != fd->hints->cb_pfr) {
		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						   "romio_cb_pfr",
						   error_code);
		return;
	    }
	    /* --END ERROR HANDLING-- */
	}

	/* file realm assignment types ADIOI_FR_AAR(0),
	 ADIOI_FR_FSZ(-1), ADIOI_FR_USR_REALMS(-2), all others specify
	 a regular fr size in bytes. probably not the best way... */
	ADIOI_Info_get(users_info, "romio_cb_fr_type", MPI_MAX_INFO_VAL, 
		     value, &flag);
	if (flag && ((intval=atoi(value)) >= -2)) {
	    tmp_val = intval;

	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
	    /* --BEGIN ERROR HANDLING-- */
	    if (tmp_val != intval) {
		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						   "romio_cb_fr_type",
						   error_code);
		return;
	    }
	    /* --END ERROR HANDLING-- */

	    ADIOI_Info_set(info, "romio_cb_fr_type", value);
	    fd->hints->cb_fr_type = intval;

	}
#endif
	/* new hint for specifying no indep. read/write will be performed */
	ADIOI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value,
		     &flag);
	if (flag) {
	    if (!strcmp(value, "true") || !strcmp(value, "TRUE")) {
		    /* if 'no_indep_rw' set, also hint that we will do
		     * collective buffering: if we aren't doing independent io,
		     * then we have to do collective  */
		ADIOI_Info_set(info, "romio_no_indep_rw", value);
		ADIOI_Info_set(info, "romio_cb_write", "enable");
		ADIOI_Info_set(info, "romio_cb_read", "enable");
		fd->hints->no_indep_rw = 1;
		fd->hints->cb_read = 1;
		fd->hints->cb_write = 1;
		tmp_val = 1;
	    }
	    else if (!strcmp(value, "false") || !strcmp(value, "FALSE")) {
		ADIOI_Info_set(info, "romio_no_indep_rw", value);
		fd->hints->no_indep_rw = 0;
		tmp_val = 0;
	    }
	    else {
		/* default is above */
		tmp_val = 0;
	    }

	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
	    /* --BEGIN ERROR HANDLING-- */
	    if (tmp_val != fd->hints->no_indep_rw) {
		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						   "romio_no_indep_rw",
						   error_code);
		return;
	    }
	    /* --END ERROR HANDLING-- */
	}
	/* new hints for enabling/disabling data sieving on
	 * reads/writes
	 */
	ADIOI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value, 
		     &flag);
	if (flag) {
	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
		ADIOI_Info_set(info, "romio_ds_read", value);
		fd->hints->ds_read = ADIOI_HINT_ENABLE;
	    }
	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
		ADIOI_Info_set(info, "romio_ds_read", value);
		fd->hints->ds_read = ADIOI_HINT_DISABLE;
	    }
	    else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
	    {
		ADIOI_Info_set(info, "romio_ds_read", value);
		fd->hints->ds_read = ADIOI_HINT_AUTO;
	    }
	    /* otherwise ignore */
	}
	ADIOI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value, 
		     &flag);
	if (flag) {
	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
		ADIOI_Info_set(info, "romio_ds_write", value);
		fd->hints->ds_write = ADIOI_HINT_ENABLE;
	    }
	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
		ADIOI_Info_set(info, "romio_ds_write", value);
		fd->hints->ds_write = ADIOI_HINT_DISABLE;
	    }
	    else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
	    {
		ADIOI_Info_set(info, "romio_ds_write", value);
		fd->hints->ds_write = ADIOI_HINT_AUTO;
	    }
	    /* otherwise ignore */
	}

	ADIOI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, 
		     value, &flag);
	if (flag && ((intval = atoi(value)) > 0)) {
	    ADIOI_Info_set(info, "ind_wr_buffer_size", value);
	    fd->hints->ind_wr_buffer_size = intval;
	}

	ADIOI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, 
		     value, &flag);
	if (flag && ((intval = atoi(value)) > 0)) {
	    ADIOI_Info_set(info, "ind_rd_buffer_size", value);
	    fd->hints->ind_rd_buffer_size = intval;
	}

	memset( value, 0, MPI_MAX_INFO_VAL+1 );
	ADIOI_Info_get(users_info, "romio_min_fdomain_size", MPI_MAX_INFO_VAL,
			value, &flag);
	if ( flag && ((intval = atoi(value)) > 0) ) {
		ADIOI_Info_set(info, "romio_min_fdomain_size", value);
		fd->hints->min_fdomain_size = intval;
	}
  /* Now we use striping unit in common code so we should
     process hints for it. */
	ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
			value, &flag);
	if ( flag && ((intval = atoi(value)) > 0) ) {
		ADIOI_Info_set(info, "striping_unit", value);
		fd->hints->striping_unit = intval;
	}

	memset( value, 0, MPI_MAX_INFO_VAL+1 );
        ADIOI_Info_get(users_info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, MPI_MAX_INFO_VAL,
		     value, &flag);
	if (flag && ((intval = atoi(value)) > 0)) {

	    did_anything = 1;
	    ADIOI_Info_set(info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, value);
	    fd->hints->cb_nodes = intval;
	}
    }

    /* associate CB aggregators to certain CNs in every involved PSET */
    if (did_anything) {
	ADIOI_BGL_gen_agg_ranklist(fd, fd->hints->cb_nodes);
    }
    /* ignore defered open hints and do not enable it for bluegene: need all
     * processors in the open path so we can stat-and-broadcast the blocksize
     */
    ADIOI_Info_set(info, "romio_no_indep_rw", "false");
    fd->hints->no_indep_rw = 0;
    fd->hints->deferred_open = 0;

    /* BobC commented this out, but since hint processing runs on both bgl and
     * bglockless, we need to keep DS writes enabled on gpfs and disabled on
     * PVFS */
    if (ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) == 0) {
    /* disable data sieving for fs that do not
       support file locking */
       	ADIOI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
		     value, &flag);
	if (flag) {
	    /* get rid of this value if it is set */
	    ADIOI_Info_delete(info, "ind_wr_buffer_size");
	}
	/* note: leave ind_wr_buffer_size alone; used for other cases
	 * as well. -- Rob Ross, 04/22/2003
	 */
	ADIOI_Info_set(info, "romio_ds_write", "disable");
	fd->hints->ds_write = ADIOI_HINT_DISABLE;
    }

    ADIOI_Free(value);

    *error_code = MPI_SUCCESS;
}
void ADIOI_PANFS_Open(ADIO_File fd, int *error_code)
{
    char* value;
    int perm, old_mask, amode, flag;
    static char myname[] = "ADIOI_PANFS_OPEN";

    if (fd->perm == ADIO_PERM_NULL) {
        old_mask = umask(022);
        umask(old_mask);
        perm = ~old_mask & 0666;
    }
    else perm = fd->perm;

    amode = 0;
    if (fd->access_mode & ADIO_CREATE)
    {
        pan_fs_client_layout_agg_type_t layout_type = PAN_FS_CLIENT_LAYOUT_TYPE__DEFAULT;
        unsigned long int layout_stripe_unit = 0;
        unsigned long int layout_parity_stripe_width = 0;
        unsigned long int layout_parity_stripe_depth = 0; 
        unsigned long int layout_total_num_comps = 0;
        pan_fs_client_layout_visit_t layout_visit_policy  = PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN;
        int myrank;

        MPI_Comm_rank(fd->comm, &myrank);

        *error_code = MPI_SUCCESS;
        value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
        ADIOI_Info_get(fd->info, "panfs_layout_type", MPI_MAX_INFO_VAL, 
                 value, &flag);
        if (flag) {
            layout_type = strtoul(value,NULL,10);
        }
        ADIOI_Info_get(fd->info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL, 
                 value, &flag);
        if (flag) {
            layout_stripe_unit = strtoul(value,NULL,10);
        }
        ADIOI_Info_get(fd->info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL, 
                 value, &flag);
        if (flag) {
            layout_total_num_comps = strtoul(value,NULL,10);
        }
        ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL, 
                 value, &flag);
        if (flag) {
            layout_parity_stripe_width = strtoul(value,NULL,10);
        }
        ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL, 
                 value, &flag);
        if (flag) {
            layout_parity_stripe_depth = strtoul(value,NULL,10);
        }
        ADIOI_Info_get(fd->info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL, 
                 value, &flag);
        if (flag) {
            layout_visit_policy = strtoul(value,NULL,10);
        }
        ADIOI_Free(value);

        amode = amode | O_CREAT;
        /* Check for valid set of hints */
        if ((layout_type < PAN_FS_CLIENT_LAYOUT_TYPE__DEFAULT) ||
           (layout_type > PAN_FS_CLIENT_LAYOUT_TYPE__RAID10))
        {
            FPRINTF(stderr, "%s: panfs_layout_type is not a valid value: %u.\n", myname, layout_type);
            MPI_Abort(MPI_COMM_WORLD, 1);
        }
        if ((layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0) &&
           ((layout_stripe_unit == 0) || (layout_total_num_comps == 0)))
        {
            if(layout_stripe_unit == 0)
            {
                FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_stripe_unit hint which is necessary to specify a valid RAID0 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
            }
            if(layout_total_num_comps == 0)
            {
                FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_total_num_comps hint which is necessary to specify a valid RAID0 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
            }
            MPI_Abort(MPI_COMM_WORLD, 1);
        }
        if (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)
        {
            if ((layout_stripe_unit == 0) ||
               (layout_parity_stripe_width == 0) ||
               (layout_parity_stripe_depth == 0) ||
               (layout_total_num_comps == 0))
            {
                if(layout_stripe_unit == 0)
                {
                    FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_stripe_unit hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
                }
                if(layout_total_num_comps == 0)
                {
                    FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_total_num_comps hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
                }
                if(layout_parity_stripe_width == 0)
                {
                    FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_parity_stripe_width hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
                }
                if(layout_parity_stripe_depth == 0)
                {
                    FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_parity_stripe_depth hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
                }
                MPI_Abort(MPI_COMM_WORLD, 1);
           }
           if ((layout_visit_policy < PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN) ||
              (layout_visit_policy > PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN_WITH_HASHED_OFFSET))
           {
                FPRINTF(stderr, "%s: panfs_layout_visit_policy is not a valid value: %u.\n", myname, layout_visit_policy);
                MPI_Abort(MPI_COMM_WORLD, 1);
           }
        }
        if (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)
        {
            if ((layout_stripe_unit == 0) || (layout_total_num_comps == 0))
            {
                if(layout_stripe_unit == 0)
                {
                    FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_stripe_unit hint which is necessary to specify a valid RAID10 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
                }
                if(layout_total_num_comps == 0)
                {
                    FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_total_num_comps hint which is necessary to specify a valid RAID10 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
                }
                MPI_Abort(MPI_COMM_WORLD, 1);
            }
            if ((layout_visit_policy < PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN) ||
              (layout_visit_policy > PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN_WITH_HASHED_OFFSET))
            {
                FPRINTF(stderr, "%s: panfs_layout_visit_policy is not a valid value: %u.\n", myname, layout_visit_policy);
                MPI_Abort(MPI_COMM_WORLD, 1);
            }
        }
        /* Create the file via ioctl() or open(). ADIOI_PANFS_Open's caller 
         * already optimizes performance by only calling this function with
         * ADIO_CREATE on rank 0.  Therefore, we don't need to worry about 
         * implementing that optimization here. */
        if((layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0) || (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE) 
                || (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)) {
            pan_fs_client_layout_create_args_t file_create_args;    
            int fd_dir;
            char* slash;
            struct stat stat_buf;
            int err;
            char *path;

            /* Check that the file does not exist before
             * trying to create it.  The ioctl itself should
             * be able to handle this condition.  Currently,
             * the ioctl will return successfully if the file
             * has been previously created.  Filed bug 33862
             * to track the problem.
             */
            err = stat(fd->filename,&stat_buf);
            if((err == -1) && (errno != ENOENT))
            {
                FPRINTF(stderr,"%s: Unexpected I/O Error calling stat() on PanFS file: %s.\n", myname, strerror(errno));
                MPI_Abort(MPI_COMM_WORLD, 1);
            }
            else if (err == 0)
            {
                FPRINTF(stderr,"%s: Cannot create PanFS file with ioctl when file already exists.\n", myname);
                MPI_Abort(MPI_COMM_WORLD, 1);
            }
            else
            {
                /* (err == -1) && (errno == ENOENT) */
                /* File does not exist */
                path = ADIOI_Strdup(fd->filename);
                slash = strrchr(path, '/');
                if (!slash)
                    ADIOI_Strncpy(path, ".", 2);
                else {
                    if (slash == path) 
                        *(path + 1) = '\0';
                    else *slash = '\0';
                }

                /* create PanFS object */
                bzero(&file_create_args,sizeof(pan_fs_client_layout_create_args_t)); 
                /* open directory */
                fd_dir = open(path, O_RDONLY);
                if (fd_dir < 0) {
                    FPRINTF(stderr, "%s: I/O Error opening parent directory to create PanFS file using ioctl: %s.\n", myname, strerror(errno));
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
                else
                {
                    char *file_name_ptr = fd->filename;
                    slash = strrchr(fd->filename, '/');
                    if (slash)
                    {
                        file_name_ptr = slash + 1;
                    }
                    /* create file in the directory */
                    file_create_args.mode = perm;
                    file_create_args.version = PAN_FS_CLIENT_LAYOUT_VERSION;
                    file_create_args.flags = PAN_FS_CLIENT_LAYOUT_CREATE_F__NONE;
                    ADIOI_Strncpy(file_create_args.filename, file_name_ptr, strlen(fd->filename)+1); 
                    file_create_args.layout.agg_type = layout_type;
                    file_create_args.layout.layout_is_valid = 1;
                    if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)
                    {
                        file_create_args.layout.u.raid1_5_parity_stripe.total_num_comps = layout_total_num_comps;
                        file_create_args.layout.u.raid1_5_parity_stripe.parity_stripe_width   = layout_parity_stripe_width;
                        file_create_args.layout.u.raid1_5_parity_stripe.parity_stripe_depth   = layout_parity_stripe_depth;
                        file_create_args.layout.u.raid1_5_parity_stripe.stripe_unit     = layout_stripe_unit;
                        file_create_args.layout.u.raid1_5_parity_stripe.layout_visit_policy   = layout_visit_policy;
                    }
                    else if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0)
                    {
                        file_create_args.layout.u.raid0.total_num_comps = layout_total_num_comps;
                        file_create_args.layout.u.raid0.stripe_unit     = layout_stripe_unit;
                    }
                    else if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)
                    {
                        file_create_args.layout.u.raid10.total_num_comps     = layout_total_num_comps;
                        file_create_args.layout.u.raid10.stripe_unit         = layout_stripe_unit;
                        file_create_args.layout.u.raid10.layout_visit_policy = layout_visit_policy;
                    }
                    err = ioctl(fd_dir, PAN_FS_CLIENT_LAYOUT_CREATE_FILE, &file_create_args);
                    if (err < 0) {
                        FPRINTF(stderr, "%s: I/O Error doing ioctl on parent directory to create PanFS file using ioctl: %s.\n", myname, strerror(errno));
                        MPI_Abort(MPI_COMM_WORLD, 1);
                    }
                    err = close(fd_dir);
                }
                ADIOI_Free(path);
            }
        }
        else
        {
            int create_fd = open(fd->filename,amode,perm);
            if(create_fd != -1)
            {
                close(create_fd);
            }
            else
            {
                FPRINTF(stderr, "%s: I/O Error creating PanFS file using open: %s.\n", myname, strerror(errno));
                MPI_Abort(MPI_COMM_WORLD, 1);
            }
        }
    }
    if (fd->access_mode & ADIO_RDONLY)
	amode = amode | O_RDONLY;
    if (fd->access_mode & ADIO_WRONLY)
	amode = amode | O_WRONLY;
    if (fd->access_mode & ADIO_RDWR)
	amode = amode | O_RDWR;
    if (fd->access_mode & ADIO_EXCL)
	amode = amode | O_EXCL;

	value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
	ADIOI_Info_get(fd->info, "panfs_concurrent_write", MPI_MAX_INFO_VAL, 
		     value, &flag);
	if (flag) {
        unsigned long int concurrent_write = strtoul(value,NULL,10);
        if(concurrent_write == 1)
        {
            amode = amode | O_CONCURRENT_WRITE;
        }
	}
	ADIOI_Free(value);

    fd->fd_sys = open(fd->filename, amode, perm);
    fd->fd_direct = -1;

    if (fd->fd_sys != -1)
    {
        int rc;
        char temp_buffer[TEMP_BUFFER_SIZE];
        pan_fs_client_layout_query_args_t file_query_args;
        bzero(&file_query_args,sizeof(pan_fs_client_layout_query_args_t));
        file_query_args.version = PAN_FS_CLIENT_LAYOUT_VERSION;
        rc = ioctl(fd->fd_sys, PAN_FS_CLIENT_LAYOUT_QUERY_FILE, &file_query_args);
        if (rc < 0)
        {
            /* Error - set layout type to unknown */
	        ADIOI_Info_set(fd->info, "panfs_layout_type", "PAN_FS_CLIENT_LAYOUT_TYPE__INVALID");
        }
        else 
        {
            ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.agg_type);
            ADIOI_Info_set(fd->info, "panfs_layout_type", temp_buffer);
            if (file_query_args.layout.layout_is_valid == 1)
            {
                switch (file_query_args.layout.agg_type)
                {
                    case PAN_FS_CLIENT_LAYOUT_TYPE__RAID0:
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.stripe_unit);
                        ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.total_num_comps);
                        ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
                        break;
                    case PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE:
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.stripe_unit);
                        ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_width);
                        ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_width", temp_buffer);
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_depth);
                        ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", temp_buffer);
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.total_num_comps);
                        ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.layout_visit_policy);
                        ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
                        break;
                    case PAN_FS_CLIENT_LAYOUT_TYPE__RAID10:
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.stripe_unit);
                        ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.total_num_comps);
                        ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
                        ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.layout_visit_policy);
                        ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
                        break;
		  default:
			  break;
                }
            }
        }
    }

    if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
	fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);

    if (fd->fd_sys == -1) {
	if (errno == ENAMETOOLONG)
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_BAD_FILE,
					       "**filenamelong",
					       "**filenamelong %s %d",
					       fd->filename,
					       strlen(fd->filename));
	else if (errno == ENOENT)
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_NO_SUCH_FILE,
					       "**filenoexist",
					       "**filenoexist %s",
					       fd->filename);
	else if (errno == ENOTDIR || errno == ELOOP)
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE,
					       myname, __LINE__,
					       MPI_ERR_BAD_FILE,
					       "**filenamedir",
					       "**filenamedir %s",
					       fd->filename);
	else if (errno == EACCES) {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_ACCESS,
					       "**fileaccess",
					       "**fileaccess %s", 
					       fd->filename );
	}
	else if (errno == EROFS) {
	    /* Read only file or file system and write access requested */
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_READ_ONLY,
					       "**ioneedrd", 0 );
	}
	else {
	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
					       MPIR_ERR_RECOVERABLE, myname,
					       __LINE__, MPI_ERR_IO, "**io",
					       "**io %s", strerror(errno));
	}
    }
    else *error_code = MPI_SUCCESS;
}
示例#21
0
void ADIOI_GPFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
/* if fd->info is null, create a new info object.
   Initialize fd->info to default values.
   Initialize fd->hints to default values.
   Examine the info object passed by the user. If it contains values that
   ROMIO understands, override the default. */

    MPI_Info info;
    char *value;
    int flag, intval, nprocs=0, nprocs_is_valid = 0;
    static char myname[] = "ADIOI_GPFS_SETINFO";

    int did_anything = 0;

    if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info));
    info = fd->info;

    /* Note that fd->hints is allocated at file open time; thus it is
     * not necessary to allocate it, or check for allocation, here.
     */

    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
    ADIOI_Assert ((value != NULL));

    /* initialize info and hints to default values if they haven't been
     * previously initialized
     */
    if (!fd->hints->initialized) {

	ad_get_env_vars();
	ad_gpfs_get_env_vars();
	did_anything = 1;

	/* buffer size for collective I/O */
	ADIOI_Info_set(info, "cb_buffer_size", ADIOI_GPFS_CB_BUFFER_SIZE_DFLT);
	fd->hints->cb_buffer_size = atoi(ADIOI_GPFS_CB_BUFFER_SIZE_DFLT);

	/* default is to let romio automatically decide when to use
	 * collective buffering
	 */
	ADIOI_Info_set(info, "romio_cb_read", "enable");
	fd->hints->cb_read = ADIOI_HINT_ENABLE;
	ADIOI_Info_set(info, "romio_cb_write", "enable");
	fd->hints->cb_write = ADIOI_HINT_ENABLE;

   	if ( fd->hints->cb_config_list != NULL ) ADIOI_Free (fd->hints->cb_config_list);
	fd->hints->cb_config_list = NULL;

	/* number of processes that perform I/O in collective I/O */
	MPI_Comm_size(fd->comm, &nprocs);
	nprocs_is_valid = 1;
	MPL_snprintf(value, MPI_MAX_INFO_VAL+1, "%d", nprocs);
	ADIOI_Info_set(info, "cb_nodes", value);
	fd->hints->cb_nodes = -1;

	/* hint indicating that no indep. I/O will be performed on this file */
	ADIOI_Info_set(info, "romio_no_indep_rw", "false");
	fd->hints->no_indep_rw = 0;

	/* gpfs is not implementing file realms (ADIOI_IOStridedColl),
	   initialize to disabled it. 	   */
	/* hint instructing the use of persistent file realms */
	ADIOI_Info_set(info, "romio_cb_pfr", "disable");
	fd->hints->cb_pfr = ADIOI_HINT_DISABLE;

	/* hint guiding the assignment of persistent file realms */
	ADIOI_Info_set(info, "romio_cb_fr_types", "aar");
	fd->hints->cb_fr_type = ADIOI_FR_AAR;

	/* hint to align file realms with a certain byte value */
	ADIOI_Info_set(info, "romio_cb_fr_alignment", "1");
	fd->hints->cb_fr_alignment = 1;

	/* hint to set a threshold percentage for a datatype's size/extent at
	 * which data sieving should be done in collective I/O */
	ADIOI_Info_set(info, "romio_cb_ds_threshold", "0");
	fd->hints->cb_ds_threshold = 0;

	/* hint to switch between point-to-point or all-to-all for two-phase */
	ADIOI_Info_set(info, "romio_cb_alltoall", "automatic");
	fd->hints->cb_alltoall = ADIOI_HINT_AUTO;

	 /* deferred_open derived from no_indep_rw and cb_{read,write} */
	fd->hints->deferred_open = 0;

	/* buffer size for data sieving in independent reads */
	ADIOI_Info_set(info, "ind_rd_buffer_size", ADIOI_GPFS_IND_RD_BUFFER_SIZE_DFLT);
	fd->hints->ind_rd_buffer_size = atoi(ADIOI_GPFS_IND_RD_BUFFER_SIZE_DFLT);

	/* buffer size for data sieving in independent writes */
	ADIOI_Info_set(info, "ind_wr_buffer_size", ADIOI_GPFS_IND_WR_BUFFER_SIZE_DFLT);
	fd->hints->ind_wr_buffer_size = atoi(ADIOI_GPFS_IND_WR_BUFFER_SIZE_DFLT);


    ADIOI_Info_set(info, "romio_ds_read", "automatic");
    fd->hints->ds_read = ADIOI_HINT_AUTO;
    ADIOI_Info_set(info, "romio_ds_write", "automatic");
    fd->hints->ds_write = ADIOI_HINT_AUTO;

    /* still to do: tune this a bit for a variety of file systems. there's
	 * no good default value so just leave it unset */
    fd->hints->min_fdomain_size = 0;
    fd->hints->striping_unit = 0;

    fd->hints->initialized = 1;
    }

    /* add in user's info if supplied */
    if (users_info != MPI_INFO_NULL) {
	ADIOI_Info_check_and_install_int(fd, users_info, "cb_buffer_size",
		&(fd->hints->cb_buffer_size), myname, error_code);
	/* new hints for enabling/disabling coll. buffering on
	 * reads/writes
	 */
	ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_cb_read",
		&(fd->hints->cb_read), myname, error_code);
	if (fd->hints->cb_read == ADIOI_HINT_DISABLE) {
	    /* romio_cb_read overrides no_indep_rw */
	    ADIOI_Info_set(info, "romio_no_indep_rw", "false");
	    fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
	}
	ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_cb_write",
		&(fd->hints->cb_write), myname, error_code);
	if (fd->hints->cb_write == ADIOI_HINT_DISABLE) {
	    /* romio_cb_write overrides no_indep_rw */
	    ADIOI_Info_set(info, "romio_no_indep_rw", "false");
	    fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
	}
	/* Has the user indicated all I/O will be done collectively? */
	ADIOI_Info_check_and_install_true(fd, users_info, "romio_no_indep_rw",
		&(fd->hints->no_indep_rw), myname, error_code);
	if (fd->hints->no_indep_rw == 1) {
	    /* if 'no_indep_rw' set, also hint that we will do
	     * collective buffering: if we aren't doing independent io,
	     * then we have to do collective  */
	    ADIOI_Info_set(info, "romio_cb_write", "enable");
	    ADIOI_Info_set(info, "romio_cb_read", "enable");
	    fd->hints->cb_read = 1;
	    fd->hints->cb_write = 1;
	}

	/* new hints for enabling/disabling data sieving on
	 * reads/writes
	 */
	ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_ds_read",
		&(fd->hints->ds_read), myname, error_code);
	ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_ds_write",
		&(fd->hints->ds_write), myname, error_code);

	ADIOI_Info_check_and_install_int(fd, users_info, "ind_wr_buffer_size",
		&(fd->hints->ind_wr_buffer_size), myname, error_code);
	ADIOI_Info_check_and_install_int(fd, users_info, "ind_rd_buffer_size",
		&(fd->hints->ind_rd_buffer_size), myname, error_code);

	memset( value, 0, MPI_MAX_INFO_VAL+1 );
	ADIOI_Info_get(users_info, "romio_min_fdomain_size", MPI_MAX_INFO_VAL,
			value, &flag);
	if ( flag && ((intval = atoi(value)) > 0) ) {
		ADIOI_Info_set(info, "romio_min_fdomain_size", value);
		fd->hints->min_fdomain_size = intval;
	}
  /* Now we use striping unit in common code so we should
     process hints for it. */
	ADIOI_Info_check_and_install_int(fd, users_info, "striping_unit",
		&(fd->hints->striping_unit), myname, error_code);

#ifdef BGQPLATFORM
	memset( value, 0, MPI_MAX_INFO_VAL+1 );
        ADIOI_Info_get(users_info, ADIOI_BG_NAGG_IN_PSET_HINT_NAME, MPI_MAX_INFO_VAL,
		     value, &flag);
	if (flag && ((intval = atoi(value)) > 0)) {

	    did_anything = 1;
	    ADIOI_Info_set(info, ADIOI_BG_NAGG_IN_PSET_HINT_NAME, value);
	    fd->hints->cb_nodes = intval;
	}
#endif
    }

    /* special CB aggregator assignment */
    if (did_anything) {
#ifdef BGQPLATFORM
	ADIOI_BG_gen_agg_ranklist(fd, fd->hints->cb_nodes);
#elif PEPLATFORM
	ADIOI_PE_gen_agg_ranklist(fd);
#endif
    }

    /* deferred_open won't be set by callers, but if the user doesn't
     * explicitly disable collecitve buffering (two-phase) and does hint that
     * io w/o independent io is going on, we'll set this internal hint as a
     * convenience */
    if ( ( (fd->hints->cb_read != ADIOI_HINT_DISABLE) \
			    && (fd->hints->cb_write != ADIOI_HINT_DISABLE)\
			    && fd->hints->no_indep_rw ) ) {
	    fd->hints->deferred_open = 1;
    } else {
	    /* setting romio_no_indep_rw enable and romio_cb_{read,write}
	     * disable at the same time doesn't make sense. honor
	     * romio_cb_{read,write} and force the no_indep_rw hint to
	     * 'disable' */
	    ADIOI_Info_set(info, "romio_no_indep_rw", "false");
	    fd->hints->no_indep_rw = 0;
	    fd->hints->deferred_open = 0;
    }

    /* BobC commented this out, but since hint processing runs on both bg and
     * bglockless, we need to keep DS writes enabled on gpfs and disabled on
     * PVFS */
    if (ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) == 0) {
    /* disable data sieving for fs that do not
       support file locking */
       	ADIOI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
		     value, &flag);
	if (flag) {
	    /* get rid of this value if it is set */
	    ADIOI_Info_delete(info, "ind_wr_buffer_size");
	}
	/* note: leave ind_wr_buffer_size alone; used for other cases
	 * as well. -- Rob Ross, 04/22/2003
	 */
	ADIOI_Info_set(info, "romio_ds_write", "disable");
	fd->hints->ds_write = ADIOI_HINT_DISABLE;
    }

    ADIOI_Free(value);

    *error_code = MPI_SUCCESS;
}
示例#22
0
void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
{
    int perm, old_mask, amode, amode_direct;
    int lumlen, myrank, flag, set_layout=0, err;
    struct lov_user_md *lum = NULL;
    char *value;
    ADIO_Offset str_factor = -1, str_unit=0, start_iodev=-1;
    size_t value_sz = (MPI_MAX_INFO_VAL+1)*sizeof(char);

#if defined(MPICH) || !defined(PRINT_ERR_MSG)
    static char myname[] = "ADIOI_LUSTRE_OPEN";
#endif

    MPI_Comm_rank(fd->comm, &myrank);

    if (fd->perm == ADIO_PERM_NULL) {
	old_mask = umask(022);
	umask(old_mask);
	perm = old_mask ^ 0666;
    }
    else perm = fd->perm;

    amode = 0;
    if (fd->access_mode & ADIO_CREATE)
	amode = amode | O_CREAT;
    if (fd->access_mode & ADIO_RDONLY)
	amode = amode | O_RDONLY;
    if (fd->access_mode & ADIO_WRONLY)
	amode = amode | O_WRONLY;
    if (fd->access_mode & ADIO_RDWR)
	amode = amode | O_RDWR;
    if (fd->access_mode & ADIO_EXCL)
	amode = amode | O_EXCL;

    amode_direct = amode | O_DIRECT;

    /* odd length here because lov_user_md contains some fixed data and
     * then a list of 'lmm_objects' representing stripe */
    lumlen = sizeof(struct lov_user_md) +
	    MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data);
    lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen);

    value = (char *) ADIOI_Malloc(value_sz);
    /* we already validated in LUSTRE_SetInfo that these are going to be the same */
    if (fd->info != MPI_INFO_NULL) {
	/* striping information */
	ADIOI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL,
		value, &flag);
	if (flag)
	    str_unit=atoll(value);

	ADIOI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL,
		value, &flag);
	if (flag)
	    str_factor=atoll(value);

	ADIOI_Info_get(fd->info, "romio_lustre_start_iodevice",
		MPI_MAX_INFO_VAL, value, &flag);
	if (flag)
	    start_iodev=atoll(value);
    }
    if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0))
	set_layout = 1;

    /* if hints were set, we need to delay creation of any lustre objects.
     * However, if we open the file with O_LOV_DELAY_CREATE and don't call the
     * follow-up ioctl, subsequent writes will fail */
    if (myrank == 0 && set_layout)
	amode = amode | O_LOV_DELAY_CREATE;

    fd->fd_sys = open(fd->filename, amode, perm);
    if (fd->fd_sys == -1) goto fn_exit;

    /* we can only set these hints on new files */
    /* It was strange and buggy to open the file in the hint path.  Instead,
     * we'll apply the file tunings at open time */
    if ((amode & O_CREAT) && set_layout ) {
	/* if user has specified striping info, first aggregator tries to set
	 * it */
	if (myrank == fd->hints->ranklist[0] || fd->comm == MPI_COMM_SELF) {
	    lum->lmm_magic = LOV_USER_MAGIC;
	    lum->lmm_pattern = 0;
	    /* crude check for overflow of lustre internal datatypes.
		 * Silently cap to large value if user provides a value
		 * larger than lustre supports */
	    if (str_unit > UINT_MAX)
	            lum->lmm_stripe_size = UINT_MAX;
	    else
	            lum->lmm_stripe_size = str_unit;

	    if (str_factor > USHRT_MAX)
	            lum->lmm_stripe_count = USHRT_MAX;
	    else
	            lum->lmm_stripe_count = str_factor;

	    if (start_iodev > USHRT_MAX)
	             lum->lmm_stripe_offset = USHRT_MAX;
	    else
	            lum->lmm_stripe_offset = start_iodev;
	    err = ioctl(fd->fd_sys, LL_IOC_LOV_SETSTRIPE, lum);
	    if (err == -1 && errno != EEXIST) {
		fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
		/* not a fatal error, but user might care to know */
	    }
	} /* End of striping parameters validation */
    }

    /* Pascal Deveze reports that, even though we pass a
     * "GETSTRIPE" (read) flag to the ioctl, if some of the values of this
     * struct are uninitialzed, the call can give an error.  zero it out in case
     * there are other members that must be initialized and in case
     * lov_user_md struct changes in future */
    memset(lum, 0, lumlen);
    lum->lmm_magic = LOV_USER_MAGIC;
    err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum);
    if (!err) {

	fd->hints->striping_unit = lum->lmm_stripe_size;
	MPL_snprintf(value, value_sz, "%d", lum->lmm_stripe_size);
	ADIOI_Info_set(fd->info, "striping_unit", value);

	fd->hints->striping_factor = lum->lmm_stripe_count;
	MPL_snprintf(value, value_sz, "%d", lum->lmm_stripe_count);
	ADIOI_Info_set(fd->info, "striping_factor", value);

	fd->hints->start_iodevice = lum->lmm_stripe_offset;
	MPL_snprintf(value, value_sz, "%d", lum->lmm_stripe_offset);
	ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);

    }

    if (fd->access_mode & ADIO_APPEND)
	fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);

    fd->fd_direct = -1;
    if (fd->direct_write || fd->direct_read) {
	fd->fd_direct = open(fd->filename, amode_direct, perm);
	if (fd->fd_direct != -1) {
	    fd->d_mem = fd->d_miniosz = (1<<12);
	} else {
	    perror("cannot open file with O_Direct");
	    fd->direct_write = fd->direct_read = 0;
	}
    }

fn_exit:
    ADIOI_Free(lum);
    ADIOI_Free(value);
    /* --BEGIN ERROR HANDLING-- */
    if (fd->fd_sys == -1 || ((fd->fd_direct == -1) && 
		(fd->direct_write || fd->direct_read))) {
	*error_code = ADIOI_Err_create_code(myname, fd->filename, errno);
    }
    /* --END ERROR HANDLING-- */
    else *error_code = MPI_SUCCESS;

}
示例#23
0
void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
    char *value;
    int flag;
    ADIO_Offset stripe_val[3], str_factor = -1, str_unit = 0, start_iodev = -1;
    int myrank;
    static char myname[] = "ADIOI_LUSTRE_SETINFO";


#ifdef HAVE_LUSTRE_LOCKAHEAD
    /* Set lock ahead default hints */
    fd->hints->fs_hints.lustre.lock_ahead_read = 0;
    fd->hints->fs_hints.lustre.lock_ahead_write = 0;
    fd->hints->fs_hints.lustre.lock_ahead_num_extents = 500;
    fd->hints->fs_hints.lustre.lock_ahead_flags = 0;
#endif

    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL + 1) * sizeof(char));
    if ((fd->info) == MPI_INFO_NULL) {
        /* This must be part of the open call. can set striping parameters
         * if necessary. */
        MPI_Info_create(&(fd->info));

        ADIOI_Info_set(fd->info, "direct_read", "false");
        ADIOI_Info_set(fd->info, "direct_write", "false");
        fd->direct_read = fd->direct_write = 0;
        /* initialize lustre hints */
        ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", "1");
        fd->hints->fs_hints.lustre.co_ratio = 1;
        ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", "0");
        fd->hints->fs_hints.lustre.coll_threshold = 0;
        ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "enable");
        fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_ENABLE;

        /* has user specified striping or server buffering parameters
         * and do they have the same value on all processes? */
        if (users_info != MPI_INFO_NULL) {
            /* striping information */
            ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag);
            if (flag) {
                ADIOI_Info_set(fd->info, "striping_unit", value);
                str_unit = atoll(value);
            }

            ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag);
            if (flag) {
                ADIOI_Info_set(fd->info, "striping_factor", value);
                str_factor = atoll(value);
            }

            ADIOI_Info_get(users_info, "romio_lustre_start_iodevice",
                           MPI_MAX_INFO_VAL, value, &flag);
            if (flag) {
                ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
                start_iodev = atoll(value);
            }


            /* direct read and write */
            ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL, value, &flag);
            if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
                ADIOI_Info_set(fd->info, "direct_read", "true");
                fd->direct_read = 1;
            }
            ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL, value, &flag);
            if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
                ADIOI_Info_set(fd->info, "direct_write", "true");
                fd->direct_write = 1;
            }
#ifdef HAVE_LUSTRE_LOCKAHEAD
            /* Get lock ahead hints */

            ADIOI_Info_check_and_install_int(fd, users_info,
                                             "romio_lustre_cb_lock_ahead_write",
                                             &(fd->hints->fs_hints.lustre.lock_ahead_write),
                                             myname, error_code);
            ADIOI_Info_check_and_install_int(fd, users_info,
                                             "romio_lustre_cb_lock_ahead_read",
                                             &(fd->hints->fs_hints.lustre.lock_ahead_read),
                                             myname, error_code);

            /* If, and only if, we're using lock ahead,
             * process/set the number of extents to pre-lock and the flags */
            if (fd->hints->fs_hints.lustre.lock_ahead_read ||
                fd->hints->fs_hints.lustre.lock_ahead_write) {
                /* Get user's number of extents */
                ADIOI_Info_check_and_install_int(fd, users_info,
                                                 "romio_lustre_cb_lock_ahead_num_extents",
                                                 &(fd->hints->fs_hints.
                                                   lustre.lock_ahead_num_extents), myname,
                                                 error_code);

                /* ADIOI_Info_check_and_install_int doesn't set the
                 * value in fd unless it was in user_info, but knowing
                 * the value - default or explicit - is useful.
                 * Set the final number of extents in the fd->info */
                MPL_snprintf(value, MPI_MAX_INFO_VAL + 1, "%d",
                             fd->hints->fs_hints.lustre.lock_ahead_num_extents);
                ADIOI_Info_set(fd->info, "romio_lustre_cb_lock_ahead_num_extents", value);

                /* Get user's flags */
                ADIOI_Info_check_and_install_int(fd, users_info,
                                                 "romio_lustre_cb_lock_ahead_flags",
                                                 &(fd->hints->fs_hints.lustre.lock_ahead_flags),
                                                 myname, error_code);
            }
#endif
        }



        /* set striping information with ioctl */
        MPI_Comm_rank(fd->comm, &myrank);
        if (myrank == 0) {
            stripe_val[0] = str_factor;
            stripe_val[1] = str_unit;
            stripe_val[2] = start_iodev;
        }
        MPI_Bcast(stripe_val, 3, MPI_OFFSET, 0, fd->comm);

        /* do not open file in hint processing.   Open file in open routines,
         * where we can better deal with EXCL flag .  Continue to check the
         * "all processors set a value" condition holds.  */
        if (stripe_val[0] != str_factor
            || stripe_val[1] != str_unit || stripe_val[2] != start_iodev) {
            MPIO_ERR_CREATE_CODE_INFO_NOT_SAME("ADIOI_LUSTRE_SetInfo",
                                               "str_factor or str_unit or start_iodev", error_code);
            ADIOI_Free(value);
            return;
        }
    }

    /* get other hint */
    if (users_info != MPI_INFO_NULL) {
        /* CO: IO Clients/OST,
         * to keep the load balancing between clients and OSTs */
        ADIOI_Info_check_and_install_int(fd, users_info, "romio_lustre_co_ratio",
                                         &(fd->hints->fs_hints.lustre.co_ratio), myname,
                                         error_code);

        /* coll_threshold:
         * if the req size is bigger than this, collective IO may not be performed.
         */
        ADIOI_Info_check_and_install_int(fd, users_info, "romio_lustre_coll_threshold",
                                         &(fd->hints->fs_hints.lustre.coll_threshold), myname,
                                         error_code);

        /* ds_in_coll: disable data sieving in collective IO */
        ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_lustre_ds_in_coll",
                                             &(fd->hints->fs_hints.lustre.ds_in_coll), myname,
                                             error_code);

    }
    /* set the values for collective I/O and data sieving parameters */
    ADIOI_GEN_SetInfo(fd, users_info, error_code);

    /* generic hints might step on striping_unit */
    if (users_info != MPI_INFO_NULL) {
        ADIOI_Info_check_and_install_int(fd, users_info, "striping_unit", NULL, myname, error_code);
    }

    if (ADIOI_Direct_read)
        fd->direct_read = 1;
    if (ADIOI_Direct_write)
        fd->direct_write = 1;

    ADIOI_Free(value);

    *error_code = MPI_SUCCESS;
}
示例#24
0
void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
    char *value;
    int flag, tmp_val, str_factor=-1, str_unit=-1, start_iodev=-1; 
    static char myname[] = "ADIOI_PVFS_SETINFO";

    if ((fd->info) == MPI_INFO_NULL) {
	/* This must be part of the open call. can set striping parameters 
           if necessary. */ 
	MPI_Info_create(&(fd->info));
	ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", "disable");
	ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", "disable");
	fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_DISABLE;
	fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_DISABLE;
	
	/* has user specified any pvfs-specific hints (striping params, listio)
           and do they have the same value on all processes? */
	if (users_info != MPI_INFO_NULL) {
	    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));

	    ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, 
			 value, &flag);
	    if (flag) {
		str_factor=atoi(value);
		tmp_val = str_factor;
		MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
		if (tmp_val != str_factor) {
		    /* --BEGIN ERROR HANDLING-- */
		    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						       "striping_factor",
						       error_code);
		    return;
		    /* --END ERROR HANDLING-- */
		}
		else ADIOI_Info_set(fd->info, "striping_factor", value);
	    }

	    ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, 
			 value, &flag);
	    if (flag) {
		str_unit=atoi(value);
		tmp_val = str_unit;
		MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
		if (tmp_val != str_unit) {
		    /* --BEGIN ERROR HANDLING-- */
		    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						       "striping_unit",
						       error_code);
		    return;
		    /* --END ERROR HANDLING-- */
		}
		else ADIOI_Info_set(fd->info, "striping_unit", value);
	    }

	    ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, 
			 value, &flag);
	    if (flag) {
		start_iodev=atoi(value);
		tmp_val = start_iodev;
		MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
		if (tmp_val != start_iodev) {
		    /* --BEGIN ERROR HANDLING-- */
		    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						       "start_iodevice",
						       error_code);
		    return;
		    /* --END ERROR HANDLING-- */
		}
		else ADIOI_Info_set(fd->info, "start_iodevice", value);
	    }

	    ADIOI_Info_get(users_info, "romio_pvfs_listio_read",
			 MPI_MAX_INFO_VAL,
			 value, &flag);
	    if (flag) {
		if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) 
		{
		    ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", value);
		    fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_ENABLE;
		} else if ( !strcmp(value, "disable") || !strcmp(value, "DISABLE")) 
		{
		    ADIOI_Info_set(fd->info , "romio_pvfs_listio_read", value);
		    fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_DISABLE;
		}
		else if ( !strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) 
		{
		    ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", value);
		    fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_AUTO;
		}
		tmp_val = fd->hints->fs_hints.pvfs.listio_read;
		MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
		if (tmp_val != fd->hints->fs_hints.pvfs.listio_read) {
		    /* --BEGIN ERROR HANDLING-- */
		    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						       "romio_pvfs_listio_read",
						       error_code);
		    return;
		    /* --END ERROR HANDLING-- */
		}
	    }
	    ADIOI_Info_get(users_info, "romio_pvfs_listio_write", MPI_MAX_INFO_VAL,
			 value, &flag);
	    if (flag) {
		if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) 
		{
		    ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value);
		    fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_ENABLE;
		} else if ( !strcmp(value, "disable") || !strcmp(value, "DISABLE")) 
		{
		    ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value);
		    fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_DISABLE;
		}
		else if ( !strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) 
		{
		    ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value);
		    fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_AUTO;
		}
		tmp_val = fd->hints->fs_hints.pvfs.listio_write;
		MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
		if (tmp_val != fd->hints->fs_hints.pvfs.listio_write) {
		    /* --BEGIN ERROR HANDLING-- */
		    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
						       "romio_pvfs_listio_write",
						       error_code);
		    return;
		    /* --END ERROR HANDLING-- */
		}
	    }		    
	    ADIOI_Free(value);
	}
    }	

    /* set the values for collective I/O and data sieving parameters */
    ADIOI_GEN_SetInfo(fd, users_info, error_code);

    *error_code = MPI_SUCCESS;
}