/*@ MPI_Info_dup - Returns a duplicate of the info object Input Parameters: . info - info object (handle) Output Parameters: . newinfo - duplicate of info object (handle) .N fortran @*/ int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo) { MPI_Info curr_old, curr_new; if ((info <= (MPI_Info) 0) || (info->cookie != MPIR_INFO_COOKIE)) { FPRINTF(stderr, "MPI_Info_dup: Invalid info object\n"); MPI_Abort(MPI_COMM_WORLD, 1); } *newinfo = (MPI_Info) ADIOI_Malloc(sizeof(struct MPIR_Info)); curr_new = *newinfo; curr_new->cookie = MPIR_INFO_COOKIE; curr_new->key = 0; curr_new->value = 0; curr_new->next = 0; curr_old = info->next; while (curr_old) { curr_new->next = (MPI_Info) ADIOI_Malloc(sizeof(struct MPIR_Info)); curr_new = curr_new->next; curr_new->cookie = 0; /* cookie not set on purpose */ curr_new->key = ADIOI_Strdup(curr_old->key); curr_new->value = ADIOI_Strdup(curr_old->value); curr_new->next = 0; curr_old = curr_old->next; } return MPI_SUCCESS; }
/* ADIO_FileSysType_parentdir * * Returns pointer to string in dirnamep; that string is allocated with * strdup and must be free()'d. */ static void ADIO_FileSysType_parentdir(const char *filename, char **dirnamep) { int err; char *dir = NULL, *slash; struct stat statbuf; err = lstat(filename, &statbuf); if (err || (!S_ISLNK(statbuf.st_mode))) { /* no such file, or file is not a link; these are the "normal" * cases where we can just return the parent directory. */ dir = ADIOI_Strdup(filename); } else { /* filename is a symlink. we've presumably already tried * to stat it and found it to be missing (dangling link), * but this code doesn't care if the target is really there * or not. */ ssize_t namelen; char *linkbuf; linkbuf = ADIOI_Malloc(PATH_MAX+1); namelen = readlink(filename, linkbuf, PATH_MAX+1); if (namelen == -1) { /* something strange has happened between the time that * we determined that this was a link and the time that * we attempted to read it; punt and use the old name. */ dir = ADIOI_Strdup(filename); } else { /* successfully read the link */ linkbuf[namelen] = '\0'; /* readlink doesn't null terminate */ dir = ADIOI_Strdup(linkbuf); } ADIOI_Free(linkbuf); } slash = strrchr(dir, '/'); if (!slash) ADIOI_Strncpy(dir, ".", 2); else { if (slash == dir) *(dir + 1) = '\0'; else *slash = '\0'; } *dirnamep = dir; return; }
void ADIOI_BEEGFS_SetInfo( ADIO_File fd, MPI_Info users_info, int *error_code ) { char *value, *pathname, *dname, *slash; int flag, stripe_val[2], numtargets = 0, chunksize = 0; struct BeegfsIoctl_MkFileWithStripeHints_Arg createFileArg; int err, myrank, fd_pdir, perm, old_mask; static char myname[] = "ADIOI_BEEGFS_SETINFO"; /* set error code to success */ *error_code = MPI_SUCCESS; value = ( char * )ADIOI_Malloc( ( MPI_MAX_INFO_VAL + 1 ) * sizeof( char ) ); MPI_Comm_rank( fd->comm, &myrank ); /* set hints */ if( ( fd->info ) == MPI_INFO_NULL ) { MPI_Info_create( &( fd->info ) ); ADIOI_Info_set( fd->info, "striping_unit", "0" ); ADIOI_Info_set( fd->info, "striping_factor", "0" ); /* set users infos */ if( users_info != MPI_INFO_NULL ) { /* striping information */ ADIOI_Info_get( users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag ); if( flag ) chunksize = atoi( value ); ADIOI_Info_get( users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag ); if( flag ) numtargets = atoi( value ); /* check stripe info consistency */ if( myrank == 0 ) { stripe_val[0] = numtargets; stripe_val[1] = chunksize; } MPI_Bcast( stripe_val, 2, MPI_INT, 0, fd->comm ); if( stripe_val[0] != numtargets || stripe_val[1] != chunksize ) { FPRINTF( stderr, "ADIOI_BEEGFS_SetInfo: All keys" "-striping_factor:striping_unit " "need to be identical across all processes\n" ); MPI_Abort( MPI_COMM_WORLD, 1 ); } /* if user has specified striping info, process 0 tries to set it */ if( myrank == 0 && ( fd->access_mode & ADIO_CREATE ) && numtargets && chunksize ) { /* open the parent dir to get/set striping info */ pathname = ADIOI_Strdup( fd->filename ); dname = strrchr( pathname, '/' ); if( dname != NULL ) { *dname = '\0'; // replace / with nul-character fd_pdir = open( pathname, O_RDONLY ); if( fd_pdir == -1 ) { FPRINTF( stderr, "Error opening %s: %s\n", pathname, strerror( errno ) ); } } else { /* current dir relative path */ fd_pdir = open( ".", O_RDONLY ); if( fd_pdir == -1 ) { FPRINTF( stderr, "Error opening .: %s\n", strerror( errno ) ); } } ADIOI_Free( pathname ); if( fd->perm == ADIO_PERM_NULL ) { old_mask = umask( 022 ); umask( old_mask ); perm = old_mask ^ 0666; } else perm = fd->perm; /* set create hints depending on e10 hints previously set */ slash = strrchr( fd->filename, '/' ); if( slash != NULL ) slash += 1; else slash = fd->filename; createFileArg.filename = slash; createFileArg.mode = perm; createFileArg.numtargets = numtargets; createFileArg.chunksize = chunksize; /* create the hint file */ err = ioctl( fd_pdir, BEEGFS_IOC_MKFILE_STRIPEHINTS, &createFileArg ); if( err ) { FPRINTF( stderr, "BEEGFS_IOC_MKFILE_STRIPEHINTS: %s. ", strerror( errno ) ); if( errno == EEXIST ) { /* ignore user striping and use current file info */ FPRINTF( stderr, "[rank:%d] Failure to set stripe info for %s!\n", myrank, fd->filename ); } } /* close the parent dir file descriptor */ close( fd_pdir ); } /* End of striping parameters validation */ } MPI_Barrier( fd->comm ); } /* set rest of the MPI hints (including E10 hints) */ ADIOI_GEN_SetInfo( fd, users_info, error_code ); ADIOI_Free( value ); }
/*@ MPI_Register_datarep - Register functions for user-defined data representations Input Parameters: + datarep - data representation name (string) . read_conversion_fn - function invoked to convert from file representation to native representation (function) . write_conversion_fn - function invoked to convert from native representation to file representation (function) . dtype_file_extent_fn - function invoked to get the exted of a datatype as represented in the file (function) - extra_state - pointer to extra state that is passed to each of the three functions Notes: This function allows the user to provide routines to convert data from an external representation, used within a file, and the native representation, used within the CPU. There is one predefined data representation, 'external32'. Please consult the MPI-2 standard for details on this function. .N fortran @*/ int MPI_Register_datarep(ROMIO_CONST char *datarep, MPI_Datarep_conversion_function *read_conversion_fn, MPI_Datarep_conversion_function *write_conversion_fn, MPI_Datarep_extent_function *dtype_file_extent_fn, void *extra_state) { int error_code; ADIOI_Datarep *adio_datarep; static char myname[] = "MPI_REGISTER_DATAREP"; ROMIO_THREAD_CS_ENTER(); /* --BEGIN ERROR HANDLING-- */ /* check datarep name (use strlen instead of strnlen because strnlen is not portable) */ if (datarep == NULL || strlen(datarep) < 1 || strlen(datarep) > MPI_MAX_DATAREP_STRING) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_ARG, "**datarepname", 0); error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code); goto fn_exit; } /* --END ERROR HANDLING-- */ MPIR_MPIOInit(&error_code); if (error_code != MPI_SUCCESS) goto fn_exit; /* --BEGIN ERROR HANDLING-- */ /* check datarep isn't already registered */ for (adio_datarep = ADIOI_Datarep_head; adio_datarep; adio_datarep = adio_datarep->next) { if (!strncmp(datarep, adio_datarep->name, MPI_MAX_DATAREP_STRING)) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_DUP_DATAREP, "**datarepused", "**datarepused %s", datarep); error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code); goto fn_exit; } } /* Check Non-NULL Read and Write conversion function pointer */ /* Read and Write conversions are currently not supported. */ if ( (read_conversion_fn != NULL) || (write_conversion_fn != NULL) ) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_CONVERSION, "**drconvnotsupported", 0); error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code); goto fn_exit; } /* check extent function pointer */ if (dtype_file_extent_fn == NULL) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_ARG, "**datarepextent", 0); error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code); goto fn_exit; } /* --END ERROR HANDLING-- */ adio_datarep = ADIOI_Malloc(sizeof(ADIOI_Datarep)); adio_datarep->name = ADIOI_Strdup(datarep); adio_datarep->state = extra_state; adio_datarep->read_conv_fn = read_conversion_fn; adio_datarep->write_conv_fn = write_conversion_fn; adio_datarep->extent_fn = dtype_file_extent_fn; adio_datarep->next = ADIOI_Datarep_head; ADIOI_Datarep_head = adio_datarep; error_code = MPI_SUCCESS; fn_exit: ROMIO_THREAD_CS_EXIT(); return error_code; }
MPI_File ADIO_Open(MPI_Comm orig_comm, MPI_Comm comm, const char *filename, int file_system, ADIOI_Fns *ops, int access_mode, ADIO_Offset disp, MPI_Datatype etype, MPI_Datatype filetype, MPI_Info info, int perm, int *error_code) { MPI_File mpi_fh; ADIO_File fd; int err, rank, procs; static char myname[] = "ADIO_OPEN"; int max_error_code; MPI_Info dupinfo; int syshints_processed, can_skip; char *p; *error_code = MPI_SUCCESS; /* obtain MPI_File handle */ mpi_fh = MPIO_File_create(sizeof(struct ADIOI_FileD)); if (mpi_fh == MPI_FILE_NULL) { fd = MPI_FILE_NULL; *error_code = MPIO_Err_create_code(*error_code, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_OTHER, "**nomem2",0); goto fn_exit; } fd = MPIO_File_resolve(mpi_fh); fd->cookie = ADIOI_FILE_COOKIE; fd->fp_ind = disp; fd->fp_sys_posn = 0; fd->comm = comm; /* dup'ed in MPI_File_open */ fd->filename = ADIOI_Strdup(filename); fd->file_system = file_system; fd->fs_ptr = NULL; fd->fns = ops; fd->disp = disp; fd->split_coll_count = 0; fd->shared_fp_fd = ADIO_FILE_NULL; fd->atomicity = 0; fd->etype = etype; /* MPI_BYTE by default */ fd->filetype = filetype; /* MPI_BYTE by default */ fd->etype_size = 1; /* default etype is MPI_BYTE */ fd->file_realm_st_offs = NULL; fd->file_realm_types = NULL; fd->perm = perm; fd->async_count = 0; fd->fortran_handle = -1; fd->err_handler = ADIOI_DFLT_ERR_HANDLER; fd->io_buf_window = MPI_WIN_NULL; fd->io_buf_put_amounts_window = MPI_WIN_NULL; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &procs); /* create and initialize info object */ fd->hints = (ADIOI_Hints *)ADIOI_Calloc(1, sizeof(struct ADIOI_Hints_struct)); if (fd->hints == NULL) { *error_code = MPIO_Err_create_code(*error_code, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_OTHER, "**nomem2",0); goto fn_exit; } fd->hints->cb_config_list = NULL; fd->hints->ranklist = NULL; fd->hints->initialized = 0; fd->info = MPI_INFO_NULL; /* move system-wide hint processing *back* into open, but this time the * hintfile reader will do a scalable read-and-broadcast. The global * ADIOI_syshints will get initialized at first open. subsequent open * calls will just use result from first open. * * We have two goals here: * 1: avoid processing the hintfile multiple times * 2: have all processes participate in hintfile processing (so we can read-and-broadcast) * * a code might do an "initialize from 0", so we can only skip hint * processing once everyone has particpiated in hint processing */ if (ADIOI_syshints == MPI_INFO_NULL) syshints_processed = 0; else syshints_processed = 1; MPI_Allreduce(&syshints_processed, &can_skip, 1, MPI_INT, MPI_MIN, fd->comm); if (!can_skip) { if (ADIOI_syshints == MPI_INFO_NULL) MPI_Info_create(&ADIOI_syshints); ADIOI_process_system_hints(fd, ADIOI_syshints); } ADIOI_incorporate_system_hints(info, ADIOI_syshints, &dupinfo); ADIO_SetInfo(fd, dupinfo, &err); if (dupinfo != MPI_INFO_NULL) { *error_code = MPI_Info_free(&dupinfo); if (*error_code != MPI_SUCCESS) goto fn_exit; } ADIOI_Info_set(fd->info, "romio_filesystem_type", fd->fns->fsname); /* Instead of repeatedly allocating this buffer in collective read/write, * allocating up-front might make memory management on small platforms * (e.g. Blue Gene) more efficent */ fd->io_buf = ADIOI_Malloc(fd->hints->cb_buffer_size); /* deferred open: * we can only do this optimization if 'fd->hints->deferred_open' is set * (which means the user hinted 'no_indep_rw' and collective buffering). * Furthermore, we only do this if our collective read/write routines use * our generic function, and not an fs-specific routine (we can defer opens * only if we use our aggreagation code). */ if (fd->hints->deferred_open && !(uses_generic_read(fd) \ && uses_generic_write(fd))) { fd->hints->deferred_open = 0; } if (ADIO_Feature(fd, ADIO_SCALABLE_OPEN)) /* disable deferred open on these fs so that scalable broadcast * will always use the propper communicator */ fd->hints->deferred_open = 0; /* on BlueGene, the cb_config_list is built when hints are processed. No * one else does that right now */ if (fd->hints->ranklist == NULL) { build_cb_config_list(fd, orig_comm, comm, rank, procs, error_code); if (*error_code != MPI_SUCCESS) goto fn_exit; } fd->is_open = 0; fd->my_cb_nodes_index = -2; fd->is_agg = is_aggregator(rank, fd); /* deferred open used to split the communicator to create an "aggregator * communicator", but we only used it as a way to indicate that deferred * open happened. fd->is_open and fd->is_agg are sufficient */ /* actual opens start here */ /* generic open: one process opens to create the file, all others open */ /* nfs open: everybody opens or else you'll end up with "file not found" * due to stupid nfs consistency semantics */ /* scalable open: one process opens and broadcasts results to everyone */ ADIOI_OpenColl(fd, rank, access_mode, error_code); /* for debugging, it can be helpful to see the hints selected. Some file * systes set up the hints in the open call (e.g. lustre) */ p = getenv("ROMIO_PRINT_HINTS"); if (rank == 0 && p != NULL ) { ADIOI_Info_print_keyvals(fd->info); } fn_exit: MPI_Allreduce(error_code, &max_error_code, 1, MPI_INT, MPI_MAX, comm); if (max_error_code != MPI_SUCCESS) { /* If the file was successfully opened, close it */ if (*error_code == MPI_SUCCESS) { /* in the deferred open case, only those who have actually opened the file should close it */ if (fd->hints->deferred_open) { if (fd->is_agg) { (*(fd->fns->ADIOI_xxx_Close))(fd, error_code); } } else { (*(fd->fns->ADIOI_xxx_Close))(fd, error_code); } } ADIOI_Free(fd->filename); ADIOI_Free(fd->hints->ranklist); ADIOI_Free(fd->hints->cb_config_list); ADIOI_Free(fd->hints); if (fd->info != MPI_INFO_NULL) MPI_Info_free(&(fd->info)); ADIOI_Free(fd->io_buf); ADIOI_Free(fd); fd = ADIO_FILE_NULL; if (*error_code == MPI_SUCCESS) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**oremote_fail", 0); } } return fd; }
void ADIOI_PANFS_Open(ADIO_File fd, int *error_code) { char* value; int perm, old_mask, amode, flag; static char myname[] = "ADIOI_PANFS_OPEN"; if (fd->perm == ADIO_PERM_NULL) { old_mask = umask(022); umask(old_mask); perm = ~old_mask & 0666; } else perm = fd->perm; amode = 0; if (fd->access_mode & ADIO_CREATE) { pan_fs_client_layout_agg_type_t layout_type = PAN_FS_CLIENT_LAYOUT_TYPE__DEFAULT; unsigned long int layout_stripe_unit = 0; unsigned long int layout_parity_stripe_width = 0; unsigned long int layout_parity_stripe_depth = 0; unsigned long int layout_total_num_comps = 0; pan_fs_client_layout_visit_t layout_visit_policy = PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN; int myrank; MPI_Comm_rank(fd->comm, &myrank); *error_code = MPI_SUCCESS; value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); ADIOI_Info_get(fd->info, "panfs_layout_type", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_type = strtoul(value,NULL,10); } ADIOI_Info_get(fd->info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_stripe_unit = strtoul(value,NULL,10); } ADIOI_Info_get(fd->info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_total_num_comps = strtoul(value,NULL,10); } ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_parity_stripe_width = strtoul(value,NULL,10); } ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_parity_stripe_depth = strtoul(value,NULL,10); } ADIOI_Info_get(fd->info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_visit_policy = strtoul(value,NULL,10); } ADIOI_Free(value); amode = amode | O_CREAT; /* Check for valid set of hints */ if ((layout_type < PAN_FS_CLIENT_LAYOUT_TYPE__DEFAULT) || (layout_type > PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)) { FPRINTF(stderr, "%s: panfs_layout_type is not a valid value: %u.\n", myname, layout_type); MPI_Abort(MPI_COMM_WORLD, 1); } if ((layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0) && ((layout_stripe_unit == 0) || (layout_total_num_comps == 0))) { if(layout_stripe_unit == 0) { FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_stripe_unit hint which is necessary to specify a valid RAID0 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname); } if(layout_total_num_comps == 0) { FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_total_num_comps hint which is necessary to specify a valid RAID0 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname); } MPI_Abort(MPI_COMM_WORLD, 1); } if (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE) { if ((layout_stripe_unit == 0) || (layout_parity_stripe_width == 0) || (layout_parity_stripe_depth == 0) || (layout_total_num_comps == 0)) { if(layout_stripe_unit == 0) { FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_stripe_unit hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname); } if(layout_total_num_comps == 0) { FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_total_num_comps hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname); } if(layout_parity_stripe_width == 0) { FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_parity_stripe_width hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname); } if(layout_parity_stripe_depth == 0) { FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_parity_stripe_depth hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname); } MPI_Abort(MPI_COMM_WORLD, 1); } if ((layout_visit_policy < PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN) || (layout_visit_policy > PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN_WITH_HASHED_OFFSET)) { FPRINTF(stderr, "%s: panfs_layout_visit_policy is not a valid value: %u.\n", myname, layout_visit_policy); MPI_Abort(MPI_COMM_WORLD, 1); } } if (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10) { if ((layout_stripe_unit == 0) || (layout_total_num_comps == 0)) { if(layout_stripe_unit == 0) { FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_stripe_unit hint which is necessary to specify a valid RAID10 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname); } if(layout_total_num_comps == 0) { FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_total_num_comps hint which is necessary to specify a valid RAID10 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname); } MPI_Abort(MPI_COMM_WORLD, 1); } if ((layout_visit_policy < PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN) || (layout_visit_policy > PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN_WITH_HASHED_OFFSET)) { FPRINTF(stderr, "%s: panfs_layout_visit_policy is not a valid value: %u.\n", myname, layout_visit_policy); MPI_Abort(MPI_COMM_WORLD, 1); } } /* Create the file via ioctl() or open(). ADIOI_PANFS_Open's caller * already optimizes performance by only calling this function with * ADIO_CREATE on rank 0. Therefore, we don't need to worry about * implementing that optimization here. */ if((layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0) || (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE) || (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)) { pan_fs_client_layout_create_args_t file_create_args; int fd_dir; char* slash; struct stat stat_buf; int err; char *path; /* Check that the file does not exist before * trying to create it. The ioctl itself should * be able to handle this condition. Currently, * the ioctl will return successfully if the file * has been previously created. Filed bug 33862 * to track the problem. */ err = stat(fd->filename,&stat_buf); if((err == -1) && (errno != ENOENT)) { FPRINTF(stderr,"%s: Unexpected I/O Error calling stat() on PanFS file: %s.\n", myname, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } else if (err == 0) { FPRINTF(stderr,"%s: Cannot create PanFS file with ioctl when file already exists.\n", myname); MPI_Abort(MPI_COMM_WORLD, 1); } else { /* (err == -1) && (errno == ENOENT) */ /* File does not exist */ path = ADIOI_Strdup(fd->filename); slash = strrchr(path, '/'); if (!slash) ADIOI_Strncpy(path, ".", 2); else { if (slash == path) *(path + 1) = '\0'; else *slash = '\0'; } /* create PanFS object */ bzero(&file_create_args,sizeof(pan_fs_client_layout_create_args_t)); /* open directory */ fd_dir = open(path, O_RDONLY); if (fd_dir < 0) { FPRINTF(stderr, "%s: I/O Error opening parent directory to create PanFS file using ioctl: %s.\n", myname, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } else { char *file_name_ptr = fd->filename; slash = strrchr(fd->filename, '/'); if (slash) { file_name_ptr = slash + 1; } /* create file in the directory */ file_create_args.mode = perm; file_create_args.version = PAN_FS_CLIENT_LAYOUT_VERSION; file_create_args.flags = PAN_FS_CLIENT_LAYOUT_CREATE_F__NONE; ADIOI_Strncpy(file_create_args.filename, file_name_ptr, strlen(fd->filename)+1); file_create_args.layout.agg_type = layout_type; file_create_args.layout.layout_is_valid = 1; if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE) { file_create_args.layout.u.raid1_5_parity_stripe.total_num_comps = layout_total_num_comps; file_create_args.layout.u.raid1_5_parity_stripe.parity_stripe_width = layout_parity_stripe_width; file_create_args.layout.u.raid1_5_parity_stripe.parity_stripe_depth = layout_parity_stripe_depth; file_create_args.layout.u.raid1_5_parity_stripe.stripe_unit = layout_stripe_unit; file_create_args.layout.u.raid1_5_parity_stripe.layout_visit_policy = layout_visit_policy; } else if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0) { file_create_args.layout.u.raid0.total_num_comps = layout_total_num_comps; file_create_args.layout.u.raid0.stripe_unit = layout_stripe_unit; } else if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10) { file_create_args.layout.u.raid10.total_num_comps = layout_total_num_comps; file_create_args.layout.u.raid10.stripe_unit = layout_stripe_unit; file_create_args.layout.u.raid10.layout_visit_policy = layout_visit_policy; } err = ioctl(fd_dir, PAN_FS_CLIENT_LAYOUT_CREATE_FILE, &file_create_args); if (err < 0) { FPRINTF(stderr, "%s: I/O Error doing ioctl on parent directory to create PanFS file using ioctl: %s.\n", myname, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } err = close(fd_dir); } ADIOI_Free(path); } } else { int create_fd = open(fd->filename,amode,perm); if(create_fd != -1) { close(create_fd); } else { FPRINTF(stderr, "%s: I/O Error creating PanFS file using open: %s.\n", myname, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } } } if (fd->access_mode & ADIO_RDONLY) amode = amode | O_RDONLY; if (fd->access_mode & ADIO_WRONLY) amode = amode | O_WRONLY; if (fd->access_mode & ADIO_RDWR) amode = amode | O_RDWR; if (fd->access_mode & ADIO_EXCL) amode = amode | O_EXCL; value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); ADIOI_Info_get(fd->info, "panfs_concurrent_write", MPI_MAX_INFO_VAL, value, &flag); if (flag) { unsigned long int concurrent_write = strtoul(value,NULL,10); if(concurrent_write == 1) { amode = amode | O_CONCURRENT_WRITE; } } ADIOI_Free(value); fd->fd_sys = open(fd->filename, amode, perm); fd->fd_direct = -1; if (fd->fd_sys != -1) { int rc; char temp_buffer[TEMP_BUFFER_SIZE]; pan_fs_client_layout_query_args_t file_query_args; bzero(&file_query_args,sizeof(pan_fs_client_layout_query_args_t)); file_query_args.version = PAN_FS_CLIENT_LAYOUT_VERSION; rc = ioctl(fd->fd_sys, PAN_FS_CLIENT_LAYOUT_QUERY_FILE, &file_query_args); if (rc < 0) { /* Error - set layout type to unknown */ ADIOI_Info_set(fd->info, "panfs_layout_type", "PAN_FS_CLIENT_LAYOUT_TYPE__INVALID"); } else { ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.agg_type); ADIOI_Info_set(fd->info, "panfs_layout_type", temp_buffer); if (file_query_args.layout.layout_is_valid == 1) { switch (file_query_args.layout.agg_type) { case PAN_FS_CLIENT_LAYOUT_TYPE__RAID0: ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.stripe_unit); ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer); ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.total_num_comps); ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer); break; case PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE: ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.stripe_unit); ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer); ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_width); ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_width", temp_buffer); ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_depth); ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", temp_buffer); ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.total_num_comps); ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer); ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.layout_visit_policy); ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer); break; case PAN_FS_CLIENT_LAYOUT_TYPE__RAID10: ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.stripe_unit); ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer); ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.total_num_comps); ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer); ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.layout_visit_policy); ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer); break; default: break; } } } } if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND)) fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END); if (fd->fd_sys == -1) { if (errno == ENAMETOOLONG) *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_BAD_FILE, "**filenamelong", "**filenamelong %s %d", fd->filename, strlen(fd->filename)); else if (errno == ENOENT) *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_NO_SUCH_FILE, "**filenoexist", "**filenoexist %s", fd->filename); else if (errno == ENOTDIR || errno == ELOOP) *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_BAD_FILE, "**filenamedir", "**filenamedir %s", fd->filename); else if (errno == EACCES) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_ACCESS, "**fileaccess", "**fileaccess %s", fd->filename ); } else if (errno == EROFS) { /* Read only file or file system and write access requested */ *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_READ_ONLY, "**ioneedrd", 0 ); } else { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); } } else *error_code = MPI_SUCCESS; }