struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_individual_component_file_query (mca_io_ompio_file_t *fh, int *priority) { int amode; bool wronly_flag=false; bool relaxed_order_flag=false; MPI_Info info; int flag; int valuelen; char value[MPI_MAX_INFO_VAL+1]; *priority = 0; /*test, and update priority*/ /*---------------------------------------------------------*/ /* 1. Is the file write only? check amode for MPI_MODE_WRONLY */ amode = fh->f_amode; if ( amode & MPI_MODE_WRONLY || amode & MPI_MODE_RDWR ) { wronly_flag=true; if ( mca_sharedfp_individual_verbose ) { opal_output(ompi_sharedfp_base_framework.framework_output, "mca_sharedfp_individual_component_file_query: " "MPI_MODE_WRONLY[true=%d,false=%d]=%d\n",true,false,wronly_flag); } } else { wronly_flag=false; if ( mca_sharedfp_individual_verbose ) { opal_output(ompi_sharedfp_base_framework.framework_output, "mca_sharedfp_individual_component_file_query: Can not run!, " "MPI_MODE_WRONLY[true=%d,false=%d]=%d\n",true,false,wronly_flag); } } /*---------------------------------------------------------*/ /* 2. Did the user specify MPI_INFO relaxed ordering flag? */ info = fh->f_info; if ( info != MPI_INFO_NULL ){ valuelen = MPI_MAX_INFO_VAL; opal_info_get ( info,"OMPIO_SHAREDFP_RELAXED_ORDERING", valuelen, value, &flag); if ( flag ) { if ( mca_sharedfp_individual_verbose ) { opal_output(ompi_sharedfp_base_framework.framework_output, "mca_sharedfp_individual_component_file_query: " "OMPIO_SHAREDFP_RELAXED_ORDERING=%s\n",value); } /* flag - Returns true if key defined, false if not (boolean). */ relaxed_order_flag=true; } else { if ( mca_sharedfp_individual_verbose ) { opal_output(ompi_sharedfp_base_framework.framework_output, "mca_sharedfp_individual_component_file_query: " "OMPIO_SHAREDFP_RELAXED_ORDERING MPI_Info key not set. " "Set this key in order to increase this component's priority value.\n"); } } } else { if ( mca_sharedfp_individual_verbose ) { opal_output(ompi_sharedfp_base_framework.framework_output, "mca_sharedfp_individual_component_file_query: " "OMPIO_SHAREDFP_RELAXED_ORDERING MPI_Info key not set, " "got MPI_INFO_NULL. Set this key in order to increase " "this component's priority value.\n"); } } /*For now, this algorithm will not run if the file is not opened write only. *Setting the OMPIO_SHAREDFP_RELAXED_ORDERING gives this module a higher priority *otherwise it gets a priority of zero. This means that this module will *run only if no other module can run */ if ( wronly_flag && relaxed_order_flag){ *priority=mca_sharedfp_individual_priority; } else { *priority=1; } if ( wronly_flag ){ return &individual; } return NULL; }
/* Info keys: * * - crs: * none = (Default) No CRS Service * default = Whatever CRS service MPI chooses * blcr = BLCR * self = app level callbacks * * - cmdline: * Command line to restart the process with. * If empty, the user must manually enter it * * - target: * Absolute path to the target directory. * * - handle: * first = Earliest checkpoint directory available * last = Most recent checkpoint directory available * [global:local] = handle provided by the MPI library * * - restarting: * 0 = not restarting * 1 = restarting * * - checkpointing: * 0 = No need to prepare for checkpointing * 1 = MPI should prepare for checkpointing * * - inflight: * default = message * message = Drain inflight messages at the message level * network = Drain inflight messages at the network level (if possible) * * - user_space_mem: * 0 = Memory does not need to be managed * 1 = Memory must be in user space (i.e., not on network card * */ static int extract_info_into_datum(opal_info_t *info, orte_snapc_base_quiesce_t *datum) { int info_flag = false; int max_crs_len = 32; bool info_bool = false; char *info_char = NULL; info_char = (char *) malloc(sizeof(char) * (OPAL_PATH_MAX+1)); /* * Key: crs */ opal_info_get(info, "crs", max_crs_len, info_char, &info_flag); if( info_flag) { datum->crs_name = strdup(info_char); } /* * Key: cmdline */ opal_info_get(info, "cmdline", OPAL_PATH_MAX, info_char, &info_flag); if( info_flag) { datum->cmdline = strdup(info_char); } /* * Key: handle */ opal_info_get(info, "handle", OPAL_PATH_MAX, info_char, &info_flag); if( info_flag) { datum->handle = strdup(info_char); } /* * Key: target */ opal_info_get(info, "target", OPAL_PATH_MAX, info_char, &info_flag); if( info_flag) { datum->target_dir = strdup(info_char); } /* * Key: restarting */ opal_info_get_bool(info, "restarting", &info_bool, &info_flag); if( info_flag ) { datum->restarting = info_bool; } else { datum->restarting = false; } /* * Key: checkpointing */ opal_info_get_bool(info, "checkpointing", &info_bool, &info_flag); if( info_flag ) { datum->checkpointing = info_bool; } else { datum->checkpointing = false; } /* * Display all values */ OPAL_OUTPUT_VERBOSE((3, mca_crcp_bkmrk_component.super.output_handle, "crcp:bkmrk: %s extract_info: Info('crs' = '%s')", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), (NULL == datum->crs_name ? "Default (none)" : datum->crs_name))); OPAL_OUTPUT_VERBOSE((3, mca_crcp_bkmrk_component.super.output_handle, "crcp:bkmrk: %s extract_info: Info('cmdline' = '%s')", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), (NULL == datum->cmdline ? "Default ()" : datum->cmdline))); OPAL_OUTPUT_VERBOSE((3, mca_crcp_bkmrk_component.super.output_handle, "crcp:bkmrk: %s extract_info: Info('checkpointing' = '%c')", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), (datum->checkpointing ? 'T' : 'F'))); OPAL_OUTPUT_VERBOSE((3, mca_crcp_bkmrk_component.super.output_handle, "crcp:bkmrk: %s extract_info: Info('restarting' = '%c')", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), (datum->restarting ? 'T' : 'F'))); if( NULL != info_char ) { free(info_char); info_char = NULL; } return OMPI_SUCCESS; }
/* * file_open_pvfs2: This is the same strategy as ROMIO's pvfs2 open * * Function: - opens a new file * Accepts: - same arguments as MPI_File_open() * Returns: - Success if new file handle */ int mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm, const char* filename, int access_mode, struct opal_info_t *info, mca_io_ompio_file_t *fh) { int ret; mca_fs_pvfs2 *pvfs2_fs; PVFS_fs_id pvfs2_id; char pvfs2_path[OMPIO_MAX_NAME] = {0}; char * ncache_timeout; open_status o_status = {0, {0, 0}}; struct ompi_datatype_t *open_status_type; struct ompi_datatype_t *types[2] = {&ompi_mpi_int.dt, &ompi_mpi_byte.dt}; int lens[2] = {1, sizeof(PVFS_object_ref)}; OPAL_PTRDIFF_TYPE offsets[2]; char char_stripe[MPI_MAX_INFO_KEY]; int flag; int fs_pvfs2_stripe_size = -1; int fs_pvfs2_stripe_width = -1; /* We are going to do what ROMIO does with one process resolving * the name and broadcasting to others */ pvfs2_fs = (mca_fs_pvfs2 *) malloc(sizeof(mca_fs_pvfs2)); if (NULL == pvfs2_fs) { opal_output (1, "OUT OF MEMORY\n"); return OMPI_ERR_OUT_OF_RESOURCE; } if (!mca_fs_pvfs2_IS_INITIALIZED) { /* disable the pvfs2 ncache */ ncache_timeout = getenv("PVFS2_NCACHE_TIMEOUT"); if (ncache_timeout == NULL ) { setenv("PVFS2_NCACHE_TIMEOUT", "0", 1); } ret = PVFS_util_init_defaults(); if (ret < 0) { PVFS_perror("PVFS_util_init_defaults", ret); return OMPI_ERROR; } mca_fs_pvfs2_IS_INITIALIZED = 1; } memset(&(pvfs2_fs->credentials), 0, sizeof(PVFS_credentials)); PVFS_util_gen_credentials(&(pvfs2_fs->credentials)); /* check for stripe size and stripe depth in the info object and update mca_fs_pvfs2_stripe_width and mca_fs_pvfs2_stripe_size before calling fake_an_open() */ opal_info_get (info, "stripe_size", MPI_MAX_INFO_VAL, char_stripe, &flag); if ( flag ) { sscanf ( char_stripe, "%d", &fs_pvfs2_stripe_size ); } opal_info_get (info, "stripe_width", MPI_MAX_INFO_VAL, char_stripe, &flag); if ( flag ) { sscanf ( char_stripe, "%d", &fs_pvfs2_stripe_width ); } if (fs_pvfs2_stripe_size < 0) { fs_pvfs2_stripe_size = mca_fs_pvfs2_stripe_size; } if (fs_pvfs2_stripe_width < 0) { fs_pvfs2_stripe_width = mca_fs_pvfs2_stripe_width; } if (OMPIO_ROOT == fh->f_rank) { ret = PVFS_util_resolve(filename, &pvfs2_id, pvfs2_path, OMPIO_MAX_NAME); if (ret < 0 ) { PVFS_perror("PVFS_util_resolve", ret); o_status.error = -1; } else { fake_an_open (pvfs2_id, pvfs2_path, access_mode, fs_pvfs2_stripe_width, (PVFS_size)fs_pvfs2_stripe_size, pvfs2_fs, &o_status); } pvfs2_fs->object_ref = o_status.object_ref; fh->f_fs_ptr = pvfs2_fs; } /* broadcast status and (possibly valid) object reference */ offsets[0] = (MPI_Aint)(&o_status.error); offsets[1] = (MPI_Aint)(&o_status.object_ref); ompi_datatype_create_struct (2, lens, offsets, types, &open_status_type); ompi_datatype_commit (&open_status_type); fh->f_comm->c_coll.coll_bcast (MPI_BOTTOM, 1, open_status_type, OMPIO_ROOT, fh->f_comm, fh->f_comm->c_coll.coll_bcast_module); ompi_datatype_destroy (&open_status_type); if (o_status.error != 0) { /* No need to free the pvfs2_fs structure, since it will be deallocated in file_close in case of an error */ fh->f_fs_ptr = NULL; return OMPI_ERROR; } pvfs2_fs->object_ref = o_status.object_ref; fh->f_fs_ptr = pvfs2_fs; /* update the internal ompio structure to store stripe size and stripe depth correctly. Hadi(to be done): For this read the stripe size and stripe depth from the file itself */ if (fs_pvfs2_stripe_size > 0 && fs_pvfs2_stripe_width > 0) { fh->f_stripe_size = fs_pvfs2_stripe_size; } return OMPI_SUCCESS; }
int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name) { char range[OPAL_MAX_INFO_VAL]; int flag=0, ret; opal_value_t *rng; opal_list_t results, pinfo; opal_pmix_pdata_t *pdat; if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( NULL == port_name ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } if ( NULL == service_name ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } if (NULL == info || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INFO, FUNC_NAME); } } if (NULL == opal_pmix.lookup) { opal_show_help("help-mpi-api.txt", "MPI function not supported", true, FUNC_NAME, "Underlying runtime environment does not support name lookup functionality"); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, OMPI_ERR_NOT_SUPPORTED, FUNC_NAME); } OPAL_CR_ENTER_LIBRARY(); OBJ_CONSTRUCT(&pinfo, opal_list_t); /* OMPI supports info keys to pass the range to * be searched for the given key */ if (MPI_INFO_NULL != info) { opal_info_get (info, "range", sizeof(range) - 1, range, &flag); if (flag) { if (0 == strcmp(range, "nspace")) { rng = OBJ_NEW(opal_value_t); rng->key = strdup(OPAL_PMIX_RANGE); rng->type = OPAL_INT; rng->data.integer = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace opal_list_append(&pinfo, &rng->super); } else if (0 == strcmp(range, "session")) { rng = OBJ_NEW(opal_value_t); rng->key = strdup(OPAL_PMIX_RANGE); rng->type = OPAL_INT; rng->data.integer = OPAL_PMIX_SESSION; // share only with procs in same session opal_list_append(&pinfo, &rng->super); } else { /* unrecognized scope */ OPAL_LIST_DESTRUCT(&pinfo); OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } } /* collect the findings */ OBJ_CONSTRUCT(&results, opal_list_t); pdat = OBJ_NEW(opal_pmix_pdata_t); pdat->value.key = strdup(service_name); opal_list_append(&results, &pdat->super); ret = opal_pmix.lookup(&results, &pinfo); OPAL_LIST_DESTRUCT(&pinfo); if (OPAL_SUCCESS != ret || OPAL_STRING != pdat->value.type || NULL == pdat->value.data.string) { if (OPAL_ERR_NOT_SUPPORTED == ret) { ret = OMPI_ERR_NOT_SUPPORTED; opal_show_help("help-mpi-api.txt", "MPI function not supported", true, FUNC_NAME, "Underlying runtime environment does not support name lookup functionality"); } else { ret = MPI_ERR_NAME; } OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, FUNC_NAME); } strncpy ( port_name, pdat->value.data.string, MPI_MAX_PORT_NAME ); OPAL_LIST_DESTRUCT(&results); OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; }
int mca_fs_lustre_file_open (struct ompi_communicator_t *comm, const char* filename, int access_mode, struct opal_info_t *info, mca_io_ompio_file_t *fh) { int amode; int old_mask, perm; int rc; int flag; int fs_lustre_stripe_size = -1; int fs_lustre_stripe_width = -1; char char_stripe[MPI_MAX_INFO_KEY]; struct lov_user_md *lump=NULL; if (fh->f_perm == OMPIO_PERM_NULL) { old_mask = umask(022); umask(old_mask); perm = old_mask ^ 0666; } else { perm = fh->f_perm; } amode = 0; if (access_mode & MPI_MODE_CREATE) amode = amode | O_CREAT; if (access_mode & MPI_MODE_RDONLY) amode = amode | O_RDONLY; if (access_mode & MPI_MODE_WRONLY) amode = amode | O_WRONLY; if (access_mode & MPI_MODE_RDWR) amode = amode | O_RDWR; if (access_mode & MPI_MODE_EXCL) amode = amode | O_EXCL; opal_info_get (info, "stripe_size", MPI_MAX_INFO_VAL, char_stripe, &flag); if ( flag ) { sscanf ( char_stripe, "%d", &fs_lustre_stripe_size ); } opal_info_get (info, "stripe_width", MPI_MAX_INFO_VAL, char_stripe, &flag); if ( flag ) { sscanf ( char_stripe, "%d", &fs_lustre_stripe_width ); } if (fs_lustre_stripe_size < 0) { fs_lustre_stripe_size = mca_fs_lustre_stripe_size; } if (fs_lustre_stripe_width < 0) { fs_lustre_stripe_width = mca_fs_lustre_stripe_width; } if ( (fs_lustre_stripe_size>0 || fs_lustre_stripe_width>0) && (amode&O_CREAT) && (amode&O_RDWR)) { if (0 == fh->f_rank) { llapi_file_create(filename, fs_lustre_stripe_size, -1, /* MSC need to change that */ fs_lustre_stripe_width, 0); /* MSC need to change that */ fh->fd = open(filename, O_CREAT | O_RDWR | O_LOV_DELAY_CREATE, perm); if (fh->fd < 0) { fprintf(stderr, "Can't open %s file: %d (%s)\n", filename, errno, strerror(errno)); return OMPI_ERROR; } close (fh->fd); } fh->f_comm->c_coll.coll_barrier (fh->f_comm, fh->f_comm->c_coll.coll_barrier_module); } fh->fd = open (filename, amode, perm); if (fh->fd < 0) { opal_output(1, "error opening file %s\n", filename); return OMPI_ERROR; } if (mca_fs_lustre_stripe_size > 0) { fh->f_stripe_size = mca_fs_lustre_stripe_size; } else { lump = alloc_lum(); if (NULL == lump ){ fprintf(stderr,"Cannot allocate memory for extracting stripe size\n"); return OMPI_ERROR; } rc = llapi_file_get_stripe(filename, lump); if (rc != 0) { opal_output(1, "get_stripe failed: %d (%s)\n", errno, strerror(errno)); return OMPI_ERROR; } fh->f_stripe_size = lump->lmm_stripe_size; // if ( NULL != lump ) { // free ( lump ); // } } return OMPI_SUCCESS; }