/* * Similar to ompi_info_get(), but cast the result into a boolean * using some well-defined rules. */ int ompi_info_get_bool(ompi_info_t *info, char *key, bool *value, int *flag) { char *ptr; char str[256]; str[sizeof(str) - 1] = '\0'; ompi_info_get(info, key, sizeof(str) - 1, str, flag); if (*flag) { *value = false; /* Trim whitespace */ ptr = str + sizeof(str) - 1; while (ptr >= str && isspace(*ptr)) { *ptr = '\0'; --ptr; } ptr = str; while (ptr < str + sizeof(str) - 1 && *ptr != '\0' && isspace(*ptr)) { ++ptr; } if ('\0' != *ptr) { if (isdigit(*ptr)) { *value = (bool) atoi(ptr); } else if (0 == strcasecmp(ptr, "yes") || 0 == strcasecmp(ptr, "true")) { *value = true; } else if (0 != strcasecmp(ptr, "no") && 0 != strcasecmp(ptr, "false")) { /* RHC unrecognized value -- print a warning? */ } } } return MPI_SUCCESS; }
/** * MPI_Info_get - Get a (key, value) pair from an 'MPI_Info' object * * @param info info object (handle) * @param key null-terminated character string of the index key * @param valuelen maximum length of 'value' (integer) * @param value null-terminated character string of the value * @param flag true (1) if 'key' defined on 'info', false (0) if not * (logical) * * @retval MPI_SUCCESS * @retval MPI_ERR_ARG * @retval MPI_ERR_INFO * @retval MPI_ERR_INFO_KEY * @retval MPI_ERR_INFO_VALUE * * In C and C++, 'valuelen' should be one less than the allocated space * to allow for for the null terminator. */ int MPI_Info_get(MPI_Info info, const char *key, int valuelen, char *value, int *flag) { int err; int key_length; /* * Simple function. All we need to do is search for the value * having the "key" associated with it and then populate the * necessary structures. */ if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info || MPI_INFO_NULL == info || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INFO, FUNC_NAME); } if (0 > valuelen){ return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } key_length = (key) ? (int)strlen (key) : 0; if ((NULL == key) || (0 == key_length) || (MPI_MAX_INFO_KEY <= key_length)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INFO_KEY, FUNC_NAME); } if (NULL == value) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INFO_VALUE, FUNC_NAME); } if (NULL == flag) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } OPAL_CR_ENTER_LIBRARY(); err = ompi_info_get (info, key, valuelen, value, flag); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); }
static bool check_config_value_equal(char *key, ompi_info_t *info, char *value) { char *value_string; int value_len, ret, flag, param; const bool *flag_value; bool result = false; ret = ompi_info_get_valuelen(info, key, &value_len, &flag); if (OMPI_SUCCESS != ret) goto info_not_found; if (flag == 0) goto info_not_found; value_len++; value_string = (char*)malloc(sizeof(char) * value_len + 1); /* Should malloc 1 char for NUL-termination */ if (NULL == value_string) goto info_not_found; ret = ompi_info_get(info, key, value_len, value_string, &flag); if (OMPI_SUCCESS != ret) { free(value_string); goto info_not_found; } assert(flag != 0); if (0 == strcmp(value_string, value)) result = true; free(value_string); return result; info_not_found: param = mca_base_var_find("ompi", "osc", "portals4", key); if (0 > param) return false; ret = mca_base_var_get_value(param, &flag_value, NULL, NULL); if (OMPI_SUCCESS != ret) return false; if (0 == strcmp(value_string, value)) result = true; return result; }
/* * file_open_pvfs2: This is the same strategy as ROMIO's pvfs2 open * * Function: - opens a new file * Accepts: - same arguments as MPI_File_open() * Returns: - Success if new file handle */ int mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm, const char* filename, int access_mode, struct ompi_info_t *info, mca_io_ompio_file_t *fh) { int ret; mca_fs_pvfs2 *pvfs2_fs; PVFS_fs_id pvfs2_id; char pvfs2_path[OMPIO_MAX_NAME] = {0}; char * ncache_timeout; open_status o_status = {0, {0, 0}}; struct ompi_datatype_t *open_status_type; struct ompi_datatype_t *types[2] = {&ompi_mpi_int.dt, &ompi_mpi_byte.dt}; int lens[2] = {1, sizeof(PVFS_object_ref)}; OPAL_PTRDIFF_TYPE offsets[2]; char char_stripe[MPI_MAX_INFO_KEY]; int flag; int fs_pvfs2_stripe_size = -1; int fs_pvfs2_stripe_width = -1; /* We are going to do what ROMIO does with one process resolving * the name and broadcasting to others */ pvfs2_fs = (mca_fs_pvfs2 *) malloc(sizeof(mca_fs_pvfs2)); if (NULL == pvfs2_fs) { opal_output (1, "OUT OF MEMORY\n"); return OMPI_ERR_OUT_OF_RESOURCE; } if (!mca_fs_pvfs2_IS_INITIALIZED) { /* disable the pvfs2 ncache */ ncache_timeout = getenv("PVFS2_NCACHE_TIMEOUT"); if (ncache_timeout == NULL ) { setenv("PVFS2_NCACHE_TIMEOUT", "0", 1); } ret = PVFS_util_init_defaults(); if (ret < 0) { PVFS_perror("PVFS_util_init_defaults", ret); return OMPI_ERROR; } mca_fs_pvfs2_IS_INITIALIZED = 1; } memset(&(pvfs2_fs->credentials), 0, sizeof(PVFS_credentials)); PVFS_util_gen_credentials(&(pvfs2_fs->credentials)); /* check for stripe size and stripe depth in the info object and update mca_fs_pvfs2_stripe_width and mca_fs_pvfs2_stripe_size before calling fake_an_open() */ ompi_info_get (info, "stripe_size", MPI_MAX_INFO_VAL, char_stripe, &flag); if ( flag ) { sscanf ( char_stripe, "%d", &fs_pvfs2_stripe_size ); } ompi_info_get (info, "stripe_width", MPI_MAX_INFO_VAL, char_stripe, &flag); if ( flag ) { sscanf ( char_stripe, "%d", &fs_pvfs2_stripe_width ); } if (fs_pvfs2_stripe_size < 0) { fs_pvfs2_stripe_size = mca_fs_pvfs2_stripe_size; } if (fs_pvfs2_stripe_width < 0) { fs_pvfs2_stripe_width = mca_fs_pvfs2_stripe_width; } if (OMPIO_ROOT == fh->f_rank) { ret = PVFS_util_resolve(filename, &pvfs2_id, pvfs2_path, OMPIO_MAX_NAME); if (ret < 0 ) { PVFS_perror("PVFS_util_resolve", ret); o_status.error = -1; } else { fake_an_open (pvfs2_id, pvfs2_path, access_mode, fs_pvfs2_stripe_width, (PVFS_size)fs_pvfs2_stripe_size, pvfs2_fs, &o_status); } pvfs2_fs->object_ref = o_status.object_ref; fh->f_fs_ptr = pvfs2_fs; } /* broadcast status and (possibly valid) object reference */ offsets[0] = (MPI_Aint)(&o_status.error); offsets[1] = (MPI_Aint)(&o_status.object_ref); ompi_datatype_create_struct (2, lens, offsets, types, &open_status_type); ompi_datatype_commit (&open_status_type); fh->f_comm->c_coll.coll_bcast (MPI_BOTTOM, 1, open_status_type, OMPIO_ROOT, fh->f_comm, fh->f_comm->c_coll.coll_bcast_module); ompi_datatype_destroy (&open_status_type); if (o_status.error != 0) { /* No need to free the pvfs2_fs structure, since it will be deallocated in file_close in case of an error */ fh->f_fs_ptr = NULL; return OMPI_ERROR; } pvfs2_fs->object_ref = o_status.object_ref; fh->f_fs_ptr = pvfs2_fs; /* update the internal ompio structure to store stripe size and stripe depth correctly. Hadi(to be done): For this read the stripe size and stripe depth from the file itself */ if (fs_pvfs2_stripe_size > 0 && fs_pvfs2_stripe_width > 0) { fh->f_stripe_size = fs_pvfs2_stripe_size; fh->f_stripe_count = fs_pvfs2_stripe_width; } return OMPI_SUCCESS; }
int MPI_Unpublish_name(const char *service_name, MPI_Info info, const char *port_name) { int rc; char range[OPAL_MAX_INFO_VAL]; int flag=0; opal_list_t pinfo; opal_value_t *rng; char **keys = NULL; if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( NULL == port_name ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } if ( NULL == service_name ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } if (NULL == info || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INFO, FUNC_NAME); } } OPAL_CR_ENTER_LIBRARY(); OBJ_CONSTRUCT(&pinfo, opal_list_t); /* OMPI supports info keys to pass the range to * be searched for the given key */ if (MPI_INFO_NULL != info) { ompi_info_get (info, "range", sizeof(range) - 1, range, &flag); if (flag) { if (0 == strcmp(range, "nspace")) { rng = OBJ_NEW(opal_value_t); rng->key = strdup(OPAL_PMIX_RANGE); rng->type = OPAL_INT; rng->data.integer = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace opal_list_append(&pinfo, &rng->super); } else if (0 == strcmp(range, "session")) { rng = OBJ_NEW(opal_value_t); rng->key = strdup(OPAL_PMIX_RANGE); rng->type = OPAL_INT; rng->data.integer = OPAL_PMIX_SESSION; // share only with procs in same session opal_list_append(&pinfo, &rng->super); } else { /* unrecognized scope */ OPAL_LIST_DESTRUCT(&pinfo); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } } /* unpublish the service_name */ opal_argv_append_nosize(&keys, service_name); rc = opal_pmix.unpublish(keys, &pinfo); opal_argv_free(keys); OPAL_LIST_DESTRUCT(&pinfo); if ( OPAL_SUCCESS != rc ) { if (OPAL_ERR_NOT_FOUND == rc) { /* service couldn't be found */ OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_SERVICE, FUNC_NAME); } if (OPAL_ERR_PERM == rc) { /* this process didn't own the specified service */ OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ACCESS, FUNC_NAME); } /* none of the MPI-specific errors occurred - must be some * kind of internal error */ OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INTERN, FUNC_NAME); } OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; }
int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name) { char range[OPAL_MAX_INFO_VAL]; int flag=0, ret; opal_value_t *rng; opal_list_t results, pinfo; opal_pmix_pdata_t *pdat; if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( NULL == port_name ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } if ( NULL == service_name ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } if (NULL == info || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INFO, FUNC_NAME); } } OPAL_CR_ENTER_LIBRARY(); OBJ_CONSTRUCT(&pinfo, opal_list_t); /* OMPI supports info keys to pass the range to * be searched for the given key */ if (MPI_INFO_NULL != info) { ompi_info_get (info, "range", sizeof(range) - 1, range, &flag); if (flag) { if (0 == strcmp(range, "nspace")) { rng = OBJ_NEW(opal_value_t); rng->key = strdup(OPAL_PMIX_RANGE); rng->type = OPAL_INT; rng->data.integer = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace opal_list_append(&pinfo, &rng->super); } else if (0 == strcmp(range, "session")) { rng = OBJ_NEW(opal_value_t); rng->key = strdup(OPAL_PMIX_RANGE); rng->type = OPAL_INT; rng->data.integer = OPAL_PMIX_SESSION; // share only with procs in same session opal_list_append(&pinfo, &rng->super); } else { /* unrecognized scope */ OPAL_LIST_DESTRUCT(&pinfo); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } } /* collect the findings */ OBJ_CONSTRUCT(&results, opal_list_t); pdat = OBJ_NEW(opal_pmix_pdata_t); pdat->value.key = strdup(service_name); opal_list_append(&results, &pdat->super); ret = opal_pmix.lookup(&results, &pinfo); OPAL_LIST_DESTRUCT(&pinfo); if (OPAL_SUCCESS != ret || OPAL_STRING != pdat->value.type || NULL == pdat->value.data.string) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NAME, FUNC_NAME); } strncpy ( port_name, pdat->value.data.string, MPI_MAX_PORT_NAME ); OPAL_LIST_DESTRUCT(&results); OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; }
static char* lookup ( const char *service_name, ompi_info_t *info ) { orte_process_name_t *info_host; opal_buffer_t *buf; orte_data_server_cmd_t cmd=ORTE_DATA_SERVER_LOOKUP; orte_std_cntr_t cnt=0; char *port_name=NULL; int ret, rc, flag, i; char value[256], **tokens, *ptr; int lookup[2] = { GLOBAL, LOCAL }; size_t num_tokens; orte_rml_recv_cb_t xfer; /* Look in the MPI_Info (ompi_info_t*) for the key * "ompi_lookup_order". Acceptable values are: * * - "local" -- only check the local scope * - "global" -- only check the global scope * - "local,global" -- check the local scope first, then check the * global scope * - "global,local" -- check the global scope first, then check the * local scope * * Give a little leeway in terms of whitespace in the value. * * The lookup[2] array will contain the results: lookup[0] is the * first scope to check, lookup[1] is the 2nd. Either value may * be NONE, LOCAL, or GLOBAL. If both are NONE, clearly that's an * error. :-) */ ompi_info_get(info, "ompi_lookup_order", sizeof(value) - 1, value, &flag); if (flag) { ptr = &value[0]; while (isspace(*ptr) && (ptr - value) < (int)sizeof(value)) { ++ptr; } if (ptr - value < (int)sizeof(value)) { tokens = opal_argv_split(ptr, ','); if (NULL != tokens) { if ((num_tokens = opal_argv_count(tokens)) > 2) { /* too many values in the comma-delimited list */ opal_show_help("help-ompi-pubsub-orte.txt", "pubsub-orte:too-many-orders", true, (long)ORTE_PROC_MY_NAME->vpid, (long)num_tokens); opal_argv_free(tokens); return NULL; } for (i = 0; i < 2; ++i) { if (NULL != tokens[i]) { if (0 == strcasecmp(tokens[i], "local")) { lookup[i] = LOCAL; } else if (0 == strcasecmp(tokens[i], "global")) { lookup[i] = GLOBAL; } else { /* unrecognized value -- that's an error */ opal_show_help("help-ompi-pubsub-orte.txt", "pubsub-orte:unknown-order", true, (long)ORTE_PROC_MY_NAME->vpid); opal_argv_free(tokens); return NULL; } } else { lookup[i] = NONE; } } opal_argv_free(tokens); } } if (NONE == lookup[0]) { /* if the user provided an info key, then we at least must * be given one place to look */ opal_show_help("help-ompi-pubsub-orte.txt", "pubsub-orte:unknown-order", true, (long)ORTE_PROC_MY_NAME->vpid); return NULL; } } else { /* if no info key was provided, then we default to the global * server IF it is active */ if (!server_setup) { setup_server(); } lookup[1] = NONE; if (mca_pubsub_orte_component.server_found) { lookup[0] = GLOBAL; } else { /* global server was not found - just look local */ lookup[0] = LOCAL; } } OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, "%s pubsub:orte: lookup service %s scope %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), service_name, lookup[0])); /* go find the value */ for (i=0; i < 2; i++) { if (LOCAL == lookup[i]) { /* if the scope is local, then lookup the value on the HNP */ info_host = ORTE_PROC_MY_HNP; } else if (GLOBAL == lookup[i]) { /* has the server been setup yet? */ if (!server_setup) { setup_server(); } /* lookup the value on the global ompi_server, but error * if that server wasn't contacted */ if (!mca_pubsub_orte_component.server_found) { opal_show_help("help-ompi-pubsub-orte.txt", "pubsub-orte:no-server", true, (long)ORTE_PROC_MY_NAME->vpid, "lookup from"); return NULL; } info_host = &mca_pubsub_orte_component.server; } else if (NONE == lookup[i]) { continue; } else { /* unknown host! */ opal_show_help("help-ompi-pubsub-orte.txt", "pubsub-orte:unknown-order", true, (long)ORTE_PROC_MY_NAME->vpid); return NULL; } /* go look it up */ /* construct the buffer */ buf = OBJ_NEW(opal_buffer_t); /* pack the lookup command */ if (OPAL_SUCCESS != (ret = opal_dss.pack(buf, &cmd, 1, ORTE_DATA_SERVER_CMD))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(buf); goto CLEANUP; } /* pack the service name */ if (OPAL_SUCCESS != (ret = opal_dss.pack(buf, &service_name, 1, OPAL_STRING))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(buf); goto CLEANUP; } /* send the cmd */ if (0 > (ret = orte_rml.send_buffer_nb(info_host, buf, ORTE_RML_TAG_DATA_SERVER, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(buf); goto CLEANUP; } /* get the answer */ OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t); xfer.active = true; orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DATA_CLIENT, ORTE_RML_NON_PERSISTENT, orte_rml_recv_callback, &xfer); OMPI_WAIT_FOR_COMPLETION(xfer.active); /* unpack the return code */ cnt = 1; if (OPAL_SUCCESS != (ret = opal_dss.unpack(&xfer.data, &rc, &cnt, OPAL_INT))) { ORTE_ERROR_LOG(ret); goto CLEANUP; } OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, "%s pubsub:orte: lookup returned status %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rc)); if (ORTE_SUCCESS == rc) { /* the server was able to lookup the port - unpack the port name */ cnt=1; if (OPAL_SUCCESS != (ret = opal_dss.unpack(&xfer.data, &port_name, &cnt, OPAL_STRING))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&xfer); goto CLEANUP; } OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, "%s pubsub:orte: lookup returned port %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == port_name) ? "NULL" : port_name)); if (NULL != port_name) { /* got an answer - return it */ OBJ_DESTRUCT(&xfer); return port_name; } } /* if we didn't get a port_name, then continue */ OBJ_DESTRUCT(&xfer); } /* only get here if we tried both options and failed - since the * buffer will already have been cleaned up, just return */ CLEANUP: return NULL; }
int ompi_comm_start_processes(int count, char **array_of_commands, char ***array_of_argv, int *array_of_maxprocs, MPI_Info *array_of_info, char *port_name) { int rc, i, j, counter; int have_wdir=0; bool have_prefix; int valuelen=OMPI_PATH_MAX, flag=0; char cwd[OMPI_PATH_MAX]; char host[OMPI_PATH_MAX]; /*** should define OMPI_HOST_MAX ***/ char prefix[OMPI_PATH_MAX]; char *base_prefix; orte_std_cntr_t num_apps, ai; orte_jobid_t new_jobid=ORTE_JOBID_INVALID; orte_app_context_t **apps=NULL; opal_list_t attributes; opal_list_item_t *item; bool timing = false; struct timeval ompistart, ompistop; int param, value; /* parse the info object */ /* check potentially for: - "host": desired host where to spawn the processes - "prefix": the path to the root of the directory tree where ompi executables and libraries can be found - "arch": desired architecture - "wdir": directory, where executable can be found - "path": list of directories where to look for the executable - "file": filename, where additional information is provided. - "soft": see page 92 of MPI-2. */ /* make sure the progress engine properly trips the event library */ opal_progress_event_increment(); /* check to see if we want timing information */ param = mca_base_param_reg_int_name("ompi", "timing", "Request that critical timing loops be measured", false, false, 0, &value); if (value != 0) { timing = true; if (0 != gettimeofday(&ompistart, NULL)) { opal_output(0, "ompi_comm_start_procs: could not obtain start time"); ompistart.tv_sec = 0; ompistart.tv_usec = 0; } } /* setup to record the attributes */ OBJ_CONSTRUCT(&attributes, opal_list_t); /* we want to be able to default the prefix to the one used for this job * so that the ompi executables and libraries can be found. the user can * later override this value by providing an MPI_Info value. for now, though, * let's get the default value off the registry */ if (ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(orte_process_info.my_name->jobid, &apps, &num_apps))) { ORTE_ERROR_LOG(rc); return rc; } /* we'll just use the prefix from the first member of the app_context array. * this shouldn't matter as they all should be the same. it could be NULL, of * course (user might not have specified it), so we need to protect against that. * * It's possible that no app_contexts are returned (e.g., during a comm_spawn * from a singleton), so check first */ if (NULL != apps && NULL != apps[0]->prefix_dir) { base_prefix = strdup(apps[0]->prefix_dir); } else { base_prefix = NULL; } /* cleanup the memory we used */ for (ai = 0; ai < num_apps; ai++) { OBJ_RELEASE(apps[ai]); } if (NULL != apps) free(apps); /* Convert the list of commands to an array of orte_app_context_t pointers */ apps = (orte_app_context_t**)malloc(count * sizeof(orte_app_context_t *)); if (NULL == apps) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } for (i = 0; i < count; ++i) { apps[i] = OBJ_NEW(orte_app_context_t); if (NULL == apps[i]) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); /* rollback what was already done */ for (j=0; j < i; j++) OBJ_RELEASE(apps[j]); opal_progress_event_decrement(); return ORTE_ERR_OUT_OF_RESOURCE; } /* copy over the name of the executable */ apps[i]->app = strdup(array_of_commands[i]); if (NULL == apps[i]->app) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); /* rollback what was already done */ for (j=0; j < i; j++) OBJ_RELEASE(apps[j]); opal_progress_event_decrement(); return ORTE_ERR_OUT_OF_RESOURCE; } /* record the number of procs to be generated */ apps[i]->num_procs = array_of_maxprocs[i]; /* copy over the argv array */ counter = 1; if (MPI_ARGVS_NULL != array_of_argv && MPI_ARGV_NULL != array_of_argv[i]) { /* first need to find out how many entries there are */ j=0; while (NULL != array_of_argv[i][j]) { j++; } counter += j; } /* now copy them over, ensuring to NULL terminate the array */ apps[i]->argv = (char**)malloc((1 + counter) * sizeof(char*)); if (NULL == apps[i]->argv) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); /* rollback what was already done */ for (j=0; j < i; j++) { OBJ_RELEASE(apps[j]); } opal_progress_event_decrement(); return ORTE_ERR_OUT_OF_RESOURCE; } apps[i]->argv[0] = strdup(array_of_commands[i]); for (j=1; j < counter; j++) { apps[i]->argv[j] = strdup(array_of_argv[i][j-1]); } apps[i]->argv[counter] = NULL; /* the environment gets set by the launcher * all we need to do is add the specific values * needed for comm_spawn */ /* Add environment variable with the contact information for the child processes. */ counter = 1; apps[i]->env = (char**)malloc((1+counter) * sizeof(char*)); if (NULL == apps[i]->env) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); /* rollback what was already done */ for (j=0; j < i; j++) OBJ_RELEASE(apps[j]); opal_progress_event_decrement(); return ORTE_ERR_OUT_OF_RESOURCE; } asprintf(&(apps[i]->env[0]), "OMPI_PARENT_PORT=%s", port_name); apps[i]->env[1] = NULL; for (j = 0; NULL != environ[j]; ++j) { if (0 == strncmp("OMPI_", environ[j], 5)) { opal_argv_append_nosize(&apps[i]->env, environ[j]); } } /* Check for well-known info keys */ have_wdir = 0; have_prefix = false; if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) { /* check for 'wdir' */ ompi_info_get (array_of_info[i], "wdir", valuelen, cwd, &flag); if ( flag ) { apps[i]->cwd = cwd; have_wdir = 1; } /* check for 'host' */ ompi_info_get (array_of_info[i], "host", sizeof(host), host, &flag); if ( flag ) { apps[i]->num_map = 1; apps[i]->map_data = (orte_app_context_map_t **) malloc(sizeof(orte_app_context_map_t *)); apps[i]->map_data[0] = OBJ_NEW(orte_app_context_map_t); apps[i]->map_data[0]->map_type = ORTE_APP_CONTEXT_MAP_HOSTNAME; apps[i]->map_data[0]->map_data = strdup(host); } /* 'path', 'arch', 'file', 'soft' -- to be implemented */ /* check for 'ompi_prefix' (OMPI-specific -- to effect the same * behavior as --prefix option to orterun) */ ompi_info_get (array_of_info[i], "ompi_prefix", sizeof(prefix), prefix, &flag); if ( flag ) { apps[i]->prefix_dir = strdup(prefix); have_prefix = true; } } /* default value: If the user did not tell us where to look for the executable, we assume the current working directory */ if ( !have_wdir ) { getcwd(cwd, OMPI_PATH_MAX); apps[i]->cwd = strdup(cwd); } /* if the user told us a new prefix, then we leave it alone. otherwise, if * a prefix had been provided before, copy that one into the new app_context * for use by the spawned children */ if ( !have_prefix && NULL != base_prefix) { apps[i]->prefix_dir = strdup(base_prefix); } /* leave the map info alone - the launcher will * decide where to put things */ } /* for (i = 0 ; i < count ; ++i) */ /* cleanup */ if (NULL != base_prefix) free(base_prefix); /* tell the RTE that we want to the children to run inside of our allocation - * don't go get one just for them */ if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_RAS_USE_PARENT_ALLOCATION, ORTE_JOBID, &(orte_process_info.my_name->jobid), ORTE_RMGR_ATTR_OVERRIDE))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&attributes); opal_progress_event_decrement(); return MPI_ERR_SPAWN; } /* tell the RTE that we want the children mapped the same way as their parent */ if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_RMAPS_USE_PARENT_PLAN, ORTE_JOBID, &(orte_process_info.my_name->jobid), ORTE_RMGR_ATTR_OVERRIDE))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&attributes); opal_progress_event_decrement(); return MPI_ERR_SPAWN; } /* check for timing request - get stop time and report elapsed time if so */ if (timing) { if (0 != gettimeofday(&ompistop, NULL)) { opal_output(0, "ompi_comm_start_procs: could not obtain stop time"); } else { opal_output(0, "ompi_comm_start_procs: time from start to prepare to spawn %ld usec", (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 + (ompistop.tv_usec - ompistart.tv_usec))); if (0 != gettimeofday(&ompistart, NULL)) { opal_output(0, "ompi_comm_start_procs: could not obtain new start time"); ompistart.tv_sec = ompistop.tv_sec; ompistart.tv_usec = ompistop.tv_usec; } } } /* spawn procs */ if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(apps, count, &new_jobid, 0, NULL, NULL, ORTE_PROC_STATE_NONE, &attributes))) { ORTE_ERROR_LOG(rc); opal_progress_event_decrement(); return MPI_ERR_SPAWN; } /* check for timing request - get stop time and report elapsed time if so */ if (timing) { if (0 != gettimeofday(&ompistop, NULL)) { opal_output(0, "ompi_comm_start_procs: could not obtain stop time"); } else { opal_output(0, "ompi_comm_start_procs: time to spawn %ld usec", (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 + (ompistop.tv_usec - ompistart.tv_usec))); } } /* clean up */ opal_progress_event_decrement(); while (NULL != (item = opal_list_remove_first(&attributes))) OBJ_RELEASE(item); OBJ_DESTRUCT(&attributes); for ( i=0; i<count; i++) { OBJ_RELEASE(apps[i]); } free (apps); return OMPI_SUCCESS; }
/* Info keys: * * - crs: * none = (Default) No CRS Service * default = Whatever CRS service MPI chooses * blcr = BLCR * self = app level callbacks * * - cmdline: * Command line to restart the process with. * If empty, the user must manually enter it * * - target: * Absolute path to the target directory. * * - handle: * first = Earliest checkpoint directory available * last = Most recent checkpoint directory available * [global:local] = handle provided by the MPI library * * - restarting: * 0 = not restarting * 1 = restarting * * - checkpointing: * 0 = No need to prepare for checkpointing * 1 = MPI should prepare for checkpointing * * - inflight: * default = message * message = Drain inflight messages at the message level * network = Drain inflight messages at the network level (if possible) * * - user_space_mem: * 0 = Memory does not need to be managed * 1 = Memory must be in user space (i.e., not on network card * */ static int extract_info_into_datum(ompi_info_t *info, orte_snapc_base_quiesce_t *datum) { int info_flag = false; int max_crs_len = 32; bool info_bool = false; char *info_char = NULL; info_char = (char *) malloc(sizeof(char) * (OPAL_PATH_MAX+1)); /* * Key: crs */ ompi_info_get(info, "crs", max_crs_len, info_char, &info_flag); if( info_flag) { datum->crs_name = strdup(info_char); } /* * Key: cmdline */ ompi_info_get(info, "cmdline", OPAL_PATH_MAX, info_char, &info_flag); if( info_flag) { datum->cmdline = strdup(info_char); } /* * Key: handle */ ompi_info_get(info, "handle", OPAL_PATH_MAX, info_char, &info_flag); if( info_flag) { datum->handle = strdup(info_char); } /* * Key: target */ ompi_info_get(info, "target", OPAL_PATH_MAX, info_char, &info_flag); if( info_flag) { datum->target_dir = strdup(info_char); } /* * Key: restarting */ ompi_info_get_bool(info, "restarting", &info_bool, &info_flag); if( info_flag ) { datum->restarting = info_bool; } else { datum->restarting = false; } /* * Key: checkpointing */ ompi_info_get_bool(info, "checkpointing", &info_bool, &info_flag); if( info_flag ) { datum->checkpointing = info_bool; } else { datum->checkpointing = false; } /* * Display all values */ OPAL_OUTPUT_VERBOSE((3, mca_crcp_bkmrk_component.super.output_handle, "crcp:bkmrk: %s extract_info: Info('crs' = '%s')", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == datum->crs_name ? "Default (none)" : datum->crs_name))); OPAL_OUTPUT_VERBOSE((3, mca_crcp_bkmrk_component.super.output_handle, "crcp:bkmrk: %s extract_info: Info('cmdline' = '%s')", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == datum->cmdline ? "Default ()" : datum->cmdline))); OPAL_OUTPUT_VERBOSE((3, mca_crcp_bkmrk_component.super.output_handle, "crcp:bkmrk: %s extract_info: Info('checkpointing' = '%c')", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (datum->checkpointing ? 'T' : 'F'))); OPAL_OUTPUT_VERBOSE((3, mca_crcp_bkmrk_component.super.output_handle, "crcp:bkmrk: %s extract_info: Info('restarting' = '%c')", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (datum->restarting ? 'T' : 'F'))); if( NULL != info_char ) { free(info_char); info_char = NULL; } return ORTE_SUCCESS; }
/** * Function to allocate special memory according to what the user requests in * the info object. * * If the user passes in a valid info structure then the function will * try to allocate the memory and register it with every mpool that there is a * key for it in the info struct. If it fails at registering the memory with * one of the requested mpools, an error will be returned. Also, if there is a * key in info that does not match any mpool, an error will be returned. * * If the info parameter is MPI_INFO_NULL, then this function will try to allocate * the memory and register it with as many mpools as possible. However, * if any of the registratons fail the mpool will simply be ignored. * * @param size the size of the memory area to allocate * @param info an info object which tells us what kind of memory to allocate * * @retval pointer to the allocated memory * @retval NULL on failure */ void *mca_mpool_base_alloc(size_t size, opal_info_t *info) { opal_list_item_t * item; int num_modules = opal_list_get_size(&mca_mpool_base_modules); int reg_module_num = 0, i; mca_mpool_base_selected_module_t * current; mca_mpool_base_selected_module_t * no_reg_function = NULL; mca_mpool_base_selected_module_t ** has_reg_function = NULL; mca_mpool_base_registration_t * registration; mca_mpool_base_tree_item_t* mpool_tree_item = NULL; mca_mpool_base_module_t *mpool; void * mem = NULL; #if defined(TODO_BTL_GB) int flag = 0; bool match_found = false; #endif /* defined(TODO_BTL_GB) */ bool mpool_requested = false; if(num_modules > 0) { has_reg_function = (mca_mpool_base_selected_module_t **) malloc(num_modules * sizeof(mca_mpool_base_module_t *)); if(!has_reg_function) goto out; } mpool_tree_item = mca_mpool_base_tree_item_get(); if(!mpool_tree_item) goto out; mpool_tree_item->num_bytes = size; mpool_tree_item->count = 0; #if defined(TODO_BTL_GB) if(&ompi_mpi_info_null.info == info) #endif /* defined(TODO_BTL_GB) */ { for(item = opal_list_get_first(&mca_mpool_base_modules); item != opal_list_get_end(&mca_mpool_base_modules); item = opal_list_get_next(item)) { current = ((mca_mpool_base_selected_module_t *) item); if(current->mpool_module->flags & MCA_MPOOL_FLAGS_MPI_ALLOC_MEM) { if(NULL == current->mpool_module->mpool_register){ no_reg_function = current; } else { has_reg_function[reg_module_num++] = current; } } } } #if defined(TODO_BTL_GB) else { int num_keys; char key[MPI_MAX_INFO_KEY + 1]; char value[MPI_MAX_INFO_VAL + 1]; ompi_info_get_nkeys(info, &num_keys); for(i = 0; i < num_keys; i++) { ompi_info_get_nthkey(info, i, key); if ( 0 != strcmp(key, "mpool") ) { continue; } mpool_requested = true; ompi_info_get(info, key, MPI_MAX_INFO_VAL, value, &flag); if ( !flag ) { continue; } match_found = false; for(item = opal_list_get_first(&mca_mpool_base_modules); item != opal_list_get_end(&mca_mpool_base_modules); item = opal_list_get_next(item)) { current = ((mca_mpool_base_selected_module_t *)item); if(0 == strcmp(value, current->mpool_module->mpool_component->mpool_version.mca_component_name)) { match_found = true; if(NULL == current->mpool_module->mpool_register) { if(NULL != no_reg_function) { /* there was more than one requested mpool that lacks * a registration function, so return failure */ goto out; } no_reg_function = current; } else { has_reg_function[reg_module_num++] = current; } } } if(!match_found) { /* one of the keys given to us by the user did not match any * mpools, so return an error */ goto out; } } } #endif /* defined(TODO_BTL_GB) */ if(NULL == no_reg_function && 0 == reg_module_num) { if(!mpool_requested) { /* if the info argument was NULL and there were no useable mpools * or there user provided info object but did not specifiy a "mpool" key, * just malloc the memory and return it */ mem = malloc(size); goto out; } /* the user passed info but we were not able to use any of the mpools * specified */ goto out; } for(i = -1; i < reg_module_num; i++) { if(-1 == i) { if(NULL != no_reg_function) mpool = no_reg_function->mpool_module; else continue; } else { mpool = has_reg_function[i]->mpool_module; } if(NULL == mem) { mem = mpool->mpool_alloc(mpool, size, 0, MCA_MPOOL_FLAGS_PERSIST, ®istration); if(NULL == mem) { if(mpool_requested) goto out; continue; } mpool_tree_item->key = mem; mpool_tree_item->mpools[mpool_tree_item->count] = mpool; mpool_tree_item->regs[mpool_tree_item->count++] = registration; } else { if(mpool->mpool_register(mpool, mem, size, MCA_MPOOL_FLAGS_PERSIST, ®istration) != OPAL_SUCCESS) { if(mpool_requested) { unregister_tree_item(mpool_tree_item); goto out; } continue; } mpool_tree_item->mpools[mpool_tree_item->count] = mpool; mpool_tree_item->regs[mpool_tree_item->count++] = registration; } } if(NULL == mem) { mem = malloc(size); goto out; } mca_mpool_base_tree_insert(mpool_tree_item); mpool_tree_item = NULL; /* prevent it to be deleted below */ out: if(mpool_tree_item) mca_mpool_base_tree_item_put(mpool_tree_item); if(has_reg_function) free(has_reg_function); return mem; }
struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_individual_component_file_query (mca_io_ompio_file_t *fh, int *priority) { int amode; bool wronly_flag=false; bool relaxed_order_flag=false; MPI_Info info; int flag; int valuelen; char value[MPI_MAX_INFO_VAL+1]; *priority = 0; /*test, and update priority*/ /*---------------------------------------------------------*/ /* 1. Is the file write only? check amode for MPI_MODE_WRONLY */ amode = fh->f_amode; if ( amode & MPI_MODE_WRONLY || amode & MPI_MODE_RDWR ) { wronly_flag=true; if ( mca_sharedfp_individual_verbose ) { printf("mca_sharedfp_individual_component_file_query: " "MPI_MODE_WRONLY[true=%d,false=%d]=%d\n",true,false,wronly_flag); } } else { wronly_flag=false; if ( mca_sharedfp_individual_verbose ) { printf("mca_sharedfp_individual_component_file_query: Can not run!, " "MPI_MODE_WRONLY[true=%d,false=%d]=%d\n",true,false,wronly_flag); } } /*---------------------------------------------------------*/ /* 2. Did the user specify MPI_INFO relaxed ordering flag? */ info = fh->f_info; if ( info != MPI_INFO_NULL ){ valuelen = MPI_MAX_INFO_VAL; ompi_info_get ( info,"OMPIO_SHAREDFP_RELAXED_ORDERING", valuelen, value, &flag); if ( flag ) { if ( mca_sharedfp_individual_verbose ) { printf("mca_sharedfp_individual_component_file_query: " "OMPIO_SHAREDFP_RELAXED_ORDERING=%s\n",value); } /* flag - Returns true if key defined, false if not (boolean). */ relaxed_order_flag=true; } else { if ( mca_sharedfp_individual_verbose ) { printf("mca_sharedfp_individual_component_file_query: " "OMPIO_SHAREDFP_RELAXED_ORDERING MPI_Info key not set. " "Set this key in order to increase this component's priority value.\n"); } } } else { if ( mca_sharedfp_individual_verbose ) { printf("mca_sharedfp_individual_component_file_query: " "OMPIO_SHAREDFP_RELAXED_ORDERING MPI_Info key not set, " "got MPI_INFO_NULL. Set this key in order to increase " "this component's priority value.\n"); } } /*For now, this algorithm will not run if the file is not opened write only. *Setting the OMPIO_SHAREDFP_RELAXED_ORDERING gives this module a higher priority *otherwise it gets a priority of zero. This means that this module will *run only if no other module can run */ if ( wronly_flag && relaxed_order_flag){ *priority=mca_sharedfp_individual_priority; } else { *priority=1; } if ( wronly_flag ){ return &individual; } return NULL; }
int mca_fs_lustre_file_open (struct ompi_communicator_t *comm, char* filename, int access_mode, struct ompi_info_t *info, mca_io_ompio_file_t *fh) { int amode; int old_mask, perm; int rc; int flag; int fs_lustre_stripe_size = -1; int fs_lustre_stripe_width = -1; char char_stripe[MPI_MAX_INFO_KEY]; struct lov_user_md *lump=NULL; if (fh->f_perm == OMPIO_PERM_NULL) { old_mask = umask(022); umask(old_mask); perm = old_mask ^ 0666; } else { perm = fh->f_perm; } amode = 0; if (access_mode & MPI_MODE_CREATE) amode = amode | O_CREAT; if (access_mode & MPI_MODE_RDONLY) amode = amode | O_RDONLY; if (access_mode & MPI_MODE_WRONLY) amode = amode | O_WRONLY; if (access_mode & MPI_MODE_RDWR) amode = amode | O_RDWR; if (access_mode & MPI_MODE_EXCL) amode = amode | O_EXCL; ompi_info_get (info, "stripe_size", MPI_MAX_INFO_VAL, char_stripe, &flag); if ( flag ) { sscanf ( char_stripe, "%d", &fs_lustre_stripe_size ); } ompi_info_get (info, "stripe_width", MPI_MAX_INFO_VAL, char_stripe, &flag); if ( flag ) { sscanf ( char_stripe, "%d", &fs_lustre_stripe_width ); } if (fs_lustre_stripe_size < 0) { fs_lustre_stripe_size = mca_fs_lustre_stripe_size; } if (fs_lustre_stripe_width < 0) { fs_lustre_stripe_width = mca_fs_lustre_stripe_width; } if ( (fs_lustre_stripe_size>0 || fs_lustre_stripe_width>0) && (amode&O_CREAT) && (amode&O_RDWR)) { if (0 == fh->f_rank) { llapi_file_create(filename, fs_lustre_stripe_size, -1, /* MSC need to change that */ fs_lustre_stripe_width, 0); /* MSC need to change that */ fh->fd = open(filename, O_CREAT | O_RDWR | O_LOV_DELAY_CREATE, perm); if (fh->fd < 0) { fprintf(stderr, "Can't open %s file: %d (%s)\n", filename, errno, strerror(errno)); return OMPI_ERROR; } close (fh->fd); } fh->f_comm->c_coll.coll_barrier (fh->f_comm, fh->f_comm->c_coll.coll_barrier_module); } fh->fd = open (filename, amode, perm); if (fh->fd < 0) { opal_output(1, "error opening file %s\n", filename); return OMPI_ERROR; } if (mca_fs_lustre_stripe_size > 0) { fh->f_stripe_size = mca_fs_lustre_stripe_size; } else { lump = alloc_lum(); if (NULL == lump ){ fprintf(stderr,"Cannot allocate memory for extracting stripe size\n"); return OMPI_ERROR; } rc = llapi_file_get_stripe(filename, lump); if (rc != 0) { opal_output(1, "get_stripe failed: %d (%s)\n", errno, strerror(errno)); return OMPI_ERROR; } fh->f_stripe_size = lump->lmm_stripe_size; // if ( NULL != lump ) { // free ( lump ); // } } return OMPI_SUCCESS; }