Esempio n. 1
0
orte_node_rank_t orte_get_proc_node_rank(orte_process_name_t *proc)
{
    orte_proc_t *proct;
    orte_node_rank_t *noderank, nd;
    int rc;

    if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
        /* look it up on our arrays */
        if (NULL == (proct = orte_get_proc_object(proc))) {
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return ORTE_NODE_RANK_INVALID;
        }
        return proct->node_rank;
    }

    /* if we are an app, get the value from the modex db */
    noderank = &nd;
    OPAL_MODEX_RECV_VALUE(rc, OPAL_PMIX_NODE_RANK,
                          (opal_process_name_t*)proc,
                          &noderank, ORTE_NODE_RANK);
    if (OPAL_SUCCESS != rc) {
        nd = ORTE_NODE_RANK_INVALID;
    }
    return nd;
}
Esempio n. 2
0
char* orte_get_proc_hostname(orte_process_name_t *proc)
{
    orte_proc_t *proct;
    char *hostname = NULL;
    int rc;

    /* if we are a tool, then we have no way of obtaining
     * this info */
    if (ORTE_PROC_IS_TOOL) {
        return NULL;
    }

    /* don't bother error logging any not-found situations
     * as the layer above us will have something to say
     * about it */
    if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
        /* look it up on our arrays */
        if (NULL == (proct = orte_get_proc_object(proc))) {
            return NULL;
        }
        if (NULL == proct->node || NULL == proct->node->name) {
            return NULL;
        }
        return proct->node->name;
    }

    /* if we are an app, get the data from the modex db */
    OPAL_MODEX_RECV_VALUE(rc, OPAL_PMIX_HOSTNAME,
                          (opal_process_name_t*)proc,
                          &hostname, OPAL_STRING);

    /* user is responsible for releasing the data */
    return hostname;
}
Esempio n. 3
0
char* opal_get_proc_hostname(const opal_proc_t *proc)
{
    int ret;

    /* if the proc is NULL, then we can't know */
    if (NULL == proc) {
        return "unknown";
    }

    /* if it is my own hostname we are after, then just hand back
     * the value in opal_process_info */
    if (proc == opal_proc_my_name) {
        return opal_process_info.nodename;
    }

    /* see if we already have the data - if so, pass it back */
    if (NULL != proc->proc_hostname) {
        return proc->proc_hostname;
    }

    /* if we don't already have it, then try to get it */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_HOSTNAME, proc,
                          (char**)&(proc->proc_hostname), OPAL_STRING);
    if (OPAL_SUCCESS != ret) {
        OPAL_ERROR_LOG(ret);
        return "unknown";  // return something so the caller doesn't segfault
    }

    /* user is not allowed to release the data */
    return proc->proc_hostname;
}
Esempio n. 4
0
static int rte_init(void)
{
    int rc, ret;
    char *error = NULL;
    char *envar, *ev1, *ev2;
    uint64_t unique_key[2];
    char *string_key;
    opal_value_t *kv;
    char *val;
    int u32, *u32ptr;
    uint16_t u16, *u16ptr;
    orte_process_name_t name;

    /* run the prolog */
    if (ORTE_SUCCESS != (rc = orte_ess_base_std_prolog())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    u32ptr = &u32;
    u16ptr = &u16;

    if (NULL != mca_ess_singleton_component.server_uri) {
        /* we are going to connect to a server HNP */
        if (0 == strncmp(mca_ess_singleton_component.server_uri, "file", strlen("file")) ||
            0 == strncmp(mca_ess_singleton_component.server_uri, "FILE", strlen("FILE"))) {
            char input[1024], *filename;
            FILE *fp;

            /* it is a file - get the filename */
            filename = strchr(mca_ess_singleton_component.server_uri, ':');
            if (NULL == filename) {
                /* filename is not correctly formatted */
                orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true,
                               "singleton", mca_ess_singleton_component.server_uri);
                return ORTE_ERROR;
            }
            ++filename; /* space past the : */

            if (0 >= strlen(filename)) {
                /* they forgot to give us the name! */
                orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true,
                               "singleton", mca_ess_singleton_component.server_uri);
                return ORTE_ERROR;
            }

            /* open the file and extract the uri */
            fp = fopen(filename, "r");
            if (NULL == fp) { /* can't find or read file! */
                orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true,
                               "singleton", mca_ess_singleton_component.server_uri);
                return ORTE_ERROR;
            }
            memset(input, 0, 1024);  // initialize the array to ensure a NULL termination
            if (NULL == fgets(input, 1023, fp)) {
                /* something malformed about file */
                fclose(fp);
                orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true,
                               "singleton", mca_ess_singleton_component.server_uri, "singleton");
                return ORTE_ERROR;
            }
            fclose(fp);
            input[strlen(input)-1] = '\0';  /* remove newline */
            orte_process_info.my_hnp_uri = strdup(input);
        } else {
            orte_process_info.my_hnp_uri = strdup(mca_ess_singleton_component.server_uri);
        }
        /* save the daemon uri - we will process it later */
        orte_process_info.my_daemon_uri = strdup(orte_process_info.my_hnp_uri);
        /* construct our name - we are in their job family, so we know that
         * much. However, we cannot know how many other singletons and jobs
         * this HNP is running. Oh well - if someone really wants to use this
         * option, they can try to figure it out. For now, we'll just assume
         * we are the only ones */
        ORTE_PROC_MY_NAME->jobid = ORTE_CONSTRUCT_LOCAL_JOBID(ORTE_PROC_MY_HNP->jobid, 1);
        /* obviously, we are vpid=0 for this job */
        ORTE_PROC_MY_NAME->vpid = 0;

        /* for convenience, push the pubsub version of this param into the environ */
        opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, true, &environ);
    } else if (NULL != getenv("SINGULARITY_CONTAINER") ||
               mca_ess_singleton_component.isolated) {
        /* ensure we use the isolated pmix component */
        opal_setenv (OPAL_MCA_PREFIX"pmix", "isolated", true, &environ);
    } else {
        /* spawn our very own HNP to support us */
        if (ORTE_SUCCESS != (rc = fork_hnp())) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        /* our name was given to us by the HNP */
        opal_setenv (OPAL_MCA_PREFIX"pmix", "^s1,s2,cray,isolated", true, &environ);
    }

    /* get an async event base - we use the opal_async one so
     * we don't startup extra threads if not needed */
    orte_event_base = opal_progress_thread_init(NULL);
    progress_thread_running = true;

    /* open and setup pmix */
    if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
        error = "opening pmix";
        goto error;
    }
    if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
        error = "select pmix";
        goto error;
    }
    /* set the event base */
    opal_pmix_base_set_evbase(orte_event_base);
    /* initialize the selected module */
    if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
        /* we cannot run */
        error = "pmix init";
        goto error;
    }

    /* pmix.init set our process name down in the OPAL layer,
     * so carry it forward here */
    ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid;
    ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid;
    name.jobid = OPAL_PROC_MY_NAME.jobid;
    name.vpid = ORTE_VPID_WILDCARD;

    /* get our local rank from PMI */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK,
                          ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16);
    if (OPAL_SUCCESS != ret) {
        error = "getting local rank";
        goto error;
    }
    orte_process_info.my_local_rank = u16;

    /* get our node rank from PMI */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_NODE_RANK,
                          ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16);
    if (OPAL_SUCCESS != ret) {
        error = "getting node rank";
        goto error;
    }
    orte_process_info.my_node_rank = u16;

    /* get max procs */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_MAX_PROCS,
                          &name, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS != ret) {
        error = "getting max procs";
        goto error;
    }
    orte_process_info.max_procs = u32;

    /* we are a singleton, so there is only one proc in the job */
    orte_process_info.num_procs = 1;
    /* push into the environ for pickup in MPI layer for
     * MPI-3 required info key
     */
    if (NULL == getenv(OPAL_MCA_PREFIX"orte_ess_num_procs")) {
        asprintf(&ev1, OPAL_MCA_PREFIX"orte_ess_num_procs=%d", orte_process_info.num_procs);
        putenv(ev1);
        added_num_procs = true;
    }
    if (NULL == getenv("OMPI_APP_CTX_NUM_PROCS")) {
        asprintf(&ev2, "OMPI_APP_CTX_NUM_PROCS=%d", orte_process_info.num_procs);
        putenv(ev2);
        added_app_ctx = true;
    }


    /* get our app number from PMI - ok if not found */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_APPNUM,
                          ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS == ret) {
        orte_process_info.app_num = u32;
    } else {
        orte_process_info.app_num = 0;
    }
    /* set some other standard values */
    orte_process_info.num_local_peers = 0;

    /* setup transport keys in case the MPI layer needs them -
     * we can use the jobfam and stepid as unique keys
     * because they are unique values assigned by the RM
     */
    if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) {
        unique_key[0] = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid);
        unique_key[1] = ORTE_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid);
        if (NULL == (string_key = orte_pre_condition_transports_print(unique_key))) {
            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
            return ORTE_ERR_OUT_OF_RESOURCE;
        }
        asprintf(&envar, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key);
        putenv(envar);
        added_transport_keys = true;
        /* cannot free the envar as that messes up our environ */
        free(string_key);
    }

    /* retrieve our topology */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_TOPO,
                          &name, &val, OPAL_STRING);
    if (OPAL_SUCCESS == ret && NULL != val) {
        /* load the topology */
        if (0 != hwloc_topology_init(&opal_hwloc_topology)) {
            ret = OPAL_ERROR;
            free(val);
            error = "setting topology";
            goto error;
        }
        if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) {
            ret = OPAL_ERROR;
            free(val);
            hwloc_topology_destroy(opal_hwloc_topology);
            error = "setting topology";
            goto error;
        }
        /* since we are loading this from an external source, we have to
         * explicitly set a flag so hwloc sets things up correctly
         */
        if (0 != hwloc_topology_set_flags(opal_hwloc_topology,
                                         (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
                                          HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM |
                                          HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
            ret = OPAL_ERROR;
            hwloc_topology_destroy(opal_hwloc_topology);
            free(val);
            error = "setting topology";
            goto error;
        }
        /* now load the topology */
        if (0 != hwloc_topology_load(opal_hwloc_topology)) {
            ret = OPAL_ERROR;
            hwloc_topology_destroy(opal_hwloc_topology);
            free(val);
            error = "setting topology";
            goto error;
        }
        free(val);
    } else {
        /* it wasn't passed down to us, so go get it */
        if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) {
            error = "topology discovery";
            goto error;
        }
        /* push it into the PMIx database in case someone
         * tries to retrieve it so we avoid an attempt to
         * get it again */
        kv = OBJ_NEW(opal_value_t);
        kv->key = strdup(OPAL_PMIX_LOCAL_TOPO);
        kv->type = OPAL_STRING;
        if (0 != (ret = hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &kv->data.string, &u32))) {
            error = "topology export";
            goto error;
        }
        if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, kv))) {
            error = "topology store";
            goto error;
        }
        OBJ_RELEASE(kv);
    }

    /* use the std app init to complete the procedure */
    if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup(true))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    /* push our hostname so others can find us, if they need to */
    OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
    if (ORTE_SUCCESS != ret) {
        error = "db store hostname";
        goto error;
    }

    return ORTE_SUCCESS;

 error:
    if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
        orte_show_help("help-orte-runtime.txt",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }
    return ret;
}
Esempio n. 5
0
static int rte_init(void)
{
    int ret;
    char *error = NULL;
    char *envar, *ev1, *ev2;
    uint64_t unique_key[2];
    char *string_key;
    char *rmluri;
    opal_value_t *kv;
    char *val;
    int u32, *u32ptr;
    uint16_t u16, *u16ptr;
    char **peers=NULL, *mycpuset, **cpusets=NULL;
    opal_process_name_t name;
    size_t i;

    /* run the prolog */
    if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
        error = "orte_ess_base_std_prolog";
        goto error;
    }

    /* get an async event base - we use the opal_async one so
     * we don't startup extra threads if not needed */
    orte_event_base = opal_progress_thread_init(NULL);
    progress_thread_running = true;

    /* open and setup pmix */
    if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        /* we cannot run */
        error = "pmix init";
        goto error;
    }
    if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
        /* we cannot run */
        error = "pmix init";
        goto error;
    }
    /* set the event base */
    opal_pmix_base_set_evbase(orte_event_base);
    /* initialize the selected module */
    if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
        /* we cannot run */
        error = "pmix init";
        goto error;
    }
    u32ptr = &u32;
    u16ptr = &u16;

    /****   THE FOLLOWING ARE REQUIRED VALUES   ***/
    /* pmix.init set our process name down in the OPAL layer,
     * so carry it forward here */
    ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid;
    ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid;

    /* get our local rank from PMI */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK,
                          ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16);
    if (OPAL_SUCCESS != ret) {
        error = "getting local rank";
        goto error;
    }
    orte_process_info.my_local_rank = u16;

    /* get our node rank from PMI */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_NODE_RANK,
                          ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16);
    if (OPAL_SUCCESS != ret) {
        error = "getting node rank";
        goto error;
    }
    orte_process_info.my_node_rank = u16;

    /* get max procs */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_MAX_PROCS,
                          ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS != ret) {
        error = "getting max procs";
        goto error;
    }
    orte_process_info.max_procs = u32;

    /* get job size */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_JOB_SIZE,
                          ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS != ret) {
        error = "getting job size";
        goto error;
    }
    orte_process_info.num_procs = u32;

    /* push into the environ for pickup in MPI layer for
     * MPI-3 required info key
     */
    if (NULL == getenv(OPAL_MCA_PREFIX"orte_ess_num_procs")) {
        asprintf(&ev1, OPAL_MCA_PREFIX"orte_ess_num_procs=%d", orte_process_info.num_procs);
        putenv(ev1);
        added_num_procs = true;
    }
    if (NULL == getenv("OMPI_APP_CTX_NUM_PROCS")) {
        asprintf(&ev2, "OMPI_APP_CTX_NUM_PROCS=%d", orte_process_info.num_procs);
        putenv(ev2);
        added_app_ctx = true;
    }


    /* get our app number from PMI - ok if not found */
    OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_APPNUM,
                                   ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS == ret) {
        orte_process_info.app_num = u32;
    } else {
        orte_process_info.app_num = 0;
    }

    /* get the number of local peers - required for wireup of
     * shared memory BTL */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_SIZE,
                          ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS == ret) {
        orte_process_info.num_local_peers = u32 - 1;  // want number besides ourselves
    } else {
        orte_process_info.num_local_peers = 0;
    }

    /* setup transport keys in case the MPI layer needs them -
     * we can use the jobfam and stepid as unique keys
     * because they are unique values assigned by the RM
     */
    if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) {
        unique_key[0] = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid);
        unique_key[1] = ORTE_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid);
        if (NULL == (string_key = orte_pre_condition_transports_print(unique_key))) {
            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
            return ORTE_ERR_OUT_OF_RESOURCE;
        }
        opal_output_verbose(2, orte_ess_base_framework.framework_output,
                            "%s transport key %s",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), string_key);
        asprintf(&envar, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key);
        putenv(envar);
        added_transport_keys = true;
        /* cannot free the envar as that messes up our environ */
        free(string_key);
    }

    /* retrieve our topology */
    val = NULL;
    OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_TOPO,
                                   ORTE_PROC_MY_NAME, &val, OPAL_STRING);
    if (OPAL_SUCCESS == ret && NULL != val) {
        /* load the topology */
        if (0 != hwloc_topology_init(&opal_hwloc_topology)) {
            ret = OPAL_ERROR;
            free(val);
            error = "setting topology";
            goto error;
        }
        if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) {
            ret = OPAL_ERROR;
            free(val);
            hwloc_topology_destroy(opal_hwloc_topology);
            error = "setting topology";
            goto error;
        }
        /* since we are loading this from an external source, we have to
         * explicitly set a flag so hwloc sets things up correctly
         */
        if (0 != hwloc_topology_set_flags(opal_hwloc_topology,
                                          (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
                                           HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM |
                                           HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
            ret = OPAL_ERROR;
            hwloc_topology_destroy(opal_hwloc_topology);
            free(val);
            error = "setting topology";
            goto error;
        }
        /* now load the topology */
        if (0 != hwloc_topology_load(opal_hwloc_topology)) {
            ret = OPAL_ERROR;
            hwloc_topology_destroy(opal_hwloc_topology);
            free(val);
            error = "setting topology";
            goto error;
        }
        free(val);
        /* filter the cpus thru any default cpu set */
        if (OPAL_SUCCESS != (ret = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) {
            error = "filtering topology";
            goto error;
        }
    } else {
        /* it wasn't passed down to us, so go get it */
        if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) {
            error = "topology discovery";
            goto error;
        }
        /* push it into the PMIx database in case someone
         * tries to retrieve it so we avoid an attempt to
         * get it again */
        kv = OBJ_NEW(opal_value_t);
        kv->key = strdup(OPAL_PMIX_LOCAL_TOPO);
        kv->type = OPAL_STRING;
        if (0 != (ret = hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &kv->data.string, &u32))) {
            error = "topology export";
            goto error;
        }
        if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, kv))) {
            error = "topology store";
            goto error;
        }
        OBJ_RELEASE(kv);
    }

    /* get our local peers */
    if (0 < orte_process_info.num_local_peers) {
        /* if my local rank if too high, then that's an error */
        if (orte_process_info.num_local_peers < orte_process_info.my_local_rank) {
            ret = ORTE_ERR_BAD_PARAM;
            error = "num local peers";
            goto error;
        }
        /* retrieve the local peers */
        OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_PEERS,
                              ORTE_PROC_MY_NAME, &val, OPAL_STRING);
        if (OPAL_SUCCESS == ret && NULL != val) {
            peers = opal_argv_split(val, ',');
            free(val);
            /* and their cpusets, if available */
            OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_CPUSETS, ORTE_PROC_MY_NAME, &val, OPAL_STRING);
            if (OPAL_SUCCESS == ret && NULL != val) {
                cpusets = opal_argv_split(val, ':');
                free(val);
            } else {
                cpusets = NULL;
            }
        } else {
            peers = NULL;
            cpusets = NULL;
        }
    } else {
        peers = NULL;
        cpusets = NULL;
    }

    /* set the locality */
    if (NULL != peers) {
        /* indentify our cpuset */
        if (NULL != cpusets) {
            mycpuset = cpusets[orte_process_info.my_local_rank];
        } else {
            mycpuset = NULL;
        }
        name.jobid = ORTE_PROC_MY_NAME->jobid;
        for (i=0; NULL != peers[i]; i++) {
            kv = OBJ_NEW(opal_value_t);
            kv->key = strdup(OPAL_PMIX_LOCALITY);
            kv->type = OPAL_UINT16;
            name.vpid = strtoul(peers[i], NULL, 10);
            if (name.vpid == ORTE_PROC_MY_NAME->vpid) {
                /* we are fully local to ourselves */
                u16 = OPAL_PROC_ALL_LOCAL;
            } else if (NULL == mycpuset || NULL == cpusets[i] ||
                       0 == strcmp(cpusets[i], "UNBOUND")) {
                /* all we can say is that it shares our node */
                u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
            } else {
                /* we have it, so compute the locality */
                u16 = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, mycpuset, cpusets[i]);
            }
            OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
                                 "%s ess:pmi:locality: proc %s locality %x",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&name), u16));
            kv->data.uint16 = u16;
            ret = opal_pmix.store_local(&name, kv);
            if (OPAL_SUCCESS != ret) {
                error = "local store of locality";
                opal_argv_free(peers);
                opal_argv_free(cpusets);
                goto error;
            }
            OBJ_RELEASE(kv);
        }
        opal_argv_free(peers);
        opal_argv_free(cpusets);
    }

    /* now that we have all required info, complete the setup */
    if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(false))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ess_base_app_setup";
        goto error;
    }

    /* setup process binding */
    if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) {
        error = "proc_binding";
        goto error;
    }

    /* this needs to be set to enable debugger use when direct launched */
    if (NULL == orte_process_info.my_daemon_uri) {
        orte_standalone_operation = true;
    }

    /* set max procs */
    if (orte_process_info.max_procs < orte_process_info.num_procs) {
        orte_process_info.max_procs = orte_process_info.num_procs;
    }

    /***  PUSH DATA FOR OTHERS TO FIND   ***/

    /* push our RML URI in case others need to talk directly to us */
    rmluri = orte_rml.get_contact_info();
    /* push it out for others to use */
    OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_PROC_URI, rmluri, OPAL_STRING);
    if (ORTE_SUCCESS != ret) {
        error = "pmix put uri";
        goto error;
    }
    free(rmluri);

    /* push our hostname so others can find us, if they need to */
    OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
    if (ORTE_SUCCESS != ret) {
        error = "db store hostname";
        goto error;
    }

    /* if we are an ORTE app - and not an MPI app - then
     * we need to exchange our connection info here.
     * MPI_Init has its own modex, so we don't need to do
     * two of them. However, if we don't do a modex at all,
     * then processes have no way to communicate
     *
     * NOTE: only do this when the process originally launches.
     * Cannot do this on a restart as the rest of the processes
     * in the job won't be executing this step, so we would hang
     */
    if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) {
        opal_pmix.fence(NULL, 0);
    }

    return ORTE_SUCCESS;

error:
    if (!progress_thread_running) {
        /* can't send the help message, so ensure it
         * comes out locally
         */
        orte_show_help_finalize();
    }
    if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
        orte_show_help("help-orte-runtime.txt",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }
    return ret;
}
Esempio n. 6
0
static int rte_init(void)
{
    int ret;
    char *error = NULL;
    char *envar, *ev1, *ev2;
    uint64_t unique_key[2];
    char *string_key;
    opal_value_t *kv;
    char *val;
    int u32, *u32ptr;
    uint16_t u16, *u16ptr;
    char **peers=NULL, *mycpuset;
    opal_process_name_t wildcard_rank, pname;
    bool bool_val, *bool_ptr = &bool_val, tdir_mca_override = false;
    size_t i;

    /* run the prolog */
    if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
        error = "orte_ess_base_std_prolog";
        goto error;
    }

    /* get an async event base - we use the opal_async one so
     * we don't startup extra threads if not needed */
    orte_event_base = opal_progress_thread_init(NULL);
    progress_thread_running = true;

    /* open and setup pmix */
    if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        /* we cannot run */
        error = "pmix init";
        goto error;
    }
    if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
        /* we cannot run */
        error = "pmix init";
        goto error;
    }
    /* set the event base */
    opal_pmix_base_set_evbase(orte_event_base);
    /* initialize the selected module */
    if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init(NULL)))) {
        /* we cannot run - this could be due to being direct launched
         * without the required PMI support being built. Try to detect
         * that scenario and warn the user */
        if (ORTE_SCHIZO_DIRECT_LAUNCHED == orte_schizo.check_launch_environment() &&
            NULL != (envar = getenv("ORTE_SCHIZO_DETECTION"))) {
            if (0 == strcmp(envar, "SLURM")) {
                /* yes to both - so emit a hopefully helpful
                 * error message and abort */
                orte_show_help_finalize();
                orte_show_help("help-ess-base.txt", "slurm-error", true);
                return ORTE_ERR_SILENT;
            } else if (0 == strcmp(envar, "ALPS")) {
                /* we were direct launched by ALPS */
                orte_show_help_finalize();
                orte_show_help("help-ess-base.txt", "alps-error", true);
                return ORTE_ERR_SILENT;
            }
        }
        error = "pmix init";
        goto error;
    }
    u32ptr = &u32;
    u16ptr = &u16;

    /****   THE FOLLOWING ARE REQUIRED VALUES   ***/
    /* pmix.init set our process name down in the OPAL layer,
     * so carry it forward here */
    ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid;
    ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid;

    /* setup a name for retrieving data associated with the job */
    wildcard_rank.jobid = ORTE_PROC_MY_NAME->jobid;
    wildcard_rank.vpid = ORTE_NAME_WILDCARD->vpid;

    /* setup a name for retrieving proc-specific data */
    pname.jobid = ORTE_PROC_MY_NAME->jobid;
    pname.vpid = 0;

    /* get our local rank from PMI */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK,
                          ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16);
    if (OPAL_SUCCESS != ret) {
        error = "getting local rank";
        goto error;
    }
    orte_process_info.my_local_rank = u16;

    /* get our node rank from PMI */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_NODE_RANK,
                          ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16);
    if (OPAL_SUCCESS != ret) {
        error = "getting node rank";
        goto error;
    }
    orte_process_info.my_node_rank = u16;

    /* get max procs for this application */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_MAX_PROCS,
                          &wildcard_rank, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS != ret) {
        error = "getting max procs";
        goto error;
    }
    orte_process_info.max_procs = u32;

    /* get job size */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_JOB_SIZE,
                          &wildcard_rank, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS != ret) {
        error = "getting job size";
        goto error;
    }
    orte_process_info.num_procs = u32;

    /* push into the environ for pickup in MPI layer for
     * MPI-3 required info key
     */
    if (NULL == getenv(OPAL_MCA_PREFIX"orte_ess_num_procs")) {
        asprintf(&ev1, OPAL_MCA_PREFIX"orte_ess_num_procs=%d", orte_process_info.num_procs);
        putenv(ev1);
        added_num_procs = true;
    }
    if (NULL == getenv("OMPI_APP_CTX_NUM_PROCS")) {
        asprintf(&ev2, "OMPI_APP_CTX_NUM_PROCS=%d", orte_process_info.num_procs);
        putenv(ev2);
        added_app_ctx = true;
    }


    /* get our app number from PMI - ok if not found */
    OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_APPNUM,
                                   ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS == ret) {
        orte_process_info.app_num = u32;
    } else {
        orte_process_info.app_num = 0;
    }

    /* get the number of local peers - required for wireup of
     * shared memory BTL */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_SIZE,
                          &wildcard_rank, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS == ret) {
        orte_process_info.num_local_peers = u32 - 1;  // want number besides ourselves
    } else {
        orte_process_info.num_local_peers = 0;
    }

    /* get number of nodes in the job */
    OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_NUM_NODES,
                                   &wildcard_rank, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS == ret) {
        orte_process_info.num_nodes = u32;
    }

    /* setup transport keys in case the MPI layer needs them -
     * we can use the jobfam and stepid as unique keys
     * because they are unique values assigned by the RM
     */
    if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) {
        unique_key[0] = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid);
        unique_key[1] = ORTE_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid);
        if (NULL == (string_key = orte_pre_condition_transports_print(unique_key))) {
            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
            return ORTE_ERR_OUT_OF_RESOURCE;
        }
        opal_output_verbose(2, orte_ess_base_framework.framework_output,
                            "%s transport key %s",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), string_key);
        asprintf(&envar, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key);
        putenv(envar);
        added_transport_keys = true;
        /* cannot free the envar as that messes up our environ */
        free(string_key);
    }

    /* retrieve temp directories info */
    OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TMPDIR, &wildcard_rank, &val, OPAL_STRING);
    if (OPAL_SUCCESS == ret && NULL != val) {
        /* We want to provide user with ability
         * to override RM settings at his own risk
         */
        if( NULL == orte_process_info.top_session_dir ){
            orte_process_info.top_session_dir = val;
        } else {
            /* keep the MCA setting */
            tdir_mca_override = true;
            free(val);
        }
        val = NULL;
    }

    if( !tdir_mca_override ){
        OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_NSDIR, &wildcard_rank, &val, OPAL_STRING);
        if (OPAL_SUCCESS == ret && NULL != val) {
            /* We want to provide user with ability
             * to override RM settings at his own risk
             */
            if( NULL == orte_process_info.job_session_dir ){
                orte_process_info.job_session_dir = val;
            } else {
                /* keep the MCA setting */
                free(val);
                tdir_mca_override = true;
            }
            val = NULL;
        }
    }

    if( !tdir_mca_override ){
        OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_PROCDIR, &wildcard_rank, &val, OPAL_STRING);
        if (OPAL_SUCCESS == ret && NULL != val) {
            /* We want to provide user with ability
             * to override RM settings at his own risk
             */
            if( NULL == orte_process_info.proc_session_dir ){
                orte_process_info.proc_session_dir = val;
            } else {
                /* keep the MCA setting */
                tdir_mca_override = true;
                free(val);
            }
            val = NULL;
        }
    }

    if( !tdir_mca_override ){
        OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TDIR_RMCLEAN, &wildcard_rank, &bool_ptr, OPAL_BOOL);
        if (OPAL_SUCCESS == ret ) {
            orte_process_info.rm_session_dirs = bool_val;
        }
    }

    /* get our local peers */
    if (0 < orte_process_info.num_local_peers) {
        /* if my local rank if too high, then that's an error */
        if (orte_process_info.num_local_peers < orte_process_info.my_local_rank) {
            ret = ORTE_ERR_BAD_PARAM;
            error = "num local peers";
            goto error;
        }
        /* retrieve the local peers */
        OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_PEERS,
                              &wildcard_rank, &val, OPAL_STRING);
        if (OPAL_SUCCESS == ret && NULL != val) {
            peers = opal_argv_split(val, ',');
            free(val);
        } else {
            peers = NULL;
        }
    } else {
        peers = NULL;
    }

    /* set the locality */
    if (NULL != peers) {
        /* identify our location */
        val = NULL;
        OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY_STRING,
                                       ORTE_PROC_MY_NAME, &val, OPAL_STRING);
        if (OPAL_SUCCESS == ret && NULL != val) {
            mycpuset = val;
        } else {
            mycpuset = NULL;
        }
        pname.jobid = ORTE_PROC_MY_NAME->jobid;
        for (i=0; NULL != peers[i]; i++) {
            pname.vpid = strtoul(peers[i], NULL, 10);
            if (pname.vpid == ORTE_PROC_MY_NAME->vpid) {
                /* we are fully local to ourselves */
                u16 = OPAL_PROC_ALL_LOCAL;
            } else {
                val = NULL;
                OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY_STRING,
                                               &pname, &val, OPAL_STRING);
                if (OPAL_SUCCESS == ret && NULL != val) {
                    u16 = opal_hwloc_compute_relative_locality(mycpuset, val);
                    free(val);
                } else {
                    /* all we can say is that it shares our node */
                    u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
                }
            }
            kv = OBJ_NEW(opal_value_t);
            kv->key = strdup(OPAL_PMIX_LOCALITY);
            kv->type = OPAL_UINT16;
            OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
                                 "%s ess:pmi:locality: proc %s locality %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&pname), opal_hwloc_base_print_locality(u16)));
            kv->data.uint16 = u16;
            ret = opal_pmix.store_local(&pname, kv);
            if (OPAL_SUCCESS != ret) {
                error = "local store of locality";
                opal_argv_free(peers);
                if (NULL != mycpuset) {
                    free(mycpuset);
                }
                goto error;
            }
            OBJ_RELEASE(kv);
        }
        opal_argv_free(peers);
        if (NULL != mycpuset) {
            free(mycpuset);
        }
    }

    /* now that we have all required info, complete the setup */
    if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(false))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ess_base_app_setup";
        goto error;
    }

    /* setup process binding */
    if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) {
        error = "proc_binding";
        goto error;
    }

    /* this needs to be set to enable debugger use when direct launched */
    if (NULL == orte_process_info.my_daemon_uri) {
        orte_standalone_operation = true;
    }

    /* set max procs */
    if (orte_process_info.max_procs < orte_process_info.num_procs) {
        orte_process_info.max_procs = orte_process_info.num_procs;
    }

    /* push our hostname so others can find us, if they need to - the
     * native PMIx component will ignore this request as the hostname
     * is provided by the system */
    OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
    if (ORTE_SUCCESS != ret) {
        error = "db store hostname";
        goto error;
    }

    /* if we are an ORTE app - and not an MPI app - then
     * we need to exchange our connection info here.
     * MPI_Init has its own modex, so we don't need to do
     * two of them. However, if we don't do a modex at all,
     * then processes have no way to communicate
     *
     * NOTE: only do this when the process originally launches.
     * Cannot do this on a restart as the rest of the processes
     * in the job won't be executing this step, so we would hang
     */
    if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) {
        /* need to commit the data before we fence */
        opal_pmix.commit();
        opal_pmix.fence(NULL, 0);
    }

    return ORTE_SUCCESS;

 error:
    if (!progress_thread_running) {
        /* can't send the help message, so ensure it
         * comes out locally
         */
        orte_show_help_finalize();
    }
    if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
        orte_show_help("help-orte-runtime.txt",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }
    return ret;
}