int opal_hwloc_copy(hwloc_topology_t *dest, hwloc_topology_t src, opal_data_type_t type)
{
    char *xml;
    int len;
    struct hwloc_topology_support *support, *destsupport;

    if (0 != hwloc_topology_export_xmlbuffer(src, &xml, &len)) {
        return OPAL_ERROR;
    }
    if (0 != hwloc_topology_init(dest)) {
        free(xml);
        return OPAL_ERROR;
    }
    if (0 != hwloc_topology_set_xmlbuffer(*dest, xml, len)) {
        hwloc_topology_destroy(*dest);
        free(xml);
        return OPAL_ERROR;
    }
    if (0 != hwloc_topology_load(*dest)) {
        hwloc_topology_destroy(*dest);
        free(xml);
        return OPAL_ERROR;
    }
    free(xml);

    /* get the available support - hwloc unfortunately does
     * not include this info in its xml support!
     */
    support = (struct hwloc_topology_support*)hwloc_topology_get_support(src);
    destsupport = (struct hwloc_topology_support*)hwloc_topology_get_support(*dest);
    *destsupport = *support;

    return OPAL_SUCCESS;
}
Beispiel #2
0
int rsreader_hwloc_load (resrc_api_ctx_t *rsapi, const char *buf, size_t len,
        uint32_t rank, rsreader_t r_mode, machs_t *machs, char **err_str)
{
    int rc = -1;
    rssig_t *sig = NULL;
    hwloc_topology_t topo;

    if (!machs)
        goto done;

    if (hwloc_topology_init (&topo) != 0)
        goto done;
    if (hwloc_topology_set_xmlbuffer (topo, buf, len) != 0)
        goto err;
    if (hwloc_topology_load (topo) != 0)
        goto err;
    if (rs2rank_set_signature ((char*)buf, len, topo, &sig) != 0)
        goto err;
    if (rs2rank_tab_update (machs, get_hn (topo), sig, rank) != 0)
        goto err;

    if (r_mode == RSREADER_HWLOC) {
        const char *s = rs2rank_get_digest (sig);
        if (!resrc_generate_hwloc_resources (rsapi, topo, s, err_str))
            goto err;
    }

    rc = 0;
err:
    hwloc_topology_destroy (topo);
done:
    return rc;
}
Beispiel #3
0
int opal_hwloc_unpack(opal_buffer_t *buffer, void *dest,
                      int32_t *num_vals,
                      opal_data_type_t type)
{
    /* NOTE: hwloc defines topology_t as a pointer to a struct! */
    hwloc_topology_t t, *tarray  = (hwloc_topology_t*)dest;
    int rc=OPAL_SUCCESS, i, cnt, j;
    char *xmlbuffer;
    struct hwloc_topology_support *support;

    for (i=0, j=0; i < *num_vals; i++) {
        /* unpack the xml string */
        cnt=1;
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &xmlbuffer, &cnt, OPAL_STRING))) {
            goto cleanup;
        }

        /* convert the xml */
        if (0 != hwloc_topology_init(&t)) {
            rc = OPAL_ERROR;
            free(xmlbuffer);
            goto cleanup;
        }
        if (0 != hwloc_topology_set_xmlbuffer(t, xmlbuffer, strlen(xmlbuffer))) {
            rc = OPAL_ERROR;
            free(xmlbuffer);
            hwloc_topology_destroy(t);
            goto cleanup;
        }
        free(xmlbuffer);
        /* since we are loading this from an external source, we have to
         * explicitly set a flag so hwloc sets things up correctly
         */
        if (0 != hwloc_topology_set_flags(t, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES)) {
            rc = OPAL_ERROR;
            hwloc_topology_destroy(t);
            goto cleanup;
        }
        /* now load the topology */
        if (0 != hwloc_topology_load(t)) {
            rc = OPAL_ERROR;
            hwloc_topology_destroy(t);
            goto cleanup;
        }

        /* get the available support - hwloc unfortunately does
         * not include this info in its xml import!
         */
        support = (struct hwloc_topology_support*)hwloc_topology_get_support(t);
        cnt = sizeof(struct hwloc_topology_discovery_support);
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, support->discovery, &cnt, OPAL_BYTE))) {
            goto cleanup;
        }
        cnt = sizeof(struct hwloc_topology_cpubind_support);
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, support->cpubind, &cnt, OPAL_BYTE))) {
            goto cleanup;
        }
        cnt = sizeof(struct hwloc_topology_membind_support);
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, support->membind, &cnt, OPAL_BYTE))) {
            goto cleanup;
        }

        /* pass it back */
        tarray[i] = t;

        /* track the number added */
        j++;
    }

 cleanup:
    *num_vals = j;
    return rc;
}
Beispiel #4
0
int main(void)
{
  hwloc_topology_t topology1, topology2;
  char *xmlbuf;
  int xmlbuflen;
  char xmlfile[] = "hwloc_backends.tmpxml.XXXXXX";
  int xmlbufok = 0, xmlfileok = 0, xmlfilefd;
  hwloc_obj_t sw;
  int err;

  printf("trying to export topology to XML buffer and file for later...\n");
  hwloc_topology_init(&topology1);
  hwloc_topology_load(topology1);
  assert(hwloc_topology_is_thissystem(topology1));
  if (hwloc_topology_export_xmlbuffer(topology1, &xmlbuf, &xmlbuflen) < 0)
    printf("XML buffer export failed (%s), ignoring\n", strerror(errno));
  else
    xmlbufok = 1;
  xmlfilefd = mkstemp(xmlfile);
  if (xmlfilefd < 0 || hwloc_topology_export_xml(topology1, xmlfile) < 0)
    printf("XML file export failed (%s), ignoring\n", strerror(errno));
  else
    xmlfileok = 1;


  printf("init...\n");
  hwloc_topology_init(&topology2);
  if (xmlfileok) {
    printf("switching to xml...\n");
    assert(!hwloc_topology_set_xml(topology2, xmlfile));
  }
  if (xmlbufok) {
    printf("switching to xmlbuffer...\n");
    assert(!hwloc_topology_set_xmlbuffer(topology2, xmlbuf, xmlbuflen));
  }
  printf("switching to custom...\n");
  hwloc_topology_set_custom(topology2);
  printf("switching to synthetic...\n");
  hwloc_topology_set_synthetic(topology2, "machine:2 node:3 cache:2 pu:4");
  printf("switching sysfs fsroot to // ...\n");
  hwloc_topology_set_fsroot(topology2, "//"); /* valid path that won't be recognized as '/' */
  printf("switching sysfs fsroot to / ...\n");
  hwloc_topology_set_fsroot(topology2, "/");
  hwloc_topology_destroy(topology2);

  if (xmlfileok) {
    printf("switching to xml and loading...\n");
    hwloc_topology_init(&topology2);
    assert(!hwloc_topology_set_xml(topology2, xmlfile));
    hwloc_topology_load(topology2);
    hwloc_topology_check(topology2);
    assert(!hwloc_topology_is_thissystem(topology2));
    hwloc_topology_destroy(topology2);
  }

  if (xmlbufok) {
    printf("switching to xmlbuffer and loading...\n");
    hwloc_topology_init(&topology2);
    assert(!hwloc_topology_set_xmlbuffer(topology2, xmlbuf, xmlbuflen));
    hwloc_topology_load(topology2);
    hwloc_topology_check(topology2);
    assert(!hwloc_topology_is_thissystem(topology2));
    hwloc_topology_destroy(topology2);
  }

  printf("switching to custom and loading...\n");
  hwloc_topology_init(&topology2);
  hwloc_topology_set_custom(topology2);
  sw = hwloc_custom_insert_group_object_by_parent(topology2, hwloc_get_root_obj(topology2), 0);
  assert(sw);
  hwloc_custom_insert_topology(topology2, sw, topology1, NULL);
  hwloc_topology_load(topology2);
  hwloc_topology_check(topology2);
  assert(!hwloc_topology_is_thissystem(topology2));
  hwloc_topology_destroy(topology2);

  printf("switching to synthetic and loading...\n");
  hwloc_topology_init(&topology2);
  hwloc_topology_set_synthetic(topology2, "machine:2 node:3 cache:2 pu:4");
  hwloc_topology_load(topology2);
  hwloc_topology_check(topology2);
  assert(!hwloc_topology_is_thissystem(topology2));
  hwloc_topology_destroy(topology2);

  printf("switching sysfs fsroot to // and loading...\n");
  hwloc_topology_init(&topology2);
  err = hwloc_topology_set_fsroot(topology2, "//"); /* '//' isn't recognized as the normal fsroot on Linux, and it fails and falls back to normal topology on !Linux */
  hwloc_topology_load(topology2);
  hwloc_topology_check(topology2);
  assert(!hwloc_topology_is_thissystem(topology2) == !err);
  hwloc_topology_destroy(topology2);

  printf("switching sysfs fsroot to / and loading...\n");
  hwloc_topology_init(&topology2);
  err = hwloc_topology_set_fsroot(topology2, "/");
  hwloc_topology_load(topology2);
  hwloc_topology_check(topology2);
  assert(hwloc_topology_is_thissystem(topology2)); /* '/' is recognized as the normal fsroot on Linux, and it fails and falls back to normal topology on !Linux */
  hwloc_topology_destroy(topology2);

  printf("switching to synthetic...\n");
  hwloc_topology_init(&topology2);
  hwloc_topology_set_synthetic(topology2, "machine:2 node:3 cache:2 pu:4");
  hwloc_topology_destroy(topology2);


  if (xmlbufok)
    hwloc_free_xmlbuffer(topology1, xmlbuf);
  if (xmlfilefd >= 0) {
    unlink(xmlfile);
    close(xmlfilefd);
  }
  hwloc_topology_destroy(topology1);

  return 0;
}
Beispiel #5
0
static int rte_init(void)
{
    int rc, ret;
    char *error = NULL;
    char *envar, *ev1, *ev2;
    uint64_t unique_key[2];
    char *string_key;
    opal_value_t *kv;
    char *val;
    int u32, *u32ptr;
    uint16_t u16, *u16ptr;
    orte_process_name_t name;

    /* run the prolog */
    if (ORTE_SUCCESS != (rc = orte_ess_base_std_prolog())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    u32ptr = &u32;
    u16ptr = &u16;

    if (NULL != mca_ess_singleton_component.server_uri) {
        /* we are going to connect to a server HNP */
        if (0 == strncmp(mca_ess_singleton_component.server_uri, "file", strlen("file")) ||
            0 == strncmp(mca_ess_singleton_component.server_uri, "FILE", strlen("FILE"))) {
            char input[1024], *filename;
            FILE *fp;

            /* it is a file - get the filename */
            filename = strchr(mca_ess_singleton_component.server_uri, ':');
            if (NULL == filename) {
                /* filename is not correctly formatted */
                orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true,
                               "singleton", mca_ess_singleton_component.server_uri);
                return ORTE_ERROR;
            }
            ++filename; /* space past the : */

            if (0 >= strlen(filename)) {
                /* they forgot to give us the name! */
                orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true,
                               "singleton", mca_ess_singleton_component.server_uri);
                return ORTE_ERROR;
            }

            /* open the file and extract the uri */
            fp = fopen(filename, "r");
            if (NULL == fp) { /* can't find or read file! */
                orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true,
                               "singleton", mca_ess_singleton_component.server_uri);
                return ORTE_ERROR;
            }
            memset(input, 0, 1024);  // initialize the array to ensure a NULL termination
            if (NULL == fgets(input, 1023, fp)) {
                /* something malformed about file */
                fclose(fp);
                orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true,
                               "singleton", mca_ess_singleton_component.server_uri, "singleton");
                return ORTE_ERROR;
            }
            fclose(fp);
            input[strlen(input)-1] = '\0';  /* remove newline */
            orte_process_info.my_hnp_uri = strdup(input);
        } else {
            orte_process_info.my_hnp_uri = strdup(mca_ess_singleton_component.server_uri);
        }
        /* save the daemon uri - we will process it later */
        orte_process_info.my_daemon_uri = strdup(orte_process_info.my_hnp_uri);
        /* construct our name - we are in their job family, so we know that
         * much. However, we cannot know how many other singletons and jobs
         * this HNP is running. Oh well - if someone really wants to use this
         * option, they can try to figure it out. For now, we'll just assume
         * we are the only ones */
        ORTE_PROC_MY_NAME->jobid = ORTE_CONSTRUCT_LOCAL_JOBID(ORTE_PROC_MY_HNP->jobid, 1);
        /* obviously, we are vpid=0 for this job */
        ORTE_PROC_MY_NAME->vpid = 0;

        /* for convenience, push the pubsub version of this param into the environ */
        opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, true, &environ);
    } else if (NULL != getenv("SINGULARITY_CONTAINER") ||
               mca_ess_singleton_component.isolated) {
        /* ensure we use the isolated pmix component */
        opal_setenv (OPAL_MCA_PREFIX"pmix", "isolated", true, &environ);
    } else {
        /* spawn our very own HNP to support us */
        if (ORTE_SUCCESS != (rc = fork_hnp())) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        /* our name was given to us by the HNP */
        opal_setenv (OPAL_MCA_PREFIX"pmix", "^s1,s2,cray,isolated", true, &environ);
    }

    /* get an async event base - we use the opal_async one so
     * we don't startup extra threads if not needed */
    orte_event_base = opal_progress_thread_init(NULL);
    progress_thread_running = true;

    /* open and setup pmix */
    if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
        error = "opening pmix";
        goto error;
    }
    if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
        error = "select pmix";
        goto error;
    }
    /* set the event base */
    opal_pmix_base_set_evbase(orte_event_base);
    /* initialize the selected module */
    if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
        /* we cannot run */
        error = "pmix init";
        goto error;
    }

    /* pmix.init set our process name down in the OPAL layer,
     * so carry it forward here */
    ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid;
    ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid;
    name.jobid = OPAL_PROC_MY_NAME.jobid;
    name.vpid = ORTE_VPID_WILDCARD;

    /* get our local rank from PMI */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK,
                          ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16);
    if (OPAL_SUCCESS != ret) {
        error = "getting local rank";
        goto error;
    }
    orte_process_info.my_local_rank = u16;

    /* get our node rank from PMI */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_NODE_RANK,
                          ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16);
    if (OPAL_SUCCESS != ret) {
        error = "getting node rank";
        goto error;
    }
    orte_process_info.my_node_rank = u16;

    /* get max procs */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_MAX_PROCS,
                          &name, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS != ret) {
        error = "getting max procs";
        goto error;
    }
    orte_process_info.max_procs = u32;

    /* we are a singleton, so there is only one proc in the job */
    orte_process_info.num_procs = 1;
    /* push into the environ for pickup in MPI layer for
     * MPI-3 required info key
     */
    if (NULL == getenv(OPAL_MCA_PREFIX"orte_ess_num_procs")) {
        asprintf(&ev1, OPAL_MCA_PREFIX"orte_ess_num_procs=%d", orte_process_info.num_procs);
        putenv(ev1);
        added_num_procs = true;
    }
    if (NULL == getenv("OMPI_APP_CTX_NUM_PROCS")) {
        asprintf(&ev2, "OMPI_APP_CTX_NUM_PROCS=%d", orte_process_info.num_procs);
        putenv(ev2);
        added_app_ctx = true;
    }


    /* get our app number from PMI - ok if not found */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_APPNUM,
                          ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS == ret) {
        orte_process_info.app_num = u32;
    } else {
        orte_process_info.app_num = 0;
    }
    /* set some other standard values */
    orte_process_info.num_local_peers = 0;

    /* setup transport keys in case the MPI layer needs them -
     * we can use the jobfam and stepid as unique keys
     * because they are unique values assigned by the RM
     */
    if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) {
        unique_key[0] = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid);
        unique_key[1] = ORTE_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid);
        if (NULL == (string_key = orte_pre_condition_transports_print(unique_key))) {
            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
            return ORTE_ERR_OUT_OF_RESOURCE;
        }
        asprintf(&envar, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key);
        putenv(envar);
        added_transport_keys = true;
        /* cannot free the envar as that messes up our environ */
        free(string_key);
    }

    /* retrieve our topology */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_TOPO,
                          &name, &val, OPAL_STRING);
    if (OPAL_SUCCESS == ret && NULL != val) {
        /* load the topology */
        if (0 != hwloc_topology_init(&opal_hwloc_topology)) {
            ret = OPAL_ERROR;
            free(val);
            error = "setting topology";
            goto error;
        }
        if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) {
            ret = OPAL_ERROR;
            free(val);
            hwloc_topology_destroy(opal_hwloc_topology);
            error = "setting topology";
            goto error;
        }
        /* since we are loading this from an external source, we have to
         * explicitly set a flag so hwloc sets things up correctly
         */
        if (0 != hwloc_topology_set_flags(opal_hwloc_topology,
                                         (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
                                          HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM |
                                          HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
            ret = OPAL_ERROR;
            hwloc_topology_destroy(opal_hwloc_topology);
            free(val);
            error = "setting topology";
            goto error;
        }
        /* now load the topology */
        if (0 != hwloc_topology_load(opal_hwloc_topology)) {
            ret = OPAL_ERROR;
            hwloc_topology_destroy(opal_hwloc_topology);
            free(val);
            error = "setting topology";
            goto error;
        }
        free(val);
    } else {
        /* it wasn't passed down to us, so go get it */
        if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) {
            error = "topology discovery";
            goto error;
        }
        /* push it into the PMIx database in case someone
         * tries to retrieve it so we avoid an attempt to
         * get it again */
        kv = OBJ_NEW(opal_value_t);
        kv->key = strdup(OPAL_PMIX_LOCAL_TOPO);
        kv->type = OPAL_STRING;
        if (0 != (ret = hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &kv->data.string, &u32))) {
            error = "topology export";
            goto error;
        }
        if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, kv))) {
            error = "topology store";
            goto error;
        }
        OBJ_RELEASE(kv);
    }

    /* use the std app init to complete the procedure */
    if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup(true))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    /* push our hostname so others can find us, if they need to */
    OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
    if (ORTE_SUCCESS != ret) {
        error = "db store hostname";
        goto error;
    }

    return ORTE_SUCCESS;

 error:
    if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
        orte_show_help("help-orte-runtime.txt",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }
    return ret;
}
int main(void)
{
  hwloc_topology_t topology, reimport;
  hwloc_obj_t obj1, obj2, obj3;
  char *xmlbuf;
  int xmlbuflen;

  randomstring = malloc(RANDOMSTRINGLENGTH);
  /* keep it uninitialized, we want binary data */

  /* check the real topology */
  hwloc_topology_init(&topology);
  hwloc_topology_load(topology);
  check(topology);
  assert(hwloc_topology_get_userdata(topology) == NULL);
  hwloc_topology_destroy(topology);

  /* check a synthetic topology */
  hwloc_topology_init(&topology);
  hwloc_topology_set_userdata(topology, (void *)(uintptr_t)0x987654);
  hwloc_topology_set_synthetic(topology, "6 5 4 3 2");
  hwloc_topology_load(topology);
  check(topology);

  /* now place some userdata and see if importing/exporting works well */
  obj1 = hwloc_get_root_obj(topology);
  assert(obj1);
  obj1->userdata = (void *)(uintptr_t) 0x1;
  obj2 = hwloc_get_obj_by_depth(topology, 3, 13);
  assert(obj2);
  obj2->userdata = (void *)(uintptr_t) 0x2;
  obj3 = hwloc_get_obj_by_depth(topology, 5, 2*3*4*5*6-1);
  assert(obj3);
  obj3->userdata = (void *)(uintptr_t) 0x3;

  /* export/import without callback, we get nothing */
  hwloc_topology_export_xmlbuffer(topology, &xmlbuf, &xmlbuflen);

  hwloc_topology_init(&reimport);
  hwloc_topology_set_xmlbuffer(reimport, xmlbuf, xmlbuflen);
  hwloc_topology_load(reimport);
  check(reimport); /* there should be no userdata */
  hwloc_topology_destroy(reimport);

  /* export/import with callback, we should get three userdata */
  hwloc_topology_set_userdata_export_callback(topology, export_cb);
  hwloc_topology_export_xmlbuffer(topology, &xmlbuf, &xmlbuflen);

  hwloc_topology_init(&reimport);
  hwloc_topology_set_userdata_import_callback(reimport, import_cb);
  hwloc_topology_set_xmlbuffer(reimport, xmlbuf, xmlbuflen);
  hwloc_topology_load(reimport);
  obj1 = hwloc_get_root_obj(reimport);
  assert(obj1);
  assert(obj1->userdata == (void *)(uintptr_t) 0x4);
  obj2 = hwloc_get_obj_by_depth(reimport, 3, 13);
  assert(obj2);
  assert(obj2->userdata == (void *)(uintptr_t) 0x5);
  obj3 = hwloc_get_obj_by_depth(reimport, 5, 2*3*4*5*6-1);
  assert(obj3);
  assert(obj3->userdata == (void *)(uintptr_t) 0x6);
  hwloc_topology_destroy(reimport);

  assert(hwloc_topology_get_userdata(topology) == (void *)(uintptr_t)0x987654);
  hwloc_topology_destroy(topology);

  free(randomstring);
  return 0;
}
Beispiel #7
0
static int rte_init(void)
{
    int ret;
    char *error = NULL;
    char *envar, *ev1, *ev2;
    uint64_t unique_key[2];
    char *string_key;
    char *rmluri;
    opal_value_t *kv;
    char *val;
    int u32, *u32ptr;
    uint16_t u16, *u16ptr;
    char **peers=NULL, *mycpuset, **cpusets=NULL;
    opal_process_name_t name;
    size_t i;

    /* run the prolog */
    if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
        error = "orte_ess_base_std_prolog";
        goto error;
    }

    /* get an async event base - we use the opal_async one so
     * we don't startup extra threads if not needed */
    orte_event_base = opal_progress_thread_init(NULL);
    progress_thread_running = true;

    /* open and setup pmix */
    if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        /* we cannot run */
        error = "pmix init";
        goto error;
    }
    if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
        /* we cannot run */
        error = "pmix init";
        goto error;
    }
    /* set the event base */
    opal_pmix_base_set_evbase(orte_event_base);
    /* initialize the selected module */
    if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
        /* we cannot run */
        error = "pmix init";
        goto error;
    }
    u32ptr = &u32;
    u16ptr = &u16;

    /****   THE FOLLOWING ARE REQUIRED VALUES   ***/
    /* pmix.init set our process name down in the OPAL layer,
     * so carry it forward here */
    ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid;
    ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid;

    /* get our local rank from PMI */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK,
                          ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16);
    if (OPAL_SUCCESS != ret) {
        error = "getting local rank";
        goto error;
    }
    orte_process_info.my_local_rank = u16;

    /* get our node rank from PMI */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_NODE_RANK,
                          ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16);
    if (OPAL_SUCCESS != ret) {
        error = "getting node rank";
        goto error;
    }
    orte_process_info.my_node_rank = u16;

    /* get max procs */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_MAX_PROCS,
                          ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS != ret) {
        error = "getting max procs";
        goto error;
    }
    orte_process_info.max_procs = u32;

    /* get job size */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_JOB_SIZE,
                          ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS != ret) {
        error = "getting job size";
        goto error;
    }
    orte_process_info.num_procs = u32;

    /* push into the environ for pickup in MPI layer for
     * MPI-3 required info key
     */
    if (NULL == getenv(OPAL_MCA_PREFIX"orte_ess_num_procs")) {
        asprintf(&ev1, OPAL_MCA_PREFIX"orte_ess_num_procs=%d", orte_process_info.num_procs);
        putenv(ev1);
        added_num_procs = true;
    }
    if (NULL == getenv("OMPI_APP_CTX_NUM_PROCS")) {
        asprintf(&ev2, "OMPI_APP_CTX_NUM_PROCS=%d", orte_process_info.num_procs);
        putenv(ev2);
        added_app_ctx = true;
    }


    /* get our app number from PMI - ok if not found */
    OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_APPNUM,
                                   ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS == ret) {
        orte_process_info.app_num = u32;
    } else {
        orte_process_info.app_num = 0;
    }

    /* get the number of local peers - required for wireup of
     * shared memory BTL */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_SIZE,
                          ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS == ret) {
        orte_process_info.num_local_peers = u32 - 1;  // want number besides ourselves
    } else {
        orte_process_info.num_local_peers = 0;
    }

    /* setup transport keys in case the MPI layer needs them -
     * we can use the jobfam and stepid as unique keys
     * because they are unique values assigned by the RM
     */
    if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) {
        unique_key[0] = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid);
        unique_key[1] = ORTE_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid);
        if (NULL == (string_key = orte_pre_condition_transports_print(unique_key))) {
            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
            return ORTE_ERR_OUT_OF_RESOURCE;
        }
        opal_output_verbose(2, orte_ess_base_framework.framework_output,
                            "%s transport key %s",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), string_key);
        asprintf(&envar, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key);
        putenv(envar);
        added_transport_keys = true;
        /* cannot free the envar as that messes up our environ */
        free(string_key);
    }

    /* retrieve our topology */
    val = NULL;
    OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_TOPO,
                                   ORTE_PROC_MY_NAME, &val, OPAL_STRING);
    if (OPAL_SUCCESS == ret && NULL != val) {
        /* load the topology */
        if (0 != hwloc_topology_init(&opal_hwloc_topology)) {
            ret = OPAL_ERROR;
            free(val);
            error = "setting topology";
            goto error;
        }
        if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) {
            ret = OPAL_ERROR;
            free(val);
            hwloc_topology_destroy(opal_hwloc_topology);
            error = "setting topology";
            goto error;
        }
        /* since we are loading this from an external source, we have to
         * explicitly set a flag so hwloc sets things up correctly
         */
        if (0 != hwloc_topology_set_flags(opal_hwloc_topology,
                                          (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
                                           HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM |
                                           HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
            ret = OPAL_ERROR;
            hwloc_topology_destroy(opal_hwloc_topology);
            free(val);
            error = "setting topology";
            goto error;
        }
        /* now load the topology */
        if (0 != hwloc_topology_load(opal_hwloc_topology)) {
            ret = OPAL_ERROR;
            hwloc_topology_destroy(opal_hwloc_topology);
            free(val);
            error = "setting topology";
            goto error;
        }
        free(val);
        /* filter the cpus thru any default cpu set */
        if (OPAL_SUCCESS != (ret = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) {
            error = "filtering topology";
            goto error;
        }
    } else {
        /* it wasn't passed down to us, so go get it */
        if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) {
            error = "topology discovery";
            goto error;
        }
        /* push it into the PMIx database in case someone
         * tries to retrieve it so we avoid an attempt to
         * get it again */
        kv = OBJ_NEW(opal_value_t);
        kv->key = strdup(OPAL_PMIX_LOCAL_TOPO);
        kv->type = OPAL_STRING;
        if (0 != (ret = hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &kv->data.string, &u32))) {
            error = "topology export";
            goto error;
        }
        if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, kv))) {
            error = "topology store";
            goto error;
        }
        OBJ_RELEASE(kv);
    }

    /* get our local peers */
    if (0 < orte_process_info.num_local_peers) {
        /* if my local rank if too high, then that's an error */
        if (orte_process_info.num_local_peers < orte_process_info.my_local_rank) {
            ret = ORTE_ERR_BAD_PARAM;
            error = "num local peers";
            goto error;
        }
        /* retrieve the local peers */
        OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_PEERS,
                              ORTE_PROC_MY_NAME, &val, OPAL_STRING);
        if (OPAL_SUCCESS == ret && NULL != val) {
            peers = opal_argv_split(val, ',');
            free(val);
            /* and their cpusets, if available */
            OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_CPUSETS, ORTE_PROC_MY_NAME, &val, OPAL_STRING);
            if (OPAL_SUCCESS == ret && NULL != val) {
                cpusets = opal_argv_split(val, ':');
                free(val);
            } else {
                cpusets = NULL;
            }
        } else {
            peers = NULL;
            cpusets = NULL;
        }
    } else {
        peers = NULL;
        cpusets = NULL;
    }

    /* set the locality */
    if (NULL != peers) {
        /* indentify our cpuset */
        if (NULL != cpusets) {
            mycpuset = cpusets[orte_process_info.my_local_rank];
        } else {
            mycpuset = NULL;
        }
        name.jobid = ORTE_PROC_MY_NAME->jobid;
        for (i=0; NULL != peers[i]; i++) {
            kv = OBJ_NEW(opal_value_t);
            kv->key = strdup(OPAL_PMIX_LOCALITY);
            kv->type = OPAL_UINT16;
            name.vpid = strtoul(peers[i], NULL, 10);
            if (name.vpid == ORTE_PROC_MY_NAME->vpid) {
                /* we are fully local to ourselves */
                u16 = OPAL_PROC_ALL_LOCAL;
            } else if (NULL == mycpuset || NULL == cpusets[i] ||
                       0 == strcmp(cpusets[i], "UNBOUND")) {
                /* all we can say is that it shares our node */
                u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
            } else {
                /* we have it, so compute the locality */
                u16 = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, mycpuset, cpusets[i]);
            }
            OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
                                 "%s ess:pmi:locality: proc %s locality %x",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&name), u16));
            kv->data.uint16 = u16;
            ret = opal_pmix.store_local(&name, kv);
            if (OPAL_SUCCESS != ret) {
                error = "local store of locality";
                opal_argv_free(peers);
                opal_argv_free(cpusets);
                goto error;
            }
            OBJ_RELEASE(kv);
        }
        opal_argv_free(peers);
        opal_argv_free(cpusets);
    }

    /* now that we have all required info, complete the setup */
    if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(false))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ess_base_app_setup";
        goto error;
    }

    /* setup process binding */
    if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) {
        error = "proc_binding";
        goto error;
    }

    /* this needs to be set to enable debugger use when direct launched */
    if (NULL == orte_process_info.my_daemon_uri) {
        orte_standalone_operation = true;
    }

    /* set max procs */
    if (orte_process_info.max_procs < orte_process_info.num_procs) {
        orte_process_info.max_procs = orte_process_info.num_procs;
    }

    /***  PUSH DATA FOR OTHERS TO FIND   ***/

    /* push our RML URI in case others need to talk directly to us */
    rmluri = orte_rml.get_contact_info();
    /* push it out for others to use */
    OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_PROC_URI, rmluri, OPAL_STRING);
    if (ORTE_SUCCESS != ret) {
        error = "pmix put uri";
        goto error;
    }
    free(rmluri);

    /* push our hostname so others can find us, if they need to */
    OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
    if (ORTE_SUCCESS != ret) {
        error = "db store hostname";
        goto error;
    }

    /* if we are an ORTE app - and not an MPI app - then
     * we need to exchange our connection info here.
     * MPI_Init has its own modex, so we don't need to do
     * two of them. However, if we don't do a modex at all,
     * then processes have no way to communicate
     *
     * NOTE: only do this when the process originally launches.
     * Cannot do this on a restart as the rest of the processes
     * in the job won't be executing this step, so we would hang
     */
    if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) {
        opal_pmix.fence(NULL, 0);
    }

    return ORTE_SUCCESS;

error:
    if (!progress_thread_running) {
        /* can't send the help message, so ensure it
         * comes out locally
         */
        orte_show_help_finalize();
    }
    if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
        orte_show_help("help-orte-runtime.txt",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }
    return ret;
}
Beispiel #8
0
int main(void)
{
    hwloc_topology_t topology1, topology2;
    char *xmlbuf;
    int xmlbuflen;
    char xmlfile[] = "hwloc_backends.tmpxml.XXXXXX";
    int xmlbufok = 0, xmlfileok = 0;
    hwloc_obj_t sw;
    int err;

    printf("trying to export topology to XML buffer and file for later...\n");
    hwloc_topology_init(&topology1);
    hwloc_topology_load(topology1);
    assert(hwloc_topology_is_thissystem(topology1));
    if (hwloc_topology_export_xmlbuffer(topology1, &xmlbuf, &xmlbuflen) < 0)
        printf("XML buffer export failed (%s), ignoring\n", strerror(errno));
    else
        xmlbufok = 1;
    mktemp(xmlfile);
    if (hwloc_topology_export_xml(topology1, xmlfile) < 0)
        printf("XML file export failed (%s), ignoring\n", strerror(errno));
    else
        xmlfileok = 1;


    printf("init...\n");
    hwloc_topology_init(&topology2);
    if (xmlfileok) {
        printf("switching to xml...\n");
        assert(!hwloc_topology_set_xml(topology2, xmlfile));
    }
    if (xmlbufok) {
        printf("switching to xmlbuffer...\n");
        assert(!hwloc_topology_set_xmlbuffer(topology2, xmlbuf, xmlbuflen));
    }
    printf("switching to custom...\n");
    hwloc_topology_set_custom(topology2);
    printf("switching to synthetic...\n");
    hwloc_topology_set_synthetic(topology2, "machine:2 node:3 cache:2 pu:4");
    printf("switching sysfs fsroot to // ...\n");
    hwloc_topology_set_fsroot(topology2, "//"); /* valid path that won't be recognized as '/' */
    printf("switching sysfs fsroot to / ...\n");
    hwloc_topology_set_fsroot(topology2, "/");

    if (xmlfileok) {
        printf("switching to xml and loading...\n");
        assert(!hwloc_topology_set_xml(topology2, xmlfile));
        hwloc_topology_load(topology2);
        hwloc_topology_check(topology2);
        assert(!hwloc_topology_is_thissystem(topology2));
    }
    if (xmlbufok) {
        printf("switching to xmlbuffer and loading...\n");
        assert(!hwloc_topology_set_xmlbuffer(topology2, xmlbuf, xmlbuflen));
        hwloc_topology_load(topology2);
        hwloc_topology_check(topology2);
        assert(!hwloc_topology_is_thissystem(topology2));
    }
    printf("switching to custom and loading...\n");
    hwloc_topology_set_custom(topology2);
    sw = hwloc_custom_insert_group_object_by_parent(topology2, hwloc_get_root_obj(topology2), 0);
    assert(sw);
    hwloc_custom_insert_topology(topology2, sw, topology1, NULL);
    hwloc_topology_load(topology2);
    hwloc_topology_check(topology2);
    assert(!hwloc_topology_is_thissystem(topology2));
    /* don't try fsroot here because it fails on !linux, we would revert back to custom, which requires some insert to make the topology valid */
    printf("switching to synthetic and loading...\n");
    hwloc_topology_set_synthetic(topology2, "machine:2 node:3 cache:2 pu:4");
    hwloc_topology_load(topology2);
    hwloc_topology_check(topology2);
    assert(!hwloc_topology_is_thissystem(topology2));
    printf("switching sysfs fsroot to // and loading...\n");
    hwloc_topology_set_fsroot(topology2, "//"); /* valid path that won't be recognized as '/' */
    hwloc_topology_load(topology2);
    hwloc_topology_check(topology2);
    assert(!hwloc_topology_is_thissystem(topology2)); /* earlier fsroot worked, or we're still synthetic */
    printf("switching sysfs fsroot to / and loading...\n");
    err = hwloc_topology_set_fsroot(topology2, "/");
    hwloc_topology_load(topology2);
    hwloc_topology_check(topology2);
    assert(hwloc_topology_is_thissystem(topology2) == !err); /* on Linux, '/' is recognized as thissystem. on !Linux, set_fsroot() failed and we went back to synthetic */

    printf("switching to synthetic...\n");
    hwloc_topology_set_synthetic(topology2, "machine:2 node:3 cache:2 pu:4");

    hwloc_topology_destroy(topology2);


    if (xmlbufok)
        hwloc_free_xmlbuffer(topology1, xmlbuf);
    if (xmlfileok)
        unlink(xmlfile);
    hwloc_topology_destroy(topology1);

    return 0;
}
Beispiel #9
0
int main(void)
{
  hwloc_topology_t topology1, topology2;
  char *xmlbuf;
  int xmlbuflen;
  char xmlfile[] = "hwloc_backends.tmpxml.XXXXXX";
  char env[64];
  int xmlbufok = 0, xmlfileok = 0, xmlfilefd;
  const char *orig_backend_name;

  putenv("HWLOC_LIBXML_CLEANUP=1");

  printf("trying to export topology to XML buffer and file for later...\n");
  hwloc_topology_init(&topology1);
  hwloc_topology_load(topology1);
  orig_backend_name = get_backend_name(topology1);
  hwloc_obj_add_info(hwloc_get_root_obj(topology1), "Foo", "Bar");
  assert(hwloc_topology_is_thissystem(topology1));
  if (hwloc_topology_export_xmlbuffer(topology1, &xmlbuf, &xmlbuflen) < 0)
    printf("XML buffer export failed (%s), ignoring\n", strerror(errno));
  else
    xmlbufok = 1;
  xmlfilefd = mkstemp(xmlfile);
  if (xmlfilefd < 0 || hwloc_topology_export_xml(topology1, xmlfile) < 0)
    printf("XML file export failed (%s), ignoring\n", strerror(errno));
  else
    xmlfileok = 1;


  /* init+config+destroy without loading */
  printf("init...\n");
  hwloc_topology_init(&topology2);
  if (xmlfileok) {
    printf("switching to xml...\n");
    assert(!hwloc_topology_set_xml(topology2, xmlfile));
  }
  if (xmlbufok) {
    printf("switching to xmlbuffer...\n");
    assert(!hwloc_topology_set_xmlbuffer(topology2, xmlbuf, xmlbuflen));
  }
  printf("switching to synthetic...\n");
  hwloc_topology_set_synthetic(topology2, "machine:2 node:3 l1:2 pu:4");
  hwloc_topology_destroy(topology2);

  /* init+xml+load+destroy */
  if (xmlfileok) {
    printf("switching to xml and loading...\n");
    hwloc_topology_init(&topology2);
    assert(!hwloc_topology_set_xml(topology2, xmlfile));
    hwloc_topology_load(topology2);
    assert_backend_name(topology2, orig_backend_name);
    assert_foo_bar(topology2, 1);
    hwloc_topology_check(topology2);
    assert(!hwloc_topology_is_thissystem(topology2));
    hwloc_topology_destroy(topology2);
  }

  /* init+xmlbuf+load+destroy */
  if (xmlbufok) {
    printf("switching to xmlbuffer and loading...\n");
    hwloc_topology_init(&topology2);
    assert(!hwloc_topology_set_xmlbuffer(topology2, xmlbuf, xmlbuflen));
    hwloc_topology_load(topology2);
    assert_backend_name(topology2, orig_backend_name);
    assert_foo_bar(topology2, 1);
    hwloc_topology_check(topology2);
    assert(!hwloc_topology_is_thissystem(topology2));
    hwloc_topology_destroy(topology2);
  }

  /* init+synthetic+load+destroy */
  printf("switching to synthetic and loading...\n");
  hwloc_topology_init(&topology2);
  hwloc_topology_set_synthetic(topology2, "machine:2 node:3 l3i:2 pu:4");
  hwloc_topology_load(topology2);
  assert_backend_name(topology2, "Synthetic");
  assert_foo_bar(topology2, 0);
  assert(hwloc_get_nbobjs_by_type(topology2, HWLOC_OBJ_PU) == 2*3*2*4);
  hwloc_topology_check(topology2);
  assert(!hwloc_topology_is_thissystem(topology2));
  hwloc_topology_destroy(topology2);

  /* xmlenv+init+load+destroy */
  if (xmlfileok) {
    printf("switching to xml by env and loading...\n");
    snprintf(env, sizeof(env), "HWLOC_XMLFILE=%s", xmlfile);
    putenv(env);
    hwloc_topology_init(&topology2);
    hwloc_topology_load(topology2);
    assert_backend_name(topology2, orig_backend_name);
    assert_foo_bar(topology2, 1);
    hwloc_topology_check(topology2);
    assert(!hwloc_topology_is_thissystem(topology2));
    hwloc_topology_destroy(topology2);
  }

  /* syntheticenv+init+load+destroy, synthetic env overrides xml */
  printf("switching to synthetic by env and loading...\n");
  putenv("HWLOC_SYNTHETIC=node:3 pu:3");
  hwloc_topology_init(&topology2);
  hwloc_topology_load(topology2);
  assert_backend_name(topology2, "Synthetic");
  assert_foo_bar(topology2, 0);
  assert(hwloc_get_nbobjs_by_type(topology2, HWLOC_OBJ_PU) == 3*3);
  hwloc_topology_check(topology2);
  assert(!hwloc_topology_is_thissystem(topology2));
  hwloc_topology_destroy(topology2);

  /* componentsenv+init+load+destroy for testing defaults, overrides synthetic/xml/fsroot envs */
  printf("switching to default components by env and loading...\n");
  putenv("HWLOC_COMPONENTS=,"); /* don't set to empty since it means 'unset' on windows */
  hwloc_topology_init(&topology2);
  hwloc_topology_load(topology2);
  assert_backend_name(topology2, orig_backend_name);
  assert_foo_bar(topology2, 0);
  hwloc_topology_check(topology2);
  assert(hwloc_topology_is_thissystem(topology2));
  hwloc_topology_destroy(topology2);

  if (xmlbufok)
    hwloc_free_xmlbuffer(topology1, xmlbuf);
  if (xmlfilefd >= 0) {
    unlink(xmlfile);
    close(xmlfilefd);
  }
  hwloc_topology_destroy(topology1);

  return 0;
}