Example #1
0
int
shmem_runtime_init(void)
{
    int spawned, appnum, my_node;
    int initialized;

    if (PMI2_SUCCESS != PMI2_Initialized()) {
        return 1;
    }

    if (!initialized) {
        if (PMI2_SUCCESS != PMI2_Init(&spawned, &size, &rank, &appnum)) {
            return 2;
        }
    }

    max_name_len = PMI2_MAX_VALLEN;
    kvs_name = (char*) malloc(max_name_len);
    if (NULL == kvs_name) return 4;

    max_key_len = PMI2_MAX_KEYLEN;
    kvs_key = (char*) malloc(max_key_len);
    if (NULL == kvs_key) return 6;

    max_val_len = PMI2_MAX_VALLEN;
    kvs_value = (char*) malloc(max_val_len);
    if (NULL == kvs_value) return 8;

    if (PMI2_SUCCESS != PMI2_Job_GetId(kvs_name, max_name_len)) {
        return 7;
    }

    return 0;
}
static bool pmi_startup(void)
{
#if WANT_CRAY_PMI2_EXT
    int spawned, size, rank, appnum;

    if (PMI2_Initialized()) {
        /* already initialized */
        return true;
    }
    /* if we can't startup PMI, we can't be used */
    if (PMI_SUCCESS != PMI2_Init(&spawned, &size, &rank, &appnum)) {
        return false;
    }
    /* flag that we started PMI */
    started_by_me = true;
    /* ignore the info - we'll pick it up elsewhere */
    return true;
#else
    PMI_BOOL initialized;

    if (PMI_SUCCESS != PMI_Initialized(&initialized)) {
        return false;
    }
    if (PMI_TRUE != initialized) {
        if (PMI_SUCCESS != PMI_Init(&initialized)) {
            return false;
        }
        /* flag that we started PMI */
        started_by_me = true;
    }
    return true;
#endif
}
Example #3
0
static int test_item2(void)
{
    int rc = 0;

    log_assert(PMI2_Initialized(), "");

    return rc;
}
Example #4
0
void chpl_comm_ofi_oob_init(void) {
  int spawned, size, rank, appnum;

  if (PMI2_Initialized() != PMI_TRUE) {
    PMI_CHK(PMI2_Init(&spawned, &size, &rank, &appnum));
    assert(spawned == 0);
    chpl_nodeID = (c_nodeid_t) rank;
    chpl_numNodes = (int32_t) size;
  }

  DBG_PRINTF(DBG_OOB, "OOB init: node %" PRI_c_nodeid_t " of %" PRId32,
             chpl_nodeID, chpl_numNodes);
}
int orte_grpcomm_pmi_close(void)
{
#if WANT_CRAY_PMI2_EXT
    if (started_by_me && PMI2_Initialized()) {
        PMI2_Finalize();
    }
#else
    PMI_BOOL initialized;

    /* if we weren't selected, cleanup */
    if (started_by_me &&
        PMI_SUCCESS == PMI_Initialized(&initialized) &&
        PMI_TRUE == initialized) {
        PMI_Finalize();
    }
#endif

    return ORTE_SUCCESS;
}
Example #6
0
static int cray_init(void)
{
    int i, spawned, size, rank, appnum, my_node;
    int rc, ret = OPAL_ERROR;
    char *pmapping = NULL;
    char buf[PMI2_MAX_ATTRVALUE];
    int found;
    uint32_t jobfam;

    ++pmix_init_count;

    /* if we can't startup PMI, we can't be used */
    if ( PMI2_Initialized () ) {
        return OPAL_SUCCESS;
    }
    size = -1;
    rank = -1;
    appnum = -1;
    if (PMI_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
        opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc);
        return OPAL_ERROR;
    }
    if( size < 0 || rank < 0 ){
        opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true);
        goto err_exit;
    }

    pmix_size = size;
    pmix_rank = rank;
    pmix_appnum = appnum;

    pmix_vallen_max = PMI2_MAX_VALLEN;
    pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility?
    pmix_keylen_max = PMI2_MAX_KEYLEN;

    rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
        goto err_exit;
    }
    pmix_usize = atoi(buf);

    pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
    if( pmix_kvs_name == NULL ){
         PMI2_Finalize();
         ret = OPAL_ERR_OUT_OF_RESOURCE;
         goto err_exit;
    }
    rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        goto err_exit;
    }

    rc = sscanf(pmix_kvs_name,"kvs_%u",&jobfam);
    if (rc != 1) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        rc = OPAL_ERROR;
        goto err_exit;
    }

    pmix_jobid = jobfam << 16;

    /* store our name in the opal_proc_t so that
     * debug messages will make sense - an upper
     * layer will eventually overwrite it, but that
     * won't do any harm */
    pmix_pname.jid = pmix_jobid;
    pmix_pname.vid = pmix_rank;
    opal_proc_set_name((opal_process_name_t*)&pmix_pname);
    opal_output_verbose(10, opal_pmix_base_framework.framework_output,
                        "%s pmix:cray: assigned tmp name %d %d pmix_kvs_name %s",
                        OPAL_NAME_PRINT(*(opal_process_name_t*)&pmix_pname),pmix_pname.jid,pmix_pname.vid,pmix_kvs_name);

    pmapping = (char*)malloc(PMI2_MAX_VALLEN);
    if( pmapping == NULL ){
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found);
    if( !found || PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr");
        return OPAL_ERROR;
    }

    pmix_lranks = pmix_cray_parse_pmap(pmapping, pmix_rank, &my_node, &pmix_nlranks);
    if (NULL == pmix_lranks) {
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    free(pmapping);

    /* find ourselves */
    for (i=0; i < pmix_nlranks; i++) {
        if (pmix_rank == pmix_lranks[i]) {
            pmix_lrank = i;
            pmix_nrank = my_node;
            break;
        }
    }

    return OPAL_SUCCESS;
err_exit:
    PMI2_Finalize();
    return ret;
}
Example #7
0
void chpl_comm_ofi_oob_fini(void) {
  if (PMI2_Initialized() == PMI_TRUE) {
    DBG_PRINTF(DBG_OOB, "OOB finalize");
    PMI_CHK(PMI2_Finalize());
  }
}
Example #8
0
static int cray_init(void)
{
    int i, spawned, size, rank, appnum, my_node;
    int rc, ret = OPAL_ERROR;
    char *pmapping = NULL;
    char buf[PMI2_MAX_ATTRVALUE];
    int found;
    int major, minor, revision;
    uint32_t jobfam;
    opal_value_t kv;
    opal_process_name_t ldr;
    char nmtmp[64];
    char *str, **localranks = NULL;

    ++pmix_init_count;

    /* if we can't startup PMI, we can't be used */
    if ( PMI2_Initialized () ) {
        opal_output_verbose(10, opal_pmix_base_framework.framework_output,
                        "%s pmix:cray: pmi already initialized",
                        OPAL_NAME_PRINT(pmix_pname));
        return OPAL_SUCCESS;
    }
    size = -1;
    rank = -1;
    appnum = -1;
    if (PMI_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
        opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc);
        return OPAL_ERROR;
    }
    if( size < 0 || rank < 0 ){
        opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true);
        goto err_exit;
    }

    pmix_size = size;
    pmix_rank = rank;
    pmix_appnum = appnum;

    pmix_vallen_max = PMI2_MAX_VALLEN;
    pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility?
    pmix_keylen_max = PMI2_MAX_KEYLEN;
    pmix_vallen_threshold = PMI2_MAX_VALLEN * 3;
    pmix_vallen_threshold >>= 2;

    /*
     * get the version info
     */

    if (PMI_SUCCESS != PMI_Get_version_info(&major,&minor,&revision)) {
        return OPAL_ERROR;
    }

    snprintf(cray_pmi_version, sizeof(cray_pmi_version),
             "%d.%d.%d", major, minor, revision);

    pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
    if( pmix_kvs_name == NULL ){
         PMI2_Finalize();
         ret = OPAL_ERR_OUT_OF_RESOURCE;
         goto err_exit;
    }

    rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        goto err_exit;
    }

    rc = sscanf(pmix_kvs_name,"kvs_%u",&jobfam);
    if (rc != 1) {
        opal_output_verbose(10, opal_pmix_base_framework.framework_output,
                           "%s pmix:cray: pmix_kvs_name %s",
                            OPAL_NAME_PRINT(pmix_pname), pmix_kvs_name);
        rc = OPAL_ERROR;
        goto err_exit;
    }

    pmix_jobid = jobfam << 16;

    /* store our name in the opal_proc_t so that
     * debug messages will make sense - an upper
     * layer will eventually overwrite it, but that
     * won't do any harm */
    pmix_pname.jobid = pmix_jobid;
    pmix_pname.vpid = pmix_rank;
    opal_proc_set_name(&pmix_pname);
    opal_output_verbose(10, opal_pmix_base_framework.framework_output,
                        "%s pmix:cray: assigned tmp name %d %d pmix_kvs_name %s",
                        OPAL_NAME_PRINT(pmix_pname),pmix_pname.jobid,pmix_pname.vpid,pmix_kvs_name);

    pmapping = (char*)malloc(PMI2_MAX_VALLEN);
    if( pmapping == NULL ){
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found);
    if( !found || PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr");
        return OPAL_ERROR;
    }

    pmix_lranks = pmix_cray_parse_pmap(pmapping, pmix_rank, &my_node, &pmix_nlranks);
    if (NULL == pmix_lranks) {
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    free(pmapping);

    // setup hash table
    opal_pmix_base_hash_init();

    /* save the job size */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_JOB_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = pmix_size;
    if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* save the appnum */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_APPNUM);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = pmix_appnum;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
        goto err_exit;
    }

    pmix_usize = atoi(buf);

    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_UNIV_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = pmix_usize;
    if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_JOBID);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = pmix_jobid;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* save the local size */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_SIZE);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = pmix_nlranks;
    if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    ldr.vpid = pmix_lranks[0];
    ldr.jobid = pmix_pname.jobid;

    /* find ourselves and build up a string for local peer info */
    memset(nmtmp, 0, 64);
    for (i=0; i < pmix_nlranks; i++) {
        ret = snprintf(nmtmp, 64, "%d", pmix_lranks[i]);
        opal_argv_append_nosize(&localranks, nmtmp);
        if (pmix_rank == pmix_lranks[i]) {
            pmix_lrank = i;
            pmix_nrank = i;
        }
    }

    str = opal_argv_join(localranks, ',');
    opal_argv_free(localranks);

    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_PEERS);
    kv.type = OPAL_STRING;
    kv.data.string = str;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* save the local leader */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCALLDR);
    kv.type = OPAL_UINT64;
    kv.data.uint64 = *(uint64_t*)&ldr;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }

    /* save our local rank */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_RANK);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = pmix_lrank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }

    /* and our node rank */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_NODE_RANK);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = pmix_nrank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    return OPAL_SUCCESS;
err_exit:
    PMI2_Finalize();
    return ret;
}
Example #9
0
static int s2_init(void)
{
    int spawned, size, rank, appnum;
    int rc, ret = OPAL_ERROR;
    char buf[16];
    int found;
    int my_node;
    char *tmp;
    uint32_t jobfam, stepid;
    int i;

    /* if we can't startup PMI, we can't be used */
    if ( PMI2_Initialized () ) {
        return OPAL_SUCCESS;
    }
    size = -1;
    rank = -1;
    appnum = -1;
    if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
        opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc);
        return OPAL_ERROR;
    }
    if( size < 0 || rank < 0 ){
        opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true);
        goto err_exit;
    }

    s2_jsize = size;
    s2_rank = rank;
    s2_appnum = appnum;

    pmix_vallen_max = PMI2_MAX_VALLEN;
    pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility?
    pmix_keylen_max = PMI2_MAX_KEYLEN;
    pmix_vallen_threshold = PMI2_MAX_VALLEN * 3;
    pmix_vallen_threshold >>= 2;

    rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found);
    if( PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
        goto err_exit;
    }
    s2_usize = atoi(buf);

    pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
    if( pmix_kvs_name == NULL ){
        PMI2_Finalize();
        ret = OPAL_ERR_OUT_OF_RESOURCE;
        goto err_exit;
    }
    rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max);
    if( PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        goto err_exit;
    }

    /* Slurm PMI provides the job id as an integer followed
     * by a '.', followed by essentially a stepid. The first integer
     * defines an overall job number. The second integer is the number of
     * individual jobs we have run within that allocation. So we translate
     * this as the overall job number equating to our job family, and
     * the individual number equating to our local jobid
     */

    jobfam = strtoul(pmix_kvs_name, &tmp, 10);
    if (NULL == tmp) {
        /* hmmm - no '.', so let's just use zero */
        stepid = 0;
    } else {
        tmp++; /* step over the '.' */
        stepid = strtoul(tmp, NULL, 10);
    }
    /* now build the jobid */
    s2_jobid = (jobfam << 16) | stepid;

    /* store our name in the opal_proc_t so that
     * debug messages will make sense - an upper
     * layer will eventually overwrite it, but that
     * won't do any harm */
    s2_pname.jobid = s2_jobid;
    s2_pname.vpid = s2_rank;
    opal_proc_set_name(&s2_pname);
    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:s2: assigned tmp name",
                        OPAL_NAME_PRINT(s2_pname));

    char *pmapping = (char*)malloc(PMI2_MAX_VALLEN);
    if( pmapping == NULL ){
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found);
    if( !found || PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr");
        return OPAL_ERROR;
    }

    s2_lranks = mca_common_pmi2_parse_pmap(pmapping, s2_pname.vpid, &my_node, &s2_nlranks);
    if (NULL == s2_lranks) {
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    free(pmapping);

    /* find ourselves */
    for (i=0; i < s2_nlranks; i++) {
        if (s2_rank == s2_lranks[i]) {
            s2_lrank = i;
            s2_nrank = i;
            break;
        }
    }

    /* increment the init count */
    ++pmix_init_count;

    return OPAL_SUCCESS;
 err_exit:
    PMI2_Finalize();
    return ret;
}
Example #10
0
bool mca_common_pmi_init (void) {
    if (0 < mca_common_pmi_init_count++) {
        return true;
    }

#if WANT_PMI2_SUPPORT
    {
        int spawned, size, rank, appnum;
        int rc;

        size = -1;
        rank = -1;
        appnum = -1;

        /* deal with a Slurm bug by first checking if we were
         * even launched by a PMI server before attempting
         * to use PMI */
        if (NULL == getenv("PMI_FD")) {
            mca_common_pmi_init_count--;
            return false;
        }

        /* if we can't startup PMI, we can't be used */
        if (PMI2_Initialized ()) {
            return true;
        }

        if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
            opal_show_help("help-common-pmi.txt", "pmi2-init-failed", true, rc);
            mca_common_pmi_init_count--;
            return false;
        }
        if (size < 0 || rank < 0 ) {
            opal_show_help("help-common-pmi.txt", "pmi2-init-returned-bad-values", true);
            mca_common_pmi_init_count--;
            return false;
        }
        mca_common_pmi_init_size = size;
        mca_common_pmi_init_rank = rank;
        mca_common_pmi_init_count--;
        return true;
    }
#else
    {
        PMI_BOOL initialized;

        /* deal with a Slurm bug by first checking if we were
         * even launched by a PMI server before attempting
         * to use PMI */
        if (NULL == getenv("PMI_FD")) {
            mca_common_pmi_init_count--;
            return false;
        }

        if (PMI_SUCCESS != PMI_Initialized(&initialized)) {
            mca_common_pmi_init_count--;
            return false;
        }

        if (PMI_TRUE != initialized && PMI_SUCCESS != PMI_Init(&initialized)) {
            mca_common_pmi_init_count--;
            return false;
        }
    }
#endif

    return true;
}
Example #11
0
static int mca_initialize_pmi_v2(void)
{
    int spawned, size, rank, appnum;
    int rc, ret = OPAL_ERROR;

    /* deal with a Slurm bug by first checking if we were
     * even launched by a PMI server before attempting
     * to use PMI */
    if (NULL == getenv("PMI_FD")) {
        return OPAL_ERROR;
    }

    /* if we can't startup PMI, we can't be used */
    if ( PMI2_Initialized () ) {
        return OPAL_SUCCESS;
    }
    size = -1;
    rank = -1;
    appnum = -1;
    if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
        opal_show_help("help-common-pmi.txt", "pmi2-init-failed", true, rc);
        return OPAL_ERROR;
    }
    if( size < 0 || rank < 0 ){
        opal_output(0, "SIZE %d RANK %d", size, rank);
        opal_show_help("help-common-pmi.txt", "pmi2-init-returned-bad-values", true);
        goto err_exit;
    }


    pmi_size = size;
    pmi_rank = rank;
    pmi_appnum = appnum;

    pmi_vallen_max = PMI2_MAX_VALLEN;
    pmi_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility?
    pmi_keylen_max = PMI2_MAX_KEYLEN;


    char buf[16];
    int found;
    
    rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found);
    if( PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
        goto err_exit;
    }
    pmi_usize = atoi(buf);

    pmi_kvs_name = (char*)malloc(pmi_kvslen_max);
    if( pmi_kvs_name == NULL ){
         PMI2_Finalize();
         ret = OPAL_ERR_OUT_OF_RESOURCE;
         goto err_exit;
    }
    rc = PMI2_Job_GetId(pmi_kvs_name, pmi_kvslen_max);
    if( PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        goto err_exit;
    }

    return OPAL_SUCCESS;
err_exit:
    PMI2_Finalize();
    return ret;
}
Example #12
0
static int s2_init(void)
{
    int spawned, size, rank, appnum;
    int rc, ret = OPAL_ERROR;
    char buf[16];
    int found;
    int my_node;
    uint32_t stepid;
    int i;
    opal_process_name_t ldr;
    opal_value_t kv;
    char **localranks;
    char *str;
    char nmtmp[64];
    opal_process_name_t wildcard_rank;

    /* if we can't startup PMI, we can't be used */
    if ( PMI2_Initialized () ) {
        return OPAL_SUCCESS;
    }
    size = -1;
    rank = -1;
    appnum = -1;
    // setup hash table so we always can finalize it
    opal_pmix_base_hash_init();

    if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
        opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc);
        return OPAL_ERROR;
    }
    if( size < 0 || rank < 0 ){
        opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true);
        goto err_exit;
    }

    s2_jsize = size;
    s2_rank = rank;
    s2_appnum = appnum;

    pmix_vallen_max = PMI2_MAX_VALLEN;
    pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility?
    pmix_keylen_max = PMI2_MAX_KEYLEN;
    pmix_vallen_threshold = PMI2_MAX_VALLEN * 3;
    pmix_vallen_threshold >>= 2;

    pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
    if( pmix_kvs_name == NULL ){
        PMI2_Finalize();
        ret = OPAL_ERR_OUT_OF_RESOURCE;
        goto err_exit;
    }
    rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max);
    if( PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        free(pmix_kvs_name);
        goto err_exit;
    }

    /* store our name in the opal_proc_t so that
     * debug messages will make sense - an upper
     * layer will eventually overwrite it, but that
     * won't do any harm */
    s2_pname.jobid = strtoul(pmix_kvs_name, &str, 10);
    s2_pname.jobid = (s2_pname.jobid << 16) & 0xffff0000;
    if (NULL != str) {
        stepid = strtoul(str, NULL, 10);
        s2_pname.jobid |= (stepid & 0x0000ffff);
    }
    s2_pname.vpid = s2_rank;
    opal_proc_set_name(&s2_pname);
    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:s2: assigned tmp name",
                        OPAL_NAME_PRINT(s2_pname));

    /* setup wildcard rank*/
    wildcard_rank = OPAL_PROC_MY_NAME;
    wildcard_rank.vpid = OPAL_VPID_WILDCARD;

    /* Slurm PMI provides the job id as an integer followed
     * by a '.', followed by essentially a stepid. The first integer
     * defines an overall job number. The second integer is the number of
     * individual jobs we have run within that allocation.
     */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_JOBID);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = s2_pname.jobid;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&wildcard_rank, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);  // frees pmix_kvs_name

    /* save the job size */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_JOB_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = size;
    if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard_rank, &kv))) {
        OPAL_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* save the appnum */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_APPNUM);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = appnum;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found);
    if( PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
        goto err_exit;
    }
    /* save it */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_UNIV_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = atoi(buf);
    if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard_rank, &kv))) {
        OPAL_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);
    /* push this into the dstore for subsequent fetches */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_MAX_PROCS);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = atoi(buf);
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&wildcard_rank, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    char *pmapping = (char*)malloc(PMI2_MAX_VALLEN);
    if( pmapping == NULL ){
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found);
    if( !found || PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr");
        return OPAL_ERROR;
    }

    s2_lranks = mca_common_pmi2_parse_pmap(pmapping, s2_pname.vpid, &my_node, &s2_nlranks);
    if (NULL == s2_lranks) {
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    free(pmapping);

    /* save the local size */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = s2_nlranks;
    if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard_rank, &kv))) {
        OPAL_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    s2_lrank = 0;
    s2_nrank = 0;
    ldr.vpid = rank;
    localranks = NULL;
    if (0 < s2_nlranks && NULL != s2_lranks) {
        /* note the local ldr */
        ldr.vpid = s2_lranks[0];
        /* find ourselves */
        ldr.jobid = s2_pname.jobid;
        ldr.vpid = s2_pname.vpid;
        memset(nmtmp, 0, 64);
        for (i=0; i < s2_nlranks; i++) {
            (void)snprintf(nmtmp, 64, "%d", s2_lranks[i]);
            opal_argv_append_nosize(&localranks, nmtmp);
            if (s2_rank == s2_lranks[i]) {
                s2_lrank = i;
                s2_nrank = i;
            }
        }
        str = opal_argv_join(localranks, ',');
        opal_argv_free(localranks);
        OBJ_CONSTRUCT(&kv, opal_value_t);
        kv.key = strdup(OPAL_PMIX_LOCAL_PEERS);
        kv.type = OPAL_STRING;
        kv.data.string = str;
        if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&wildcard_rank, &kv))) {
            OPAL_ERROR_LOG(ret);
            OBJ_DESTRUCT(&kv);
            goto err_exit;
        }
        OBJ_DESTRUCT(&kv);
    }

    /* save the local leader */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCALLDR);
    kv.type = OPAL_UINT64;
    kv.data.uint64 = *(uint64_t*)&ldr;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);
    /* save our local rank */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_RANK);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = s2_lrank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);
    /* and our node rank */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_NODE_RANK);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = s2_nrank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* increment the init count */
    ++pmix_init_count;

    return OPAL_SUCCESS;
err_exit:
    PMI2_Finalize();
    return ret;
}