Esempio n. 1
0
static int s2_init(void)
{
    int spawned, size, rank, appnum;
    int rc, ret = OPAL_ERROR;
    char buf[16];
    int found;
    int my_node;
    uint32_t stepid;
    int i;
    opal_process_name_t ldr;
    opal_value_t kv;
    char **localranks;
    char *str;
    char nmtmp[64];
    opal_process_name_t wildcard_rank;

    /* if we can't startup PMI, we can't be used */
    if ( PMI2_Initialized () ) {
        return OPAL_SUCCESS;
    }
    size = -1;
    rank = -1;
    appnum = -1;
    // setup hash table so we always can finalize it
    opal_pmix_base_hash_init();

    if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
        opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc);
        return OPAL_ERROR;
    }
    if( size < 0 || rank < 0 ){
        opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true);
        goto err_exit;
    }

    s2_jsize = size;
    s2_rank = rank;
    s2_appnum = appnum;

    pmix_vallen_max = PMI2_MAX_VALLEN;
    pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility?
    pmix_keylen_max = PMI2_MAX_KEYLEN;
    pmix_vallen_threshold = PMI2_MAX_VALLEN * 3;
    pmix_vallen_threshold >>= 2;

    pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
    if( pmix_kvs_name == NULL ){
        PMI2_Finalize();
        ret = OPAL_ERR_OUT_OF_RESOURCE;
        goto err_exit;
    }
    rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max);
    if( PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        free(pmix_kvs_name);
        goto err_exit;
    }

    /* store our name in the opal_proc_t so that
     * debug messages will make sense - an upper
     * layer will eventually overwrite it, but that
     * won't do any harm */
    s2_pname.jobid = strtoul(pmix_kvs_name, &str, 10);
    s2_pname.jobid = (s2_pname.jobid << 16) & 0xffff0000;
    if (NULL != str) {
        stepid = strtoul(str, NULL, 10);
        s2_pname.jobid |= (stepid & 0x0000ffff);
    }
    s2_pname.vpid = s2_rank;
    opal_proc_set_name(&s2_pname);
    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:s2: assigned tmp name",
                        OPAL_NAME_PRINT(s2_pname));

    /* setup wildcard rank*/
    wildcard_rank = OPAL_PROC_MY_NAME;
    wildcard_rank.vpid = OPAL_VPID_WILDCARD;

    /* Slurm PMI provides the job id as an integer followed
     * by a '.', followed by essentially a stepid. The first integer
     * defines an overall job number. The second integer is the number of
     * individual jobs we have run within that allocation.
     */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_JOBID);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = s2_pname.jobid;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&wildcard_rank, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);  // frees pmix_kvs_name

    /* save the job size */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_JOB_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = size;
    if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard_rank, &kv))) {
        OPAL_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* save the appnum */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_APPNUM);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = appnum;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found);
    if( PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
        goto err_exit;
    }
    /* save it */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_UNIV_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = atoi(buf);
    if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard_rank, &kv))) {
        OPAL_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);
    /* push this into the dstore for subsequent fetches */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_MAX_PROCS);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = atoi(buf);
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&wildcard_rank, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    char *pmapping = (char*)malloc(PMI2_MAX_VALLEN);
    if( pmapping == NULL ){
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found);
    if( !found || PMI2_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr");
        return OPAL_ERROR;
    }

    s2_lranks = mca_common_pmi2_parse_pmap(pmapping, s2_pname.vpid, &my_node, &s2_nlranks);
    if (NULL == s2_lranks) {
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    free(pmapping);

    /* save the local size */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = s2_nlranks;
    if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard_rank, &kv))) {
        OPAL_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    s2_lrank = 0;
    s2_nrank = 0;
    ldr.vpid = rank;
    localranks = NULL;
    if (0 < s2_nlranks && NULL != s2_lranks) {
        /* note the local ldr */
        ldr.vpid = s2_lranks[0];
        /* find ourselves */
        ldr.jobid = s2_pname.jobid;
        ldr.vpid = s2_pname.vpid;
        memset(nmtmp, 0, 64);
        for (i=0; i < s2_nlranks; i++) {
            (void)snprintf(nmtmp, 64, "%d", s2_lranks[i]);
            opal_argv_append_nosize(&localranks, nmtmp);
            if (s2_rank == s2_lranks[i]) {
                s2_lrank = i;
                s2_nrank = i;
            }
        }
        str = opal_argv_join(localranks, ',');
        opal_argv_free(localranks);
        OBJ_CONSTRUCT(&kv, opal_value_t);
        kv.key = strdup(OPAL_PMIX_LOCAL_PEERS);
        kv.type = OPAL_STRING;
        kv.data.string = str;
        if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&wildcard_rank, &kv))) {
            OPAL_ERROR_LOG(ret);
            OBJ_DESTRUCT(&kv);
            goto err_exit;
        }
        OBJ_DESTRUCT(&kv);
    }

    /* save the local leader */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCALLDR);
    kv.type = OPAL_UINT64;
    kv.data.uint64 = *(uint64_t*)&ldr;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);
    /* save our local rank */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_RANK);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = s2_lrank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);
    /* and our node rank */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_NODE_RANK);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = s2_nrank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* increment the init count */
    ++pmix_init_count;

    return OPAL_SUCCESS;
err_exit:
    PMI2_Finalize();
    return ret;
}
Esempio n. 2
0
static int fetch_pointer(const opal_identifier_t *uid,
                         const char *key,
                         void **data, opal_data_type_t type)
{
    proc_data_t *proc_data;
    opal_value_t *kv;
    opal_identifier_t id;

    /* to protect alignment, copy the data across */
    memcpy(&id, uid, sizeof(opal_identifier_t));

    OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                         "db:hash:fetch_pointer: searching for key %s on proc %" PRIu64 "",
                         (NULL == key) ? "NULL" : key, id));

    /* if the key is NULL, that is an error */
    if (NULL == key) {
        OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
        return OPAL_ERR_BAD_PARAM;
    }

    /* lookup the proc data object for this proc */
    if (NULL == (proc_data = lookup_opal_proc(&hash_data, id))) {
        /* look elsewhere */
        return OPAL_ERR_TAKE_NEXT_OPTION;
    }

    /* find the value */
    if (NULL == (kv = lookup_keyval(proc_data, key))) {
        /* look elsewhere */
        return OPAL_ERR_TAKE_NEXT_OPTION;
    }

   switch (type) {
    case OPAL_STRING:
        if (OPAL_STRING != kv->type) {
            return OPAL_ERR_TYPE_MISMATCH;
        }
        *data = kv->data.string;
        break;
    case OPAL_UINT32:
        if (OPAL_UINT32 != kv->type) {
            return OPAL_ERR_TYPE_MISMATCH;
        }
        *data = &kv->data.uint32;
        break;
    case OPAL_UINT16:
        if (OPAL_UINT16 != kv->type) {
            return OPAL_ERR_TYPE_MISMATCH;
        }
        *data = &kv->data.uint16;
        break;
    case OPAL_INT:
        if (OPAL_INT != kv->type) {
            return OPAL_ERR_TYPE_MISMATCH;
        }
        *data = &kv->data.integer;
        break;
    case OPAL_UINT:
        if (OPAL_UINT != kv->type) {
            return OPAL_ERR_TYPE_MISMATCH;
        }
        *data = &kv->data.uint;
        break;
    case OPAL_BYTE_OBJECT:
        if (OPAL_BYTE_OBJECT != kv->type) {
            return OPAL_ERR_TYPE_MISMATCH;
        }
        *data = &kv->data.bo;
        break;
    default:
        OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
        return OPAL_ERR_NOT_SUPPORTED;
    }

    return OPAL_SUCCESS;
}
Esempio n. 3
0
static int fetch_multiple(const opal_identifier_t *uid,
                          const char *key,
                          opal_list_t *kvs)
{
    proc_data_t *proc_data;
    opal_value_t *kv, *kvnew;
    int rc;
    char *srchkey, *ptr;
    size_t len = 0;
    opal_identifier_t id;

    /* to protect alignment, copy the data across */
    memcpy(&id, uid, sizeof(opal_identifier_t));

    OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                         "db:hash:fetch_multiple: searching for key %s on proc %" PRIu64 "",
                         (NULL == key) ? "NULL" : key, id));

    /* lookup the proc data object for this proc */
    if (NULL == (proc_data = lookup_opal_proc(&hash_data, id))) {
        /* look elsewhere */
        return OPAL_ERR_TAKE_NEXT_OPTION;
    }

    /* if the key is NULL, then return all the values */
    if (NULL == key) {
        for (kv = (opal_value_t*) opal_list_get_first(&proc_data->data);
             kv != (opal_value_t*) opal_list_get_end(&proc_data->data);
             kv = (opal_value_t*) opal_list_get_next(kv)) {
            if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&kvnew, kv, OPAL_VALUE))) {
                OPAL_ERROR_LOG(rc);
                return rc;
            }
            opal_list_append(kvs, &kvnew->super);
        }
        return OPAL_SUCCESS;
    }

    /* see if the key includes a wildcard */
    srchkey = strdup(key);
    if (NULL != (ptr = strchr(srchkey, '*'))) {
        *ptr = '\0';
        len = strlen(srchkey);
    }

    /* otherwise, find all matching keys and return them */
    for (kv = (opal_value_t*) opal_list_get_first(&proc_data->data);
         kv != (opal_value_t*) opal_list_get_end(&proc_data->data);
         kv = (opal_value_t*) opal_list_get_next(kv)) {
        if ((0 < len && 0 == strncmp(srchkey, kv->key, len)) ||
            (0 == len && 0 == strcmp(key, kv->key))) {
            if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&kvnew, kv, OPAL_VALUE))) {
                OPAL_ERROR_LOG(rc);
                return rc;
            }
            opal_list_append(kvs, &kvnew->super);
        }
    }
    free(srchkey);
    return OPAL_SUCCESS;
}
Esempio n. 4
0
static int store(const opal_identifier_t *uid,
                 opal_db_locality_t locality,
                 const char *key, const void *data,
                 opal_data_type_t type)
{
    proc_data_t *proc_data;
    opal_value_t *kv;
    opal_byte_object_t *boptr;
    opal_identifier_t id;

    /* to protect alignment, copy the data across */
    memcpy(&id, uid, sizeof(opal_identifier_t));

    /* we are at the bottom of the store priorities, so
     * if this fell to us, we store it
     */
    opal_output_verbose(1, opal_db_base_framework.framework_output,
                        "db:hash:store storing data for proc %" PRIu64 " at locality %d",
                        id, (int)locality);

    /* lookup the proc data object for this proc */
    if (NULL == (proc_data = lookup_opal_proc(&hash_data, id))) {
        /* unrecoverable error */
        OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                             "db:hash:store: storing key %s[%s] for proc %" PRIu64 " unrecoverably failed",
                             key, opal_dss.lookup_data_type(type), id));
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    /* see if we already have this key in the data - means we are updating
     * a pre-existing value
     */
    kv = lookup_keyval(proc_data, key);
    OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                         "db:hash:store: %s key %s[%s] for proc %" PRIu64 "",
                         (NULL == kv ? "storing" : "updating"),
                         key, opal_dss.lookup_data_type(type), id));

    if (NULL != kv) {
        opal_list_remove_item(&proc_data->data, &kv->super);
        OBJ_RELEASE(kv);
    }
    kv = OBJ_NEW(opal_value_t);
    kv->key = strdup(key);
    opal_list_append(&proc_data->data, &kv->super);

    /* the type could come in as an OPAL one (e.g., OPAL_VPID). Since
     * the value is an OPAL definition, it cannot cover OPAL data
     * types, so convert to the underlying OPAL type
     */
    switch (type) {
    case OPAL_STRING:
        kv->type = OPAL_STRING;
        if (NULL != data) {
            kv->data.string = strdup( (const char *) data);
        } else {
            kv->data.string = NULL;
        }
        break;
    case OPAL_UINT32:
        if (NULL == data) {
            OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
            return OPAL_ERR_BAD_PARAM;
        }
        kv->type = OPAL_UINT32;
        kv->data.uint32 = *(uint32_t*)data;
        break;
    case OPAL_UINT16:
        if (NULL == data) {
            OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
            return OPAL_ERR_BAD_PARAM;
        }
        kv->type = OPAL_UINT16;
        kv->data.uint16 = *(uint16_t*)(data);
        break;
    case OPAL_INT:
        if (NULL == data) {
            OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
            return OPAL_ERR_BAD_PARAM;
        }
        kv->type = OPAL_INT;
        kv->data.integer = *(int*)(data);
        break;
    case OPAL_UINT:
        if (NULL == data) {
            OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
            return OPAL_ERR_BAD_PARAM;
        }
        kv->type = OPAL_UINT;
        kv->data.uint = *(unsigned int*)(data);
        break;
    case OPAL_BYTE_OBJECT:
        kv->type = OPAL_BYTE_OBJECT;
        boptr = (opal_byte_object_t*)data;
        if (NULL != boptr && NULL != boptr->bytes && 0 < boptr->size) {
            kv->data.bo.bytes = (uint8_t *) malloc(boptr->size);
            memcpy(kv->data.bo.bytes, boptr->bytes, boptr->size);
            kv->data.bo.size = boptr->size;
        } else {
            kv->data.bo.bytes = NULL;
            kv->data.bo.size = 0;
        }
        break;
    default:
        OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
        return OPAL_ERR_NOT_SUPPORTED;
    }

    return OPAL_SUCCESS;
}
Esempio n. 5
0
static int fetch(const opal_identifier_t *uid,
                 const char *key, void **data, opal_data_type_t type)
{
    proc_data_t *proc_data;
    opal_value_t *kv;
    opal_byte_object_t *boptr;
    opal_identifier_t id;

    /* to protect alignment, copy the data across */
    memcpy(&id, uid, sizeof(opal_identifier_t));

    OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                         "db:hash:fetch: searching for key %s[%s] on proc %" PRIu64 "",
                         (NULL == key) ? "NULL" : key,
                         opal_dss.lookup_data_type(type), id));

    /* if the key is NULL, that is an error */
    if (NULL == key) {
        OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
        return OPAL_ERR_BAD_PARAM;
    }

    /* lookup the proc data object for this proc */
    if (NULL == (proc_data = lookup_opal_proc(&hash_data, id))) {
        /* maybe they can find it elsewhere */
        return OPAL_ERR_TAKE_NEXT_OPTION;
    }

    /* find the value */
    if (NULL == (kv = lookup_keyval(proc_data, key))) {
        /* maybe they can find it elsewhere */
        return OPAL_ERR_TAKE_NEXT_OPTION;
    }

    /* do the copy and check the type */
    switch (type) {
    case OPAL_STRING:
        if (OPAL_STRING != kv->type) {
            return OPAL_ERR_TYPE_MISMATCH;
        }
        if (NULL != kv->data.string) {
            *data = strdup(kv->data.string);
        } else {
            *data = NULL;
        }
        break;
    case OPAL_UINT32:
        if (OPAL_UINT32 != kv->type) {
            return OPAL_ERR_TYPE_MISMATCH;
        }
        memcpy(*data, &kv->data.uint32, 4);
        break;
    case OPAL_UINT16:
        if (OPAL_UINT16 != kv->type) {
            return OPAL_ERR_TYPE_MISMATCH;
        }
        memcpy(*data, &kv->data.uint16, 2);
        break;
    case OPAL_INT:
        if (OPAL_INT != kv->type) {
            return OPAL_ERR_TYPE_MISMATCH;
        }
        memcpy(*data, &kv->data.integer, sizeof(int));
        break;
    case OPAL_UINT:
        if (OPAL_UINT != kv->type) {
            return OPAL_ERR_TYPE_MISMATCH;
        }
        memcpy(*data, &kv->data.uint, sizeof(unsigned int));
        break;
    case OPAL_BYTE_OBJECT:
        if (OPAL_BYTE_OBJECT != kv->type) {
            return OPAL_ERR_TYPE_MISMATCH;
        }
        boptr = (opal_byte_object_t*)malloc(sizeof(opal_byte_object_t));
        if (NULL != kv->data.bo.bytes && 0 < kv->data.bo.size) {
            boptr->bytes = (uint8_t *) malloc(kv->data.bo.size);
            memcpy(boptr->bytes, kv->data.bo.bytes, kv->data.bo.size);
            boptr->size = kv->data.bo.size;
        } else {
            boptr->bytes = NULL;
            boptr->size = 0;
        }
        *data = boptr;
        break;
    default:
        OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
        return OPAL_ERR_NOT_SUPPORTED;
    }

    return OPAL_SUCCESS;
}
Esempio n. 6
0
static int cray_fence(opal_process_name_t *procs, size_t nprocs)
{
    int rc;
    int32_t i;
    opal_value_t *kp, kvn;
    opal_hwloc_locality_t locality;

    opal_output_verbose(10, opal_pmix_base_framework.framework_output,
                        "%s pmix:cray called fence",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    /* check if there is partially filled meta key and put them */
    if (0 != pmix_packed_data_offset && NULL != pmix_packed_data) {
        opal_pmix_base_commit_packed(pmix_packed_data, pmix_packed_data_offset, pmix_vallen_max, &pmix_pack_key, kvs_put);
        pmix_packed_data_offset = 0;
        free(pmix_packed_data);
        pmix_packed_data = NULL;
    }

    if (PMI_SUCCESS != (rc = PMI2_KVS_Fence())) {
        OPAL_PMI_ERROR(rc, "PMI2_KVS_Fence");
        return OPAL_ERROR;
    }

    opal_output_verbose(10, opal_pmix_base_framework.framework_output,
                        "%s pmix:cray kvs_fence complete",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    /* get the modex data from each local process and set the
     * localities to avoid having the MPI layer fetch data
     * for every process in the job */
    if (!pmix_got_modex_data) {
        pmix_got_modex_data = true;
        /* we only need to set locality for each local rank as "not found"
         * equates to "non-local" */
        for (i=0; i < pmix_nlranks; i++) {
            pmix_pname.vid = pmix_lranks[i];
            rc = opal_pmix_base_cache_keys_locally((opal_identifier_t*)&pmix_pname, OPAL_DSTORE_CPUSET,
                                                   &kp, pmix_kvs_name, pmix_vallen_max, kvs_get);
            if (OPAL_SUCCESS != rc) {
                OPAL_ERROR_LOG(rc);
                return rc;
            }
#if OPAL_HAVE_HWLOC
            if (NULL == kp || NULL == kp->data.string) {
                /* if we share a node, but we don't know anything more, then
                 * mark us as on the node as this is all we know
                 */
                locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
            } else {
                /* determine relative location on our node */
                locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
                                                                 opal_process_info.cpuset,
                                                                 kp->data.string);
            }
            if (NULL != kp) {
                OBJ_RELEASE(kp);
            }
#else
            /* all we know is we share a node */
            locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
#endif
            OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output,
                                 "%s pmix:s2 proc %s locality %s",
                                 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                                 OPAL_NAME_PRINT(*(opal_identifier_t*)&pmix_pname),
                                 opal_hwloc_base_print_locality(locality)));

            OBJ_CONSTRUCT(&kvn, opal_value_t);
            kvn.key = strdup(OPAL_DSTORE_LOCALITY);
            kvn.type = OPAL_UINT16;
            kvn.data.uint16 = locality;
            (void)opal_dstore.store(opal_dstore_internal, (opal_identifier_t*)&pmix_pname, &kvn);
            OBJ_DESTRUCT(&kvn);
        }
    }

    return OPAL_SUCCESS;
}
Esempio n. 7
0
static int cray_init(void)
{
    int i, spawned, size, rank, appnum, my_node;
    int rc, ret = OPAL_ERROR;
    char *pmapping = NULL;
    char buf[PMI2_MAX_ATTRVALUE];
    int found;
    uint32_t jobfam;

    ++pmix_init_count;

    /* if we can't startup PMI, we can't be used */
    if ( PMI2_Initialized () ) {
        return OPAL_SUCCESS;
    }
    size = -1;
    rank = -1;
    appnum = -1;
    if (PMI_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
        opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc);
        return OPAL_ERROR;
    }
    if( size < 0 || rank < 0 ){
        opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true);
        goto err_exit;
    }

    pmix_size = size;
    pmix_rank = rank;
    pmix_appnum = appnum;

    pmix_vallen_max = PMI2_MAX_VALLEN;
    pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility?
    pmix_keylen_max = PMI2_MAX_KEYLEN;

    rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
        goto err_exit;
    }
    pmix_usize = atoi(buf);

    pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
    if( pmix_kvs_name == NULL ){
         PMI2_Finalize();
         ret = OPAL_ERR_OUT_OF_RESOURCE;
         goto err_exit;
    }
    rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        goto err_exit;
    }

    rc = sscanf(pmix_kvs_name,"kvs_%u",&jobfam);
    if (rc != 1) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        rc = OPAL_ERROR;
        goto err_exit;
    }

    pmix_jobid = jobfam << 16;

    /* store our name in the opal_proc_t so that
     * debug messages will make sense - an upper
     * layer will eventually overwrite it, but that
     * won't do any harm */
    pmix_pname.jid = pmix_jobid;
    pmix_pname.vid = pmix_rank;
    opal_proc_set_name((opal_process_name_t*)&pmix_pname);
    opal_output_verbose(10, opal_pmix_base_framework.framework_output,
                        "%s pmix:cray: assigned tmp name %d %d pmix_kvs_name %s",
                        OPAL_NAME_PRINT(*(opal_process_name_t*)&pmix_pname),pmix_pname.jid,pmix_pname.vid,pmix_kvs_name);

    pmapping = (char*)malloc(PMI2_MAX_VALLEN);
    if( pmapping == NULL ){
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found);
    if( !found || PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr");
        return OPAL_ERROR;
    }

    pmix_lranks = pmix_cray_parse_pmap(pmapping, pmix_rank, &my_node, &pmix_nlranks);
    if (NULL == pmix_lranks) {
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    free(pmapping);

    /* find ourselves */
    for (i=0; i < pmix_nlranks; i++) {
        if (pmix_rank == pmix_lranks[i]) {
            pmix_lrank = i;
            pmix_nrank = my_node;
            break;
        }
    }

    return OPAL_SUCCESS;
err_exit:
    PMI2_Finalize();
    return ret;
}
Esempio n. 8
0
static int fetch(const opal_identifier_t *uid,
                 const char *key, void **data, opal_data_type_t type)
{
    opal_byte_object_t *boptr;
    uint16_t ui16;
    uint32_t ui32;
    int ival;
    unsigned int uival;
    char *pmikey;
    char tmp_val[1024];
    size_t sval;
    opal_identifier_t proc;

    /* to protect alignment, copy the data across */
    memcpy(&proc, uid, sizeof(opal_identifier_t));

    OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                         "db:pmi:fetch: searching for key %s[%s] on proc %" PRIu64 "",
                         (NULL == key) ? "NULL" : key,
                         opal_dss.lookup_data_type(type), proc));

    /* if the key is NULL, that is an error */
    if (NULL == key) {
        OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
        return OPAL_ERR_BAD_PARAM;
    }

    /* setup the key */
    if (NULL == (pmikey = setup_key(proc, key))) {
	OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
	return OPAL_ERR_BAD_PARAM;
    }

    /* check to see if they are looking for a string */
    if (OPAL_STRING == type) {
        /* might have been passed in multiple sections */
        *data = fetch_string(pmikey);
        free(pmikey);
        return OPAL_SUCCESS;
    }

    /* otherwise, retrieve the pmi keyval */
    if (NULL == (pmikey = setup_key(proc, key))) {
	OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
	return OPAL_ERR_BAD_PARAM;
    }
    if (PMI_SUCCESS != kvs_get(pmikey, tmp_val, pmi_vallen_max)) {
        OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND);
        free(pmikey);
        return OPAL_ERR_NOT_FOUND;
    }
    free(pmikey);

    /* return the value according to the provided type */
    switch (type) {
    case OPAL_UINT32:
        ui32 = (uint32_t)strtoul(tmp_val, NULL, 10);
        memcpy(*data, &ui32, sizeof(uint32_t));
        break;
    case OPAL_UINT16:
        ui16 = (uint16_t)strtoul(tmp_val, NULL, 10);
        memcpy(*data, &ui16, sizeof(uint16_t));
        break;
    case OPAL_INT:
        ival = (int)strtol(tmp_val, NULL, 10);
        memcpy(*data, &ival, sizeof(int));
        break;
    case OPAL_UINT:
        uival = (unsigned int)strtoul(tmp_val, NULL, 10);
        memcpy(*data, &uival, sizeof(unsigned int));
        break;
    case OPAL_BYTE_OBJECT:
        sval = 0;
        boptr = (opal_byte_object_t*)malloc(sizeof(opal_byte_object_t));
        boptr->bytes = (uint8_t*)pmi_decode(tmp_val, &sval);
        boptr->size = sval;
        *data = boptr;
        break;
    default:
        OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
        return OPAL_ERR_NOT_SUPPORTED;
    }

    return OPAL_SUCCESS;
}
Esempio n. 9
0
static char* fetch_string(const char *key)
{
    char *tmp_val, *ptr, *tmpkey;
    int i, nsections;
    char *data;

    /* create our sandbox */
    tmp_val = (char*)malloc(pmi_vallen_max * sizeof(char));

    /* the first section of the string has the original key, so fetch it */
    if (PMI_SUCCESS != kvs_get(key, tmp_val, pmi_vallen_max)) {
        OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND);
        free(tmp_val);
        return NULL;
    }

    OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                         "db:pmi:fetch_string: received key %s DATA %s",
                         key, tmp_val));

    /* the data in this section was prepended with the number of sections
     * required to hold the entire string - get it
     */
    ptr = strchr(tmp_val, ':');
    *ptr = '\0';
    nsections = strtol(tmp_val, NULL, 10);
    /* save the actual data */
    ptr++;
    data = strdup(ptr);

    /* get any remaining sections */
    for (i=1; i < nsections; i++) {
        /* create the key */
        asprintf(&tmpkey, "%s:%d", key, i);
        /* fetch it */
        if (PMI_SUCCESS != kvs_get(tmpkey, tmp_val, pmi_vallen_max)) {
            OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND);
            free(tmp_val);
            free(tmpkey);
            free(data);
            return NULL;
        }
        OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                             "db:pmi:fetch_string: received key %s DATA %s",
                             tmpkey, tmp_val));

        /* add it to our data */
        asprintf(&ptr, "%s%s", data, tmp_val);
        free(data);
        data = ptr;
        /* cleanup */
        free(tmpkey);
    }

    /* cleanup */
    free(tmp_val);

#if WANT_PMI2_SUPPORT
        {
            /* the blasted Cray PMI implementation marked a number of common
             * ASCII characters as "illegal", so if we are on one of those
             * machines, then replaced those characters with something
             * else - now recover them
             */
            size_t n, k;
            char *tmp;
            char conv[2];

            /* first, count how many characters were replaced - since Cray
             * is the source of the trouble, we only make this slow for them!
             */
            ptr = data;
            i=0;
            while (NULL != (tmp = strchr(ptr, escape_char))) {
                i++;
                ptr = tmp;
                ptr++;
            }
            /* shrink the string */
            ptr = (char*)malloc(sizeof(char) * (1 + strlen(data) - i));
            /* now construct it */
            k=0;
            conv[1] = '\0';
            for (n=0; n < strlen(data); n++) {
                if (escape_char == data[n]) {
                    /* the next character tells us which character
                     * was subbed out
                     */
                    n++;
                    conv[0] = data[n];
                    i = strtol(conv, NULL, 10);
                    ptr[k++] = illegal[i];
                } else {
                    ptr[k++] = data[n];
                }
            }
            /* pass the result */
            free(data);
            data = ptr;
        }
#endif

    return data;
}
Esempio n. 10
0
static int store(const opal_identifier_t *uid,
                 opal_db_locality_t locality,
                 const char *key, const void *data, opal_data_type_t type)
{
    int i, rc;
    char *pmidata, *str, *localdata;
    int64_t i64;
    uint64_t ui64;
    opal_byte_object_t *bo;
    char *pmikey, *tmpkey, *tmp, sav;
    char **strdata=NULL;
    opal_identifier_t proc;

    /* to protect alignment, copy the data across */
    memcpy(&proc, uid, sizeof(opal_identifier_t));

    /* pass internal stores down to someone else */
    if (OPAL_DB_INTERNAL == locality) {
        return OPAL_ERR_TAKE_NEXT_OPTION;
    }

    OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                         "db:pmi:store: storing key %s[%s] for proc %" PRIu64 "",
                         key, opal_dss.lookup_data_type(type), proc));

    if (NULL == (pmikey = setup_key(proc, key))) {
	OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
	return OPAL_ERR_BAD_PARAM;
    }

    switch (type) {
    case OPAL_STRING:
#if WANT_PMI2_SUPPORT
        {
            /* the blasted Cray PMI implementation marked a number of common
             * ASCII characters as "illegal", so if we are on one of those
             * machines, then we have to replace those characters with something
             * else
             */
            size_t n, k;
            bool subbed;
            char *ptr;

            str = (char*)data;
            /* first, count how many characters need to be replaced - since Cray
             * is the source of the trouble, we only make this slow for them!
             */
            ptr = str;
            i=0;
            for (n=0; n < strlen(illegal); n++) {
                while (NULL != (tmp = strchr(ptr, illegal[n]))) {
                    i++;
                    ptr = tmp;
                    ptr++;
                }
            }
            /* stretch the string */
            ptr = (char*)malloc(sizeof(char) * (1 + strlen(str) + 2*i));
            /* now construct it */
            k=0;
            for (n=0; n < strlen(str); n++) {
                subbed = false;
                for (i=0; i < (int)strlen(illegal); i++) {
                    if (str[n] == illegal[i]) {
                        /* escape the character */
                        ptr[k++] = escape_char;
                        ptr[k++] = sub[i];
                        subbed = true;
                        break;
                    }
                }
                if (!subbed) {
                    ptr[k++] = str[n];
                }
            }
            /* pass the result */
            localdata = ptr;
        }
#else
        localdata = strdup((char*)data);
#endif
        str = localdata;
        while (pmi_vallen_max < (int)(OPAL_PMI_PAD + strlen(str))) {
            /* the string is too long, so we need to break it into
             * multiple sections
             */
            tmp = str + pmi_vallen_max - OPAL_PMI_PAD;
            sav = *tmp;
            *tmp = '\0';
            opal_argv_append_nosize(&strdata, str);
            *tmp = sav;
            str = tmp;
        }
        /* put whatever remains on the stack */
        opal_argv_append_nosize(&strdata, str);
        /* cleanup */
        free(localdata);
        /* the first value we put uses the original key, but
         * the data is prepended with the number of sections
         * required to hold the entire string
         */
        asprintf(&pmidata, "%d:%s", opal_argv_count(strdata), strdata[0]);
        OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                             "db:pmi:store: storing key %s data %s",
                             pmikey, pmidata));

        if (PMI_SUCCESS != (rc = kvs_put(pmikey, pmidata))) {
            OPAL_PMI_ERROR(rc, "PMI_KVS_Put");
            free(pmidata);
            free(pmikey);
            opal_argv_free(strdata);
            return OPAL_ERROR;
        }
        free(pmidata);
        /* for each remaining segment, augment the key with the index */
        for (i=1; NULL != strdata[i]; i++) {
            asprintf(&tmpkey, "%s:%d", pmikey, i);
            OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                                 "db:pmi:store: storing key %s data %s",
                                 pmikey, strdata[i]));

            if (PMI_SUCCESS != (rc = kvs_put(tmpkey, strdata[i]))) {
                OPAL_PMI_ERROR(rc, "PMI_KVS_Put");
                free(pmikey);
                opal_argv_free(strdata);
                return OPAL_ERROR;
            }
            free(tmpkey);
        }
        free(pmikey);
        opal_argv_free(strdata);
        return OPAL_SUCCESS;

    case OPAL_INT:
        i64 = (int64_t)(*((int*)data));
        asprintf(&pmidata, "%ld", (long)i64);
        break;
        
    case OPAL_INT32:
        i64 = (int64_t)(*((int32_t*)data));
        asprintf(&pmidata, "%ld", (long)i64);
        break;
        
    case OPAL_INT64:
        i64 = (int64_t)(*((int*)data));
        asprintf(&pmidata, "%ld", (long)i64);
        break;
        
    case OPAL_UINT64:
        ui64 = *((uint64_t*)data);
        asprintf(&pmidata, "%lu", (unsigned long)ui64);
        break;
    
    case OPAL_UINT32:
        ui64 = (uint64_t)(*((uint32_t*)data));
        asprintf(&pmidata, "%lu", (unsigned long)ui64);
        break;
       
    case OPAL_UINT16:
        ui64 = (uint64_t)(*((uint16_t*)data));
        asprintf(&pmidata, "%lu", (unsigned long)ui64);
        break;
    
    case OPAL_BYTE_OBJECT:
        bo = (opal_byte_object_t*)data;
        pmidata = (char*)malloc(pmi_vallen_max*sizeof(char));
        if (OPAL_SUCCESS != (rc = pmi_encode(pmidata, bo->bytes, bo->size))) {
            OPAL_ERROR_LOG(rc);
            free(pmidata);
            return rc;
        }
        break;

    default:
        OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
        return OPAL_ERR_NOT_SUPPORTED;
    }

    OPAL_OUTPUT_VERBOSE((10, opal_db_base_framework.framework_output,
                         "PUTTING KEY %s DATA %s",
                         pmikey, pmidata));

    rc = kvs_put(pmikey, pmidata);
    if (PMI_SUCCESS != rc) {
	OPAL_PMI_ERROR(rc, "PMI_KVS_Put");
	return OPAL_ERROR;
    }
    free(pmidata);
    free(pmikey);
    return OPAL_SUCCESS;
}