Exemplo n.º 1
0
static int get_addr(MPIDI_VC_t * vc, struct scif_portID *addr)
{
    int mpi_errno = MPI_SUCCESS;
    char *bc;
    int pmi_errno;
    int val_max_sz;
    MPIU_CHKLMEM_DECL(1);

    /* Allocate space for the business card */
    pmi_errno = PMI_KVS_Get_value_length_max(&val_max_sz);
    MPIU_ERR_CHKANDJUMP1(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**fail",
                         "**fail %d", pmi_errno);
    MPIU_CHKLMEM_MALLOC(bc, char *, val_max_sz, mpi_errno, "bc");

    mpi_errno = vc->pg->getConnInfo(vc->pg_rank, bc, val_max_sz, vc->pg);
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);

    mpi_errno = scif_addr_from_bc(bc, &addr->node, &addr->port);
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);

  fn_exit:
    MPIU_CHKLMEM_FREEALL();
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Exemplo n.º 2
0
static int test_item3(void)
{
    int rc = 0;
    int val = 0;

    val = random_value(10, 100);
    if (PMI_SUCCESS != (rc = PMI_KVS_Get_key_length_max(&val))) {
        log_fatal("PMI_KVS_Get_key_length_max failed: %d\n", rc);
        return rc;
    }
    log_info("PMI_KVS_Get_key_length_max=%d\n", val);
    if (!_legacy) {
        log_assert(511 == val, "Check PMIX_MAX_KEYLEN value in pmix_common.h");
    }

    val = random_value(10, 100);
    if (PMI_SUCCESS != (rc = PMI_KVS_Get_value_length_max(&val))) {
        log_fatal("PMI_KVS_Get_value_length_max failed: %d\n", rc);
        return rc;
    }
    log_info("PMI_KVS_Get_value_length_max=%d\n", val);
    if (!_legacy) {
        log_assert(4096 == val, "Check limitation for a value");
    }

    return rc;
}
Exemplo n.º 3
0
int MPID_nem_ptl_init_id(MPIDI_VC_t *vc)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
    char *bc;
    int pmi_errno;
    int val_max_sz;
    MPIR_CHKLMEM_DECL(1);
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_INIT_ID);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_INIT_ID);

    pmi_errno = PMI_KVS_Get_value_length_max(&val_max_sz);
    MPIR_ERR_CHKANDJUMP1(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %d", pmi_errno);
    MPIR_CHKLMEM_MALLOC(bc, char *, val_max_sz, mpi_errno, "bc");

    mpi_errno = vc->pg->getConnInfo(vc->pg_rank, bc, val_max_sz, vc->pg);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    mpi_errno = MPID_nem_ptl_get_id_from_bc(bc, &vc_ptl->id, &vc_ptl->pt, &vc_ptl->ptg, &vc_ptl->ptc, &vc_ptl->ptr, &vc_ptl->ptrg, &vc_ptl->ptrc);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    vc_ptl->id_initialized = TRUE;

    MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
    
 fn_exit:
    MPIR_CHKLMEM_FREEALL();
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_INIT_ID);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Exemplo n.º 4
0
static int pmi_set_proc_attr(const char* attr_name, 
                             const void *buffer, size_t size)
{
    char *attr, *attrval;
    int rc;

     if (NULL == pmi_kvs_name) {
        int max_length;

        rc = PMI_KVS_Get_value_length_max(&pmi_vallen_max);
        if (PMI_SUCCESS != rc) {
            ORTE_PMI_ERROR(rc, "PMI_Get_value_length_max");
            return ORTE_ERROR;
        }

        if (PMI_SUCCESS != (rc = PMI_KVS_Get_name_length_max(&max_length))) {
            ORTE_PMI_ERROR(rc, "PMI_KVS_Get_name_length_max");
            return ORTE_ERROR;
        }
        pmi_kvs_name = malloc(max_length);
        if (NULL == pmi_kvs_name) {
            return ORTE_ERR_OUT_OF_RESOURCE;
        }
        rc = PMI_KVS_Get_my_name(pmi_kvs_name,max_length);
        if (PMI_SUCCESS != rc) {
            ORTE_PMI_ERROR(rc, "PMI_KVS_Get_my_name");
            return ORTE_ERROR;
        }
    }

    if (0 > asprintf(&attr, "%s-%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), attr_name)) {
        return ORTE_ERR_OUT_OF_RESOURCE;
    }

    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
                         "%s grpcomm:pmi: set attr %s of size %lu in KVS %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), attr_name,
                         (unsigned long)size, pmi_kvs_name));
    
    attrval = pmi_encode(buffer, size);
    if (NULL == attrval) {
        return ORTE_ERR_OUT_OF_RESOURCE;
    }

    if (strlen(attrval) > (size_t)pmi_vallen_max) {
        opal_output(0, "pmi_proc_set_attr: attribute length is too long\n");
        return ORTE_ERROR;
    }

    rc = PMI_KVS_Put(pmi_kvs_name, attr, attrval);
    if (PMI_SUCCESS != rc) {
        ORTE_PMI_ERROR(rc, "PMI_KVS_Put");
        return ORTE_ERROR;
    }

    free(attr);
    free(attrval);

    return ORTE_SUCCESS;
}
Exemplo n.º 5
0
int MPID_nem_mxm_vc_init(MPIDI_VC_t * vc)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3I_VC *vc_ch = &vc->ch;
    MPID_nem_mxm_vc_area *vc_area = VC_BASE(vc);

    MPIDI_STATE_DECL(MPID_STATE_MXM_VC_INIT);
    MPIDI_FUNC_ENTER(MPID_STATE_MXM_VC_INIT);

    /* local connection is used for any source communication */
    MPIU_Assert(MPID_nem_mem_region.rank != vc->lpid);
    MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE,
                     (MPIU_DBG_FDEST,
                      "[%i]=== connecting  to  %i  \n", MPID_nem_mem_region.rank, vc->lpid));
    {
        char *business_card;
        int val_max_sz;
#ifdef USE_PMI2_API
        val_max_sz = PMI2_MAX_VALLEN;
#else
        mpi_errno = PMI_KVS_Get_value_length_max(&val_max_sz);
        if (mpi_errno)
            MPIU_ERR_POP(mpi_errno);
#endif

        business_card = (char *) MPIU_Malloc(val_max_sz);
        mpi_errno = vc->pg->getConnInfo(vc->pg_rank, business_card, val_max_sz, vc->pg);
        if (mpi_errno)
            MPIU_ERR_POP(mpi_errno);

        vc_area->ctx = vc;
        vc_area->mxm_ep = &_mxm_obj.endpoint[vc->pg_rank];
        mpi_errno = _mxm_connect(&_mxm_obj.endpoint[vc->pg_rank], business_card, vc_area);
        if (mpi_errno)
            MPIU_ERR_POP(mpi_errno);

        MPIU_Free(business_card);
    }

    MPIDI_CHANGE_VC_STATE(vc, ACTIVE);

    vc_area->pending_sends = 0;

    vc->rndvSend_fn = NULL;
    vc->rndvRecv_fn = NULL;
    vc->sendNoncontig_fn = MPID_nem_mxm_SendNoncontig;
    vc->comm_ops = &comm_ops;

    vc_ch->iStartContigMsg = MPID_nem_mxm_iStartContigMsg;
    vc_ch->iSendContig = MPID_nem_mxm_iSendContig;

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MXM_VC_INIT);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Exemplo n.º 6
0
int
MPID_nem_mx_vc_init (MPIDI_VC_t *vc)
{
   uint32_t threshold;
   MPIDI_CH3I_VC *vc_ch = VC_CH(vc);
   int mpi_errno = MPI_SUCCESS;

   /* first make sure that our private fields in the vc fit into the area provided  */
   MPIU_Assert(sizeof(MPID_nem_mx_vc_area) <= MPID_NEM_VC_NETMOD_AREA_LEN);

#ifdef ONDEMAND
   VC_FIELD(vc, local_connected)  = 0;
   VC_FIELD(vc, remote_connected) = 0;
#else
   {
       char *business_card;
       int   val_max_sz;
       int   ret;
#ifdef USE_PMI2_API
       val_max_sz = PMI2_MAX_VALLEN;
#else
       mpi_errno = PMI_KVS_Get_value_length_max(&val_max_sz);
#endif 
       business_card = (char *)MPIU_Malloc(val_max_sz); 
       mpi_errno = vc->pg->getConnInfo(vc->pg_rank, business_card,val_max_sz, vc->pg);
       if (mpi_errno) MPIU_ERR_POP(mpi_errno);
       
       mpi_errno = MPID_nem_mx_get_from_bc (business_card, &VC_FIELD(vc, remote_endpoint_id), &VC_FIELD(vc, remote_nic_id));
       if (mpi_errno)    MPIU_ERR_POP (mpi_errno);

       MPIU_Free(business_card);
       
       ret = mx_connect(MPID_nem_mx_local_endpoint,VC_FIELD(vc, remote_nic_id),VC_FIELD(vc, remote_endpoint_id),
			MPID_NEM_MX_FILTER,MX_INFINITE,&(VC_FIELD(vc, remote_endpoint_addr)));
       MPIU_ERR_CHKANDJUMP1 (ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_connect", "**mx_connect %s", mx_strerror (ret));
       mx_set_endpoint_addr_context(VC_FIELD(vc, remote_endpoint_addr),(void *)vc);

       MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
   }
#endif
   mx_get_info(MPID_nem_mx_local_endpoint, MX_COPY_SEND_MAX, NULL, 0, &threshold, sizeof(uint32_t));

   vc->eager_max_msg_sz = threshold;
   vc->rndvSend_fn      = NULL;
   vc->sendNoncontig_fn = MPID_nem_mx_SendNoncontig;
   vc->comm_ops         = &comm_ops;
 
   vc_ch->iStartContigMsg = MPID_nem_mx_iStartContigMsg;
   vc_ch->iSendContig     = MPID_nem_mx_iSendContig;

 fn_exit:
   return mpi_errno;
 fn_fail:
   goto fn_exit;
}
static int setup_pmi(void)
{
    int max_length, rc;

#if WANT_CRAY_PMI2_EXT
    pmi_vallen_max = PMI2_MAX_VALLEN;
#else
    rc = PMI_KVS_Get_value_length_max(&pmi_vallen_max);
    if (PMI_SUCCESS != rc) {
        ORTE_PMI_ERROR(rc, "PMI_Get_value_length_max");
        return ORTE_ERROR;
    }
#endif
    pmi_attr_val = malloc(pmi_vallen_max);
    if (NULL == pmi_attr_val) {
        return ORTE_ERR_OUT_OF_RESOURCE;
    }

#if WANT_CRAY_PMI2_EXT
    /* TODO -- is this ok */
    max_length = 1024;
#else
    if (PMI_SUCCESS != (rc = PMI_KVS_Get_name_length_max(&max_length))) {
        ORTE_PMI_ERROR(rc, "PMI_KVS_Get_name_length_max");
        return ORTE_ERROR;
    }
#endif
    pmi_kvs_name = malloc(max_length);
    if (NULL == pmi_kvs_name) {
        return ORTE_ERR_OUT_OF_RESOURCE;
    }

#if WANT_CRAY_PMI2_EXT
    rc = PMI2_Job_GetId(pmi_kvs_name, max_length);
#else
    rc = PMI_KVS_Get_my_name(pmi_kvs_name,max_length);
#endif
    if (PMI_SUCCESS != rc) {
        ORTE_PMI_ERROR(rc, "PMI_KVS_Get_my_name");
        return ORTE_ERROR;
    }

#if WANT_CRAY_PMI2_EXT
    pmi_keylen_max = PMI2_MAX_KEYLEN;
#else
    if (PMI_SUCCESS != (rc = PMI_KVS_Get_key_length_max(&pmi_keylen_max))) {
        ORTE_PMI_ERROR(rc, "PMI_KVS_Get_key_length_max");
        return ORTE_ERROR;
    }
#endif
    pmi_kvs_key = malloc(pmi_keylen_max);

    return ORTE_SUCCESS;
}
Exemplo n.º 8
0
int MPIDI_check_for_failed_procs(void)
{
    int mpi_errno = MPI_SUCCESS;
    int pmi_errno;
    int len;
    char *kvsname = MPIDI_global.jobid;
    char *failed_procs_string = NULL;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CHECK_FOR_FAILED_PROCS);
    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CHECK_FOR_FAILED_PROCS);

    /* FIXME: Currently this only handles failed processes in
     * comm_world.  We need to fix hydra to include the pgid along
     * with the rank, then we need to create the failed group from
     * something bigger than comm_world. */
#ifdef USE_PMIX_API
    MPIR_Assert(0);
#elif defined(USE_PMI2_API)
    {
        int vallen = 0;
        len = PMI2_MAX_VALLEN;
        failed_procs_string = MPL_malloc(len, MPL_MEM_OTHER);
        MPIR_Assert(failed_procs_string);
        pmi_errno =
            PMI2_KVS_Get(kvsname, PMI2_ID_NULL, "PMI_dead_processes", failed_procs_string,
                         len, &vallen);
        MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get");
        MPL_free(failed_procs_string);
    }
#else
    pmi_errno = PMI_KVS_Get_value_length_max(&len);
    MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_value_length_max");
    failed_procs_string = MPL_malloc(len, MPL_MEM_OTHER);
    MPIR_Assert(failed_procs_string);
    pmi_errno = PMI_KVS_Get(kvsname, "PMI_dead_processes", failed_procs_string, len);
    MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get");
    MPL_free(failed_procs_string);
#endif

    MPL_DBG_MSG_FMT(MPIDI_CH4_DBG_GENERAL, VERBOSE,
                    (MPL_DBG_FDEST, "Received proc fail notification: %s", failed_procs_string));

    /* FIXME: handle ULFM failed groups here */

  fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CHECK_FOR_FAILED_PROCS);
    return mpi_errno;
  fn_fail:
    MPL_free(failed_procs_string);
    goto fn_exit;
}
Exemplo n.º 9
0
int
MPID_nem_newmad_vc_init (MPIDI_VC_t *vc)
{
   MPIDI_CH3I_VC *vc_ch = VC_CH(vc);
   char          *business_card;
   int            mpi_errno = MPI_SUCCESS;   
   int            val_max_sz;
   int            ret;
   
#ifdef USE_PMI2_API
   val_max_sz = PMI2_MAX_VALLEN;
#else
   mpi_errno = PMI_KVS_Get_value_length_max(&val_max_sz);
#endif
   business_card = (char *)MPIU_Malloc(val_max_sz);   
   mpi_errno = vc->pg->getConnInfo(vc->pg_rank, business_card,val_max_sz,vc->pg);
   if (mpi_errno) MPIU_ERR_POP(mpi_errno);
   
   /* Very important */
   memset(VC_FIELD(vc, url),0,MPID_NEM_NMAD_MAX_SIZE);
   
   mpi_errno = MPID_nem_newmad_get_from_bc (business_card, VC_FIELD(vc, url));
   if (mpi_errno) MPIU_ERR_POP (mpi_errno);

   MPIU_Free(business_card);
   
   ret = nm_session_connect(mpid_nem_newmad_session, &(VC_FIELD(vc,p_gate)), VC_FIELD(vc, url));
   if (ret != NM_ESUCCESS) fprintf(stdout,"nm_session_connect returned ret = %d\n", ret);

   nm_gate_ref_set(VC_FIELD(vc, p_gate),(void*)vc);
   MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
   
   vc->eager_max_msg_sz = 32768;
   vc->rndvSend_fn      = NULL;
   vc->sendNoncontig_fn = MPID_nem_newmad_SendNoncontig;
   vc->comm_ops         = &comm_ops;

   vc_ch->iStartContigMsg = MPID_nem_newmad_iStartContigMsg;
   vc_ch->iSendContig     = MPID_nem_newmad_iSendContig;
   
 fn_exit:
   return mpi_errno;
 fn_fail:
   goto fn_exit;
}
Exemplo n.º 10
0
static int setup_pmi(void)
{
    int max_length, rc;

#if WANT_PMI2_SUPPORT
    pmi_vallen_max = PMI2_MAX_VALLEN;
#else
    rc = PMI_KVS_Get_value_length_max(&pmi_vallen_max);
    if (PMI_SUCCESS != rc) {
        return OMPI_ERROR;
    }
#endif

#if WANT_PMI2_SUPPORT
    /* TODO -- is this ok */
    max_length = 1024;
#else
    if (PMI_SUCCESS != (rc = PMI_KVS_Get_name_length_max(&max_length))) {
        return OMPI_ERROR;
    }
#endif
    pmi_kvs_name = (char*)malloc(max_length);
    if (NULL == pmi_kvs_name) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

#if WANT_PMI2_SUPPORT
    rc = PMI2_Job_GetId(pmi_kvs_name, max_length);
#else
    rc = PMI_KVS_Get_my_name(pmi_kvs_name,max_length);
#endif
    if (PMI_SUCCESS != rc) {
        return OMPI_ERROR;
    }

#if WANT_PMI2_SUPPORT
    pmi_keylen_max = PMI2_MAX_KEYLEN;
#else
    if (PMI_SUCCESS != (rc = PMI_KVS_Get_key_length_max(&pmi_keylen_max))) {
        return OMPI_ERROR;
    }
#endif

    return OMPI_SUCCESS;
}
Exemplo n.º 11
0
static int test_item5(void)
{
    int rc = 0;
    char *val = NULL;
    int val_size = 0;
    /* Predefined Job attributes */
    const char *tkeys[] = {
            "PMI_process_mapping",
            NULL
    };
    const char **ptr = tkeys;

    if (_legacy || !_legacy) {
        log_error("%s\n", "PMIx and SLURM/PMI1 do not set 'PMI_process_mapping' (Do not mark test as failed)");
        return rc;
    }

    if (PMI_SUCCESS != (rc = PMI_KVS_Get_value_length_max(&val_size))) {
        log_fatal("PMI_KVS_Get_value_length_max failed: %d\n", rc);
        return rc;
    }

    val = alloca(val_size);
    if (!val) {
        return PMI_FAIL;
    }

    while (*ptr) {
        if (PMI_SUCCESS != (rc = PMI_KVS_Get(jobid, *ptr, val, val_size))) {
            log_fatal("PMI_KVS_Get: [%s] %d\n", *ptr, rc);
            return rc;
        }
        log_info("key=%s value=%.80s\n", *ptr, val);
        ptr++;
    }

    return rc;
}
Exemplo n.º 12
0
/* gasneti_bootstrapInit
 */
int gasneti_bootstrapInit_pmi(
        int *argc_p, char ***argv_p,
        gasnet_node_t *nodes_p, gasnet_node_t *mynode_p) {
    int size, rank;

#if USE_PMI2_API
    int spawned, appnum;
    if (PMI2_SUCCESS != PMI2_Init(&spawned, &size, &rank, &appnum)) {
        return GASNET_ERR_NOT_INIT;
    }
#else
    if (PMI_SUCCESS != PMI_Initialized(&gasneti_pmi_initialized)) {
        return GASNET_ERR_NOT_INIT;
    }

    if (PMI_FALSE == gasneti_pmi_initialized) {
        int spawned;
        if (PMI_SUCCESS != PMI_Init(&spawned)) {
            return GASNET_ERR_NOT_INIT;
        }
    }

    if (PMI_SUCCESS != PMI_Get_rank(&rank)) {
        gasneti_fatalerror("PMI_Get_rank() failed");
    }

    if (PMI_SUCCESS != PMI_Get_size(&size)) {
        gasneti_fatalerror("PMI_Get_size() failed");
    }
#endif

    *mynode_p = rank;
    *nodes_p = size;

#if USE_PMI2_API
    max_name_len = 1024; /* XXX: can almost certainly be shorter than this! */
    max_key_len = PMI2_MAX_KEYLEN;
    max_val_len = PMI2_MAX_VALLEN;
#else
    if (PMI_SUCCESS != PMI_KVS_Get_name_length_max(&max_name_len)) {
        gasneti_fatalerror("PMI_KVS_Get_name_length_max() failed");
    }
    if (PMI_SUCCESS != PMI_KVS_Get_key_length_max(&max_key_len)) {
        gasneti_fatalerror("PMI_KVS_Get_key_length_max() failed");
    }
    if (PMI_SUCCESS != PMI_KVS_Get_value_length_max(&max_val_len)) {
        gasneti_fatalerror("PMI_KVS_Get_value_length_max() failed");
    }
#endif

    kvs_name = (char*) gasneti_malloc(max_name_len);
    kvs_key = (char*) gasneti_malloc(max_key_len);
    kvs_value = (char*) gasneti_malloc(max_val_len);
    max_val_bytes = 4 * (max_val_len / 5);

#if USE_PMI2_API
    if (PMI2_SUCCESS != PMI2_Job_GetId(kvs_name, max_name_len)) {
        gasneti_fatalerror("PMI2_Job_GetId() failed");
    }
#else
    if (PMI_SUCCESS != PMI_KVS_Get_my_name(kvs_name, max_name_len)) {
        gasneti_fatalerror("PMI_KVS_Get_my_name() failed");
    }
#endif

    return GASNET_OK;
}
Exemplo n.º 13
0
int
main(int argc,
     char *argv[])
{
    int initialized, rank, size;
    int i, max_name_len, max_key_len, max_val_len;
    char *name, *key, *val;

    if (PMI_SUCCESS != PMI_Initialized(&initialized)) {
        return 1;
    }

    if (0 == initialized) {
        if (PMI_SUCCESS != PMI_Init(&initialized)) {
            return 1;
        }
    }

    if (PMI_SUCCESS != PMI_Get_rank(&rank)) {
        return 1;
    }

    if (PMI_SUCCESS != PMI_Get_size(&size)) {
        return 1;
    }

    printf("Hello, World.  I am %d of %d\n", rank, size);

    if (PMI_SUCCESS != PMI_KVS_Get_name_length_max(&max_name_len)) {
        return 1;
    }
    name = (char*) malloc(max_name_len);
    if (NULL == name) return 1;

    if (PMI_SUCCESS != PMI_KVS_Get_key_length_max(&max_key_len)) {
        return 1;
    }
    key = (char*) malloc(max_key_len);
    if (NULL == key) return 1;

    if (PMI_SUCCESS != PMI_KVS_Get_value_length_max(&max_val_len)) {
        return 1;
    }
    val = (char*) malloc(max_val_len);
    if (NULL == val) return 1;

    if (PMI_SUCCESS != PMI_KVS_Get_my_name(name, max_name_len)) {
        return 1;
    }

    /* put my information */
    snprintf(key, max_key_len, "pmi_hello-%lu-test", (long unsigned) rank);
    snprintf(val, max_val_len, "%lu", (long unsigned) rank);

    if (PMI_SUCCESS != PMI_KVS_Put(name, key, val)) {
        return 1;
    }

    if (PMI_SUCCESS != PMI_KVS_Commit(name)) {
        return 1;
    }

    if (PMI_SUCCESS != PMI_Barrier()) {
        return 1;
    }

    /* verify everyone's information */
    for (i = 0 ; i < size ; ++i) {
        snprintf(key, max_key_len, "pmi_hello-%lu-test", (long unsigned) i);

        if (PMI_SUCCESS != PMI_KVS_Get(name, key, val, max_val_len)) {
            return 1;
        }

        if (i != strtol(val, NULL, 0)) {
            fprintf(stderr, "%d: Error: Expected %d, got %d\n", rank, i, (int) strtol(val, NULL, 0));
            return 1;
        }
    }

    PMI_Finalize();

    return 0;
}
Exemplo n.º 14
0
static int s1_init(void)
{
    PMI_BOOL initialized;
    int spawned;
    int rc, ret = OPAL_ERROR;
    int i;
    char *pmix_id, *tmp;
    uint32_t jobfam, stepid;
    opal_value_t kv;

    if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) {
        OPAL_PMI_ERROR(rc, "PMI_Initialized");
        return OPAL_ERROR;
    }

    if( PMI_TRUE != initialized && PMI_SUCCESS != (rc = PMI_Init(&spawned)) ) {
        OPAL_PMI_ERROR(rc, "PMI_Init");
        return OPAL_ERROR;
    }

    // Initialize space demands
    rc = PMI_KVS_Get_value_length_max(&pmix_vallen_max);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_value_length_max");
        goto err_exit;
    }

    rc = PMI_KVS_Get_name_length_max(&pmix_kvslen_max);
    if (PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_name_length_max");
        goto err_exit;
    }

    rc = PMI_KVS_Get_key_length_max(&pmix_keylen_max);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_key_length_max");
        goto err_exit;
    }

    // Initialize job environment information
    pmix_id = (char*)malloc(pmix_vallen_max);
    if( pmix_id == NULL ){
        ret = OPAL_ERR_OUT_OF_RESOURCE;
        goto err_exit;
    }
    /* Get domain id */
    if (PMI_SUCCESS != (rc = PMI_Get_kvs_domain_id(pmix_id, pmix_vallen_max))) {
        free(pmix_id);
        goto err_exit;
    }
    /* Slurm PMI provides the job id as an integer followed
     * by a '.', followed by essentially a stepid. The first integer
     * defines an overall job number. The second integer is the number of
     * individual jobs we have run within that allocation. So we translate
     * this as the overall job number equating to our job family, and
     * the individual number equating to our local jobid
     */
    jobfam = strtoul(pmix_id, &tmp, 10);
    if (NULL == tmp) {
        /* hmmm - no '.', so let's just use zero */
        stepid = 0;
    } else {
        tmp++; /* step over the '.' */
        stepid = strtoul(tmp, NULL, 10);
    }
    /* now build the jobid */
    s1_jobid = (jobfam << 16) | stepid;
    free(pmix_id);

    /* get our rank */
    ret = PMI_Get_rank(&s1_rank);
    if( PMI_SUCCESS != ret ) {
        OPAL_PMI_ERROR(ret, "PMI_Get_rank");
        goto err_exit;
    }
    /* store our name in the opal_proc_t so that
     * debug messages will make sense - an upper
     * layer will eventually overwrite it, but that
     * won't do any harm */
    s1_pname.jid = s1_jobid;
    s1_pname.vid = s1_rank;
    opal_proc_set_name((opal_process_name_t*)&s1_pname);
    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:s1: assigned tmp name",
                        OPAL_NAME_PRINT(*(opal_process_name_t*)&s1_pname));

    pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
    if( pmix_kvs_name == NULL ){
        ret = OPAL_ERR_OUT_OF_RESOURCE;
        goto err_exit;
    }

    rc = PMI_KVS_Get_my_name(pmix_kvs_name, pmix_kvslen_max);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_my_name");
        goto err_exit;
    }

    /* get our local proc info to find our local rank */
    if (PMI_SUCCESS != (rc = PMI_Get_clique_size(&s1_nlranks))) {
        OPAL_PMI_ERROR(rc, "PMI_Get_clique_size");
        return rc;
    }
    /* now get the specific ranks */
    s1_lranks = (int*)calloc(s1_nlranks, sizeof(int));
    if (NULL == s1_lranks) {
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        OPAL_ERROR_LOG(rc);
        return rc;
    }
    if (PMI_SUCCESS != (rc = PMI_Get_clique_ranks(s1_lranks, s1_nlranks))) {
        OPAL_PMI_ERROR(rc, "PMI_Get_clique_ranks");
        free(s1_lranks);
        return rc;
    }
    /* find ourselves */
    for (i=0; i < s1_nlranks; i++) {
        if (s1_rank == s1_lranks[i]) {
            s1_lrank = i;
            s1_nrank = i;
            break;
        }
    }

    /* get universe size */
    ret = PMI_Get_universe_size(&s1_usize);
    if (PMI_SUCCESS != ret) {
        OPAL_PMI_ERROR(ret, "PMI_Get_universe_size");
        goto err_exit;
    }
    /* push this into the dstore for subsequent fetches */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_DSTORE_UNIV_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = s1_usize;
    if (OPAL_SUCCESS != (ret = opal_dstore.store(opal_dstore_internal, &OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* get job size */
    ret = PMI_Get_size(&s1_jsize);
    if (PMI_SUCCESS != ret) {
        OPAL_PMI_ERROR(ret, "PMI_Get_size");
        goto err_exit;
    }

    /* get appnum */
    ret = PMI_Get_appnum(&s1_appnum);
    if (PMI_SUCCESS != ret) {
        OPAL_PMI_ERROR(ret, "PMI_Get_appnum");
        goto err_exit;
    }

    return OPAL_SUCCESS;

 err_exit:
    PMI_Finalize();
    return ret;
}
Exemplo n.º 15
0
int MPIDU_bc_table_create(int rank, int size, int *nodemap, void *bc, int bc_len, int same_len,
                          int roots_only, void **bc_table, size_t ** bc_indices)
{
    int rc, mpi_errno = MPI_SUCCESS;
    int start, end, i;
    int key_max, val_max, name_max, out_len, rem;
    char *kvsname = NULL, *key = NULL, *val = NULL, *val_p;
    int local_rank = -1, local_leader = -1;
    size_t my_bc_len = bc_len;

    MPIR_NODEMAP_get_local_info(rank, size, nodemap, &local_size, &local_rank, &local_leader);

    rc = PMI_KVS_Get_name_length_max(&name_max);
    MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_name_length_max");
    rc = PMI_KVS_Get_key_length_max(&key_max);
    MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_key_length_max");
    rc = PMI_KVS_Get_value_length_max(&val_max);
    MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_value_length_max");

    /* if business cards can be different length, use the max value length */
    if (!same_len)
        bc_len = val_max;
    mpi_errno = MPIDU_shm_seg_alloc(bc_len * size, (void **) &segment, MPL_MEM_ADDRESS);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);
    mpi_errno =
        MPIDU_shm_seg_commit(&memory, &barrier, local_size, local_rank, local_leader, rank,
                             MPL_MEM_ADDRESS);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    if (size == 1) {
        memcpy(segment, bc, my_bc_len);
        goto single;
    }

    kvsname = MPL_malloc(name_max, MPL_MEM_ADDRESS);
    MPIR_Assert(kvsname);
    rc = PMI_KVS_Get_my_name(kvsname, name_max);
    MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_my_name");

    val = MPL_malloc(val_max, MPL_MEM_ADDRESS);
    memset(val, 0, val_max);
    val_p = val;
    rem = val_max;
    rc = MPL_str_add_binary_arg(&val_p, &rem, "mpi", (char *) bc, my_bc_len);
    MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**buscard");
    MPIR_Assert(rem >= 0);

    key = MPL_malloc(key_max, MPL_MEM_ADDRESS);
    MPIR_Assert(key);
    if (!roots_only || rank == local_leader) {
        sprintf(key, "bc-%d", rank);
        rc = PMI_KVS_Put(kvsname, key, val);
        MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_put");
        rc = PMI_KVS_Commit(kvsname);
        MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_commit");
    }
    rc = PMI_Barrier();
    MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_barrier");

    if (!roots_only) {
        start = local_rank * (size / local_size);
        end = start + (size / local_size);
        if (local_rank == local_size - 1)
            end += size % local_size;
        for (i = start; i < end; i++) {
            sprintf(key, "bc-%d", i);
            rc = PMI_KVS_Get(kvsname, key, val, val_max);
            MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get");
            rc = MPL_str_get_binary_arg(val, "mpi", &segment[i * bc_len], bc_len, &out_len);
            MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**argstr_missinghost");
        }
    } else {
        int num_nodes, *node_roots;
        MPIR_NODEMAP_get_node_roots(nodemap, size, &node_roots, &num_nodes);

        start = local_rank * (num_nodes / local_size);
        end = start + (num_nodes / local_size);
        if (local_rank == local_size - 1)
            end += num_nodes % local_size;
        for (i = start; i < end; i++) {
            sprintf(key, "bc-%d", node_roots[i]);
            rc = PMI_KVS_Get(kvsname, key, val, val_max);
            MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get");
            rc = MPL_str_get_binary_arg(val, "mpi", &segment[i * bc_len], bc_len, &out_len);
            MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**argstr_missinghost");
        }
        MPL_free(node_roots);
    }
    mpi_errno = MPIDU_shm_barrier(barrier, local_size);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

  single:
    if (!same_len) {
        indices = MPL_malloc(size * sizeof(size_t), MPL_MEM_ADDRESS);
        MPIR_Assert(indices);
        for (i = 0; i < size; i++)
            indices[i] = bc_len * i;
        *bc_indices = indices;
    }

  fn_exit:
    MPL_free(kvsname);
    MPL_free(key);
    MPL_free(val);
    *bc_table = segment;

    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Exemplo n.º 16
0
int main(int argc, char **argv, char **envp)
{
    int pmi_rank = -1;
    int pmi_process_group_size = -1;
    int num_local_procs = 0;
    int *local_rank_ids = NULL;
    int spawned = PMI_FALSE;
    int rc = EXIT_SUCCESS;
    char *err = NULL;
    PMI_BOOL pmi_initialized = PMI_FALSE;
    int pmi_vallen_max, max_length;
    char *pmi_kvs_name;

    /* sanity */
    if (PMI_SUCCESS != PMI_Initialized(&pmi_initialized) ||
        PMI_TRUE == pmi_initialized) {
        fprintf(stderr, "=== ERROR: PMI sanity failure\n");
        return EXIT_FAILURE;
    }
    if (PMI_SUCCESS != PMI_Init(&spawned)) {
        err = "PMI_Init failure!";
        goto done;
    }
    if (PMI_SUCCESS != PMI_Get_size(&pmi_process_group_size)) {
        err = "PMI_Get_size failure!";
        goto done;
    }
    if (PMI_SUCCESS != PMI_Get_rank(&pmi_rank)) {
        err = "PMI_Get_rank failure!";
        goto done;
    }
    if (PMI_SUCCESS != PMI_Get_clique_size(&num_local_procs)) {
        err = "PMI_Get_clique_size failure!";
        goto done;
    }
    if (PMI_SUCCESS != PMI_KVS_Get_value_length_max(&pmi_vallen_max)) {
        err = "PMI_KVS_Get_value_length_max failure!";
        goto done;
    }
    if (PMI_SUCCESS != PMI_KVS_Get_name_length_max(&max_length)) {
        err = "PMI_KVS_Get_name_length_max failure!";
        goto done;
    }
    pmi_kvs_name = (char*)malloc(max_length);
    if (NULL == pmi_kvs_name) {
        err = "malloc failure!";
        goto done;
    }
    if (PMI_SUCCESS != PMI_KVS_Get_my_name(pmi_kvs_name,max_length)) {
        err = "PMI_KVS_Get_my_name failure!";
        goto done;
    }

    if (NULL == (local_rank_ids = calloc(num_local_procs, sizeof(int)))) {
        err = "out of resources";
        goto done;
    }
    if (PMI_SUCCESS != PMI_Get_clique_ranks(local_rank_ids, num_local_procs)) {
        err = "PMI_Get_clique_size failure!";
        goto done;
    }
    /* lowest local rank will print env info and tag its output*/
    //    if (pmi_rank == local_rank_ids[0]) {
    //  for (; NULL != envp && NULL != *envp; ++envp) {
    //      printf("===[%d]: %s\n",  pmi_rank, *envp);
    //  }
    //}

done:
    if (PMI_TRUE == pmi_initialized) {
        if (PMI_SUCCESS != PMI_Finalize()) {
            err = "PMI_Finalize failure!";
        }
    }
    if (NULL != err) {
        fprintf(stderr, "=== ERROR [rank:%d] %s\n", pmi_rank, err);
        rc = EXIT_FAILURE;
    }
    return rc;
}
Exemplo n.º 17
0
main (int argc, char **argv)
{
	int i, j, rc;
	int nprocs, procid;
	int clique_size, *clique_ranks = NULL;
	char *jobid_ptr, *nprocs_ptr, *procid_ptr;
	int pmi_rank, pmi_size, kvs_name_len, key_len, val_len;
	PMI_BOOL initialized;
	char *key, *val, *kvs_name;
	struct timeval tv1, tv2;
	long delta_t;
	char tv_str[20];

	gettimeofday(&tv1, NULL);

	/* Get process count and our id from environment variables */
	jobid_ptr  = getenv("SLURM_JOB_ID");
	nprocs_ptr = getenv("SLURM_NPROCS");
	procid_ptr = getenv("SLURM_PROCID");
	if (jobid_ptr == NULL) {
		printf("WARNING: PMI test not run under SLURM\n");
		nprocs = 1;
		procid = 0;
	} else if ((nprocs_ptr == NULL) || (procid_ptr == NULL)) {
		printf("FAILURE: SLURM environment variables not set\n");
		exit(1);
	} else {
		nprocs = atoi(nprocs_ptr);
		procid = atoi(procid_ptr);
	}

	/* Validate process count and our id */
	if ((nprocs < 1) || (nprocs > 9999)) {
		printf("FAILURE: Invalid nprocs %s\n", nprocs_ptr);
		exit(1);
	}
	if ((procid < 0) || (procid > 9999)) {
		printf("FAILURE: Invalid procid %s\n", procid_ptr);
		exit(1);
	}

	/* Get process count and size from PMI and validate */
	if ((rc = PMI_Init(&i)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_Init: %d\n", rc);
		exit(1);
	}
	initialized = PMI_FALSE;
	if ((rc = PMI_Initialized(&initialized)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_Initialized: %d\n", rc);
		exit(1);
	}
	if (initialized != PMI_TRUE) {
		printf("FAILURE: PMI_Initialized returned false\n");
		exit(1);
	}
	if ((rc = PMI_Get_rank(&pmi_rank)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_Get_rank: %d\n", rc);
		exit(1);
	}
#if _DEBUG
	printf("PMI_Get_rank = %d\n", pmi_rank);
#endif
	if ((rc = PMI_Get_size(&pmi_size)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_Get_size: %d, task %d\n", rc, pmi_rank);
		exit(1);
	}
#if _DEBUG
	printf("PMI_Get_size = %d\n", pmi_size);
#endif
	if (pmi_rank != procid) {
		printf("FAILURE: Rank(%d) != PROCID(%d)\n",
			pmi_rank, procid);
		exit(1);
	}
	if (pmi_size != nprocs) {
		printf("FAILURE: Size(%d) != NPROCS(%d), task %d\n",
			pmi_size, nprocs, pmi_rank);
		exit(1);
	}

	if ((rc = PMI_Get_clique_size(&clique_size)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_Get_clique_size: %d, task %d\n",
			rc, pmi_rank);
		exit(1);
	}
	clique_ranks = malloc(sizeof(int) * clique_size);
	if ((rc = PMI_Get_clique_ranks(clique_ranks, clique_size)) !=
	     PMI_SUCCESS) {
		printf("FAILURE: PMI_Get_clique_ranks: %d, task %d\n",
			rc, pmi_rank);
		exit(1);
	}
#if _DEBUG
	for (i=0; i<clique_size; i++)
		printf("PMI_Get_clique_ranks[%d]=%d\n", i, clique_ranks[i]);
#endif
	free(clique_ranks);

	if ((rc = PMI_KVS_Get_name_length_max(&kvs_name_len)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_KVS_Get_name_length_max: %d, task %d\n",
			rc, pmi_rank);
		exit(1);
	}
#if _DEBUG
	printf("PMI_KVS_Get_name_length_max = %d\n", kvs_name_len);
#endif
	kvs_name = malloc(kvs_name_len);
	if ((rc = PMI_KVS_Get_my_name(kvs_name, kvs_name_len)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_KVS_Get_my_name: %d, task %d\n", rc,
			pmi_rank);
		exit(1);
	}
#if _DEBUG
	printf("PMI_KVS_Get_my_name = %s\n", kvs_name);
#endif
	if ((rc = PMI_KVS_Get_key_length_max(&key_len)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_KVS_Get_key_length_max: %d, task %d\n",
			rc, pmi_rank);
		exit(1);
	}
	key = malloc(key_len);
	if ((rc = PMI_KVS_Get_value_length_max(&val_len)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_KVS_Get_value_length_max: %d, task %d\n",
			rc, pmi_rank);
		exit(1);
	}
#if _DEBUG
	printf("PMI_KVS_Get_value_length_max = %d\n", val_len);
#endif
	val = malloc(val_len);

	/*  Build and set some key=val pairs */
	snprintf(key, key_len, "ATTR_1_%d", procid);
	snprintf(val, val_len, "A%d", procid+OFFSET_1);
	if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n",
			kvs_name, key, val, rc, pmi_rank);
		exit(1);
	}
#if _DEBUG
	printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val);
#endif
	snprintf(key, key_len, "attr_2_%d", procid);
	snprintf(val, val_len, "B%d", procid+OFFSET_2);
	if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n",
			kvs_name, key, val, rc, pmi_rank);
		exit(1);
	}
#if _DEBUG
	printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val);
#endif
	/* Sync KVS across all tasks */
	if ((rc = PMI_KVS_Commit(kvs_name)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_KVS_Commit: %d, task %d\n", rc, pmi_rank);
		exit(1);
	}
#if _DEBUG
	printf("PMI_KVS_Commit completed\n");
#endif
	if ((rc = PMI_Barrier()) != PMI_SUCCESS) {
		printf("FAILURE: PMI_Barrier: %d, task %d\n", rc, pmi_rank);
		exit(1);
	}
#if _DEBUG
	printf("PMI_Barrier completed\n");
#endif
	/* Now lets get all keypairs and validate */
	for (i=0; i<pmi_size; i++) {
		snprintf(key, key_len, "ATTR_1_%d", i);
		if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len))
				!= PMI_SUCCESS) {
			printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n",
				key, rc, pmi_rank);
			exit(1);
		}
		if ((val[0] != 'A')
		||  ((atoi(&val[1])-OFFSET_1) != i)) {
			printf("FAILURE: Bad keypair %s=%s, task %d\n",
				key, val, pmi_rank);
			exit(1);
		}
#if _DEBUG
		if ((pmi_size <= 8) && (pmi_rank == 0))	/* limit output */
			printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val);
#endif
		snprintf(key, key_len, "attr_2_%d", i);
		if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len))
				!= PMI_SUCCESS) {
			printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n",
				key, rc, pmi_rank);
			exit(1);
		}
		if ((val[0] != 'B')
		||  ((atoi(&val[1])-OFFSET_2) != i)) {
			printf("FAILURE: Bad keypair %s=%s, task %d\n",
				key,val, pmi_rank);
			exit(1);
		}
#if _DEBUG
		if ((pmi_size <= 8) && (pmi_rank == 1))	/* limit output */
			printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val);
#endif
	}

	/* use iterator */
	if ((rc = PMI_KVS_Iter_first(kvs_name, key, key_len, val,
			val_len)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_KVS_iter_first: %d, task %d\n", rc,
			pmi_rank);
		exit(1);
	}
	for (i=0; ; i++) {
		if (key[0] == '\0') {
			if (i != (pmi_size * 2)) {
				printf("FAILURE: PMI_KVS_iter_next "
					"cycle count(%d, %d), task %d\n",
					i, pmi_size, pmi_rank);
			}
			break;
		}
#if _DEBUG
		if ((pmi_size <= 8) && (pmi_rank == 1)) {	/* limit output */
			printf("PMI_KVS_Iter_next(%s,%d): %s=%s\n", kvs_name,
				i, key, val);
		}
#endif
		if ((rc = PMI_KVS_Iter_next(kvs_name, key, key_len,
				val, val_len)) != PMI_SUCCESS) {
			printf("FAILURE: PMI_KVS_iter_next: %d, task %d\n",
				rc, pmi_rank);
			exit(1);
		}
	}

	/* Build some more key=val pairs */
	snprintf(key, key_len, "ATTR_3_%d", procid);
	snprintf(val, val_len, "C%d", procid+OFFSET_1);
	if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n",
			kvs_name, key, val, rc, pmi_rank);
		exit(1);
	}
#if _DEBUG
	printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val);
#endif
	snprintf(key, key_len, "attr_4_%d", procid);
	snprintf(val, val_len, "D%d", procid+OFFSET_2);
	if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n",
			kvs_name, key, val, rc, pmi_rank);
		exit(1);
	}
#if _DEBUG
	printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val);
#endif
	/* Sync KVS across all tasks */
	if ((rc = PMI_KVS_Commit(kvs_name)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_KVS_Commit: %d, task %d\n", rc, pmi_rank);
		exit(1);
	}
#if _DEBUG
	printf("PMI_KVS_Commit completed\n");
#endif
	if ((rc = PMI_Barrier()) != PMI_SUCCESS) {
		printf("FAILURE: PMI_Barrier: %d, task %d\n", rc, pmi_rank);
		exit(1);
	}
#if _DEBUG
	printf("PMI_Barrier completed\n");
#endif
	/* Now lets get some keypairs and validate */
	for (i=0; i<pmi_size; i++) {
		snprintf(key, key_len, "ATTR_1_%d", i);
		if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len))
				!= PMI_SUCCESS) {
			printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n",
				key, rc, pmi_rank);
			exit(1);
		}
		if ((val[0] != 'A')
		||  ((atoi(&val[1])-OFFSET_1) != i)) {
			printf("FAILURE: Bad keypair %s=%s, task %d\n",
				key, val, pmi_rank);
			exit(1);
		}
#if _DEBUG
		if ((pmi_size <= 8) && (pmi_rank == 1))	/* limit output */
			printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val);
#endif
		snprintf(key, key_len, "attr_4_%d", i);
		if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len))
				!= PMI_SUCCESS) {
			printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n",
				key, rc, pmi_rank);
			exit(1);
		}
		if ((val[0] != 'D')
		||  ((atoi(&val[1])-OFFSET_2) != i)) {
			printf("FAILURE: Bad keypair %s=%s, task %d\n",
				key,val, pmi_rank);
			exit(1);
		}
#if _DEBUG
		if ((pmi_size <= 8) && (pmi_rank == 1))	/* limit output */
			printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val);
#endif
	}

	/* Replicate the very heavy load that MVAPICH2 puts on PMI
	 * This load exceeds that of MPICH2 by a very wide margin */
#if _DEBUG
	printf("Starting %d iterations each with %d PMI_KVS_Put and \n"
		"  one each PMI_KVS_Commit and KVS_Barrier\n",
		BARRIER_CNT, PUTS_PER_BARRIER);
	fflush(stdout);
#endif
	for (i=0; i<BARRIER_CNT; i++) {
		for (j=0; j<PUTS_PER_BARRIER; j++) {
			snprintf(key, key_len, "ATTR_%d_%d_%d", i, j, procid);
			snprintf(val, val_len, "C%d", procid+OFFSET_1);
			if ((rc = PMI_KVS_Put(kvs_name, key, val)) !=
					PMI_SUCCESS) {
				printf("FAILURE: PMI_KVS_Put(%s,%s,%s): "
					"%d, task %d\n",
					kvs_name, key, val, rc, pmi_rank);
				exit(1);
			}
		}
		if ((rc= PMI_KVS_Commit(kvs_name)) != PMI_SUCCESS) {
			printf("FAILURE: PMI_KVS_Commit: %d, task %d\n",
				rc, pmi_rank);
			exit(1);
		}
		if ((rc = PMI_Barrier()) != PMI_SUCCESS) {
			printf("FAILURE: PMI_Barrier: %d, task %d\n",
				rc, pmi_rank);
			exit(1);
		}
		/* Don't bother with PMI_KVS_Get as those are all local
		 * and do not put a real load on srun or the network */
	}
#if _DEBUG
	printf("Interative PMI calls successful\n");
#endif

	/* create new keyspace and test it */
	if ((rc = PMI_KVS_Create(kvs_name, kvs_name_len)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_KVS_Create: %d, task %d\n", rc, pmi_rank);
		exit(1);
	}
#if _DEBUG
	printf("PMI_KVS_Create %s\n", kvs_name);
#endif
	if ((rc = PMI_KVS_Put(kvs_name, "KVS_KEY", "KVS_VAL")) != PMI_SUCCESS) {
		printf("FAILURE: PMI_KVS_Put: %d, task %d\n", rc, pmi_rank);
		exit(1);
	}
#if _DEBUG
	printf("PMI_KVS_Put(%s,KVS_KEY,KVS_VAL)\n", kvs_name);
#endif
	if ((rc =  PMI_KVS_Get(kvs_name, "KVS_KEY", val, val_len)) !=
			PMI_SUCCESS) {
		printf("FAILURE: PMI_KVS_Get(%s, KVS_KEY): %d, task %d\n",
			kvs_name, rc, pmi_rank);
		exit(1);
	}
#if _DEBUG
	printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, "KVS_KEY", val);
#endif
	if ((rc = PMI_KVS_Destroy(kvs_name)) != PMI_SUCCESS) {
		printf("FAILURE: PMI_KVS_Destroy(%s): %d, task %d\n",
			kvs_name, rc, pmi_rank);
		exit(1);
	}
	if ((rc = PMI_KVS_Get(kvs_name, "KVS_KEY", val, val_len)) !=
			PMI_ERR_INVALID_KVS) {
		printf("FAILURE: PMI_KVS_Get(%s, KVS_KEY): %d, task %d\n",
			kvs_name, rc, pmi_rank);
		exit(1);
	}
	if ((rc = PMI_Finalize()) != PMI_SUCCESS) {
		printf("FAILURE: PMI_Finalize: %d, task %d\n", rc, pmi_rank);
		exit(1);
	}

	if (_DEBUG || (pmi_rank < 4)) {
		gettimeofday(&tv2, NULL);
		delta_t  = (tv2.tv_sec  - tv1.tv_sec) * 1000000;
		delta_t +=  tv2.tv_usec - tv1.tv_usec;
		snprintf(tv_str, sizeof(tv_str), "usec=%ld", delta_t);
		printf("PMI test ran successfully, for task %d, %s\n",
			pmi_rank, tv_str);
	}
	if (pmi_rank == 0) {
		printf("NOTE: All failures reported, ");
		printf("but only first four successes reported\n");
	}
	exit(0);
}
Exemplo n.º 18
0
int MPIDI_CH3U_Check_for_failed_procs(void)
{
    int mpi_errno = MPI_SUCCESS;
    int pmi_errno;
    int len;
    char *kvsname;
    MPIR_Group *prev_failed_group, *new_failed_group;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS);

    /* FIXME: Currently this only handles failed processes in
       comm_world.  We need to fix hydra to include the pgid along
       with the rank, then we need to create the failed group from
       something bigger than comm_world. */
    mpi_errno = MPIDI_PG_GetConnKVSname(&kvsname);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
#ifdef USE_PMI2_API
    {
        int vallen = 0;
        pmi_errno = PMI2_KVS_Get(kvsname, PMI2_ID_NULL, "PMI_dead_processes", MPIDI_failed_procs_string, PMI2_MAX_VALLEN, &vallen);
        MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get");
    }
#else
    pmi_errno = PMI_KVS_Get_value_length_max(&len);
    MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_value_length_max");
    pmi_errno = PMI_KVS_Get(kvsname, "PMI_dead_processes", MPIDI_failed_procs_string, len);
    MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get");
#endif

    if (*MPIDI_failed_procs_string == '\0') {
        /* there are no failed processes */
        MPIDI_Failed_procs_group = MPIR_Group_empty;
        goto fn_exit;
    }

    MPL_DBG_MSG_S(MPIDI_CH3_DBG_OTHER, TYPICAL, "Received proc fail notification: %s", MPIDI_failed_procs_string);

    /* save reference to previous group so we can identify new failures */
    prev_failed_group = MPIDI_Failed_procs_group;

    /* Parse the list of failed processes */
    MPIDI_CH3U_Get_failed_group(-2, &MPIDI_Failed_procs_group);

    /* get group of newly failed processes */
    mpi_errno = MPIR_Group_difference_impl(MPIDI_Failed_procs_group, prev_failed_group, &new_failed_group);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    if (new_failed_group != MPIR_Group_empty) {
        mpi_errno = MPIDI_CH3I_Comm_handle_failed_procs(new_failed_group);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        mpi_errno = terminate_failed_VCs(new_failed_group);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Group_release(new_failed_group);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }

    /* free prev group */
    if (prev_failed_group != MPIR_Group_empty) {
        mpi_errno = MPIR_Group_release(prev_failed_group);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }

 fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS);
    return mpi_errno;

 fn_oom: /* out-of-memory handler for utarray operations */
    MPIR_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "utarray");
 fn_fail:
    goto fn_exit;
}
Exemplo n.º 19
0
static int s1_init(void)
{
    PMI_BOOL initialized;
    int spawned;
    int rc, ret = OPAL_ERROR;
    int i, rank, lrank, nrank;
    char *pmix_id, tmp[64];
    opal_value_t kv;
    char *str;
    uint32_t ui32;
    opal_process_name_t ldr;
    char **localranks=NULL;

    if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) {
        OPAL_PMI_ERROR(rc, "PMI_Initialized");
        return OPAL_ERROR;
    }

    if (PMI_TRUE != initialized && PMI_SUCCESS != (rc = PMI_Init(&spawned))) {
        OPAL_PMI_ERROR(rc, "PMI_Init");
        return OPAL_ERROR;
    }

    // setup hash table
    opal_pmix_base_hash_init();

    // Initialize space demands
    rc = PMI_KVS_Get_value_length_max(&pmix_vallen_max);
    if (PMI_SUCCESS != rc) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_value_length_max");
        goto err_exit;
    }
    pmix_vallen_threshold = pmix_vallen_max * 3;
    pmix_vallen_threshold >>= 2;

    rc = PMI_KVS_Get_name_length_max(&pmix_kvslen_max);
    if (PMI_SUCCESS != rc) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_name_length_max");
        goto err_exit;
    }

    rc = PMI_KVS_Get_key_length_max(&pmix_keylen_max);
    if (PMI_SUCCESS != rc) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_key_length_max");
        goto err_exit;
    }

    // Initialize job environment information
    pmix_id = (char*)malloc(pmix_vallen_max);
    if (pmix_id == NULL) {
        ret = OPAL_ERR_OUT_OF_RESOURCE;
        goto err_exit;
    }
    /* Get domain id */
    if (PMI_SUCCESS != (rc = PMI_Get_kvs_domain_id(pmix_id, pmix_vallen_max))) {
        free(pmix_id);
        goto err_exit;
    }

    /* get our rank */
    ret = PMI_Get_rank(&rank);
    if( PMI_SUCCESS != ret ) {
        OPAL_PMI_ERROR(ret, "PMI_Get_rank");
        free(pmix_id);
        goto err_exit;
    }

    /* Slurm PMI provides the job id as an integer followed
     * by a '.', followed by essentially a stepid. The first integer
     * defines an overall job number. The second integer is the number of
     * individual jobs we have run within that allocation. */
    s1_pname.jobid = strtoul(pmix_id, &str, 10);
    s1_pname.jobid = (s1_pname.jobid << 16) & 0xffff0000;
    if (NULL != str) {
        ui32 = strtoul(str, NULL, 10);
        s1_pname.jobid |= (ui32 & 0x0000ffff);
    }
    ldr.jobid = s1_pname.jobid;
    s1_pname.vpid = rank;
    /* store our name in the opal_proc_t so that
     * debug messages will make sense - an upper
     * layer will eventually overwrite it, but that
     * won't do any harm */
    opal_proc_set_name(&s1_pname);
    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:s1: assigned tmp name",
                        OPAL_NAME_PRINT(s1_pname));

    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_JOBID);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = s1_pname.jobid;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* save it */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_RANK);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = rank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
    if (pmix_kvs_name == NULL) {
        ret = OPAL_ERR_OUT_OF_RESOURCE;
        goto err_exit;
    }

    rc = PMI_KVS_Get_my_name(pmix_kvs_name, pmix_kvslen_max);
    if (PMI_SUCCESS != rc) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_my_name");
        goto err_exit;
    }

    /* get our local proc info to find our local rank */
    if (PMI_SUCCESS != (rc = PMI_Get_clique_size(&nlranks))) {
        OPAL_PMI_ERROR(rc, "PMI_Get_clique_size");
        return rc;
    }
    /* save the local size */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = nlranks;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);
    lrank = 0;
    nrank = 0;
    ldr.vpid = rank;
    if (0 < nlranks) {
        /* now get the specific ranks */
        lranks = (int*)calloc(nlranks, sizeof(int));
        if (NULL == lranks) {
            rc = OPAL_ERR_OUT_OF_RESOURCE;
            OPAL_ERROR_LOG(rc);
            return rc;
        }
        if (PMI_SUCCESS != (rc = PMI_Get_clique_ranks(lranks, nlranks))) {
            OPAL_PMI_ERROR(rc, "PMI_Get_clique_ranks");
            free(lranks);
            return rc;
        }
        /* note the local ldr */
        ldr.vpid = lranks[0];
        /* save this */
        memset(tmp, 0, 64);
        for (i=0; i < nlranks; i++) {
            (void)snprintf(tmp, 64, "%d", lranks[i]);
            opal_argv_append_nosize(&localranks, tmp);
            if (rank == lranks[i]) {
                lrank = i;
                nrank = i;
            }
        }
        str = opal_argv_join(localranks, ',');
        opal_argv_free(localranks);
        OBJ_CONSTRUCT(&kv, opal_value_t);
        kv.key = strdup(OPAL_PMIX_LOCAL_PEERS);
        kv.type = OPAL_STRING;
        kv.data.string = str;
        if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
            OPAL_ERROR_LOG(ret);
            OBJ_DESTRUCT(&kv);
            goto err_exit;
        }
        OBJ_DESTRUCT(&kv);
    }

    /* save the local leader */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCALLDR);
    kv.type = OPAL_UINT64;
    kv.data.uint64 = *(uint64_t*)&ldr;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);
    /* save our local rank */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_RANK);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = lrank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);
    /* and our node rank */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_NODE_RANK);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = nrank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* get universe size */
    ret = PMI_Get_universe_size(&i);
    if (PMI_SUCCESS != ret) {
        OPAL_PMI_ERROR(ret, "PMI_Get_universe_size");
        goto err_exit;
    }
    /* push this into the dstore for subsequent fetches */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_UNIV_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = i;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* get job size */
    ret = PMI_Get_size(&i);
    if (PMI_SUCCESS != ret) {
        OPAL_PMI_ERROR(ret, "PMI_Get_size");
        goto err_exit;
    }
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_JOB_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = i;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* get appnum */
    ret = PMI_Get_appnum(&i);
    if (PMI_SUCCESS != ret) {
        OPAL_PMI_ERROR(ret, "PMI_Get_appnum");
        goto err_exit;
    }
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_APPNUM);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = i;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

   /* increment the init count */
    ++pmix_init_count;

    return OPAL_SUCCESS;

 err_exit:
    PMI_Finalize();
    return ret;
}
Exemplo n.º 20
0
static int mca_initialize_pmi_v1(void)
{
    PMI_BOOL initialized;
    int spawned;
    int rc, ret = OPAL_ERROR;

    if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) {
        OPAL_PMI_ERROR(rc, "PMI_Initialized");
        return OPAL_ERROR;
    }
    
    if( PMI_TRUE != initialized && PMI_SUCCESS != (rc = PMI_Init(&spawned)) ) {
        OPAL_PMI_ERROR(rc, "PMI_Init");
        return OPAL_ERROR;
    }

    // Initialize space demands
    rc = PMI_KVS_Get_value_length_max(&pmi_vallen_max);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_value_length_max");
        goto err_exit;
    }

    rc = PMI_KVS_Get_name_length_max(&pmi_kvslen_max);
    if (PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_name_length_max");
        goto err_exit;
    }

    rc = PMI_KVS_Get_key_length_max(&pmi_keylen_max);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_key_length_max");
        goto err_exit;
    }

    // Initialize job environment information
    rc = PMI_Get_rank(&pmi_rank);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_rank");
        return OPAL_ERROR;
    }
    rc = PMI_Get_universe_size(&pmi_usize);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
        goto err_exit;
    }

    rc = PMI_Get_size(&pmi_size);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_size");
        goto err_exit;
    }

    rc = PMI_Get_appnum(&pmi_appnum);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI_Get_appnum");
        goto err_exit;
    }

    pmi_kvs_name = (char*)malloc(pmi_kvslen_max);
    if( pmi_kvs_name == NULL ){
         ret = OPAL_ERR_OUT_OF_RESOURCE;
         goto err_exit;
    }

    rc = PMI_KVS_Get_my_name(pmi_kvs_name,pmi_kvslen_max);
    if( PMI_SUCCESS != rc ) {
        OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
        goto err_exit;
    }

    return OPAL_SUCCESS;

err_exit:
    PMI_Finalize();
    return ret;
}