int orte_grpcomm_base_peer_modex(bool modex_db)
{
    opal_buffer_t buf, rbuf;
    int rc = ORTE_SUCCESS;
    bool modex_reqd;
    
    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
                         "%s grpcomm:base:peer:modex: performing modex",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* setup the buffer that will actually be sent */
    OBJ_CONSTRUCT(&buf, opal_buffer_t);
    OBJ_CONSTRUCT(&rbuf, opal_buffer_t);
    
    /* put our process name in the buffer so it can be unpacked later */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    /* pack the entries we have received */
    if (ORTE_SUCCESS != (rc = orte_grpcomm_base_pack_modex_entries(&buf, &modex_reqd))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
                         "%s grpcomm:base:peer:modex: executing allgather",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* exchange the buffer with my peers */
    if (ORTE_SUCCESS != (rc = orte_grpcomm.allgather(&buf, &rbuf))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
                         "%s grpcomm:base:peer:modex: processing modex info",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    if (ORTE_SUCCESS != (rc = orte_grpcomm_base_modex_unpack(&rbuf, modex_db)) ) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
cleanup:
    OBJ_DESTRUCT(&buf);
    OBJ_DESTRUCT(&rbuf);
    return rc;
}
/***   MODEX SECTION ***/
static int modex(opal_list_t *procs)
{
    opal_buffer_t buf, rbuf;
    orte_std_cntr_t i, num_procs;
    orte_std_cntr_t cnt;
    orte_process_name_t proc_name;
    int rc;
    int32_t arch;
    bool modex_reqd = false;
    
    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
                         "%s grpcomm:basic: modex entered",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* setup the buffer that will actually be sent */
    OBJ_CONSTRUCT(&buf, opal_buffer_t);
    OBJ_CONSTRUCT(&rbuf, opal_buffer_t);
    
    /* put our process name in the buffer so it can be unpacked later */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    /* decide if we need to add the architecture to the modex. Check
     * first to see if hetero is enabled - if not, then we clearly
     * don't need to exchange arch's as they are all identical
     */
    if (OMPI_ENABLE_HETEROGENEOUS_SUPPORT) {
        /* Case 1: If different apps in this job were built differently - e.g., some
         * are built 32-bit while others are built 64-bit - then we need to modex
         * regardless of any other consideration. The user is reqd to tell us via a
         * cmd line option if this situation exists, which will result in an mca param
         * being set for us, so all we need to do is check for the global boolean
         * that corresponds to that param
         *
         * Case 2: the nodes are hetero, but the app binaries were built
         * the same - i.e., either they are both 32-bit, or they are both 64-bit, but
         * no mixing of the two. In this case, we include the info in the modex
         */
        if (orte_hetero_apps || !orte_homogeneous_nodes) {
            OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
                                 "%s grpcomm:basic: modex is required",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            
            modex_reqd = true;
        }
    }
    
    if (modex_reqd) {
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_process_info.arch, 1, OPAL_UINT32))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }        
    }

    /* pack the entries we have received */
    if (ORTE_SUCCESS != (rc = orte_grpcomm_base_pack_modex_entries(&buf, &modex_reqd))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    if (modex_reqd) {
        OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
                             "%s grpcomm:basic:modex: executing allgather",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        
        /* exchange the buffer with the list of peers (if provided) or all my peers */
        if (NULL == procs) {
            if (ORTE_SUCCESS != (rc = orte_grpcomm.allgather(&buf, &rbuf))) {
                ORTE_ERROR_LOG(rc);
                goto cleanup;
            }
        } else {
            if (ORTE_SUCCESS != (rc = orte_grpcomm.allgather_list(procs, &buf, &rbuf))) {
                ORTE_ERROR_LOG(rc);
                goto cleanup;
            }
        }
        
        OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
                             "%s grpcomm:basic:modex: processing modex info",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        
        /* process the results */
        /* extract the number of procs that put data in the buffer */
        cnt=1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &num_procs, &cnt, ORTE_STD_CNTR))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }
        
        OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
                             "%s grpcomm:basic:modex: received %ld data bytes from %ld procs",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             (long)(rbuf.pack_ptr - rbuf.unpack_ptr), (long)num_procs));
        
        /* if the buffer doesn't have any more data, ignore it */
        if (0 >= (rbuf.pack_ptr - rbuf.unpack_ptr)) {
            goto cleanup;
        }
        
        /* otherwise, process it */
        for (i=0; i < num_procs; i++) {
            /* unpack the process name */
            cnt=1;
            if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &proc_name, &cnt, ORTE_NAME))) {
                ORTE_ERROR_LOG(rc);
                goto cleanup;
            }
            
            if (OMPI_ENABLE_HETEROGENEOUS_SUPPORT) {
                /* are the nodes hetero? */
                if (orte_homogeneous_nodes) {
                    goto unpack_entries;
                }
                /* unpack its architecture */
                cnt=1;
                if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &arch, &cnt, OPAL_UINT32))) {
                    ORTE_ERROR_LOG(rc);
                    goto cleanup;
                }
                /* update the arch in the ESS */
                if (ORTE_SUCCESS != (rc = orte_ess.update_arch(&proc_name, arch))) {
                    ORTE_ERROR_LOG(rc);
                    goto cleanup;
                }
            }
            
        unpack_entries:
            /* update the modex database */
            if (ORTE_SUCCESS != (rc = orte_grpcomm_base_update_modex_entries(&proc_name, &rbuf))) {
                ORTE_ERROR_LOG(rc);
                goto cleanup;
            }
        }
    }
    
cleanup:
    OBJ_DESTRUCT(&buf);
    OBJ_DESTRUCT(&rbuf);
    
    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
                         "%s grpcomm:basic: modex completed",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    return rc;
}
Exemple #3
0
/***************  MODEX SECTION **************/
void orte_grpcomm_base_modex(int fd, short args, void *cbdata)
{
    orte_grpcomm_caddy_t *caddy = (orte_grpcomm_caddy_t*)cbdata;
    orte_grpcomm_collective_t *modex = caddy->op;
    int rc;
    orte_namelist_t *nm;
    opal_list_item_t *item;
    bool found;
    orte_grpcomm_collective_t *cptr;
    opal_scope_t scope;

    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:modex: performing modex",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* if we are a singleton and routing isn't enabled,
     * then we have nobody with which to communicate, so
     * we can just declare success
     */
    if ((orte_process_info.proc_type & ORTE_PROC_SINGLETON) &&
        !orte_routing_is_enabled) {
        if (NULL != modex->cbfunc) {
            OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
                                 "%s CALLING MODEX RELEASE",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            modex->cbfunc(NULL, modex->cbdata);
        }
        /* flag the collective as complete */
        modex->active = false;
        return;
    }

    if (0 == opal_list_get_size(&modex->participants)) {
        /* record the collective */
        modex->next_cbdata = modex;
        opal_list_append(&orte_grpcomm_base.active_colls, &modex->super);

        /* put our process name in the buffer so it can be unpacked later */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }

        /* this is between our peers, so only collect info marked for them */
        scope = OPAL_SCOPE_PEER;

        /* add a wildcard name to the participants so the daemon knows
         * the jobid that is involved in this collective
         */
        nm = OBJ_NEW(orte_namelist_t);
        nm->name.jobid = ORTE_PROC_MY_NAME->jobid;
        nm->name.vpid = ORTE_VPID_WILDCARD;
        opal_list_append(&modex->participants, &nm->super);
        modex->next_cb = orte_grpcomm_base_store_modex;
    } else {
        /* see if the collective is already present - a race condition
         * exists where other participants may have already sent us their
         * contribution. This would place the collective on the global
         * array, but leave it marked as "inactive" until we call
         * modex with the list of participants
         */
        found = false;
        for (item = opal_list_get_first(&orte_grpcomm_base.active_colls);
             item != opal_list_get_end(&orte_grpcomm_base.active_colls);
             item = opal_list_get_next(item)) {
            cptr = (orte_grpcomm_collective_t*)item;
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                                 "%s CHECKING COLL id %d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 cptr->id));
            
            if (modex->id == cptr->id) {
                found = true;
                /* remove the old entry - we will replace it
                 * with the modex one
                 */
                opal_list_remove_item(&orte_grpcomm_base.active_colls, item);
                break;
            }
        }
        if (found) {
            /* since it already exists, the list of
             * targets contains the list of procs
             * that have already sent us their info. Cycle
             * thru the targets and move those entries to
             * the modex object
             */
            while (NULL != (item = opal_list_remove_first(&cptr->targets))) {
                opal_list_append(&modex->targets, item);
            }
            /* copy the previously-saved data across */
            opal_dss.copy_payload(&modex->local_bucket, &cptr->local_bucket);
            /* cleanup */
            OBJ_RELEASE(cptr);
        }
        /* now add the modex to the global list of active collectives */
        modex->next_cb = orte_grpcomm_base_store_modex;
        modex->next_cbdata = modex;
        opal_list_append(&orte_grpcomm_base.active_colls, &modex->super);

        /* pack the collective id */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, &modex->id, 1, ORTE_GRPCOMM_COLL_ID_T))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }

        /* pack our name */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }

        /* this is not amongst our peers, but rather between a select
         * group of processes - e.g., during a connect/accept operation.
         * Thus, we need to include the non-peer info as well as our peers
         * since we can't tell what the other participants may already have
         */
        scope = OPAL_SCOPE_GLOBAL;

    }

    /* pack the requested entries */
    if (ORTE_SUCCESS != (rc = orte_grpcomm_base_pack_modex_entries(&modex->buffer, scope))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:full:modex: executing allgather",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* execute the allgather */
    if (ORTE_SUCCESS != (rc = orte_grpcomm.allgather(modex))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }

    OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:modex: modex posted",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    return;

 cleanup:
    return;
}
/***************  MODEX SECTION **************/
int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
{
    opal_buffer_t buf, rbuf;
    int32_t i, n, num_procs;
    orte_std_cntr_t cnt, j, num_recvd_entries;
    orte_process_name_t proc_name;
    int rc=ORTE_SUCCESS;
    bool modex_reqd;
    orte_nid_t *nid;
    orte_local_rank_t local_rank;
    orte_node_rank_t node_rank;
    orte_jmap_t *jmap;
    orte_pmap_t *pmap;
    orte_vpid_t daemon;
    char *hostname;
    
    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
                         "%s grpcomm:base:full:modex: performing modex",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* setup the buffer that will actually be sent */
    OBJ_CONSTRUCT(&buf, opal_buffer_t);
    OBJ_CONSTRUCT(&rbuf, opal_buffer_t);
    
    /* put our process name in the buffer so it can be unpacked later */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    /* pack our hostname */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_process_info.nodename, 1, OPAL_STRING))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    /* pack our daemon's vpid */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &ORTE_PROC_MY_DAEMON->vpid, 1, ORTE_VPID))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    /* pack our node rank */
    node_rank = orte_ess.get_node_rank(ORTE_PROC_MY_NAME);
    if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &node_rank, 1, ORTE_NODE_RANK))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    /* pack our local rank */
    local_rank = orte_ess.get_local_rank(ORTE_PROC_MY_NAME);
    if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &local_rank, 1, ORTE_LOCAL_RANK))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    /* pack the entries we have received */
    if (ORTE_SUCCESS != (rc = orte_grpcomm_base_pack_modex_entries(&buf, &modex_reqd))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
                         "%s grpcomm:base:full:modex: executing allgather",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* exchange the buffer with the list of peers */
    if (ORTE_SUCCESS != (rc = orte_grpcomm.allgather_list(procs, &buf, &rbuf))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
                         "%s grpcomm:base:full:modex: processing modex info",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    


    /* extract the number of procs that put data in the buffer */
    cnt=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &num_procs, &cnt, OPAL_INT32))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
                         "%s grpcomm:base:full:modex: received %ld data bytes from %d procs",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         (long)(rbuf.pack_ptr - rbuf.unpack_ptr), num_procs));
    
    /* if the buffer doesn't have any more data, ignore it */
    if (0 >= (rbuf.pack_ptr - rbuf.unpack_ptr)) {
        goto cleanup;
    }
    
    /* otherwise, process it */
    for (i=0; i < num_procs; i++) {
        /* unpack the process name */
        cnt=1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &proc_name, &cnt, ORTE_NAME))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }
        
        /* unpack the hostname */
        cnt = 1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &hostname, &cnt, OPAL_STRING))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }
        
        /* unpack the daemon vpid */
        cnt = 1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &daemon, &cnt, ORTE_VPID))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }
        
        /* unpack the node rank */
        cnt = 1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &node_rank, &cnt, ORTE_NODE_RANK))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }
        
        /* unpack the local rank */
        cnt = 1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &local_rank, &cnt, ORTE_LOCAL_RANK))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }
        
        /* UPDATE THE NIDMAP/PIDMAP TO SUPPORT DYNAMIC OPERATIONS */
        
        /* find this proc's node in the nidmap */
        nid = NULL;
        for (n=0; NULL != (nid = (orte_nid_t *) opal_pointer_array_get_item(&orte_nidmap, n)); n++) {
            if (0 == strcmp(hostname, nid->name)) {
                break;
            }
        }
        if (NULL == nid) {
            /* node wasn't found - let's add it */
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
                                 "%s grpcomm:base:full:modex no nidmap entry for node %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostname));
            nid = OBJ_NEW(orte_nid_t);
            nid->name = strdup(hostname);
            nid->daemon = daemon;
            nid->index = opal_pointer_array_add(&orte_nidmap, nid);
        }
        
        /* see if we have this job in a jobmap */
        if (NULL == (jmap = orte_util_lookup_jmap(proc_name.jobid))) {
            /* proc wasn't found - let's add it */
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
                                 "%s grpcomm:base:full:modex no jobmap entry for job %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_JOBID_PRINT(proc_name.jobid)));
            jmap = OBJ_NEW(orte_jmap_t);
            jmap->job = proc_name.jobid;
            /* unfortunately, job objects cannot be stored
             * by index number as the jobid is a constructed
             * value. So we have to just add it to the end
             * of the array
             */
            opal_pointer_array_add(&orte_jobmap, jmap);
            jmap->num_procs = 1;
            /* have to add the pidmap entry too, but this
             * can be done at the specific site corresponding
             * to the proc's vpid
             */
            pmap = OBJ_NEW(orte_pmap_t);
            pmap->node = nid->index;
            pmap->local_rank = local_rank;
            pmap->node_rank = node_rank;
            opal_pointer_array_set_item(&jmap->pmap, proc_name.vpid, pmap);
        } else {
            /* see if we have this proc in a pidmap */
            if (NULL == orte_util_lookup_pmap(&proc_name)) {
                /* proc wasn't found - let's add it */
                OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
                                     "%s grpcomm:base:full:modex no pidmap entry for proc %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     ORTE_NAME_PRINT(&proc_name)));
                pmap = OBJ_NEW(orte_pmap_t);
                pmap->node = nid->index;
                pmap->local_rank = local_rank;
                pmap->node_rank = node_rank;
                /* this can be done at the specific site corresponding
                 * to the proc's vpid
                 */
                opal_pointer_array_set_item(&jmap->pmap, proc_name.vpid, pmap);
                /* account for the proc entry in the jmap */
                jmap->num_procs++;
            }
        }
        
        OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
                             "%s grpcomm:base:full:modex: adding modex entry for proc %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&proc_name)));
        
        /* UPDATE THE MODEX INFO FOR THIS PROC */
        
        if (modex_db) {
            /* update the modex database */
            if (ORTE_SUCCESS != (rc = orte_grpcomm_base_update_modex_entries(&proc_name, &rbuf))) {
                ORTE_ERROR_LOG(rc);
                goto cleanup;
            }            
        } else {
            /* unpack the number of entries for this proc */
            cnt=1;
            if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &num_recvd_entries, &cnt, ORTE_STD_CNTR))) {
                ORTE_ERROR_LOG(rc);
                goto cleanup;
            }
            
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
                                 "%s grpcomm:base:full:modex adding %d entries for proc %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_recvd_entries,
                                 ORTE_NAME_PRINT(&proc_name)));
            
            /*
             * Extract the attribute names and values
             */
            for (j = 0; j < num_recvd_entries; j++) {
                size_t num_bytes;
                orte_attr_t *attr;
                
                attr = OBJ_NEW(orte_attr_t);
                cnt = 1;
                if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &(attr->name), &cnt, OPAL_STRING))) {
                    ORTE_ERROR_LOG(rc);
                    goto cleanup;
                }
                
                cnt = 1;
                if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &num_bytes, &cnt, OPAL_SIZE))) {
                    ORTE_ERROR_LOG(rc);
                    goto cleanup;
                }
                attr->size = num_bytes;
                
                if (num_bytes != 0) {
                    if (NULL == (attr->bytes = (uint8_t *) malloc(num_bytes))) {
                        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                        rc = ORTE_ERR_OUT_OF_RESOURCE;
                        goto cleanup;
                    }
                    cnt = (orte_std_cntr_t) num_bytes;
                    if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, attr->bytes, &cnt, OPAL_BYTE))) {
                        ORTE_ERROR_LOG(rc);
                        goto cleanup;
                    }
                }
                
                /* add this to the node's attribute list */
                opal_list_append(&nid->attrs, &attr->super);
            }
        }
    }
    
cleanup:
    OBJ_DESTRUCT(&buf);
    OBJ_DESTRUCT(&rbuf);
    return rc;
}
static int modex(opal_list_t *procs)
{
    int rc;
    
    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
                         "%s grpcomm:bad: modex entered",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    if (NULL == procs) {
        /* The modex will be realized in the background by the daemons. The processes will
         * only be informed when all data has been collected from all processes. The get_attr
         * will realize the blocking, it will not return until the data has been rteceived.
         */
        opal_buffer_t *buf, *rbuf;
        orte_grpcomm_coll_t coll_type = ORTE_GRPCOMM_ALLGATHER;
        bool modex_reqd = true;
        
        OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
                             "%s grpcomm:bad:peer:modex: performing modex",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        
        /* setup the buffer that will actually be sent */
        buf  = OBJ_NEW(opal_buffer_t);
        rbuf = OBJ_NEW(opal_buffer_t);
        
        /* tell the daemon we are doing an allgather */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &coll_type, 1, ORTE_GRPCOMM_COLL_T))) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }    

        /* put our process name in the buffer so it can be unpacked later */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }
    
        /* pack the entries we have received */
        if (ORTE_SUCCESS != (rc = orte_grpcomm_base_pack_modex_entries(buf, &modex_reqd))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }
    
        OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
                             "%s grpcomm:bad:peer:modex: executing non-blocking allgather",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

        /* send to local daemon */
        if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_DAEMON, buf, ORTE_RML_TAG_DAEMON_COLLECTIVE, 0))) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }

        OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
                             "%s grpcomm:bad allgather buffer sent",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
        /* now receive the final result. Be sure to do this in
         * a manner that allows us to return without being in a recv!
         */
        allgather_complete = false;
        rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER,
                                     ORTE_RML_NON_PERSISTENT, allgather_recv_modex, (void*)rbuf);
        if (rc != ORTE_SUCCESS) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        rbuf = NULL;  /* make sure we don't release it yet */

        OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
                             "%s grpcomm:bad: modex posted",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 cleanup:
        if( NULL != buf ) {
            OBJ_RELEASE(buf);
        }
        if( NULL != rbuf ) {
            OBJ_RELEASE(rbuf);
        }

        return rc;
    } else {
        if (ORTE_SUCCESS != (rc = orte_grpcomm_base_full_modex(procs, true))) {
            ORTE_ERROR_LOG(rc);
        }        
    }
    
    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
                         "%s grpcomm:bad: modex completed",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    return rc;
}
/***************  MODEX SECTION **************/
void orte_grpcomm_base_modex(int fd, short args, void *cbdata)
{
    orte_grpcomm_caddy_t *caddy = (orte_grpcomm_caddy_t*)cbdata;
    orte_grpcomm_collective_t *modex = caddy->op;
    int rc;
    orte_namelist_t *nm;
    opal_list_item_t *item;
    bool found;
    orte_grpcomm_collective_t *cptr;

    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:modex: performing modex",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    if (0 == opal_list_get_size(&modex->participants)) {
        /* record the collective */
        modex->next_cbdata = modex;
        opal_list_append(&orte_grpcomm_base.active_colls, &modex->super);

        /* put our process name in the buffer so it can be unpacked later */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }

        /* add a wildcard name to the participants so the daemon knows
         * the jobid that is involved in this collective
         */
        nm = OBJ_NEW(orte_namelist_t);
        nm->name.jobid = ORTE_PROC_MY_NAME->jobid;
        nm->name.vpid = ORTE_VPID_WILDCARD;
        opal_list_append(&modex->participants, &nm->super);
        modex->next_cb = orte_grpcomm_base_store_modex;
    } else {
        /* see if the collective is already present - a race condition
         * exists where other participants may have already sent us their
         * contribution. This would place the collective on the global
         * array, but leave it marked as "inactive" until we call
         * modex with the list of participants
         */
        found = false;
        for (item = opal_list_get_first(&orte_grpcomm_base.active_colls);
             item != opal_list_get_end(&orte_grpcomm_base.active_colls);
             item = opal_list_get_next(item)) {
            cptr = (orte_grpcomm_collective_t*)item;
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                                 "%s CHECKING COLL id %d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 cptr->id));
            
            if (modex->id == cptr->id) {
                found = true;
                /* remove the old entry - we will replace it
                 * with the modex one
                 */
                opal_list_remove_item(&orte_grpcomm_base.active_colls, item);
                break;
            }
        }
        if (found) {
            /* since it already exists, the list of
             * targets contains the list of procs
             * that have already sent us their info. Cycle
             * thru the targets and move those entries to
             * the modex object
             */
            while (NULL != (item = opal_list_remove_first(&cptr->targets))) {
                opal_list_append(&modex->targets, item);
            }
            /* copy the previously-saved data across */
            opal_dss.copy_payload(&modex->local_bucket, &cptr->local_bucket);
            /* cleanup */
            OBJ_RELEASE(cptr);
        }
        /* now add the modex to the global list of active collectives */
        modex->next_cb = orte_grpcomm_base_store_peer_modex;
        modex->next_cbdata = modex;
        opal_list_append(&orte_grpcomm_base.active_colls, &modex->super);

        /* this is not amongst our peers, but rather between a select
         * group of processes - e.g., during a connect/accept operation.
         * Thus, this requires we send additional info
         */

        /* pack the collective id */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, &modex->id, 1, ORTE_GRPCOMM_COLL_ID_T))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }

        /* pack our name */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }

        /* pack our hostname */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, &orte_process_info.nodename, 1, OPAL_STRING))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }
    
        /* pack our daemon's vpid */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, &ORTE_PROC_MY_DAEMON->vpid, 1, ORTE_VPID))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }
    
        /* pack our node rank */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, &orte_process_info.my_node_rank, 1, ORTE_NODE_RANK))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }
    
        /* pack our local rank */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, &orte_process_info.my_local_rank, 1, ORTE_LOCAL_RANK))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }
    
#if OPAL_HAVE_HWLOC
        /* pack our binding info so other procs can determine our locality */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, &orte_process_info.cpuset, 1, OPAL_STRING))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }
#endif
    }

    /* pack the entries we have received */
    if (ORTE_SUCCESS != (rc = orte_grpcomm_base_pack_modex_entries(&modex->buffer))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:full:modex: executing allgather",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* execute the allgather */
    if (ORTE_SUCCESS != (rc = orte_grpcomm.allgather(modex))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }

    OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:modex: modex posted",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    return;

 cleanup:
    return;
}