示例#1
0
static int allgather(orte_grpcomm_coll_t *coll,
                     opal_buffer_t *sendbuf)
{
    OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:coll:bruck algo employed for %d processes",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)coll->ndmns));

    /* if we only have one proc participating, just copy the data across and return */
    if ((coll->ndmns != 0) && ((coll->ndmns & (coll->ndmns - 1)) == 0)) {
        OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output,
                     "%s grpcomm:coll:bruck number of participating daemons (%d) is power 2",
                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int) coll->ndmns ));
        return ORTE_ERROR;
    }

    /* start by seeding the collection with our own data */
    opal_dss.copy_payload(&coll->bucket, sendbuf);

    /* Communication step:
     At every step i, rank r:
     - doubles the distance
     - sends message containing all data collected so far to rank r - distance
     - receives message containing all data collected so far from rank (r + distance)
     */
    /* find my position in the group of participants. This
     * value is the "rank" we will use in the algo
     */
    brks_allgather_send_dist(coll, 1);

    return ORTE_SUCCESS;
}
示例#2
0
static void brks_allgather_recv_dist(int status, orte_process_name_t* sender,
                                     opal_buffer_t* buffer, orte_rml_tag_t tag,
                                     void* cbdata)
{
    int32_t cnt, num_remote;
    int rc;
    orte_grpcomm_signature_t *sig;
    orte_grpcomm_coll_t *coll;
    orte_vpid_t distance, new_distance;

    OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                             "%s grpcomm:coll:recdub received data",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    /* unpack the signature */
    cnt = 1;
    if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &sig, &cnt, ORTE_SIGNATURE))) {
        ORTE_ERROR_LOG(rc);
        return;
    }

    /* check for the tracker and create it if not found */
    if (NULL == (coll = orte_grpcomm_base_get_tracker(sig, true))) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        OBJ_RELEASE(sig);
        return;
    }

    /* unpack the distance */
    distance = 1;
    if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &distance, &cnt, OPAL_INT32))) {
        OBJ_RELEASE(sig);
        ORTE_ERROR_LOG(rc);
        brks_finalize_coll(coll, rc);
        return;
    }

    /* unpack number of reported processes */
    num_remote = 0;
    if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &num_remote, &cnt, OPAL_INT32))) {
        OBJ_RELEASE(sig);
        ORTE_ERROR_LOG(rc);
        brks_finalize_coll(coll, rc);
        return;
    }
    coll->nreported += num_remote;

    /* capture any provided content */
    if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, buffer))) {
        OBJ_RELEASE(sig);
        ORTE_ERROR_LOG(rc);
        brks_finalize_coll(coll, rc);
        return;
    }

    //update distance and send
    new_distance = distance <<= 1;
    if (new_distance < coll->ndmns) {
        brks_allgather_send_dist(coll, new_distance);
    } else {
        brks_finalize_coll(coll, ORTE_SUCCESS);
    }

    OBJ_RELEASE(sig);

    return;
}
示例#3
0
static void brks_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance) {
    /* Communication step:
     At every step i, rank r:
     - doubles the distance
     - sends message containing all data collected so far to rank r - distance
     - receives message containing all data collected so far from rank (r + distance)
     */
    uint32_t log2ndmns = (uint32_t) log2 (coll->ndmns);
    uint32_t last_round;
    orte_process_name_t peer;
    orte_vpid_t nv;
    int rc;

    /* NTH: calculate in which round we should send the final data. this is the first
     * round in which we have data from at least (coll->ndmns - (1 << log2ndmns))
     * daemons. alternatively we could just send when distance reaches log2ndmns but
     * that could end up sending more data than needed */
    last_round = (uint32_t) ceil (log2 ((double) (coll->ndmns - (1 << log2ndmns))));

    peer.jobid = ORTE_PROC_MY_NAME->jobid;

    while (distance < log2ndmns) {
        OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
             "%s grpcomm:coll:brks process distance %u)",
              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance));

        /* first send my current contents */
        nv = (coll->ndmns + coll->my_rank - (1 << distance)) % coll->ndmns;
        peer.vpid = coll->dmns[nv];

        brks_allgather_send_dist(coll, &peer, distance);

        if (distance == last_round) {
            /* have enough data to send the final round now */
            nv = (coll->ndmns + coll->my_rank - (1 << log2ndmns)) % coll->ndmns;
            peer.vpid = coll->dmns[nv];
            brks_allgather_send_dist(coll, &peer, log2ndmns);
        }

        rc = brks_allgather_process_buffered (coll, distance);
        if (!rc) {
            break;
        } else if (rc < 0) {
            return;
        }

        ++distance;
    }

    if (distance == log2ndmns) {
        if (distance == last_round) {
            /* need to send the final round now */
            nv = (coll->ndmns + coll->my_rank - (1 << log2ndmns)) % coll->ndmns;
            peer.vpid = coll->dmns[nv];
            brks_allgather_send_dist(coll, &peer, log2ndmns);
        }

        /* check if the final message is already queued */
        rc = brks_allgather_process_buffered (coll, distance);
        if (rc < 0) {
            return;
        }
    }

    OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
                        "%s grpcomm:coll:brks reported %lu process from %lu",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)coll->nreported,
                        (unsigned long)coll->ndmns));

    /* if we are done, then complete things. we may get data from more daemons than expected */
    if (coll->nreported >= coll->ndmns){
        brks_finalize_coll(coll, ORTE_SUCCESS);
    }
}