Esempio n. 1
0
static int bad_barrier(orte_grpcomm_collective_t *coll)
{
    int rc;
    opal_buffer_t *buf;
    orte_namelist_t *nm;

    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
                         "%s grpcomm:bad entering barrier",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* if I am alone, just execute the callback */
    if (1 == orte_process_info.num_procs) {
        coll->active = false;
        if (NULL != coll->cbfunc) {
            coll->cbfunc(NULL, coll->cbdata);
        }
        return ORTE_SUCCESS;
    }
    
    /* mark the collective as active */
    coll->active = true;

    /* setup the collective */
    opal_list_append(&orte_grpcomm_base.active_colls, &coll->super);

    if (0 == opal_list_get_size(&coll->participants)) {
        /* add a wildcard name to the participants so the daemon knows
         * that everyone in my job must participate
         */
        nm = OBJ_NEW(orte_namelist_t);
        nm->name.jobid = ORTE_PROC_MY_NAME->jobid;
        nm->name.vpid = ORTE_VPID_WILDCARD;
        opal_list_append(&coll->participants, &nm->super);
    }

    /* pack the collective - no data should be involved, but we need
     * to ensure we get the header info correct so it can be
     * unpacked without error
     */
    buf = OBJ_NEW(opal_buffer_t);
    orte_grpcomm_base_pack_collective(buf, coll, ORTE_GRPCOMM_INTERNAL_STG_APP);

    /* send the buffer to my daemon */
    if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_DAEMON, buf, ORTE_RML_TAG_COLLECTIVE,
                                          0, orte_rml_send_callback, NULL))) {
        ORTE_ERROR_LOG(rc);
        OBJ_RELEASE(buf);
        opal_list_remove_item(&orte_grpcomm_base.active_colls, &coll->super);
        return rc;
    }
    
    OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
                         "%s grpcomm:bad barrier underway",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    return rc;
}
Esempio n. 2
0
static int bad_allgather(orte_grpcomm_collective_t *gather)
{
    int rc;
    opal_buffer_t *buf;

    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
                         "%s grpcomm:bad entering allgather",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* if I am alone and nobody else is participating, then
     * nothing really to do
     */
    if (1 == orte_process_info.num_procs &&
        0 == opal_list_get_size(&gather->participants)) {
        gather->active = false;
        if (NULL != gather->cbfunc) {
            gather->cbfunc(&gather->buffer, gather->cbdata);
        }
        return ORTE_SUCCESS;
    }
    
    /* mark the collective as active */
    gather->active = true;

    /* if this is an original request, then record the collective */
    if (NULL == gather->next_cb) {
        opal_list_append(&orte_grpcomm_base.active_colls, &gather->super);
    }

    /* start the allgather op by sending the data to our daemon - the
     * user will have put the data in the "buffer" field
     */
    buf = OBJ_NEW(opal_buffer_t);
    orte_grpcomm_base_pack_collective(buf, gather, ORTE_GRPCOMM_INTERNAL_STG_APP);
    /* send to our daemon */
    if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_DAEMON, buf,
                                          ORTE_RML_TAG_COLLECTIVE, 0,
                                          orte_rml_send_callback, NULL))) {
        ORTE_ERROR_LOG(rc);
        OBJ_RELEASE(buf);
        opal_list_remove_item(&orte_grpcomm_base.active_colls, &gather->super);
        return rc;
    }
    
    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
                         "%s grpcomm:bad allgather underway",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    return ORTE_SUCCESS;
}
static void daemon_coll_recv(int status, orte_process_name_t* sender,
                             opal_buffer_t* data, orte_rml_tag_t tag,
                             void* cbdata)
{
    orte_job_t *jdata;
    orte_std_cntr_t n;
    opal_list_item_t *item;
    orte_vpid_t np;
    int rc;
    orte_grpcomm_collective_t *coll;
    orte_namelist_t *nm;
    orte_grpcomm_coll_id_t id;
    bool do_progress;
    opal_buffer_t *relay;
    orte_jobid_t jobid;

    OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:daemon_coll: daemon collective recvd from %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(sender)));
    
    /* get the collective id */
    n = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &id, &n, ORTE_GRPCOMM_COLL_ID_T))) {
        ORTE_ERROR_LOG(rc);
        return;
    }

    OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:daemon_coll: WORKING COLLECTIVE %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), id));

    /* setup the collective for this id - if it's already present,
     * then this will just return the existing structure
     */
    coll = orte_grpcomm_base_setup_collective(id);

    /* record that we received a bucket */
    coll->num_peer_buckets++;

    /* unpack the jobid */
    n = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobid, &n, ORTE_JOBID))) {
        ORTE_ERROR_LOG(rc);
        return;
    }

    /*  find this job */
    do_progress = true;
    if (NULL == (jdata = orte_get_job_data_object(jobid))) {
        /* if we can't find it, then we haven't processed the
         * launch msg for this job yet - can't happen with
         * our own local procs, but this could involve a proc
         * running remotely that we don't know about yet
         */
        do_progress = false;
    }
    if (do_progress && 0 == jdata->num_local_procs) {
        coll->locally_complete = true;
    }

    /* unpack the number of contributors involved in the incoming data */
    n = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &np, &n, ORTE_VPID))) {
        ORTE_ERROR_LOG(rc);
        return;
    }
    OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:daemon_coll: NUM CONTRIBS: %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_VPID_PRINT(np)));
    /* add it to the number of global recvd */
    coll->num_global_recvd += np;

    /* transfer the data */
    opal_dss.copy_payload(&coll->buffer, data);

    /* are we done? */
    if (!do_progress || !coll->locally_complete) {
        /* can't continue - missing at least one launch msg
         * or not locally complete
         */
        OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                             "%s grpcomm:base:daemon_coll: CANNOT PROGRESS",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        return;
    }

    /* determine how many buckets we should receive from others
     * involved in this collective - need to know the number
     * of total contributors from all buckets being relayed
     * thru us
     */
    orte_routed.get_routing_list(ORTE_GRPCOMM_COLL_PEERS, coll);
    np = 1;  /* account for our own bucket */
    while (NULL != (item = opal_list_remove_first(&coll->targets))) {
        nm = (orte_namelist_t*)item;
        if (ORTE_VPID_WILDCARD == nm->name.vpid) {
            /* wait for input from all daemons */
            np = orte_process_info.num_procs;
            break;
        } else {
            np++;
        }
    }
    /* clear the list for reuse */
    while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&coll->targets))) {
        OBJ_RELEASE(nm);
    }

    /* relay the data, if required */
    if (np == coll->num_peer_buckets) {
        orte_routed.get_routing_list(ORTE_GRPCOMM_COLL_RELAY, coll);

        while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&coll->targets))) {
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                                 "%s grpcomm:base:daemon_coll: RELAYING COLLECTIVE TO %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&nm->name)));        
            relay = OBJ_NEW(opal_buffer_t);
            orte_grpcomm_base_pack_collective(relay, jobid,
                                              coll, ORTE_GRPCOMM_INTERNAL_STG_GLOBAL);
            if (ORTE_VPID_WILDCARD == nm->name.vpid) {
                /* this is going to everyone in this job, so use xcast */
                orte_grpcomm.xcast(nm->name.jobid, relay, ORTE_RML_TAG_DAEMON_COLL);
                OBJ_RELEASE(relay);
            }
            /* otherwise, send to each member, but don't send it back to the
             * sender as that can create an infinite loop
             */
            if (nm->name.vpid == sender->vpid) {
                OBJ_RELEASE(relay);
            } else {
                if (0 > orte_rml.send_buffer_nb(&nm->name, relay, ORTE_RML_TAG_DAEMON_COLL, 0,
                                                orte_rml_send_callback, NULL)) {
                    ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
                    OBJ_RELEASE(relay);
                }
            }
            OBJ_RELEASE(nm);
        }
    }
    /* clear the list for reuse */
    while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&coll->targets))) {
        OBJ_RELEASE(nm);
    }

    /* determine how many contributors we need to recv - we know
     * that all job objects were found, so we can skip that test
     * while counting
     */
    np = 0;
    for (item = opal_list_get_first(&coll->participants);
         item != opal_list_get_end(&coll->participants);
         item = opal_list_get_next(item)) {
        nm = (orte_namelist_t*)item;
        /* get the job object for this participant */
        jdata = orte_get_job_data_object(nm->name.jobid);
        if (ORTE_VPID_WILDCARD == nm->name.vpid) {
            /* all procs from this job are required to participate */
            np += jdata->num_procs;
        } else {
            np++;
        }
    }

    /* are we done? */
    if (np != coll->num_global_recvd) {
        OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                             "%s grpcomm:base:daemon_coll: MISSING CONTRIBUTORS: np %s ngr %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_VPID_PRINT(np),
                             ORTE_VPID_PRINT(coll->num_global_recvd)));
        return;
    }

    /* since we discovered that the collective is complete, we
     * need to send it to all the participants
     */
    for (item = opal_list_get_first(&coll->participants);
         item != opal_list_get_end(&coll->participants);
         item = opal_list_get_next(item)) {
        nm = (orte_namelist_t*)item;
        relay = OBJ_NEW(opal_buffer_t);
        opal_dss.pack(relay, &coll->id, 1, ORTE_GRPCOMM_COLL_ID_T);
        opal_dss.copy_payload(relay, &coll->buffer);
        /* if the vpid is wildcard, then this goes to
         * all daemons for relay
         */
        if (ORTE_VPID_WILDCARD == nm->name.vpid) {
            orte_grpcomm.xcast(nm->name.jobid, relay, ORTE_RML_TAG_COLLECTIVE);
            OBJ_RELEASE(relay);
        } else {
            /* send it to this proc */
            if (0 > orte_rml.send_buffer_nb(&nm->name, relay, ORTE_RML_TAG_COLLECTIVE, 0,
                                            orte_rml_send_callback, NULL)) {
                ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
                OBJ_RELEASE(relay);
            }
        }
    }

    /* remove this collective */
    opal_list_remove_item(&orte_grpcomm_base.active_colls, &coll->super);
    OBJ_RELEASE(coll);
}
void orte_grpcomm_base_progress_collectives(void)
{
    opal_list_item_t *item;
    orte_grpcomm_collective_t *coll;
    orte_namelist_t *nm;
    orte_job_t *jdata;
    opal_buffer_t *relay;
    int rc;

    /* cycle thru all known collectives - any collective on the list
     * must have come from either a local proc or receiving a global
     * collective. Either way, the number of required recipients
     * is the number of local procs for that job
     */
    item = opal_list_get_first(&orte_grpcomm_base.active_colls);
    while (item != opal_list_get_end(&orte_grpcomm_base.active_colls)) {
        coll = (orte_grpcomm_collective_t*)item;
        OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                             "%s PROGRESSING COLL id %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             coll->id));
        /* if this collective is already locally complete, then ignore it */
        if (coll->locally_complete) {
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                                 "%s COLL %d IS LOCALLY COMPLETE",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 coll->id));
            goto next_coll;
        }
        /* get the jobid of the participants in this collective */
        if (NULL == (nm = (orte_namelist_t*)opal_list_get_first(&coll->participants))) {
            opal_output(0, "NO PARTICIPANTS");
            goto next_coll;
        }
        /* get the job object for this participant */
        if (NULL == (jdata = orte_get_job_data_object(nm->name.jobid))) {
            /* if the job object isn't found, then we can't progress
             * this collective
             */
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                                 "%s COLL %d JOBID %s NOT FOUND",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 coll->id, ORTE_JOBID_PRINT(nm->name.jobid)));
            goto next_coll;
        }
        /* all local procs from this job are required to participate */
        OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                             "%s ALL LOCAL PROCS FOR JOB %s CONTRIBUTE %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOBID_PRINT(jdata->jobid),
                             (int)jdata->num_local_procs));
        /* see if all reqd participants are done */
        if (jdata->num_local_procs == coll->num_local_recvd) {
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                                 "%s COLLECTIVE %d LOCALLY COMPLETE - SENDING TO GLOBAL COLLECTIVE",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), coll->id));
            /* mark it as locally complete */
            coll->locally_complete = true;
            /* pack the collective */
            relay = OBJ_NEW(opal_buffer_t);
            orte_grpcomm_base_pack_collective(relay, jdata->jobid,
                                              coll, ORTE_GRPCOMM_INTERNAL_STG_LOCAL);
            /* send it to our global collective handler */
            if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, relay,
                                                  ORTE_RML_TAG_DAEMON_COLL, 0,
                                                  orte_rml_send_callback, NULL))) {
                ORTE_ERROR_LOG(rc);
                OBJ_RELEASE(relay);
            }
        }

    next_coll:
        item = opal_list_get_next(item);
    }
}