Exemplo n.º 1
0
static int ompi_comm_allreduce_pmix_reduce_complete (ompi_comm_request_t *request)
{
    ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context;
    ompi_comm_cid_context_t *cid_context = context->cid_context;
    int32_t size_count = context->count;
    opal_value_t info;
    opal_pmix_pdata_t pdat;
    opal_buffer_t sbuf;
    int rc;
    int bytes_written;
    const int output_id = 0;
    const int verbosity_level = 1;

    OBJ_CONSTRUCT(&sbuf, opal_buffer_t);

    if (OPAL_SUCCESS != (rc = opal_dss.pack(&sbuf, context->tmpbuf, (int32_t)context->count, OPAL_INT))) {
        OBJ_DESTRUCT(&sbuf);
        opal_output_verbose (verbosity_level, output_id, "pack failed. rc  %d\n", rc);
        return rc;
    }

    OBJ_CONSTRUCT(&info, opal_value_t);
    OBJ_CONSTRUCT(&pdat, opal_pmix_pdata_t);

    info.type = OPAL_BYTE_OBJECT;
    pdat.value.type = OPAL_BYTE_OBJECT;

    opal_dss.unload(&sbuf, (void**)&info.data.bo.bytes, &info.data.bo.size);
    OBJ_DESTRUCT(&sbuf);

    bytes_written = asprintf(&info.key,
                             cid_context->send_first ? "%s:%s:send:%d"
                             : "%s:%s:recv:%d",
                             cid_context->port_string,
                             cid_context->pmix_tag,
                             cid_context->iter);

    if (bytes_written == -1) {
        opal_output_verbose (verbosity_level, output_id, "writing info.key failed\n");
    } else {
        bytes_written = asprintf(&pdat.value.key,
                                 cid_context->send_first ? "%s:%s:recv:%d"
                                 : "%s:%s:send:%d",
                                 cid_context->port_string,
                                 cid_context->pmix_tag,
                                 cid_context->iter);

        if (bytes_written == -1) {
            opal_output_verbose (verbosity_level, output_id, "writing pdat.value.key failed\n");
        }
    }

    if (bytes_written == -1) {
        // write with separate calls,
        // just in case the args are the cause of failure
        opal_output_verbose (verbosity_level, output_id, "send first: %d\n", cid_context->send_first);
        opal_output_verbose (verbosity_level, output_id, "port string: %s\n", cid_context->port_string);
        opal_output_verbose (verbosity_level, output_id, "pmix tag: %s\n", cid_context->pmix_tag);
        opal_output_verbose (verbosity_level, output_id, "iter: %d\n", cid_context->iter);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    /* this macro is not actually non-blocking. if a non-blocking version becomes available this function
     * needs to be reworked to take advantage of it. */
    OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 600);  // give them 10 minutes
    OBJ_DESTRUCT(&info);
    if (OPAL_SUCCESS != rc) {
        OBJ_DESTRUCT(&pdat);
        return rc;
    }

    OBJ_CONSTRUCT(&sbuf, opal_buffer_t);
    opal_dss.load(&sbuf, pdat.value.data.bo.bytes, pdat.value.data.bo.size);
    pdat.value.data.bo.bytes = NULL;
    pdat.value.data.bo.size = 0;
    OBJ_DESTRUCT(&pdat);

    rc = opal_dss.unpack (&sbuf, context->outbuf, &size_count, OPAL_INT);
    OBJ_DESTRUCT(&sbuf);
    if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
        return rc;
    }

    ompi_op_reduce (context->op, context->tmpbuf, context->outbuf, size_count, MPI_INT);

    return ompi_comm_allreduce_bridged_schedule_bcast (request);
}
Exemplo n.º 2
0
static int ompi_comm_allreduce_pmix_reduce_complete (ompi_comm_request_t *request)
{
    ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context;
    ompi_comm_cid_context_t *cid_context = context->cid_context;
    int32_t size_count = context->count;
    opal_value_t info;
    opal_pmix_pdata_t pdat;
    opal_buffer_t sbuf;
    int rc;

    OBJ_CONSTRUCT(&sbuf, opal_buffer_t);

    if (OPAL_SUCCESS != (rc = opal_dss.pack(&sbuf, context->tmpbuf, (int32_t)context->count, OPAL_INT))) {
        OBJ_DESTRUCT(&sbuf);
        fprintf (stderr, "pack failed. rc  %d\n", rc);
        return rc;
    }

    OBJ_CONSTRUCT(&info, opal_value_t);
    OBJ_CONSTRUCT(&pdat, opal_pmix_pdata_t);

    info.type = OPAL_BYTE_OBJECT;
    pdat.value.type = OPAL_BYTE_OBJECT;

    opal_dss.unload(&sbuf, (void**)&info.data.bo.bytes, &info.data.bo.size);
    OBJ_DESTRUCT(&sbuf);

    if (cid_context->send_first) {
        (void)asprintf(&info.key, "%s:%s:send:%d", cid_context->port_string, cid_context->pmix_tag,
                       cid_context->iter);
        (void)asprintf(&pdat.value.key, "%s:%s:recv:%d", cid_context->port_string, cid_context->pmix_tag,
                       cid_context->iter);
    } else {
        (void)asprintf(&info.key, "%s:%s:recv:%d", cid_context->port_string, cid_context->pmix_tag,
                       cid_context->iter);
        (void)asprintf(&pdat.value.key, "%s:%s:send:%d", cid_context->port_string, cid_context->pmix_tag,
                       cid_context->iter);
    }

    /* this macro is not actually non-blocking. if a non-blocking version becomes available this function
     * needs to be reworked to take advantage of it. */
    OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 600);  // give them 10 minutes
    OBJ_DESTRUCT(&info);
    if (OPAL_SUCCESS != rc) {
        OBJ_DESTRUCT(&pdat);
        return rc;
    }

    OBJ_CONSTRUCT(&sbuf, opal_buffer_t);
    opal_dss.load(&sbuf, pdat.value.data.bo.bytes, pdat.value.data.bo.size);
    pdat.value.data.bo.bytes = NULL;
    pdat.value.data.bo.size = 0;
    OBJ_DESTRUCT(&pdat);

    rc = opal_dss.unpack (&sbuf, context->outbuf, &size_count, OPAL_INT);
    OBJ_DESTRUCT(&sbuf);
    if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
        return rc;
    }

    ompi_op_reduce (context->op, context->tmpbuf, context->outbuf, size_count, MPI_INT);

    return ompi_comm_allreduce_bridged_schedule_bcast (request);
}