static int ompi_comm_allreduce_pmix_reduce_complete (ompi_comm_request_t *request) { ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context; ompi_comm_cid_context_t *cid_context = context->cid_context; int32_t size_count = context->count; opal_value_t info; opal_pmix_pdata_t pdat; opal_buffer_t sbuf; int rc; int bytes_written; const int output_id = 0; const int verbosity_level = 1; OBJ_CONSTRUCT(&sbuf, opal_buffer_t); if (OPAL_SUCCESS != (rc = opal_dss.pack(&sbuf, context->tmpbuf, (int32_t)context->count, OPAL_INT))) { OBJ_DESTRUCT(&sbuf); opal_output_verbose (verbosity_level, output_id, "pack failed. rc %d\n", rc); return rc; } OBJ_CONSTRUCT(&info, opal_value_t); OBJ_CONSTRUCT(&pdat, opal_pmix_pdata_t); info.type = OPAL_BYTE_OBJECT; pdat.value.type = OPAL_BYTE_OBJECT; opal_dss.unload(&sbuf, (void**)&info.data.bo.bytes, &info.data.bo.size); OBJ_DESTRUCT(&sbuf); bytes_written = asprintf(&info.key, cid_context->send_first ? "%s:%s:send:%d" : "%s:%s:recv:%d", cid_context->port_string, cid_context->pmix_tag, cid_context->iter); if (bytes_written == -1) { opal_output_verbose (verbosity_level, output_id, "writing info.key failed\n"); } else { bytes_written = asprintf(&pdat.value.key, cid_context->send_first ? "%s:%s:recv:%d" : "%s:%s:send:%d", cid_context->port_string, cid_context->pmix_tag, cid_context->iter); if (bytes_written == -1) { opal_output_verbose (verbosity_level, output_id, "writing pdat.value.key failed\n"); } } if (bytes_written == -1) { // write with separate calls, // just in case the args are the cause of failure opal_output_verbose (verbosity_level, output_id, "send first: %d\n", cid_context->send_first); opal_output_verbose (verbosity_level, output_id, "port string: %s\n", cid_context->port_string); opal_output_verbose (verbosity_level, output_id, "pmix tag: %s\n", cid_context->pmix_tag); opal_output_verbose (verbosity_level, output_id, "iter: %d\n", cid_context->iter); return OMPI_ERR_OUT_OF_RESOURCE; } /* this macro is not actually non-blocking. if a non-blocking version becomes available this function * needs to be reworked to take advantage of it. */ OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 600); // give them 10 minutes OBJ_DESTRUCT(&info); if (OPAL_SUCCESS != rc) { OBJ_DESTRUCT(&pdat); return rc; } OBJ_CONSTRUCT(&sbuf, opal_buffer_t); opal_dss.load(&sbuf, pdat.value.data.bo.bytes, pdat.value.data.bo.size); pdat.value.data.bo.bytes = NULL; pdat.value.data.bo.size = 0; OBJ_DESTRUCT(&pdat); rc = opal_dss.unpack (&sbuf, context->outbuf, &size_count, OPAL_INT); OBJ_DESTRUCT(&sbuf); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { return rc; } ompi_op_reduce (context->op, context->tmpbuf, context->outbuf, size_count, MPI_INT); return ompi_comm_allreduce_bridged_schedule_bcast (request); }
static int ompi_comm_allreduce_pmix_reduce_complete (ompi_comm_request_t *request) { ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context; ompi_comm_cid_context_t *cid_context = context->cid_context; int32_t size_count = context->count; opal_value_t info; opal_pmix_pdata_t pdat; opal_buffer_t sbuf; int rc; OBJ_CONSTRUCT(&sbuf, opal_buffer_t); if (OPAL_SUCCESS != (rc = opal_dss.pack(&sbuf, context->tmpbuf, (int32_t)context->count, OPAL_INT))) { OBJ_DESTRUCT(&sbuf); fprintf (stderr, "pack failed. rc %d\n", rc); return rc; } OBJ_CONSTRUCT(&info, opal_value_t); OBJ_CONSTRUCT(&pdat, opal_pmix_pdata_t); info.type = OPAL_BYTE_OBJECT; pdat.value.type = OPAL_BYTE_OBJECT; opal_dss.unload(&sbuf, (void**)&info.data.bo.bytes, &info.data.bo.size); OBJ_DESTRUCT(&sbuf); if (cid_context->send_first) { (void)asprintf(&info.key, "%s:%s:send:%d", cid_context->port_string, cid_context->pmix_tag, cid_context->iter); (void)asprintf(&pdat.value.key, "%s:%s:recv:%d", cid_context->port_string, cid_context->pmix_tag, cid_context->iter); } else { (void)asprintf(&info.key, "%s:%s:recv:%d", cid_context->port_string, cid_context->pmix_tag, cid_context->iter); (void)asprintf(&pdat.value.key, "%s:%s:send:%d", cid_context->port_string, cid_context->pmix_tag, cid_context->iter); } /* this macro is not actually non-blocking. if a non-blocking version becomes available this function * needs to be reworked to take advantage of it. */ OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 600); // give them 10 minutes OBJ_DESTRUCT(&info); if (OPAL_SUCCESS != rc) { OBJ_DESTRUCT(&pdat); return rc; } OBJ_CONSTRUCT(&sbuf, opal_buffer_t); opal_dss.load(&sbuf, pdat.value.data.bo.bytes, pdat.value.data.bo.size); pdat.value.data.bo.bytes = NULL; pdat.value.data.bo.size = 0; OBJ_DESTRUCT(&pdat); rc = opal_dss.unpack (&sbuf, context->outbuf, &size_count, OPAL_INT); OBJ_DESTRUCT(&sbuf); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { return rc; } ompi_op_reduce (context->op, context->tmpbuf, context->outbuf, size_count, MPI_INT); return ompi_comm_allreduce_bridged_schedule_bcast (request); }