static int ompi_comm_allreduce_bridged_xchng_complete (ompi_comm_request_t *request) { ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context; /* step 3: reduce leader data */ ompi_op_reduce (context->op, context->tmpbuf, context->outbuf, context->count, MPI_INT); /* schedule the broadcast to local peers */ return ompi_comm_allreduce_bridged_schedule_bcast (request); }
static int ompi_comm_allreduce_intra_pmix_nb (int *inbuf, int *outbuf, int count, struct ompi_op_t *op, ompi_comm_cid_context_t *cid_context, ompi_request_t **req) { ompi_communicator_t *comm = cid_context->comm; ompi_comm_allreduce_context_t *context; int local_rank = ompi_comm_rank (comm); ompi_comm_request_t *request; ompi_request_t *subreq; int rc; context = ompi_comm_allreduce_context_alloc (inbuf, outbuf, count, op, cid_context); if (OPAL_UNLIKELY(NULL == context)) { return OMPI_ERR_OUT_OF_RESOURCE; } if (cid_context->local_leader == local_rank) { context->tmpbuf = (int *) calloc (count, sizeof(int)); if (OPAL_UNLIKELY(NULL == context->tmpbuf)) { OBJ_RELEASE(context); return OMPI_ERR_OUT_OF_RESOURCE; } } request = ompi_comm_request_get (); if (NULL == request) { OBJ_RELEASE(context); return OMPI_ERR_OUT_OF_RESOURCE; } request->context = &context->super; /* comm is an intra-communicator */ rc = comm->c_coll.coll_ireduce (inbuf, context->tmpbuf, count, MPI_INT, op, cid_context->local_leader, comm, &subreq, comm->c_coll.coll_ireduce_module); if ( OMPI_SUCCESS != rc ) { ompi_comm_request_return (request); return rc; } if (cid_context->local_leader == local_rank) { rc = ompi_comm_request_schedule_append (request, ompi_comm_allreduce_pmix_reduce_complete, &subreq, 1); } else { /* go ahead and schedule the broadcast */ rc = ompi_comm_request_schedule_append (request, NULL, &subreq, 1); rc = ompi_comm_allreduce_bridged_schedule_bcast (request); } if (OMPI_SUCCESS != rc) { ompi_comm_request_return (request); return rc; } ompi_comm_request_start (request); *req = (ompi_request_t *) request; /* use the same function as bridged to schedule the broadcast */ return OMPI_SUCCESS; }
static int ompi_comm_allreduce_pmix_reduce_complete (ompi_comm_request_t *request) { ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context; ompi_comm_cid_context_t *cid_context = context->cid_context; int32_t size_count = context->count; opal_value_t info; opal_pmix_pdata_t pdat; opal_buffer_t sbuf; int rc; int bytes_written; const int output_id = 0; const int verbosity_level = 1; OBJ_CONSTRUCT(&sbuf, opal_buffer_t); if (OPAL_SUCCESS != (rc = opal_dss.pack(&sbuf, context->tmpbuf, (int32_t)context->count, OPAL_INT))) { OBJ_DESTRUCT(&sbuf); opal_output_verbose (verbosity_level, output_id, "pack failed. rc %d\n", rc); return rc; } OBJ_CONSTRUCT(&info, opal_value_t); OBJ_CONSTRUCT(&pdat, opal_pmix_pdata_t); info.type = OPAL_BYTE_OBJECT; pdat.value.type = OPAL_BYTE_OBJECT; opal_dss.unload(&sbuf, (void**)&info.data.bo.bytes, &info.data.bo.size); OBJ_DESTRUCT(&sbuf); bytes_written = asprintf(&info.key, cid_context->send_first ? "%s:%s:send:%d" : "%s:%s:recv:%d", cid_context->port_string, cid_context->pmix_tag, cid_context->iter); if (bytes_written == -1) { opal_output_verbose (verbosity_level, output_id, "writing info.key failed\n"); } else { bytes_written = asprintf(&pdat.value.key, cid_context->send_first ? "%s:%s:recv:%d" : "%s:%s:send:%d", cid_context->port_string, cid_context->pmix_tag, cid_context->iter); if (bytes_written == -1) { opal_output_verbose (verbosity_level, output_id, "writing pdat.value.key failed\n"); } } if (bytes_written == -1) { // write with separate calls, // just in case the args are the cause of failure opal_output_verbose (verbosity_level, output_id, "send first: %d\n", cid_context->send_first); opal_output_verbose (verbosity_level, output_id, "port string: %s\n", cid_context->port_string); opal_output_verbose (verbosity_level, output_id, "pmix tag: %s\n", cid_context->pmix_tag); opal_output_verbose (verbosity_level, output_id, "iter: %d\n", cid_context->iter); return OMPI_ERR_OUT_OF_RESOURCE; } /* this macro is not actually non-blocking. if a non-blocking version becomes available this function * needs to be reworked to take advantage of it. */ OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 600); // give them 10 minutes OBJ_DESTRUCT(&info); if (OPAL_SUCCESS != rc) { OBJ_DESTRUCT(&pdat); return rc; } OBJ_CONSTRUCT(&sbuf, opal_buffer_t); opal_dss.load(&sbuf, pdat.value.data.bo.bytes, pdat.value.data.bo.size); pdat.value.data.bo.bytes = NULL; pdat.value.data.bo.size = 0; OBJ_DESTRUCT(&pdat); rc = opal_dss.unpack (&sbuf, context->outbuf, &size_count, OPAL_INT); OBJ_DESTRUCT(&sbuf); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { return rc; } ompi_op_reduce (context->op, context->tmpbuf, context->outbuf, size_count, MPI_INT); return ompi_comm_allreduce_bridged_schedule_bcast (request); }
static int ompi_comm_allreduce_pmix_reduce_complete (ompi_comm_request_t *request) { ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context; ompi_comm_cid_context_t *cid_context = context->cid_context; int32_t size_count = context->count; opal_value_t info; opal_pmix_pdata_t pdat; opal_buffer_t sbuf; int rc; OBJ_CONSTRUCT(&sbuf, opal_buffer_t); if (OPAL_SUCCESS != (rc = opal_dss.pack(&sbuf, context->tmpbuf, (int32_t)context->count, OPAL_INT))) { OBJ_DESTRUCT(&sbuf); fprintf (stderr, "pack failed. rc %d\n", rc); return rc; } OBJ_CONSTRUCT(&info, opal_value_t); OBJ_CONSTRUCT(&pdat, opal_pmix_pdata_t); info.type = OPAL_BYTE_OBJECT; pdat.value.type = OPAL_BYTE_OBJECT; opal_dss.unload(&sbuf, (void**)&info.data.bo.bytes, &info.data.bo.size); OBJ_DESTRUCT(&sbuf); if (cid_context->send_first) { (void)asprintf(&info.key, "%s:%s:send:%d", cid_context->port_string, cid_context->pmix_tag, cid_context->iter); (void)asprintf(&pdat.value.key, "%s:%s:recv:%d", cid_context->port_string, cid_context->pmix_tag, cid_context->iter); } else { (void)asprintf(&info.key, "%s:%s:recv:%d", cid_context->port_string, cid_context->pmix_tag, cid_context->iter); (void)asprintf(&pdat.value.key, "%s:%s:send:%d", cid_context->port_string, cid_context->pmix_tag, cid_context->iter); } /* this macro is not actually non-blocking. if a non-blocking version becomes available this function * needs to be reworked to take advantage of it. */ OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 600); // give them 10 minutes OBJ_DESTRUCT(&info); if (OPAL_SUCCESS != rc) { OBJ_DESTRUCT(&pdat); return rc; } OBJ_CONSTRUCT(&sbuf, opal_buffer_t); opal_dss.load(&sbuf, pdat.value.data.bo.bytes, pdat.value.data.bo.size); pdat.value.data.bo.bytes = NULL; pdat.value.data.bo.size = 0; OBJ_DESTRUCT(&pdat); rc = opal_dss.unpack (&sbuf, context->outbuf, &size_count, OPAL_INT); OBJ_DESTRUCT(&sbuf); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { return rc; } ompi_op_reduce (context->op, context->tmpbuf, context->outbuf, size_count, MPI_INT); return ompi_comm_allreduce_bridged_schedule_bcast (request); }