static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request) { ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context; int64_t my_id = ((int64_t) ompi_comm_get_cid (context->comm) << 32 | context->pml_tag); ompi_request_t *subreq; bool flag; int ret; if (OPAL_THREAD_TRYLOCK(&ompi_cid_lock)) { return ompi_comm_request_schedule_append (request, ompi_comm_allreduce_getnextcid, NULL, 0); } if (ompi_comm_cid_lowest_id < my_id) { OPAL_THREAD_UNLOCK(&ompi_cid_lock); return ompi_comm_request_schedule_append (request, ompi_comm_allreduce_getnextcid, NULL, 0); } ompi_comm_cid_lowest_id = my_id; /** * This is the real algorithm described in the doc */ flag = false; context->nextlocal_cid = mca_pml.pml_max_contextid; for (unsigned int i = context->start ; i < mca_pml.pml_max_contextid ; ++i) { flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, i, context->comm); if (true == flag) { context->nextlocal_cid = i; break; } } ret = context->allreduce_fn (&context->nextlocal_cid, &context->nextcid, 1, MPI_MAX, context, &subreq); if (OMPI_SUCCESS != ret) { ompi_comm_cid_lowest_id = INT64_MAX; OPAL_THREAD_UNLOCK(&ompi_cid_lock); return ret; } if ((unsigned int) context->nextlocal_cid == mca_pml.pml_max_contextid) { /* at least one peer ran out of CIDs */ if (flag) { opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL); } ompi_comm_cid_lowest_id = INT64_MAX; OPAL_THREAD_UNLOCK(&ompi_cid_lock); return OMPI_ERR_OUT_OF_RESOURCE; } OPAL_THREAD_UNLOCK(&ompi_cid_lock); /* next we want to verify that the resulting commid is ok */ return ompi_comm_request_schedule_append (request, ompi_comm_checkcid, &subreq, 1); }
static int ompi_comm_checkcid (ompi_comm_request_t *request) { ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context; ompi_request_t *subreq; int ret; if (OPAL_THREAD_TRYLOCK(&ompi_cid_lock)) { return ompi_comm_request_schedule_append (request, ompi_comm_checkcid, NULL, 0); } context->flag = (context->nextcid == context->nextlocal_cid); if (!context->flag) { opal_pointer_array_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL); context->flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, context->nextcid, context->comm); } ++context->iter; ret = context->allreduce_fn (&context->flag, &context->rflag, 1, MPI_MIN, context, &subreq); if (OMPI_SUCCESS == ret) { ompi_comm_request_schedule_append (request, ompi_comm_nextcid_check_flag, &subreq, 1); } OPAL_THREAD_UNLOCK(&ompi_cid_lock); return ret; }
int ompi_comm_nextcid ( ompi_communicator_t* newcomm, ompi_communicator_t* comm, ompi_communicator_t* bridgecomm, void* local_leader, void* remote_leader, int mode, int send_first ) { int nextcid; bool flag; int nextlocal_cid; int done=0; int response, glresponse=0; int start; unsigned int i; ompi_comm_cid_allredfct* allredfnct; /** * Determine which implementation of allreduce we have to use * for the current scenario */ switch (mode) { case OMPI_COMM_CID_INTRA: allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra; break; case OMPI_COMM_CID_INTER: allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_inter; break; case OMPI_COMM_CID_INTRA_BRIDGE: allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_bridge; break; case OMPI_COMM_CID_INTRA_OOB: allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_oob; break; default: return MPI_UNDEFINED; break; } do { /* Only one communicator function allowed in same time on the * same communicator. */ OPAL_THREAD_LOCK(&ompi_cid_lock); response = ompi_comm_register_cid (comm->c_contextid); OPAL_THREAD_UNLOCK(&ompi_cid_lock); } while (OMPI_SUCCESS != response ); start = ompi_mpi_communicators.lowest_free; while (!done) { /** * This is the real algorithm described in the doc */ OPAL_THREAD_LOCK(&ompi_cid_lock); if (comm->c_contextid != ompi_comm_lowest_cid() ) { /* if not lowest cid, we do not continue, but sleep and try again */ OPAL_THREAD_UNLOCK(&ompi_cid_lock); continue; } OPAL_THREAD_UNLOCK(&ompi_cid_lock); for (i=start; i < mca_pml.pml_max_contextid ; i++) { flag=opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, i, comm); if (true == flag) { nextlocal_cid = i; break; } } (allredfnct)(&nextlocal_cid, &nextcid, 1, MPI_MAX, comm, bridgecomm, local_leader, remote_leader, send_first ); if (nextcid == nextlocal_cid) { response = 1; /* fine with me */ } else { opal_pointer_array_set_item(&ompi_mpi_communicators, nextlocal_cid, NULL); flag = opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, nextcid, comm ); if (true == flag) { response = 1; /* works as well */ } else { response = 0; /* nope, not acceptable */ } } (allredfnct)(&response, &glresponse, 1, MPI_MIN, comm, bridgecomm, local_leader, remote_leader, send_first ); if (1 == glresponse) { done = 1; /* we are done */ break; } else if ( 0 == glresponse ) { if ( 1 == response ) { /* we could use that, but other don't agree */ opal_pointer_array_set_item(&ompi_mpi_communicators, nextcid, NULL); } start = nextcid+1; /* that's where we can start the next round */ } } /* set the according values to the newcomm */ newcomm->c_contextid = nextcid; newcomm->c_f_to_c_index = newcomm->c_contextid; opal_pointer_array_set_item (&ompi_mpi_communicators, nextcid, newcomm); OPAL_THREAD_LOCK(&ompi_cid_lock); ompi_comm_unregister_cid (comm->c_contextid); OPAL_THREAD_UNLOCK(&ompi_cid_lock); return (MPI_SUCCESS); }