/* comm shrink impl; assumes that standard error checking has already taken * place in the calling function */ int MPIR_Comm_shrink(MPID_Comm *comm_ptr, MPID_Comm **newcomm_ptr) { int mpi_errno = MPI_SUCCESS; MPID_Group *global_failed, *comm_grp, *new_group_ptr; int attempts = 0; int errflag = 0, tmp_errflag = 0; MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_SHRINK); MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_SHRINK); /* TODO - Implement this function for intercommunicators */ MPIR_Comm_group_impl(comm_ptr, &comm_grp); do { mpi_errno = MPID_Comm_get_all_failed_procs(comm_ptr, &global_failed, MPIR_SHRINK_TAG); /* Ignore the mpi_errno value here as it will definitely communicate * with failed procs */ mpi_errno = MPIR_Group_difference_impl(comm_grp, global_failed, &new_group_ptr); if (mpi_errno) MPIU_ERR_POP(mpi_errno); if (MPID_Group_empty != global_failed) MPIR_Group_release(global_failed); mpi_errno = MPIR_Comm_create_group(comm_ptr, new_group_ptr, MPIR_SHRINK_TAG, newcomm_ptr); errflag = mpi_errno || *newcomm_ptr == NULL; mpi_errno = MPIR_Allreduce_group(MPI_IN_PLACE, &errflag, 1, MPI_INT, MPI_MAX, comm_ptr, new_group_ptr, MPIR_SHRINK_TAG, &tmp_errflag); MPIR_Group_release(new_group_ptr); if (errflag) MPIU_Object_set_ref(new_group_ptr, 0); } while (errflag && ++attempts < 5); if (errflag && attempts >= 5) goto fn_fail; else mpi_errno = MPI_SUCCESS; fn_exit: MPIR_Group_release(comm_grp); MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_SHRINK); return mpi_errno; fn_fail: if (*newcomm_ptr) MPIU_Object_set_ref(*newcomm_ptr, 0); MPIU_Object_set_ref(global_failed, 0); MPIU_Object_set_ref(new_group_ptr, 0); goto fn_exit; }
int MPIR_Comm_agree(MPIR_Comm *comm_ptr, int *flag) { int mpi_errno = MPI_SUCCESS, mpi_errno_tmp = MPI_SUCCESS; MPIR_Group *comm_grp, *failed_grp, *new_group_ptr, *global_failed; int result, success = 1; MPIR_Errflag_t errflag = MPIR_ERR_NONE; int values[2]; MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_AGREE); MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_AGREE); MPIR_Comm_group_impl(comm_ptr, &comm_grp); /* Get the locally known (not acknowledged) group of failed procs */ mpi_errno = MPID_Comm_failure_get_acked(comm_ptr, &failed_grp); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* First decide on the group of failed procs. */ mpi_errno = MPID_Comm_get_all_failed_procs(comm_ptr, &global_failed, MPIR_AGREE_TAG); if (mpi_errno) errflag = MPIR_ERR_PROC_FAILED; mpi_errno = MPIR_Group_compare_impl(failed_grp, global_failed, &result); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* Create a subgroup without the failed procs */ mpi_errno = MPIR_Group_difference_impl(comm_grp, global_failed, &new_group_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* If that group isn't the same as what we think is failed locally, then * mark it as such. */ if (result == MPI_UNEQUAL || errflag) success = 0; /* Do an allreduce to decide whether or not anyone thinks the group * has changed */ mpi_errno_tmp = MPIR_Allreduce_group(MPI_IN_PLACE, &success, 1, MPI_INT, MPI_MIN, comm_ptr, new_group_ptr, MPIR_AGREE_TAG, &errflag); if (!success || errflag || mpi_errno_tmp) success = 0; values[0] = success; values[1] = *flag; /* Determine both the result of this function (mpi_errno) and the result * of flag that will be returned to the user. */ MPIR_Allreduce_group(MPI_IN_PLACE, values, 2, MPI_INT, MPI_BAND, comm_ptr, new_group_ptr, MPIR_AGREE_TAG, &errflag); /* Ignore the result of the operation this time. Everyone will either * return a failure because of !success earlier or they will return * something useful for flag because of this operation. If there was a new * failure in between the first allreduce and the second one, it's ignored * here. */ if (failed_grp != MPIR_Group_empty) MPIR_Group_release(failed_grp); MPIR_Group_release(new_group_ptr); MPIR_Group_release(comm_grp); if (global_failed != MPIR_Group_empty) MPIR_Group_release(global_failed); success = values[0]; *flag = values[1]; if (!success) { MPIR_ERR_SET(mpi_errno_tmp, MPIX_ERR_PROC_FAILED, "**mpix_comm_agree"); MPIR_ERR_ADD(mpi_errno, mpi_errno_tmp); } fn_exit: MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_AGREE); return mpi_errno; fn_fail: goto fn_exit; }
/* comm shrink impl; assumes that standard error checking has already taken * place in the calling function */ int MPIR_Comm_shrink(MPIR_Comm * comm_ptr, MPIR_Comm ** newcomm_ptr) { int mpi_errno = MPI_SUCCESS; MPIR_Group *global_failed = NULL, *comm_grp = NULL, *new_group_ptr = NULL; int attempts = 0; MPIR_Errflag_t errflag = MPIR_ERR_NONE; MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_SHRINK); MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_SHRINK); /* TODO - Implement this function for intercommunicators */ MPIR_Comm_group_impl(comm_ptr, &comm_grp); do { errflag = MPIR_ERR_NONE; MPID_Comm_get_all_failed_procs(comm_ptr, &global_failed, MPIR_SHRINK_TAG); /* Ignore the mpi_errno value here as it will definitely communicate * with failed procs */ mpi_errno = MPIR_Group_difference_impl(comm_grp, global_failed, &new_group_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); if (MPIR_Group_empty != global_failed) MPIR_Group_release(global_failed); mpi_errno = MPIR_Comm_create_group(comm_ptr, new_group_ptr, MPIR_SHRINK_TAG, newcomm_ptr); if (*newcomm_ptr == NULL) { errflag = MPIR_ERR_PROC_FAILED; } else if (mpi_errno) { errflag = MPIX_ERR_PROC_FAILED == MPIR_ERR_GET_CLASS(mpi_errno) ? MPIR_ERR_PROC_FAILED : MPIR_ERR_OTHER; MPIR_Comm_release(*newcomm_ptr); } mpi_errno = MPII_Allreduce_group(MPI_IN_PLACE, &errflag, 1, MPI_INT, MPI_MAX, comm_ptr, new_group_ptr, MPIR_SHRINK_TAG, &errflag); MPIR_Group_release(new_group_ptr); if (errflag) { if (*newcomm_ptr != NULL && MPIR_Object_get_ref(*newcomm_ptr) > 0) { MPIR_Object_set_ref(*newcomm_ptr, 1); MPIR_Comm_release(*newcomm_ptr); } if (MPIR_Object_get_ref(new_group_ptr) > 0) { MPIR_Object_set_ref(new_group_ptr, 1); MPIR_Group_release(new_group_ptr); } } } while (errflag && ++attempts < 5); if (errflag && attempts >= 5) goto fn_fail; else mpi_errno = MPI_SUCCESS; fn_exit: MPIR_Group_release(comm_grp); MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_SHRINK); return mpi_errno; fn_fail: if (*newcomm_ptr) MPIR_Object_set_ref(*newcomm_ptr, 0); MPIR_Object_set_ref(global_failed, 0); MPIR_Object_set_ref(new_group_ptr, 0); goto fn_exit; }
int MPIDI_CH3U_Check_for_failed_procs(void) { int mpi_errno = MPI_SUCCESS; int pmi_errno; int len; char *kvsname; MPIR_Group *prev_failed_group, *new_failed_group; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS); /* FIXME: Currently this only handles failed processes in comm_world. We need to fix hydra to include the pgid along with the rank, then we need to create the failed group from something bigger than comm_world. */ mpi_errno = MPIDI_PG_GetConnKVSname(&kvsname); if (mpi_errno) MPIR_ERR_POP(mpi_errno); #ifdef USE_PMI2_API { int vallen = 0; pmi_errno = PMI2_KVS_Get(kvsname, PMI2_ID_NULL, "PMI_dead_processes", MPIDI_failed_procs_string, PMI2_MAX_VALLEN, &vallen); MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get"); } #else pmi_errno = PMI_KVS_Get_value_length_max(&len); MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_value_length_max"); pmi_errno = PMI_KVS_Get(kvsname, "PMI_dead_processes", MPIDI_failed_procs_string, len); MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get"); #endif if (*MPIDI_failed_procs_string == '\0') { /* there are no failed processes */ MPIDI_Failed_procs_group = MPIR_Group_empty; goto fn_exit; } MPL_DBG_MSG_S(MPIDI_CH3_DBG_OTHER, TYPICAL, "Received proc fail notification: %s", MPIDI_failed_procs_string); /* save reference to previous group so we can identify new failures */ prev_failed_group = MPIDI_Failed_procs_group; /* Parse the list of failed processes */ MPIDI_CH3U_Get_failed_group(-2, &MPIDI_Failed_procs_group); /* get group of newly failed processes */ mpi_errno = MPIR_Group_difference_impl(MPIDI_Failed_procs_group, prev_failed_group, &new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); if (new_failed_group != MPIR_Group_empty) { mpi_errno = MPIDI_CH3I_Comm_handle_failed_procs(new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = terminate_failed_VCs(new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Group_release(new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } /* free prev group */ if (prev_failed_group != MPIR_Group_empty) { mpi_errno = MPIR_Group_release(prev_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS); return mpi_errno; fn_oom: /* out-of-memory handler for utarray operations */ MPIR_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "utarray"); fn_fail: goto fn_exit; }