int MPID_Comm_failure_get_acked(MPID_Comm *comm_ptr, MPID_Group **group_ptr) { int mpi_errno = MPI_SUCCESS; MPID_Group *failed_group, *comm_group; MPIDI_STATE_DECL(MPID_STATE_MPID_COMM_FAILURE_GET_ACKED); MPIDI_FUNC_ENTER(MPID_STATE_MPID_COMM_FAILURE_GET_ACKED); /* Get the group of all failed processes */ MPIDI_CH3U_Check_for_failed_procs(); MPIDI_CH3U_Get_failed_group(comm_ptr->dev.last_ack_rank, &failed_group); if (failed_group == MPID_Group_empty) { *group_ptr = MPID_Group_empty; goto fn_exit; } MPIR_Comm_group_impl(comm_ptr, &comm_group); /* Get the intersection of all falied processes in this communicator */ MPIR_Group_intersection_impl(failed_group, comm_group, group_ptr); MPIR_Group_release(comm_group); MPIR_Group_release(failed_group); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_COMM_FAILURE_GET_ACKED); return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_CH3U_Check_for_failed_procs(void) { int mpi_errno = MPI_SUCCESS; int pmi_errno; int len; char *kvsname; MPIR_Group *prev_failed_group, *new_failed_group; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS); /* FIXME: Currently this only handles failed processes in comm_world. We need to fix hydra to include the pgid along with the rank, then we need to create the failed group from something bigger than comm_world. */ mpi_errno = MPIDI_PG_GetConnKVSname(&kvsname); if (mpi_errno) MPIR_ERR_POP(mpi_errno); #ifdef USE_PMI2_API { int vallen = 0; pmi_errno = PMI2_KVS_Get(kvsname, PMI2_ID_NULL, "PMI_dead_processes", MPIDI_failed_procs_string, PMI2_MAX_VALLEN, &vallen); MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get"); } #else pmi_errno = PMI_KVS_Get_value_length_max(&len); MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_value_length_max"); pmi_errno = PMI_KVS_Get(kvsname, "PMI_dead_processes", MPIDI_failed_procs_string, len); MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get"); #endif if (*MPIDI_failed_procs_string == '\0') { /* there are no failed processes */ MPIDI_Failed_procs_group = MPIR_Group_empty; goto fn_exit; } MPL_DBG_MSG_S(MPIDI_CH3_DBG_OTHER, TYPICAL, "Received proc fail notification: %s", MPIDI_failed_procs_string); /* save reference to previous group so we can identify new failures */ prev_failed_group = MPIDI_Failed_procs_group; /* Parse the list of failed processes */ MPIDI_CH3U_Get_failed_group(-2, &MPIDI_Failed_procs_group); /* get group of newly failed processes */ mpi_errno = MPIR_Group_difference_impl(MPIDI_Failed_procs_group, prev_failed_group, &new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); if (new_failed_group != MPIR_Group_empty) { mpi_errno = MPIDI_CH3I_Comm_handle_failed_procs(new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = terminate_failed_VCs(new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Group_release(new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } /* free prev group */ if (prev_failed_group != MPIR_Group_empty) { mpi_errno = MPIR_Group_release(prev_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS); return mpi_errno; fn_oom: /* out-of-memory handler for utarray operations */ MPIR_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "utarray"); fn_fail: goto fn_exit; }
int MPID_Comm_failed_bitarray(MPID_Comm *comm_ptr, uint32_t **bitarray, int acked) { int mpi_errno = MPI_SUCCESS; int size, i; uint32_t bit; int *failed_procs, *group_procs; MPID_Group *failed_group, *comm_group; MPIU_CHKLMEM_DECL(2); MPIDI_STATE_DECL(MPID_STATE_COMM_FAILED_BITARRAY); MPIDI_FUNC_ENTER(MPID_STATE_COMM_FAILED_BITARRAY); /* TODO - Fix this for intercommunicators */ size = comm_ptr->local_size; /* We can fit sizeof(uint32_t) * 8 ranks in one uint64_t so divide the * size by that */ /* This buffer will be handed back to the calling function so we use a * "real" malloc here and expect the caller to free the buffer later. The * other buffers in this function are temporary and will be automatically * cleaned up at the end of the function. */ *bitarray = (uint32_t *) MPIU_Malloc(sizeof(uint32_t) * (size / (sizeof(uint32_t) * 8)+1)); if (!(*bitarray)) { fprintf(stderr, "Could not allocate space for bitarray\n"); PMPI_Abort(MPI_COMM_WORLD, 1); } for (i = 0; i <= size/(sizeof(uint32_t)*8); i++) *bitarray[i] = 0; mpi_errno = MPIDI_CH3U_Check_for_failed_procs(); if (mpi_errno) MPIU_ERR_POP(mpi_errno); if (acked) MPIDI_CH3U_Get_failed_group(comm_ptr->dev.last_ack_rank, &failed_group); else MPIDI_CH3U_Get_failed_group(-2, &failed_group); if (failed_group == MPID_Group_empty) goto fn_exit; MPIU_CHKLMEM_MALLOC(group_procs, int *, sizeof(int)*failed_group->size, mpi_errno, "group_procs"); for (i = 0; i < failed_group->size; i++) group_procs[i] = i; MPIU_CHKLMEM_MALLOC(failed_procs, int *, sizeof(int)*failed_group->size, mpi_errno, "failed_procs"); MPIR_Comm_group_impl(comm_ptr, &comm_group); MPIR_Group_translate_ranks_impl(failed_group, failed_group->size, group_procs, comm_group, failed_procs); /* The bits will actually be ordered in decending order rather than * ascending. This is purely for readability since it makes no practical * difference. So if the bits look like this: * * 10001100 01001000 00000000 00000001 * * Then processes 1, 5, 6, 9, 12, and 32 have failed. */ for (i = 0; i < failed_group->size; i++) { bit = 0x80000000; bit >>= failed_procs[i] % (sizeof(uint32_t) * 8); *bitarray[failed_procs[i] / (sizeof(uint32_t) * 8)] |= bit; } MPIR_Group_free_impl(comm_group); fn_exit: MPIU_CHKLMEM_FREEALL(); MPIDI_FUNC_EXIT(MPID_STATE_COMM_FAILED_BITARRAY); return mpi_errno; fn_fail: goto fn_exit; }