static void backend_fatal(char *type, struct ompi_communicator_t *comm, char *name, int *error_code, va_list arglist) { /* We only want aggregation while the rte is initialized */ if (ompi_rte_initialized) { backend_fatal_aggregate(type, comm, name, error_code, arglist); } else { backend_fatal_no_aggregate(type, comm, name, error_code, arglist); } /* In most instances the communicator will be valid. If not, we are either early in * the initialization or we are dealing with a window. Thus, it is good enough to abort * on MPI_COMM_SELF, the error will propagate. */ if (comm == NULL) { comm = &ompi_mpi_comm_self.comm; } if (NULL != error_code) { ompi_mpi_abort(comm, *error_code); } else { ompi_mpi_abort(comm, 1); } }
static void backend_fatal(char *type, struct ompi_communicator_t *comm, char *name, int *error_code, va_list arglist) { /* Do we want help message aggregation? Usually yes, but it uses malloc(), which may cause further errors if we're exiting due to a memory problem. So we also have the option to *not* aggregate (which doesn't use malloc during its call stack, meaning that there is a better chance that the error message will actually get printed). Note that we can only do aggregation after MPI_INIT and before MPI_FINALIZE. */ if (orte_help_want_aggregate && ompi_mpi_initialized && !ompi_mpi_finalized) { backend_fatal_aggregate(type, comm, name, error_code, arglist); } else { backend_fatal_no_aggregate(type, comm, name, error_code, arglist); } /* Should we do something more intelligent than just using COMM_SELF? */ if (comm == NULL) { comm = &ompi_mpi_comm_self.comm; } if (NULL != error_code) { ompi_mpi_abort(comm, *error_code, false); } else { ompi_mpi_abort(comm, 1, false); } }
int mca_coll_ml_check_if_bcol_is_used(const char *bcol_name, const mca_coll_ml_module_t *ml_module, int topo_index) { int i, rc, hier, *ranks_in_comm, is_used = 0, comm_size = ompi_comm_size(ml_module->comm); int n_hier, tp , max_tp; const mca_coll_ml_topology_t *topo_info; ranks_in_comm = (int *) malloc(comm_size * sizeof(int)); if (OPAL_UNLIKELY(NULL == ranks_in_comm)) { ML_ERROR(("Memory allocation failed.")); ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_NO_MEM, true); } for (i = 0; i < comm_size; ++i) { ranks_in_comm[i] = i; } if (COLL_ML_TOPO_MAX == topo_index) { tp = 0; max_tp = COLL_ML_TOPO_MAX; } else { tp = topo_index; max_tp = topo_index + 1; } for (; tp < max_tp; tp++) { topo_info = &ml_module->topo_list[tp]; n_hier = topo_info->n_levels; for (hier = 0; hier < n_hier; ++hier) { hierarchy_pairs *pair = &topo_info->component_pairs[hier]; mca_bcol_base_component_t *b_cm = pair->bcol_component; if(0 == strcmp(bcol_name, b_cm->bcol_version.mca_component_name)) { is_used = 1; break; } } } rc = comm_allreduce_pml(&is_used, &is_used, 1, MPI_INT, ompi_comm_rank(ml_module->comm), MPI_MAX, comm_size, ranks_in_comm, ml_module->comm); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { ML_ERROR(("comm_allreduce_pml failed.")); ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_OP, true); } free(ranks_in_comm); return is_used; }
static void sb_mmap_alloc(void) { #if defined(__WINDOWS__) sb.sb_map = CreateFileMapping(sb.sb_fd, NULL, PAGE_READWRITE, 0, (DWORD)sb.sb_offset + sb.sb_length, NULL); if(NULL == sb.sb_map) { V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: CreateFileMapping : %s", GetLastError()); ompi_mpi_abort(MPI_COMM_NULL, MPI_ERR_NO_SPACE, false); } sb.sb_addr = (uintptr_t) MapViewOfFile(sb.sb_map, FILE_MAP_ALL_ACCESS, 0, sb.sb_offset, sb.sb_length); if(NULL == (void*)sb.sb_addr) { V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: mmap: %s", GetLastError()); CloseHandle(sb.sb_map); CloseHandle(sb.sb_fd); ompi_mpi_abort(MPI_COMM_NULL, MPI_ERR_NO_SPACE, false); } #else #ifndef MAP_NOCACHE # define MAP_NOCACHE 0 #endif if(-1 == ftruncate(sb.sb_fd, sb.sb_offset + sb.sb_length)) { V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: ftruncate: %s", strerror(errno)); close(sb.sb_fd); ompi_mpi_abort(MPI_COMM_NULL, MPI_ERR_NO_SPACE, false); } sb.sb_addr = (uintptr_t) mmap((void *) sb.sb_addr, sb.sb_length, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_NOCACHE, sb.sb_fd, sb.sb_offset); if(((uintptr_t) -1) == sb.sb_addr) { V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: mmap: %s", strerror(errno)); close(sb.sb_fd); ompi_mpi_abort(MPI_COMM_NULL, MPI_ERR_NO_SPACE, false); } #endif }
int MPI_Abort(MPI_Comm comm, int errorcode) { /* Don't even bother checking comm and errorcode values for errors */ if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } opal_output(0, "MPI_ABORT invoked on rank %d in communicator %s with errorcode %d\n", ompi_comm_rank(comm), comm->c_name, errorcode); return ompi_mpi_abort(comm, errorcode, true); }
/** * Default errhandler callback */ void ompi_errhandler_callback(int status, opal_list_t *procs, opal_list_t *info, opal_pmix_release_cbfunc_t cbfunc, void *cbdata) { /* allow the caller to release its data */ if (NULL != cbfunc) { cbfunc(cbdata); } /* our default action is to abort */ ompi_mpi_abort(MPI_COMM_WORLD, status); }
static void sb_mmap_alloc(void) { #ifndef MAP_NOCACHE # define MAP_NOCACHE 0 #endif if(-1 == ftruncate(sb.sb_fd, sb.sb_offset + sb.sb_length)) { V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: ftruncate: %s", strerror(errno)); close(sb.sb_fd); ompi_mpi_abort(MPI_COMM_NULL, MPI_ERR_NO_SPACE); } sb.sb_addr = (uintptr_t) mmap((void *) sb.sb_addr, sb.sb_length, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_NOCACHE, sb.sb_fd, sb.sb_offset); if(((uintptr_t) -1) == sb.sb_addr) { V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: mmap: %s", strerror(errno)); close(sb.sb_fd); ompi_mpi_abort(MPI_COMM_NULL, MPI_ERR_NO_SPACE); } }
int mca_fcoll_two_phase_calc_aggregator(ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE off, OMPI_MPI_OFFSET_TYPE min_off, OMPI_MPI_OFFSET_TYPE *len, OMPI_MPI_OFFSET_TYPE fd_size, OMPI_MPI_OFFSET_TYPE *fd_start, OMPI_MPI_OFFSET_TYPE *fd_end, int striping_unit, int num_aggregators, int *aggregator_list) { int rank_index, rank; OMPI_MPI_OFFSET_TYPE avail_bytes; rank_index = (int) ((off - min_off + fd_size)/ fd_size - 1); if (striping_unit > 0){ rank_index = 0; while (off > fd_end[rank_index]) rank_index++; } if (rank_index >= num_aggregators || rank_index < 0) { fprintf(stderr, "Error in ompi_io_ompio_calcl_aggregator():"); fprintf(stderr, "rank_index(%d) >= num_aggregators(%d)fd_size=%lld off=%lld\n", rank_index,num_aggregators,fd_size,off); ompi_mpi_abort(&ompi_mpi_comm_world.comm, 1); } avail_bytes = fd_end[rank_index] + 1 - off; if (avail_bytes < *len){ *len = avail_bytes; } rank = aggregator_list[rank_index]; #if 0 printf("rank : %d, rank_index : %d\n",rank, rank_index); #endif return rank; }
/** * Runtime errhandler callback */ void ompi_errhandler_runtime_callback(opal_pointer_array_t *procs) { ompi_mpi_abort(MPI_COMM_WORLD, 1, false); }
static void fatal_error(char *mesg) { IBOFFLOAD_ERROR(("FATAL ERROR: %s", mesg)); ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_INTERN, true); }
/* dispatch for callback on message completion */ static int component_fragment_cb(ompi_request_t *request) { int ret; ompi_osc_pt2pt_buffer_t *buffer; ompi_osc_pt2pt_module_t *module; if (request->req_status._cancelled) { opal_output_verbose(5, ompi_osc_base_framework.framework_output, "pt2pt request was canceled"); return OMPI_ERR_NOT_AVAILABLE; } buffer = (ompi_osc_pt2pt_buffer_t*) request->req_complete_cb_data; module = (ompi_osc_pt2pt_module_t*) buffer->data; assert(request->req_status._ucount >= (int) sizeof(ompi_osc_pt2pt_base_header_t)); /* handle message */ switch (((ompi_osc_pt2pt_base_header_t*) buffer->payload)->hdr_type) { case OMPI_OSC_PT2PT_HDR_PUT: { /* get our header and payload */ ompi_osc_pt2pt_send_header_t *header = (ompi_osc_pt2pt_send_header_t*) buffer->payload; void *payload = (void*) (header + 1); #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) { OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header); } #endif if (!ompi_win_exposure_epoch(module->p2p_win)) { if (OMPI_WIN_FENCE & ompi_win_get_mode(module->p2p_win)) { ompi_win_set_mode(module->p2p_win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_EXPOSE_EPOCH); } } ret = ompi_osc_pt2pt_sendreq_recv_put(module, header, payload); } break; case OMPI_OSC_PT2PT_HDR_ACC: { /* get our header and payload */ ompi_osc_pt2pt_send_header_t *header = (ompi_osc_pt2pt_send_header_t*) buffer->payload; void *payload = (void*) (header + 1); #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) { OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header); } #endif if (!ompi_win_exposure_epoch(module->p2p_win)) { if (OMPI_WIN_FENCE & ompi_win_get_mode(module->p2p_win)) { ompi_win_set_mode(module->p2p_win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_EXPOSE_EPOCH); } } /* receive into temporary buffer */ ret = ompi_osc_pt2pt_sendreq_recv_accum(module, header, payload); } break; case OMPI_OSC_PT2PT_HDR_GET: { /* get our header and payload */ ompi_osc_pt2pt_send_header_t *header = (ompi_osc_pt2pt_send_header_t*) buffer->payload; void *payload = (void*) (header + 1); ompi_datatype_t *datatype; ompi_osc_pt2pt_replyreq_t *replyreq; ompi_proc_t *proc; #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) { OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header); } #endif if (!ompi_win_exposure_epoch(module->p2p_win)) { if (OMPI_WIN_FENCE & ompi_win_get_mode(module->p2p_win)) { ompi_win_set_mode(module->p2p_win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_EXPOSE_EPOCH); } } /* create or get a pointer to our datatype */ proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin ); datatype = ompi_osc_base_datatype_create(proc, &payload); if (NULL == datatype) { opal_output(ompi_osc_base_framework.framework_output, "Error recreating datatype. Aborting."); ompi_mpi_abort(module->p2p_comm, 1, false); } /* create replyreq sendreq */ ret = ompi_osc_pt2pt_replyreq_alloc_init(module, header->hdr_origin, header->hdr_origin_sendreq, header->hdr_target_disp, header->hdr_target_count, datatype, &replyreq); /* send replyreq */ ompi_osc_pt2pt_replyreq_send(module, replyreq); /* sendreq does the right retain, so we can release safely */ OBJ_RELEASE(datatype); } break; case OMPI_OSC_PT2PT_HDR_REPLY: { ompi_osc_pt2pt_reply_header_t *header = (ompi_osc_pt2pt_reply_header_t*) buffer->payload; void *payload = (void*) (header + 1); ompi_osc_pt2pt_sendreq_t *sendreq; #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) { OMPI_OSC_PT2PT_REPLY_HDR_NTOH(*header); } #endif /* get original sendreq pointer */ sendreq = (ompi_osc_pt2pt_sendreq_t*) header->hdr_origin_sendreq.pval; module = sendreq->req_module; /* receive data */ ompi_osc_pt2pt_replyreq_recv(module, sendreq, header, payload); } break; case OMPI_OSC_PT2PT_HDR_POST: { int32_t count; OPAL_THREAD_LOCK(&module->p2p_lock); count = (module->p2p_num_post_msgs -= 1); OPAL_THREAD_UNLOCK(&module->p2p_lock); if (count == 0) opal_condition_broadcast(&module->p2p_cond); } break; case OMPI_OSC_PT2PT_HDR_COMPLETE: { ompi_osc_pt2pt_control_header_t *header = (ompi_osc_pt2pt_control_header_t*) buffer->payload; int32_t count; #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) { OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(*header); } #endif /* we've heard from one more place, and have value reqs to process */ OPAL_THREAD_LOCK(&module->p2p_lock); count = (module->p2p_num_complete_msgs -= 1); count += (module->p2p_num_pending_in += header->hdr_value[0]); OPAL_THREAD_UNLOCK(&module->p2p_lock); if (count == 0) opal_condition_broadcast(&module->p2p_cond); } break; case OMPI_OSC_PT2PT_HDR_LOCK_REQ: { ompi_osc_pt2pt_control_header_t *header = (ompi_osc_pt2pt_control_header_t*) buffer->payload; int32_t count; #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) { OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(*header); } #endif if (header->hdr_value[1] > 0) { ompi_osc_pt2pt_passive_lock(module, header->hdr_value[0], header->hdr_value[1]); } else { OPAL_THREAD_LOCK(&module->p2p_lock); count = (module->p2p_lock_received_ack += 1); OPAL_THREAD_UNLOCK(&module->p2p_lock); if (count != 0) opal_condition_broadcast(&module->p2p_cond); } } break; case OMPI_OSC_PT2PT_HDR_UNLOCK_REQ: { ompi_osc_pt2pt_control_header_t *header = (ompi_osc_pt2pt_control_header_t*) buffer->payload; #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) { OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(*header); } #endif ompi_osc_pt2pt_passive_unlock(module, header->hdr_value[0], header->hdr_value[1]); } break; case OMPI_OSC_PT2PT_HDR_UNLOCK_REPLY: { int32_t count; OPAL_THREAD_LOCK(&module->p2p_lock); count = (module->p2p_num_pending_out -= 1); OPAL_THREAD_UNLOCK(&module->p2p_lock); if (count == 0) opal_condition_broadcast(&module->p2p_cond); } break; default: opal_output_verbose(5, ompi_osc_base_framework.framework_output, "received one-sided packet for with unknown type"); } ompi_request_free(&request); ret = ompi_osc_pt2pt_component_irecv(buffer->payload, mca_osc_pt2pt_component.p2p_c_eager_size, MPI_BYTE, MPI_ANY_SOURCE, CONTROL_MSG_TAG, module->p2p_comm, &buffer->request, component_fragment_cb, buffer); return ret; }