Esempio n. 1
0
static void backend_fatal(char *type, struct ompi_communicator_t *comm,
                          char *name, int *error_code,
                          va_list arglist)
{
    /* We only want aggregation while the rte is initialized */
    if (ompi_rte_initialized) {
        backend_fatal_aggregate(type, comm, name, error_code, arglist);
    } else {
        backend_fatal_no_aggregate(type, comm, name, error_code, arglist);
    }

    /* In most instances the communicator will be valid. If not, we are either early in
     * the initialization or we are dealing with a window. Thus, it is good enough to abort
     * on MPI_COMM_SELF, the error will propagate.
     */
    if (comm == NULL) {
        comm = &ompi_mpi_comm_self.comm;
    }

    if (NULL != error_code) {
        ompi_mpi_abort(comm, *error_code);
    } else {
        ompi_mpi_abort(comm, 1);
    }
}
static void backend_fatal(char *type, struct ompi_communicator_t *comm,
                          char *name, int *error_code, 
                          va_list arglist)
{
    /* Do we want help message aggregation?  Usually yes, but it uses
       malloc(), which may cause further errors if we're exiting due
       to a memory problem.  So we also have the option to *not*
       aggregate (which doesn't use malloc during its call stack,
       meaning that there is a better chance that the error message
       will actually get printed).  Note that we can only do
       aggregation after MPI_INIT and before MPI_FINALIZE. */
    if (orte_help_want_aggregate && ompi_mpi_initialized && 
        !ompi_mpi_finalized) {
        backend_fatal_aggregate(type, comm, name, error_code, arglist);
    } else {
        backend_fatal_no_aggregate(type, comm, name, error_code, arglist);
    }

    /* Should we do something more intelligent than just using
       COMM_SELF? */
    if (comm == NULL) {
        comm = &ompi_mpi_comm_self.comm;
    }

    if (NULL != error_code) {
        ompi_mpi_abort(comm, *error_code, false);
    } else {
        ompi_mpi_abort(comm, 1, false);
    }
}
int mca_coll_ml_check_if_bcol_is_used(const char *bcol_name, const mca_coll_ml_module_t *ml_module,
        int topo_index)
{
    int i, rc, hier, *ranks_in_comm,
        is_used = 0,
        comm_size = ompi_comm_size(ml_module->comm);
    int n_hier, tp , max_tp;
    const mca_coll_ml_topology_t *topo_info;

    ranks_in_comm = (int *) malloc(comm_size * sizeof(int));
    if (OPAL_UNLIKELY(NULL == ranks_in_comm)) {
        ML_ERROR(("Memory allocation failed."));
        ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_NO_MEM, true);
    }

    for (i = 0; i < comm_size; ++i) {
        ranks_in_comm[i] = i;
    }

    if (COLL_ML_TOPO_MAX == topo_index) {
        tp = 0;
        max_tp = COLL_ML_TOPO_MAX;
    } else {
        tp = topo_index;
        max_tp = topo_index + 1;
    }

    for (; tp < max_tp; tp++) {
        topo_info = &ml_module->topo_list[tp];
        n_hier = topo_info->n_levels;
        for (hier = 0; hier < n_hier; ++hier) {
            hierarchy_pairs *pair = &topo_info->component_pairs[hier];
            mca_bcol_base_component_t *b_cm = pair->bcol_component;
            if(0 == strcmp(bcol_name,
                        b_cm->bcol_version.mca_component_name)) {
                is_used = 1;
                break;
            }
        }
    }

    rc = comm_allreduce_pml(&is_used, &is_used, 1, MPI_INT,
                  ompi_comm_rank(ml_module->comm), MPI_MAX,
                  comm_size, ranks_in_comm, ml_module->comm);

    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
        ML_ERROR(("comm_allreduce_pml failed."));
        ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_OP, true);
    }

    free(ranks_in_comm);

    return is_used;
}
static void sb_mmap_alloc(void)
{
#if defined(__WINDOWS__)
    sb.sb_map = CreateFileMapping(sb.sb_fd, NULL, PAGE_READWRITE, 0, 
                                  (DWORD)sb.sb_offset + sb.sb_length,  NULL); 
    if(NULL == sb.sb_map) 
    {
        V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: CreateFileMapping : %s", 
                     GetLastError());
        ompi_mpi_abort(MPI_COMM_NULL, MPI_ERR_NO_SPACE, false);
    }
    
    sb.sb_addr = (uintptr_t) MapViewOfFile(sb.sb_map, FILE_MAP_ALL_ACCESS, 0, 
                                           sb.sb_offset, sb.sb_length); 
    if(NULL == (void*)sb.sb_addr) 
    {
        V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: mmap: %s", 
                     GetLastError());
        CloseHandle(sb.sb_map);
        CloseHandle(sb.sb_fd);
        ompi_mpi_abort(MPI_COMM_NULL, MPI_ERR_NO_SPACE, false);    
    }
#else    
#ifndef MAP_NOCACHE
#   define MAP_NOCACHE 0
#endif
    if(-1 == ftruncate(sb.sb_fd, sb.sb_offset + sb.sb_length))
    {
        V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: ftruncate: %s", 
                     strerror(errno));
        close(sb.sb_fd);
        ompi_mpi_abort(MPI_COMM_NULL, MPI_ERR_NO_SPACE, false);
    }
    sb.sb_addr = (uintptr_t) mmap((void *) sb.sb_addr, sb.sb_length, 
                                  PROT_WRITE | PROT_READ, 
                                  MAP_PRIVATE | MAP_NOCACHE, sb.sb_fd, 
                                  sb.sb_offset);
    if(((uintptr_t) -1) == sb.sb_addr)
    {
        V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: mmap: %s", 
                     strerror(errno));
        close(sb.sb_fd);
        ompi_mpi_abort(MPI_COMM_NULL, MPI_ERR_NO_SPACE, false);
    }
#endif
}
Esempio n. 5
0
File: abort.c Progetto: aosm/openmpi
int MPI_Abort(MPI_Comm comm, int errorcode) 
{
    /* Don't even bother checking comm and errorcode values for
       errors */

    if (MPI_PARAM_CHECK) {
        OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
    }

    opal_output(0, "MPI_ABORT invoked on rank %d in communicator %s with errorcode %d\n", 
                ompi_comm_rank(comm), comm->c_name, errorcode);
    return ompi_mpi_abort(comm, errorcode, true);
}
Esempio n. 6
0
/**
 * Default errhandler callback
 */
void ompi_errhandler_callback(int status,
                              opal_list_t *procs,
                              opal_list_t *info,
                              opal_pmix_release_cbfunc_t cbfunc,
                              void *cbdata)
{
    /* allow the caller to release its data */
    if (NULL != cbfunc) {
        cbfunc(cbdata);
    }
    /* our default action is to abort */
    ompi_mpi_abort(MPI_COMM_WORLD, status);
}
static void sb_mmap_alloc(void)
{
#ifndef MAP_NOCACHE
#   define MAP_NOCACHE 0
#endif
    if(-1 == ftruncate(sb.sb_fd, sb.sb_offset + sb.sb_length))
    {
        V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: ftruncate: %s",
                     strerror(errno));
        close(sb.sb_fd);
        ompi_mpi_abort(MPI_COMM_NULL, MPI_ERR_NO_SPACE);
    }
    sb.sb_addr = (uintptr_t) mmap((void *) sb.sb_addr, sb.sb_length,
                                  PROT_WRITE | PROT_READ,
                                  MAP_PRIVATE | MAP_NOCACHE, sb.sb_fd,
                                  sb.sb_offset);
    if(((uintptr_t) -1) == sb.sb_addr)
    {
        V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: mmap: %s",
                     strerror(errno));
        close(sb.sb_fd);
        ompi_mpi_abort(MPI_COMM_NULL, MPI_ERR_NO_SPACE);
    }
}
int mca_fcoll_two_phase_calc_aggregator(ompio_file_t *fh,
					OMPI_MPI_OFFSET_TYPE off,
					OMPI_MPI_OFFSET_TYPE min_off,
					OMPI_MPI_OFFSET_TYPE *len,
					OMPI_MPI_OFFSET_TYPE fd_size,
					OMPI_MPI_OFFSET_TYPE *fd_start,
					OMPI_MPI_OFFSET_TYPE *fd_end,
					int striping_unit,
					int num_aggregators,
					int *aggregator_list)
{


    int rank_index, rank;
    OMPI_MPI_OFFSET_TYPE avail_bytes;

    rank_index = (int) ((off - min_off + fd_size)/ fd_size - 1);

    if (striping_unit > 0){
	rank_index = 0;
	while (off > fd_end[rank_index]) rank_index++;
    }


    if (rank_index >= num_aggregators || rank_index < 0) {
       fprintf(stderr,
	       "Error in ompi_io_ompio_calcl_aggregator():");
       fprintf(stderr,
	       "rank_index(%d) >= num_aggregators(%d)fd_size=%lld off=%lld\n",
	       rank_index,num_aggregators,fd_size,off);
       ompi_mpi_abort(&ompi_mpi_comm_world.comm, 1);
    }


    avail_bytes = fd_end[rank_index] + 1 - off;
    if (avail_bytes < *len){
	*len = avail_bytes;
    }

    rank = aggregator_list[rank_index];

    #if 0
    printf("rank : %d, rank_index : %d\n",rank, rank_index);
    #endif

    return rank;
}
Esempio n. 9
0
/**
 * Runtime errhandler callback
 */
void ompi_errhandler_runtime_callback(opal_pointer_array_t *procs) {
    ompi_mpi_abort(MPI_COMM_WORLD, 1, false);
}
static void fatal_error(char *mesg)
{
    IBOFFLOAD_ERROR(("FATAL ERROR: %s", mesg));
    ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_INTERN, true);
}
/* dispatch for callback on message completion */
static int
component_fragment_cb(ompi_request_t *request)
{
    int ret;
    ompi_osc_pt2pt_buffer_t *buffer;
    ompi_osc_pt2pt_module_t *module;

    if (request->req_status._cancelled) {
        opal_output_verbose(5, ompi_osc_base_framework.framework_output,
                            "pt2pt request was canceled");
        return OMPI_ERR_NOT_AVAILABLE;
    }

    buffer = (ompi_osc_pt2pt_buffer_t*) request->req_complete_cb_data;
    module = (ompi_osc_pt2pt_module_t*) buffer->data;

    assert(request->req_status._ucount >= (int) sizeof(ompi_osc_pt2pt_base_header_t));

    /* handle message */
    switch (((ompi_osc_pt2pt_base_header_t*) buffer->payload)->hdr_type) {
    case OMPI_OSC_PT2PT_HDR_PUT:
        {
            /* get our header and payload */
            ompi_osc_pt2pt_send_header_t *header =
                (ompi_osc_pt2pt_send_header_t*) buffer->payload;
            void *payload = (void*) (header + 1);

#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
            if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
                OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header);
            }
#endif

            if (!ompi_win_exposure_epoch(module->p2p_win)) {
                if (OMPI_WIN_FENCE & ompi_win_get_mode(module->p2p_win)) {
                    ompi_win_set_mode(module->p2p_win, 
                                      OMPI_WIN_FENCE | 
                                      OMPI_WIN_ACCESS_EPOCH |
                                      OMPI_WIN_EXPOSE_EPOCH);
                }
            }

            ret = ompi_osc_pt2pt_sendreq_recv_put(module, header, payload);
        }
        break;

    case OMPI_OSC_PT2PT_HDR_ACC: 
        {
            /* get our header and payload */
            ompi_osc_pt2pt_send_header_t *header =
                (ompi_osc_pt2pt_send_header_t*) buffer->payload;
            void *payload = (void*) (header + 1);

#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
            if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
                OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header);
            }
#endif

            if (!ompi_win_exposure_epoch(module->p2p_win)) {
                if (OMPI_WIN_FENCE & ompi_win_get_mode(module->p2p_win)) {
                    ompi_win_set_mode(module->p2p_win, 
                                      OMPI_WIN_FENCE | 
                                      OMPI_WIN_ACCESS_EPOCH |
                                      OMPI_WIN_EXPOSE_EPOCH);
                }
            }

            /* receive into temporary buffer */
            ret = ompi_osc_pt2pt_sendreq_recv_accum(module, header, payload);
        }
        break;

    case OMPI_OSC_PT2PT_HDR_GET:
        {
            /* get our header and payload */
            ompi_osc_pt2pt_send_header_t *header =
                (ompi_osc_pt2pt_send_header_t*) buffer->payload;
            void *payload = (void*) (header + 1);
            ompi_datatype_t *datatype;
            ompi_osc_pt2pt_replyreq_t *replyreq;
            ompi_proc_t *proc;

#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
            if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
                OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header);
            }
#endif

            if (!ompi_win_exposure_epoch(module->p2p_win)) {
                if (OMPI_WIN_FENCE & ompi_win_get_mode(module->p2p_win)) {
                    ompi_win_set_mode(module->p2p_win, 
                                      OMPI_WIN_FENCE | 
                                      OMPI_WIN_ACCESS_EPOCH |
                                      OMPI_WIN_EXPOSE_EPOCH);
                }
            }

            /* create or get a pointer to our datatype */
            proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
            datatype = ompi_osc_base_datatype_create(proc, &payload);

            if (NULL == datatype) {
                opal_output(ompi_osc_base_framework.framework_output,
                            "Error recreating datatype.  Aborting.");
                ompi_mpi_abort(module->p2p_comm, 1, false);
            }

            /* create replyreq sendreq */
            ret = ompi_osc_pt2pt_replyreq_alloc_init(module,
                                                  header->hdr_origin,
                                                  header->hdr_origin_sendreq,
                                                  header->hdr_target_disp,
                                                  header->hdr_target_count,
                                                  datatype,
                                                  &replyreq);

            /* send replyreq */
            ompi_osc_pt2pt_replyreq_send(module, replyreq);

            /* sendreq does the right retain, so we can release safely */
            OBJ_RELEASE(datatype);
        }
        break;

    case OMPI_OSC_PT2PT_HDR_REPLY:
        {
            ompi_osc_pt2pt_reply_header_t *header = 
                (ompi_osc_pt2pt_reply_header_t*) buffer->payload;
            void *payload = (void*) (header + 1);
            ompi_osc_pt2pt_sendreq_t *sendreq;

#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
            if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
                OMPI_OSC_PT2PT_REPLY_HDR_NTOH(*header);
            }
#endif

            /* get original sendreq pointer */
            sendreq = (ompi_osc_pt2pt_sendreq_t*) header->hdr_origin_sendreq.pval;
            module = sendreq->req_module;

            /* receive data */
            ompi_osc_pt2pt_replyreq_recv(module, sendreq, header, payload);
        }
        break;

    case OMPI_OSC_PT2PT_HDR_POST:
        {
            int32_t count;
            OPAL_THREAD_LOCK(&module->p2p_lock);
            count = (module->p2p_num_post_msgs -= 1);
            OPAL_THREAD_UNLOCK(&module->p2p_lock);
            if (count == 0) opal_condition_broadcast(&module->p2p_cond);
        }
        break;

    case OMPI_OSC_PT2PT_HDR_COMPLETE:
        {
            ompi_osc_pt2pt_control_header_t *header = 
                (ompi_osc_pt2pt_control_header_t*) buffer->payload;
            int32_t count;

#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
            if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
                OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(*header);
            }
#endif

            /* we've heard from one more place, and have value reqs to
               process */
            OPAL_THREAD_LOCK(&module->p2p_lock);
            count = (module->p2p_num_complete_msgs -= 1);
            count += (module->p2p_num_pending_in += header->hdr_value[0]);
            OPAL_THREAD_UNLOCK(&module->p2p_lock);

            if (count == 0) opal_condition_broadcast(&module->p2p_cond);
        }
        break;

    case OMPI_OSC_PT2PT_HDR_LOCK_REQ:
        {
            ompi_osc_pt2pt_control_header_t *header = 
                (ompi_osc_pt2pt_control_header_t*) buffer->payload;
            int32_t count;

#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
            if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
                OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(*header);
            }
#endif

            if (header->hdr_value[1] > 0) {
                ompi_osc_pt2pt_passive_lock(module, header->hdr_value[0], 
                                            header->hdr_value[1]);
            } else {
                OPAL_THREAD_LOCK(&module->p2p_lock);
                count = (module->p2p_lock_received_ack += 1);
                OPAL_THREAD_UNLOCK(&module->p2p_lock);

                if (count != 0) opal_condition_broadcast(&module->p2p_cond);
            }
        }
        break;

    case OMPI_OSC_PT2PT_HDR_UNLOCK_REQ:
        {
            ompi_osc_pt2pt_control_header_t *header = 
                (ompi_osc_pt2pt_control_header_t*) buffer->payload;

#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
            if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
                OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(*header);
            }
#endif

            ompi_osc_pt2pt_passive_unlock(module, header->hdr_value[0],
                                          header->hdr_value[1]);
        }
        break;

    case OMPI_OSC_PT2PT_HDR_UNLOCK_REPLY:
        {
            int32_t count;

            OPAL_THREAD_LOCK(&module->p2p_lock);
            count = (module->p2p_num_pending_out -= 1);
            OPAL_THREAD_UNLOCK(&module->p2p_lock);
            if (count == 0) opal_condition_broadcast(&module->p2p_cond);
        }
        break;

    default:
        opal_output_verbose(5, ompi_osc_base_framework.framework_output,
                            "received one-sided packet for with unknown type");
    }

    ompi_request_free(&request);
    ret = ompi_osc_pt2pt_component_irecv(buffer->payload,
                                         mca_osc_pt2pt_component.p2p_c_eager_size,
                                         MPI_BYTE,
                                         MPI_ANY_SOURCE,
                                         CONTROL_MSG_TAG,
                                         module->p2p_comm,
                                         &buffer->request,
                                         component_fragment_cb,
                                         buffer);

    return ret;
}