Ejemplo n.º 1
0
static int mca_pml_v_component_close(void)
{
    int ret;
    
    /* Save original PML before making any changes  */
    mca_pml_v.host_pml_component = mca_pml_base_selected_component;
    mca_pml_v.host_pml = mca_pml;
    mca_pml_v.host_request_fns = ompi_request_functions;
    
    /* Do not load anything if no FT protocol is selected */
    if(! mca_vprotocol_base_include_list[0])
        return mca_pml_v_component_parasite_close();
        
    V_OUTPUT_VERBOSE(500, "component_close: I don't want to be unloaded now.");
    ret = mca_base_component_repository_retain_component("pml", "v");
    if(OPAL_SUCCESS != ret)
    {
        V_OUTPUT_ERR("pml_v: component_close: can't retain myself. If Open MPI is build static you can ignore this error. Otherwise it should crash soon.");
    }
    
    /* Mark that we have changed something */ 
    snprintf(mca_pml_base_selected_component.pmlm_version.mca_component_name, 
             MCA_BASE_MAX_TYPE_NAME_LEN, "%s]v%s", 
             mca_pml_v.host_pml_component.pmlm_version.mca_component_name,
             mca_vprotocol_component.pmlm_version.mca_component_name);

    /* Replace finalize */
    mca_pml_base_selected_component.pmlm_finalize = 
        mca_pml_v_component_parasite_finalize;    
    
    /* Make sure we get initialized if some Vprotocol is enabled */
    mca_pml.pml_enable = mca_pml_v_enable;
    
    return OMPI_SUCCESS;
}
Ejemplo n.º 2
0
void vprotocol_pessimist_matching_replay(int *src) {
#if OPAL_ENABLE_DEBUG
    vprotocol_pessimist_clock_t max = 0;
#endif
    mca_vprotocol_pessimist_event_t *event;

    /* searching this request in the event list */
    for(event = (mca_vprotocol_pessimist_event_t *) opal_list_get_first(&mca_vprotocol_pessimist.replay_events);
        event != (mca_vprotocol_pessimist_event_t *) opal_list_get_end(&mca_vprotocol_pessimist.replay_events);
        event = (mca_vprotocol_pessimist_event_t *) opal_list_get_next(event))
    {
        vprotocol_pessimist_matching_event_t *mevent;
        
        if(VPROTOCOL_PESSIMIST_EVENT_TYPE_MATCHING != event->type) continue;        
        mevent = &(event->u_event.e_matching);
        if(mevent->reqid == mca_vprotocol_pessimist.clock)
        {
            /* this is the event to replay */
            V_OUTPUT_VERBOSE(70, "pessimist: replay\tmatch\t%"PRIpclock"\trecv is forced from %d", mevent->reqid, mevent->src);
            (*src) = mevent->src;
            opal_list_remove_item(&mca_vprotocol_pessimist.replay_events, 
                                  (opal_list_item_t *) event);
            VPESSIMIST_EVENT_RETURN(event);
        }   
#if OPAL_ENABLE_DEBUG
        else if(mevent->reqid > max) 
            max = mevent->reqid;                         
    }
    /* not forcing a ANY SOURCE event whose recieve clock is lower than max
     * is a bug indicating we have missed an event during logging ! */
    assert(((*src) != MPI_ANY_SOURCE) || (mca_vprotocol_pessimist.clock > max));
#else
    }
int vprotocol_pessimist_sender_based_init(const char *mmapfile, size_t size)
{
    char *path;
#ifdef SB_USE_CONVERTOR_METHOD
    mca_pml_base_send_request_t pml_req;
    sb.sb_conv_to_pessimist_offset = (uintptr_t) VPROTOCOL_SEND_REQ(NULL) -
            ((uintptr_t) &pml_req.req_base.req_convertor -
             (uintptr_t) &pml_req);
    V_OUTPUT_VERBOSE(500, "pessimist: conv_to_pessimist_offset: %p", (void *) sb.sb_conv_to_pessimist_offset);
#endif
    sb.sb_offset = 0;
    sb.sb_length = size;
    sb.sb_pagesize = getpagesize();
    sb.sb_cursor = sb.sb_addr = (uintptr_t) NULL;
    sb.sb_available = 0;
#ifdef SB_USE_PROGRESS_METHOD
    OBJ_CONSTRUCT(&sb.sb_sendreq, opal_list_t);
#endif

    asprintf(&path, "%s"OPAL_PATH_SEP"%s", ompi_process_info.proc_session_dir,
                mmapfile);
    if(OPAL_SUCCESS != sb_mmap_file_open(path))
        return OPAL_ERR_FILE_OPEN_FAILURE;
    free(path);
    return OMPI_SUCCESS;
}
Ejemplo n.º 4
0
/*******************************************************************************
 * MCA level functions - parasite setup
 */
static int mca_pml_v_component_open(void)
{
    char *output;
    int verbose;
    int priority;
    char *vprotocol_include_list;
    int rc;
    
    priority = mca_pml_v_param_register_int("priority", -1);
    output = mca_pml_v_param_register_string("output", "stderr");
    verbose = mca_pml_v_param_register_int("verbose", 0);

    mca_base_param_reg_string_name("vprotocol", NULL, 
                                   "Specify a specific vprotocol to use", 
                                   false, false, "", &vprotocol_include_list);
   
    pml_v_output_open(output, verbose);
    free(output);

    if(-1 != priority)
        V_OUTPUT_ERR("pml_v: Overriding priority setting (%d) with -1. The PML V should NEVER be the selected component; even when enabling fault tolerance.", priority);
            
    V_OUTPUT_VERBOSE(500, "loaded");

    rc = mca_vprotocol_base_open(vprotocol_include_list);
    if (NULL != vprotocol_include_list) {
        free (vprotocol_include_list);
    }

    return rc;
}
int32_t vprotocol_pessimist_sender_based_convertor_advance(opal_convertor_t* pConvertor,
                                                            struct iovec* iov,
                                                            uint32_t* out_size,
                                                            size_t* max_data) {
    int ret;
    unsigned int i;
    size_t pending_length;
    mca_vprotocol_pessimist_send_request_t *ftreq;

    ftreq = VPESSIMIST_CONV_REQ(pConvertor);
    pConvertor->flags = ftreq->sb.conv_flags;
    pConvertor->fAdvance = ftreq->sb.conv_advance;
    ret = opal_convertor_pack(pConvertor, iov, out_size, max_data);
    V_OUTPUT_VERBOSE(39, "pessimist:\tsb\tpack\t%"PRIsize_t, *max_data);

    for(i = 0, pending_length = *max_data; pending_length > 0; i++) {
        assert(i < *out_size);
        MEMCPY((void *) ftreq->sb.cursor, iov[i].iov_base, iov[i].iov_len);
        pending_length -= iov[i].iov_len;
        ftreq->sb.cursor += iov[i].iov_len;
    }
    assert(pending_length == 0);

    pConvertor->flags &= ~CONVERTOR_NO_OP;
    pConvertor->fAdvance = &vprotocol_pessimist_sender_based_convertor_advance;
    return ret;
}
/** Manage mmap floating window, allocating enough memory for the message to be
  * asynchronously copied to disk.
  */
void vprotocol_pessimist_sender_based_alloc(size_t len)
{
    if(((uintptr_t) NULL) != sb.sb_addr)
        sb_mmap_free();
#ifdef SB_USE_SELFCOMM_METHOD
    else
        ompi_comm_dup(MPI_COMM_SELF, &sb.sb_comm, 1);
#endif

    /* Take care of alignement of sb_offset                             */
    sb.sb_offset += sb.sb_cursor - sb.sb_addr;
    sb.sb_cursor = sb.sb_offset % sb.sb_pagesize;
    sb.sb_offset -= sb.sb_cursor;

    /* Adjusting sb_length for the largest application message to fit   */
    len += sb.sb_cursor + sizeof(vprotocol_pessimist_sender_based_header_t);
    if(sb.sb_length < len)
        sb.sb_length = len;
    /* How much space left for application data */
    sb.sb_available = sb.sb_length - sb.sb_cursor;

    sb_mmap_alloc();

    sb.sb_cursor += sb.sb_addr; /* set absolute addr of sender_based buffer */
    V_OUTPUT_VERBOSE(30, "pessimist:\tsb\tgrow\toffset %llu\tlength %llu\tbase %p\tcursor %p", (unsigned long long) sb.sb_offset, (unsigned long long) sb.sb_length, (void *) sb.sb_addr, (void *) sb.sb_cursor);
}
Ejemplo n.º 7
0
static int mca_pml_v_component_finalize(void)
{
    V_OUTPUT_VERBOSE(1, "finalize: I'm not supposed to be here until BTL loading stuff gets fixed!? That's strange...");
    /* Nothing to do here. We are not sure we need to be unloaded or not at
     * this stage
     */
    return OMPI_SUCCESS;
}
Ejemplo n.º 8
0
static int mca_vprotocol_pessimist_component_finalize(void)
{
    V_OUTPUT_VERBOSE(500, "vprotocol_pessimist_finalize");
    free(mca_vprotocol_pessimist.event_buffer);
    OBJ_DESTRUCT(&mca_vprotocol_pessimist.replay_events);
    OBJ_DESTRUCT(&mca_vprotocol_pessimist.pending_events);
    OBJ_DESTRUCT(&mca_vprotocol_pessimist.events_pool);
    return OMPI_SUCCESS;
}
int mca_vprotocol_pessimist_request_free(ompi_request_t **req)
{
    mca_pml_base_request_t *pmlreq = (mca_pml_base_request_t *) *req; 
    V_OUTPUT_VERBOSE(50, "pessimist:\treq\tfree\t%"PRIpclock"\tpeer %d\ttag %d\tsize %lu", VPESSIMIST_FTREQ(pmlreq)->reqid, pmlreq->req_peer, pmlreq->req_tag, (unsigned long) pmlreq->req_count);
    vprotocol_pessimist_matching_log_finish(*req);
    pmlreq->req_ompi.req_status.MPI_SOURCE = -1; /* no matching made flag */
    vprotocol_pessimist_sender_based_flush(*req);
    return VPESSIMIST_FTREQ(pmlreq)->pml_req_free(req);
}
Ejemplo n.º 10
0
int mca_vprotocol_example_component_finalize(void)
{
  V_OUTPUT_VERBOSE(10, "vprotocol_example_finalize");
  
/**
  * Insert your own garbage collecting here
  */
  
  return OMPI_SUCCESS;
}
Ejemplo n.º 11
0
static int mca_pml_v_component_open(void)
{
    pml_v_output_open(ompi_pml_v_output, ompi_pml_v_verbose);

    V_OUTPUT_VERBOSE(500, "loaded");

    mca_vprotocol_base_set_include_list(ompi_pml_vprotocol_include_list);

    return mca_base_framework_open(&ompi_vprotocol_base_framework, 0);
}
int mca_vprotocol_example_send(void *addr,
                      size_t count,
                      ompi_datatype_t * datatype,
                      int dst,
                      int tag,
                      mca_pml_base_send_mode_t sendmode,
                      struct ompi_communicator_t *comm)
{
  V_OUTPUT_VERBOSE(50, "request\tsend \tcomm %d\tto %d\ttag %d\tsize %ld", comm->c_contextid, dst, tag, (long) count);
  return mca_pml_v.host_pml.pml_send(addr, count, datatype, dst, tag, sendmode, comm);
}
Ejemplo n.º 13
0
/*******************************************************************************
 * Enable the PML V (and initialize the Vprotocol)
 */
static int mca_pml_v_enable(bool enable)
{
    int ret;

    /* Enable the real PML (no threading issues there as threads are started
     * later)
     */
    ret = mca_pml_v.host_pml.pml_enable(enable);
    if(OMPI_SUCCESS != ret) return ret;

    if(enable) {
        /* Check if a protocol have been selected during init */
        if(! mca_vprotocol_base_selected())
            mca_vprotocol_base_select(pml_v_enable_progress_treads,
                                      pml_v_enable_mpi_thread_multiple);

        /* Check if we succeeded selecting a protocol */
        if(mca_vprotocol_base_selected()) {
            V_OUTPUT_VERBOSE(1, "I don't want to die: I will parasite %s host component %s with %s %s",
                             mca_pml_base_selected_component.pmlm_version.mca_type_name,
                             mca_pml_base_selected_component.pmlm_version.mca_component_name,
                             mca_vprotocol_component.pmlm_version.mca_type_name,
                             mca_vprotocol_component.pmlm_version.mca_component_name);

            ret = mca_vprotocol_base_parasite();
            if(OMPI_SUCCESS != ret) return ret;
            if(mca_vprotocol.enable)
                return mca_vprotocol.enable(enable);
            else
                return OMPI_SUCCESS;
        }
        V_OUTPUT_VERBOSE(1, "No fault tolerant protocol selected. All are unloaded");
    }
    /* Disable */
    mca_pml = mca_pml_v.host_pml;
    mca_pml.pml_enable = mca_pml_v_enable;
    /* /!\ This is incorrect if another component also changed the requests */
    ompi_request_functions = mca_pml_v.host_request_fns;
    return OMPI_SUCCESS;
}
Ejemplo n.º 14
0
static int mca_pml_v_component_parasite_close(void)
{
    V_OUTPUT_VERBOSE(500, "parasite_close: Ok, I accept to die and let %s component finish",
                          mca_pml_v.host_pml_component.pmlm_version.mca_component_name);
    mca_pml_base_selected_component = mca_pml_v.host_pml_component;

    (void) mca_base_framework_close(&ompi_vprotocol_base_framework);
    pml_v_output_close();

    mca_pml.pml_enable = mca_pml_v.host_pml.pml_enable;
    /* don't need to call the host component's close: pml_base will do it */
    return OMPI_SUCCESS; /* ignore any errors as we are leaving anyway */
}
static void vprotocol_pessimist_request_construct(mca_pml_base_request_t *req)
{
    mca_vprotocol_pessimist_request_t *ftreq;
    
    ftreq = VPESSIMIST_FTREQ(req);
    V_OUTPUT_VERBOSE(250, "pessimist:\treq\tnew\treq=%p\tPreq=%p (aligned to %p)", (void *) req, (void *) ftreq, (void *) &ftreq->pml_req_free);
    req->req_ompi.req_status.MPI_SOURCE = -1; /* no matching made flag */
    ftreq->pml_req_free = req->req_ompi.req_free;
    ftreq->event = NULL;
    ftreq->sb.bytes_progressed = 0;
    assert(ftreq->pml_req_free == req->req_ompi.req_free); /* detection of aligment issues on different arch */
    req->req_ompi.req_free = mca_vprotocol_pessimist_request_free;
    OBJ_CONSTRUCT(& ftreq->list_item, opal_list_item_t);
}
Ejemplo n.º 16
0
/*******************************************************************************
 * Init/finalize for MCA PML components
 */
static mca_pml_base_module_t *mca_pml_v_component_init(int *priority,
                                                      bool enable_progress_threads,
                                                      bool enable_mpi_thread_multiple)
{
    V_OUTPUT_VERBOSE(1, "init: I'm not supposed to be here until BTL loading stuff gets fixed!? That's strange...");

    pml_v_enable_progress_treads = enable_progress_threads;
    pml_v_enable_mpi_thread_multiple = enable_mpi_thread_multiple;

    /* I NEVER want to be the selected PML, so I report less than possible
     * priority and a NULL module
     */
    *priority = -1;
    return NULL;
}
Ejemplo n.º 17
0
int vprotocol_pessimist_event_logger_connect(int el_rank, ompi_communicator_t **el_comm)
{
    int rc;
    char *port;
    int rank;
    vprotocol_pessimist_clock_t connect_info[2];
    opal_list_t results;
    opal_pmix_pdata_t *pdat;

    OBJ_CONSTRUCT(&results, opal_list_t);
    pdat = OBJ_NEW(opal_pmix_pdata_t);
    opal_asprintf(&pdat->value.key, VPROTOCOL_EVENT_LOGGER_NAME_FMT, el_rank);
    opal_list_append(&results, &pdat->super);

    rc = opal_pmix.lookup(&results, NULL);
    if (OPAL_SUCCESS != rc ||
        OPAL_STRING != pdat->value.type ||
        NULL == pdat->value.data.string) {
        OPAL_LIST_DESTRUCT(&results);
        return OMPI_ERR_NOT_FOUND;
    }
    port = strdup(pdat->value.data.string);
    OPAL_LIST_DESTRUCT(&results);
    V_OUTPUT_VERBOSE(45, "Found port < %s >", port);

    rc = ompi_dpm_connect_accept(MPI_COMM_SELF, 0, port, true, el_comm);
    if(OMPI_SUCCESS != rc) {
        OMPI_ERROR_LOG(rc);
    }

    /* Send Rank, receive max buffer size and max_clock back */
    rank = ompi_comm_rank(&ompi_mpi_comm_world.comm);
    rc = mca_pml_v.host_pml.pml_send(&rank, 1, MPI_INTEGER, 0,
                                     VPROTOCOL_PESSIMIST_EVENTLOG_NEW_CLIENT_CMD,
                                     MCA_PML_BASE_SEND_STANDARD,
                                     mca_vprotocol_pessimist.el_comm);
    if(OPAL_UNLIKELY(MPI_SUCCESS != rc))
        OMPI_ERRHANDLER_INVOKE(mca_vprotocol_pessimist.el_comm, rc,
                               __FILE__ ": failed sending event logger handshake");
    rc = mca_pml_v.host_pml.pml_recv(&connect_info, 2, MPI_UNSIGNED_LONG_LONG,
                                     0, VPROTOCOL_PESSIMIST_EVENTLOG_NEW_CLIENT_CMD,
                                     mca_vprotocol_pessimist.el_comm, MPI_STATUS_IGNORE);
    if(OPAL_UNLIKELY(MPI_SUCCESS != rc))                                  \
        OMPI_ERRHANDLER_INVOKE(mca_vprotocol_pessimist.el_comm, rc,       \
                               __FILE__ ": failed receiving event logger handshake");

    return rc;
}
Ejemplo n.º 18
0
static int mca_pml_v_component_open(void)
{
    int rc;
    ompi_pml_v_output_open(ompi_pml_v_output, ompi_pml_v_verbose);

    V_OUTPUT_VERBOSE(500, "loaded");

    mca_vprotocol_base_set_include_list(ompi_pml_vprotocol_include_list);

    if (OMPI_SUCCESS != (rc = mca_base_framework_open(&ompi_vprotocol_base_framework, 0))) {
        return rc;
    }

    if( NULL == mca_vprotocol_base_include_list ) {
        ompi_pml_v_output_close();
        return mca_base_framework_close(&ompi_vprotocol_base_framework);
    }

    return rc;
}
Ejemplo n.º 19
0
int mca_vprotocol_pessimist_isend(void *buf,
                       size_t count,
                       ompi_datatype_t* datatype,
                       int dst,
                       int tag,
                       mca_pml_base_send_mode_t sendmode,
                       ompi_communicator_t* comm,
                       ompi_request_t** request )
{
    int ret;

    V_OUTPUT_VERBOSE(50, "pessimist:\tisend\tposted\t%"PRIpclock"\tto %d\ttag %d\tsize %lu",
                     mca_vprotocol_pessimist.clock, dst, tag, (unsigned long) count);

    vprotocol_pessimist_event_flush();
    ret = mca_pml_v.host_pml.pml_isend(buf, count, datatype, dst, tag, sendmode,
                                       comm, request);
    VPESSIMIST_FTREQ_INIT(*request);
    vprotocol_pessimist_sender_based_copy_start(*request);
    return ret;
}
Ejemplo n.º 20
0
int mca_vprotocol_pessimist_irecv(void *addr,
                     size_t count,
                     ompi_datatype_t * datatype,
                     int src,
                     int tag,
                     struct ompi_communicator_t *comm,
                     struct ompi_request_t **request)
{
  int ret;

  V_OUTPUT_VERBOSE(50, "pessimist:\tirecv\trequest\t%"PRIpclock"\tfrom %d\ttag %d\tsize %lu",
                        mca_vprotocol_pessimist.clock, src, tag, (unsigned long) count);

  /* first, see if we have to enforce matching order */
  VPROTOCOL_PESSIMIST_MATCHING_REPLAY(src);
  /* now just let the host pml do its job */
  ret = mca_pml_v.host_pml.pml_irecv(addr, count, datatype, src, tag, comm, request);
  VPESSIMIST_FTREQ_INIT(*request);
  vprotocol_pessimist_matching_log_prepare(*request);
  return ret;
}
Ejemplo n.º 21
0
mca_pml_v_protocol_base_module_t *mca_vprotocol_example_component_init( int* priority,
                                                                          bool enable_progress_threads,
                                                                          bool enable_mpi_threads)
{
  V_OUTPUT_VERBOSE(10, "vprotocol_example_init");
  *priority = _priority;

/**
  * Some protocols requires sanity check about thread support (those making piecewise deterministic assumption) 
  if(enable_mpi_threads)
  {
    OPAL_OUTPUT_VERBOSE( mca_pml_v_verbose, mca_pml_v_output, "vprotocol_example.init: threads are enabled, and not supported by vprotocol example fault tolerant layer, will not load"));
    return NULL;
  }
  */
  
/**
  * Insert your own protocol initialization here
  */

  return &mca_vprotocol_example.super;
}
Ejemplo n.º 22
0
int mca_vprotocol_pessimist_send(void *buf,
                      size_t count,
                      ompi_datatype_t* datatype,
                      int dst,
                      int tag,
                      mca_pml_base_send_mode_t sendmode,
                      ompi_communicator_t* comm )
{
    ompi_request_t *request = MPI_REQUEST_NULL;
    int rc;

    V_OUTPUT_VERBOSE(50, "pessimist:\tsend\tposted\t%"PRIpclock"\tto %d\ttag %d\tsize %lu",
                     mca_vprotocol_pessimist.clock, dst, tag, (unsigned long) count);

    vprotocol_pessimist_event_flush();
    mca_pml_v.host_pml.pml_isend(buf, count, datatype, dst, tag, sendmode,
                                 comm, &request);
    VPESSIMIST_FTREQ_INIT(request);
    vprotocol_pessimist_sender_based_copy_start(request);
    VPROTOCOL_PESSIMIST_WAIT(&request, MPI_STATUS_IGNORE, rc);
    return rc;
}
Ejemplo n.º 23
0
int mca_vprotocol_pessimist_recv(void *addr,
                      size_t count,
                      ompi_datatype_t * datatype,
                      int src,
                      int tag,
                      struct ompi_communicator_t *comm,
                      ompi_status_public_t * status )
{
  ompi_request_t *request = MPI_REQUEST_NULL;
  int ret;

  V_OUTPUT_VERBOSE(50, "pessimist:\trecv\tposted\t%"PRIpclock"\tfrom %d\ttag %d\tsize %lu",
                       mca_vprotocol_pessimist.clock, src, tag, (unsigned long) count);
  /* first, see if we have to enforce matching order */
  VPROTOCOL_PESSIMIST_MATCHING_REPLAY(src);
  /* now just let the pml do its job */
  ret = mca_pml_v.host_pml.pml_irecv(addr, count, datatype, src, tag, comm, &request);
  VPESSIMIST_FTREQ_INIT(request);
  vprotocol_pessimist_matching_log_prepare(request);
  /* block until the request is completed */
  VPROTOCOL_PESSIMIST_WAIT(&request, status, ret);
  return ret;
}
Ejemplo n.º 24
0
/*******************************************************************************
 * Parasite cleanup
 */
static int mca_pml_v_component_parasite_finalize(void)
{
    mca_base_component_list_item_t *cli = NULL;

    V_OUTPUT_VERBOSE(500, "parasite_finalize");

    /* Make sure we'll get closed again with the true close function */
    mca_pml_v_component.pmlm_version.mca_close_component =
        mca_pml_v_component_parasite_close;
    cli = OBJ_NEW(mca_base_component_list_item_t);
    cli->cli_component = (mca_base_component_t *) &mca_pml_v_component;
    opal_list_prepend(&ompi_pml_base_framework.framework_components,
                      (opal_list_item_t *) cli);

    /* finalize vprotocol component */
    if(mca_vprotocol_base_selected())
        mca_vprotocol_component.pmlm_finalize();

    if(mca_pml_v.host_pml_component.pmlm_finalize != NULL)
        return mca_pml_v.host_pml_component.pmlm_finalize();
    else
        return OMPI_SUCCESS;
}
Ejemplo n.º 25
0
/** VPROTOCOL level functions (same as PML one)
  */
static mca_vprotocol_base_module_t *mca_vprotocol_pessimist_component_init( int* priority,
                                                                          bool enable_progress_threads,
                                                                          bool enable_mpi_threads)
{  
    V_OUTPUT_VERBOSE(500, "vprotocol_pessimist: component_init");
    *priority = _priority;

    /* sanity check */
    if(enable_mpi_threads)
    {
        opal_output(0, "vprotocol_pessimist: component_init: threads are enabled, and not supported by vprotocol pessimist fault tolerant layer, will not load");
        return NULL;
    }

    mca_vprotocol_pessimist.clock = 1;
    mca_vprotocol_pessimist.replay = false;
    OBJ_CONSTRUCT(&mca_vprotocol_pessimist.replay_events, opal_list_t);
    OBJ_CONSTRUCT(&mca_vprotocol_pessimist.pending_events, opal_list_t);
    OBJ_CONSTRUCT(&mca_vprotocol_pessimist.events_pool, opal_free_list_t);
    opal_free_list_init (&mca_vprotocol_pessimist.events_pool,
			 sizeof(mca_vprotocol_pessimist_event_t),
                         opal_cache_line_size,
                         OBJ_CLASS(mca_vprotocol_pessimist_event_t),
                         0,opal_cache_line_size,
                         _free_list_num,
                         _free_list_max,
                         _free_list_inc,
                         NULL, 0, NULL, NULL, NULL);
    mca_vprotocol_pessimist.event_buffer_max_length = 
                _event_buffer_size / sizeof(vprotocol_pessimist_mem_event_t);
    mca_vprotocol_pessimist.event_buffer_length = 0;
    mca_vprotocol_pessimist.event_buffer = 
                (vprotocol_pessimist_mem_event_t *) malloc(_event_buffer_size);
    mca_vprotocol_pessimist.el_comm = MPI_COMM_NULL;
    
    return &mca_vprotocol_pessimist.super;
}
Ejemplo n.º 26
0
static int mca_vprotocol_pessimist_component_open(void)
{
    V_OUTPUT_VERBOSE(500, "vprotocol_pessimist: component_open: read priority %d", _priority);
  return OMPI_SUCCESS;
}
Ejemplo n.º 27
0
static int mca_vprotocol_pessimist_component_close(void)
{
    V_OUTPUT_VERBOSE(500, "vprotocol_pessimist: component_close");
    return OMPI_SUCCESS;
}
Ejemplo n.º 28
0
OMPI_DECLSPEC int mca_vprotocol_example_start(size_t count, ompi_request_t **requests)
{
  V_OUTPUT_VERBOSE(50, "starting %ld requests", (long) count);
  return mca_pml_v.host_pml.pml_start(count, requests);
}
Ejemplo n.º 29
0
int vprotocol_pessimist_event_logger_connect(int el_rank, ompi_communicator_t **el_comm)
{
    int rc;
    opal_buffer_t buffer;
    char *port;
    orte_process_name_t el_proc;
    char *hnp_uri, *rml_uri;
    orte_rml_tag_t el_tag;
    char name[MPI_MAX_PORT_NAME];
    int rank;
    vprotocol_pessimist_clock_t connect_info[2];
    
    snprintf(name, MPI_MAX_PORT_NAME, VPROTOCOL_EVENT_LOGGER_NAME_FMT, el_rank);
    port = ompi_pubsub.lookup(name, MPI_INFO_NULL);
    if(NULL == port)
    {
        return OMPI_ERR_NOT_FOUND;
    }
    V_OUTPUT_VERBOSE(45, "Found port < %s >", port);
    
    /* separate the string into the HNP and RML URI and tag */
    if (OMPI_SUCCESS != (rc = ompi_dpm.parse_port(port, &hnp_uri, &rml_uri, &el_tag))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    /* extract the originating proc's name */
    if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(rml_uri, &el_proc, NULL))) {
        ORTE_ERROR_LOG(rc);
        free(rml_uri); free(hnp_uri);
        return rc;
    }
    /* make sure we can route rml messages to the destination */
    if (OMPI_SUCCESS != (rc = ompi_dpm.route_to_port(hnp_uri, &el_proc))) {
        ORTE_ERROR_LOG(rc);
        free(rml_uri); free(hnp_uri);
        return rc;
    }
    free(rml_uri); free(hnp_uri);
    
    /* Send an rml message to tell the remote end to wake up and jump into 
     * connect/accept */
    OBJ_CONSTRUCT(&buffer, opal_buffer_t);
    rc = orte_rml.send_buffer(&el_proc, &buffer, el_tag+1, 0);
    if(ORTE_SUCCESS > rc) {
        ORTE_ERROR_LOG(rc);
        OBJ_DESTRUCT(&buffer);        
        return rc;
    }
    OBJ_DESTRUCT(&buffer);

    rc = ompi_dpm.connect_accept(MPI_COMM_SELF, 0, port, true, el_comm);
    if(OMPI_SUCCESS != rc) {
        ORTE_ERROR_LOG(rc);
    }
    
    /* Send Rank, receive max buffer size and max_clock back */
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    rc = mca_pml_v.host_pml.pml_send(&rank, 1, MPI_INTEGER, 0, 
                                     VPROTOCOL_PESSIMIST_EVENTLOG_NEW_CLIENT_CMD,
                                     MCA_PML_BASE_SEND_STANDARD, 
                                     mca_vprotocol_pessimist.el_comm);
    if(OPAL_UNLIKELY(MPI_SUCCESS != rc))
        OMPI_ERRHANDLER_INVOKE(mca_vprotocol_pessimist.el_comm, rc,
                               __FILE__ ": failed sending event logger handshake");
    rc = mca_pml_v.host_pml.pml_recv(&connect_info, 2, MPI_UNSIGNED_LONG_LONG, 
                                     0, VPROTOCOL_PESSIMIST_EVENTLOG_NEW_CLIENT_CMD,
                                     mca_vprotocol_pessimist.el_comm, MPI_STATUS_IGNORE);
    if(OPAL_UNLIKELY(MPI_SUCCESS != rc))                                  \
        OMPI_ERRHANDLER_INVOKE(mca_vprotocol_pessimist.el_comm, rc,       \
                               __FILE__ ": failed receiving event logger handshake");   
    
    return rc;
}
/*
 * Function for selecting one component from all those that are
 * available.
 *
 * Call the init function on all available components and get their
 * priorities.  Select the component with the highest priority.  All
 * other components will be closed and unloaded.  The selected component
 * will have all of its function pointers saved and returned to the
 * caller.
 */
int mca_vprotocol_base_select(bool enable_progress_threads, 
                              bool enable_mpi_threads)
{
    int priority = 0, best_priority = -1;
    opal_list_item_t *item = NULL;
    mca_base_component_list_item_t *cli = NULL;
    mca_vprotocol_base_component_t *component = NULL, *best_component = NULL;
    mca_vprotocol_base_module_t *module = NULL, *best_module = NULL;
    opal_list_t opened;
    opened_component_t *om = NULL;
    
    /* Traverse the list of available components; call their init
        functions. */
    OBJ_CONSTRUCT(&opened, opal_list_t);
    for(item = opal_list_get_first(&mca_vprotocol_base_components_available);
        opal_list_get_end(&mca_vprotocol_base_components_available) != item;
        item = opal_list_get_next(item)) 
    {
        cli = (mca_base_component_list_item_t *) item;
        component = (mca_vprotocol_base_component_t *) cli->cli_component;
        
        V_OUTPUT_VERBOSE(500, "vprotocol select: initializing %s component %s", component->pmlm_version.mca_type_name, component->pmlm_version.mca_component_name);
        if(strcmp(component->pmlm_version.mca_component_name, 
                  mca_vprotocol_base_include_list)) {
            V_OUTPUT_VERBOSE(500, "This component is not in the include list: skipping %s", component->pmlm_version.mca_component_name);
            continue;
        }
        if(NULL == component->pmlm_init) {
            V_OUTPUT_VERBOSE(2, "vprotocol select: no init function; ignoring component %s", component->pmlm_version.mca_component_name);
            continue;
        }
        module = component->pmlm_init(&priority, enable_progress_threads, enable_mpi_threads);
        if (NULL == module) {
            V_OUTPUT_VERBOSE(2, "vprotocol select: init returned failure for component %s", component->pmlm_version.mca_component_name);
            continue;
        } 
        V_OUTPUT_VERBOSE(500, "vprotocol select: component %s init returned priority %d", component->pmlm_version.mca_component_name, priority);
        if (priority > best_priority) 
        {
            best_priority = priority;
            best_component = component;
            best_module = module;
        }
            
        om = malloc(sizeof(opened_component_t));
        if (NULL == om) return OMPI_ERR_OUT_OF_RESOURCE;
        OBJ_CONSTRUCT(om, opal_list_item_t);
        om->om_component = component;
        opal_list_append(&opened, (opal_list_item_t*) om);
    }
    
    /* Finished querying all components.  Check for the bozo case. */
    if (NULL == best_component) {
        V_OUTPUT_VERBOSE(2, "vprotocol select: no protocol has returned a positive priority, fault tolerance is OFF");
    } 
    else 
    {
        /* Save the winner */
        mca_vprotocol_component = *best_component;
        mca_vprotocol = *best_module;
    }
    
    /* Finalize all non-selected components */
    for (item = opal_list_remove_first(&opened);
         NULL != item;
         item = opal_list_remove_first(&opened)) 
    {
        om = (opened_component_t *) item;
        if (om->om_component != best_component) {
            /* Finalize */
            V_OUTPUT_VERBOSE(500, "vprotocol select: component %s not selected / finalized", om->om_component->pmlm_version.mca_component_name);
            if (NULL != om->om_component->pmlm_finalize) {
                /* Blatently ignore the return code (what would we do to
                recover, anyway?  This component is going away, so errors
                don't matter anymore) */
                om->om_component->pmlm_finalize();
            }
        }
        OBJ_DESTRUCT(om);
        free(om);
    }
    
    mca_base_components_close(mca_pml_v.output, 
                              &mca_vprotocol_base_components_available, 
                              (mca_base_component_t *) best_component, false);
    
    /* All done */
    if(best_component != NULL) 
    {
        V_OUTPUT_VERBOSE(500, "vprotocol select: component %s selected", mca_vprotocol_component.pmlm_version.mca_component_name);
        return OMPI_SUCCESS;
    }
    else 
        return OMPI_ERR_NOT_FOUND;
}