Ejemplo n.º 1
0
int mca_btl_self_component_open(void)
{
    /* register SELF component parameters */
    mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_self_component, "free_list_num",
                            "Number of fragments by default", false, false,
                            0, &mca_btl_self_component.free_list_num );
    mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_self_component, "free_list_max",
                            "Maximum number of fragments", false, false,
                            -1, &mca_btl_self_component.free_list_max );
    mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_self_component, "free_list_inc",
                            "Increment by this number of fragments", false, false,
                            32, &mca_btl_self_component.free_list_inc );

    mca_btl_self.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
    mca_btl_self.btl_eager_limit = 128 * 1024;
    mca_btl_self.btl_rndv_eager_limit = 128 * 1024;
    mca_btl_self.btl_max_send_size = 256 * 1024;
    mca_btl_self.btl_rdma_pipeline_send_length = INT_MAX;
    mca_btl_self.btl_rdma_pipeline_frag_size = INT_MAX;
    mca_btl_self.btl_min_rdma_pipeline_size = 0;
    mca_btl_self.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE;
    mca_btl_self.btl_bandwidth = 100;
    mca_btl_self.btl_latency = 0;
    mca_btl_base_param_register(&mca_btl_self_component.super.btl_version,
            &mca_btl_self);

    /* initialize objects */
    OBJ_CONSTRUCT(&mca_btl_self_component.self_lock, opal_mutex_t);
    OBJ_CONSTRUCT(&mca_btl_self_component.self_frags_eager, ompi_free_list_t);
    OBJ_CONSTRUCT(&mca_btl_self_component.self_frags_send, ompi_free_list_t);
    OBJ_CONSTRUCT(&mca_btl_self_component.self_frags_rdma, ompi_free_list_t);
    return OMPI_SUCCESS;
}
Ejemplo n.º 2
0
static int
mca_btl_portals4_component_open(void)
{
    OPAL_OUTPUT_VERBOSE((1, opal_btl_base_framework.framework_output, "mca_btl_portals4_component_open\n"));

    /*
     * fill default module state
     */
    mca_btl_portals4_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_LOW + 100;
    mca_btl_portals4_module.super.btl_eager_limit = 32 * 1024;
    mca_btl_portals4_module.super.btl_rndv_eager_limit = 32 * 1024;
    mca_btl_portals4_module.super.btl_max_send_size = 64 * 1024;
    mca_btl_portals4_module.super.btl_rdma_pipeline_send_length = 64 * 1024;
    mca_btl_portals4_module.super.btl_rdma_pipeline_frag_size = INT_MAX;
    mca_btl_portals4_module.super.btl_min_rdma_pipeline_size = 0;
    mca_btl_portals4_module.super.btl_flags =
        MCA_BTL_FLAGS_RDMA |
        MCA_BTL_FLAGS_RDMA_MATCHED;

    mca_btl_portals4_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);

    mca_btl_portals4_module.super.btl_get_limit = SIZE_MAX;
    mca_btl_portals4_module.super.btl_put_limit = 0;        /* not implemented */
    mca_btl_portals4_module.super.btl_get_alignment = 0;
    mca_btl_portals4_module.super.btl_put_alignment = 0;

    mca_btl_portals4_module.super.btl_get_local_registration_threshold = 0;
    mca_btl_portals4_module.super.btl_put_local_registration_threshold = 0;

    mca_btl_portals4_module.super.btl_bandwidth = 1000;
    mca_btl_portals4_module.super.btl_latency = 0;

    mca_btl_base_param_register(&mca_btl_portals4_component.super.btl_version, &mca_btl_portals4_module.super);

    mca_btl_portals4_module.portals_num_procs = 0;

    mca_btl_portals4_module.recv_eq_h = PTL_EQ_NONE;

#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
    mca_btl_portals4_module.send_md_hs = NULL;
#else
    mca_btl_portals4_module.send_md_h = PTL_INVALID_HANDLE;
#endif

    mca_btl_portals4_module.portals_ni_h = PTL_INVALID_HANDLE;
    mca_btl_portals4_module.zero_md_h = PTL_INVALID_HANDLE;

    mca_btl_portals4_module.long_overflow_me_h = PTL_INVALID_HANDLE;
    mca_btl_portals4_module.portals_outstanding_ops = 0;
    mca_btl_portals4_module.recv_idx = (ptl_pt_index_t) ~0UL;

    return OPAL_SUCCESS;
}
Ejemplo n.º 3
0
static int mca_btl_sctp_component_register(void)
{
    /* register SCTP component parameters */
    /* num links */
    mca_btl_sctp_component.sctp_if_include =
        mca_btl_sctp_param_register_string("if_include", "");
    mca_btl_sctp_component.sctp_if_exclude =
        mca_btl_sctp_param_register_string("if_exclude", "lo");
    mca_btl_sctp_component.sctp_free_list_num =
        mca_btl_sctp_param_register_int ("free_list_num", 8);
    mca_btl_sctp_component.sctp_free_list_max =
        mca_btl_sctp_param_register_int ("free_list_max", -1);
    mca_btl_sctp_component.sctp_free_list_inc =
        mca_btl_sctp_param_register_int ("free_list_inc", 32);
    mca_btl_sctp_component.sctp_sndbuf =
        mca_btl_sctp_param_register_int ("sndbuf", 128*1024);
    mca_btl_sctp_component.sctp_rcvbuf =
        mca_btl_sctp_param_register_int ("rcvbuf", 128*1024);
    mca_btl_sctp_component.sctp_endpoint_cache =
        mca_btl_sctp_param_register_int ("endpoint_cache", 30*1024);
    mca_btl_sctp_component.sctp_use_nodelay =
        !mca_btl_sctp_param_register_int ("use_nagle", 0);
    /* port_min */
    /* port_range */
    /* use a single one-to-many socket by default except in Solaris (see
     *  the configure.m4 file)
     */
    mca_base_param_reg_int(&mca_btl_sctp_component.super.btl_version,
                           "if_11", "If 0, have one SCTP BTL module and let SCTP do multilink scheduling. If non-zero, have an SCTP BTL module per link and let the PML do the scheduling.",
                           false, false,
                           OMPI_MCA_BTL_SCTP_USE_ONE_TO_ONE_SOCKET,
                           &mca_btl_sctp_component.sctp_if_11);

    /* have lower exclusivity than tcp */
    mca_btl_sctp_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_LOW;
    mca_btl_sctp_module.super.btl_eager_limit = 64*1024;
    mca_btl_sctp_module.super.btl_rndv_eager_limit = 64*1024;
    mca_btl_sctp_module.super.btl_max_send_size = 128*1024;
    mca_btl_sctp_module.super.btl_rdma_pipeline_send_length = 128*1024;
    mca_btl_sctp_module.super.btl_rdma_pipeline_frag_size = INT_MAX;
    mca_btl_sctp_module.super.btl_min_rdma_pipeline_size = 0;
    mca_btl_sctp_module.super.btl_flags  = MCA_BTL_FLAGS_PUT |
                                       MCA_BTL_FLAGS_SEND_INPLACE |
                                       MCA_BTL_FLAGS_NEED_CSUM | 
                                       MCA_BTL_FLAGS_NEED_ACK |
                                       MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
    mca_btl_sctp_module.super.btl_bandwidth = 100;
    mca_btl_sctp_module.super.btl_latency = 100;
    mca_btl_base_param_register(&mca_btl_sctp_component.super.btl_version,
            &mca_btl_sctp_module.super);

    return OMPI_SUCCESS;
}
static int mca_btl_vader_component_register (void)
{
    /* register VADER component parameters */
    mca_btl_vader_component.vader_free_list_num =
        mca_btl_vader_param_register_int("free_list_num", 8);
    mca_btl_vader_component.vader_free_list_max =
        mca_btl_vader_param_register_int("free_list_max", -1);
    mca_btl_vader_component.vader_free_list_inc =
        mca_btl_vader_param_register_int("free_list_inc", 64);
    mca_btl_vader_component.vader_mpool_name =
        mca_btl_vader_param_register_string("mpool", "sm");
    mca_btl_vader_memcpy_limit =
        mca_btl_vader_param_register_int("memcpy_limit", mca_btl_vader_memcpy_limit);
    mca_btl_vader_log_align =
        mca_btl_vader_param_register_int("log_align", mca_btl_vader_log_align);

    /* limit segment alignment to be between 4k and 16M */
    if (mca_btl_vader_log_align < 12) {
        mca_btl_vader_log_align = 12;
    } else if (mca_btl_vader_log_align > 25) {
        mca_btl_vader_log_align = 25;
    }

    mca_btl_vader_max_inline_send =
        mca_btl_vader_param_register_int("max_inline_send", mca_btl_vader_max_inline_send);

    mca_btl_vader.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
    mca_btl_vader.super.btl_eager_limit = 64 * 1024;
    mca_btl_vader.super.btl_rndv_eager_limit = mca_btl_vader.super.btl_eager_limit;
    mca_btl_vader.super.btl_max_send_size    = mca_btl_vader.super.btl_eager_limit;
    mca_btl_vader.super.btl_rdma_pipeline_send_length = mca_btl_vader.super.btl_eager_limit;
    mca_btl_vader.super.btl_rdma_pipeline_frag_size = mca_btl_vader.super.btl_eager_limit;
    mca_btl_vader.super.btl_min_rdma_pipeline_size = mca_btl_vader.super.btl_eager_limit;
    mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_PUT |
        MCA_BTL_FLAGS_SEND_INPLACE;
    mca_btl_vader.super.btl_seg_size = sizeof (mca_btl_base_segment_t);

    mca_btl_vader.super.btl_bandwidth = 40000; /* Mbs */
    mca_btl_vader.super.btl_latency   = 1;     /* Microsecs */

    /* Call the BTL based to register its MCA params */
    mca_btl_base_param_register(&mca_btl_vader_component.super.btl_version,
                                &mca_btl_vader.super);

    return OMPI_SUCCESS;
}
static int mca_btl_template_component_register(void)
{    
    /* initialize state */
    mca_btl_template_component.template_num_btls=0;
    mca_btl_template_component.template_btls=NULL;
    
    /* initialize objects */ 
    OBJ_CONSTRUCT(&mca_btl_template_component.template_procs, opal_list_t);

    /* register TEMPLATE component parameters */
    mca_btl_template_component.template_free_list_num = 8;
    (void) mca_base_component_var_register(&mca_btl_template_component.super.btl_version,
                                           "free_list_num", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, 0, OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_READONLY,
                                           &mca_btl_template_component.template_free_list_num);
    (void) mca_base_component_var_register(&mca_btl_template_component.super.btl_version,
                                           "free_list_max", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, 0, OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_READONLY,
                                           &mca_btl_template_component.template_free_list_max);
    (void) mca_base_component_var_register(&mca_btl_template_component.super.btl_version,
                                           "free_list_inc", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, 0, OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_READONLY,
                                           &mca_btl_template_component.template_free_list_inc);

    mca_btl_template_component.template_mpool_name = "grdma";
    (void) mca_base_component_var_register(&mca_btl_template_component.super.btl_version,
                                           "mpool", NULL, MCA_BASE_VAR_TYPE_STRING,
                                           NULL, 0, 0, OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_READONLY,
                                           &mca_btl_template_component.template_mpool_name);

    mca_btl_template_module.super.btl_exclusivity = 0;
    mca_btl_template_module.super.btl_eager_limit = 64*1024;
    mca_btl_template_module.super.btl_rndv_eager_limit = 64*1024;
    mca_btl_template_module.super.btl_max_send_size = 128*1024;
    mca_btl_template_module.super.btl_min_rdma_pipeline_size = 1024*1024;
    mca_btl_template_module.super.btl_rdma_pipeline_frag_size = 1024*1024;
    mca_btl_template_module.super.btl_rdma_pipeline_send_length = 1024*1024;
    mca_btl_template_module.super.btl_flags  = MCA_BTL_FLAGS_PUT;

    return mca_btl_base_param_register(&mca_btl_template_component.super.btl_version,
                                       &mca_btl_template_module.super);
}
static int mca_btl_self_component_register(void)
{
    mca_base_var_group_component_register(&mca_btl_self_component.super.btl_version,
                                          "BTL for self communication");

    /* register SELF component parameters */
    mca_btl_self_component.free_list_num = 0;
    (void) mca_base_component_var_register(&mca_btl_self_component.super.btl_version, "free_list_num",
                                           "Number of fragments by default",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                           OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_READONLY,
                                           &mca_btl_self_component.free_list_num);
    mca_btl_self_component.free_list_max = -1;
    (void) mca_base_component_var_register(&mca_btl_self_component.super.btl_version, "free_list_max",
                                           "Maximum number of fragments",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                           OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_READONLY,
                                           &mca_btl_self_component.free_list_max);
    mca_btl_self_component.free_list_inc = 32;
    (void) mca_base_component_var_register(&mca_btl_self_component.super.btl_version, "free_list_inc",
                                           "Increment by this number of fragments",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                           OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_READONLY,
                                           &mca_btl_self_component.free_list_inc);

    mca_btl_self.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
    mca_btl_self.btl_eager_limit = 128 * 1024;
    mca_btl_self.btl_rndv_eager_limit = 128 * 1024;
    mca_btl_self.btl_max_send_size = 256 * 1024;
    mca_btl_self.btl_rdma_pipeline_send_length = INT_MAX;
    mca_btl_self.btl_rdma_pipeline_frag_size = INT_MAX;
    mca_btl_self.btl_min_rdma_pipeline_size = 0;
    mca_btl_self.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE;
    mca_btl_self.btl_seg_size = sizeof (mca_btl_base_segment_t);
    mca_btl_self.btl_bandwidth = 100;
    mca_btl_self.btl_latency = 0;
    mca_btl_base_param_register(&mca_btl_self_component.super.btl_version,
            &mca_btl_self);

    return OMPI_SUCCESS;
}
Ejemplo n.º 7
0
int mca_btl_elan_component_open(void)
{
    /* initialize state */
    mca_btl_elan_component.elan_num_btls = 0;
    mca_btl_elan_component.elan_btls = NULL;

    mca_btl_elan_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_DEFAULT;
    mca_btl_elan_module.super.btl_eager_limit =  32*1024;
    mca_btl_elan_module.super.btl_rndv_eager_limit = mca_btl_elan_module.super.btl_eager_limit;
    mca_btl_elan_module.super.btl_max_send_size = 64*1024; /*64*1024;*/
    mca_btl_elan_module.super.btl_rdma_pipeline_send_length = 512 * 1024;
    mca_btl_elan_module.super.btl_rdma_pipeline_frag_size = 128 * 1024;
    mca_btl_elan_module.super.btl_min_rdma_pipeline_size = 128 * 1024;
    mca_btl_elan_module.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND;
    mca_btl_elan_module.super.btl_bandwidth = 1959;
    mca_btl_elan_module.super.btl_latency = 4;
    mca_btl_base_param_register(&mca_btl_elan_component.super.btl_version,
                                &mca_btl_elan_module.super);

    mca_base_param_reg_string( (mca_base_component_t*)&mca_btl_elan_component, "elanidmap",
                               "System-wide configuration file for the Quadrics network (elanidmap)",
                               false, false, "/etc/elanidmap", &mca_btl_elan_component.elanidmap_file );
    mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_elan_component, "max_posted_recv",
                            "Number of received posted in advance. Increasing this number for"
                            " communication bound application can lead to visible improvement"
                            " in performances",
                            false, false, 128, &mca_btl_elan_component.elan_max_posted_recv );

    /* register Elan4 component parameters */
    mca_btl_elan_component.elan_free_list_num =
        mca_btl_elan_param_register_int( "free_list_num", 8 );
    mca_btl_elan_component.elan_free_list_max =
        mca_btl_elan_param_register_int( "free_list_max",
                                         (mca_btl_elan_component.elan_free_list_num +
                                          mca_btl_elan_component.elan_max_posted_recv) );
    mca_btl_elan_component.elan_free_list_inc =
        mca_btl_elan_param_register_int( "free_list_inc", 32 );

    return OMPI_SUCCESS;
}
Ejemplo n.º 8
0
int mca_btl_tcp_component_open(void)
{
    char* message;
#ifdef __WINDOWS__
    WSADATA win_sock_data;
    if( WSAStartup(MAKEWORD(2,2), &win_sock_data) != 0 ) {
        BTL_ERROR(("failed to initialise windows sockets:%d", WSAGetLastError()));
        return OMPI_ERROR;
    }
#endif

    /* initialize state */
    mca_btl_tcp_component.tcp_listen_sd = -1;
#if OPAL_WANT_IPV6
    mca_btl_tcp_component.tcp6_listen_sd = -1;
#endif
    mca_btl_tcp_component.tcp_num_btls=0;
    mca_btl_tcp_component.tcp_addr_count = 0;
    mca_btl_tcp_component.tcp_btls=NULL;

    /* initialize objects */
    OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_lock, opal_mutex_t);
    OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_procs, opal_hash_table_t);
    OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_events, opal_list_t);
    OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_eager, ompi_free_list_t);
    OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_max, ompi_free_list_t);
    OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_user, ompi_free_list_t);
    opal_hash_table_init(&mca_btl_tcp_component.tcp_procs, 256);

    /* register TCP component parameters */
    mca_btl_tcp_component.tcp_num_links =
        mca_btl_tcp_param_register_int("links", NULL, 1);
    mca_btl_tcp_component.tcp_if_include =
        mca_btl_tcp_param_register_string("if_include", NULL, "");
    mca_btl_tcp_component.tcp_if_exclude =
        mca_btl_tcp_param_register_string("if_exclude", NULL, "lo");
    mca_btl_tcp_component.tcp_free_list_num =
        mca_btl_tcp_param_register_int ("free_list_num", NULL, 8);
    mca_btl_tcp_component.tcp_free_list_max =
        mca_btl_tcp_param_register_int ("free_list_max", NULL, -1);
    mca_btl_tcp_component.tcp_free_list_inc =
        mca_btl_tcp_param_register_int ("free_list_inc", NULL, 32);
    mca_btl_tcp_component.tcp_sndbuf =
        mca_btl_tcp_param_register_int ("sndbuf", NULL, 128*1024);
    mca_btl_tcp_component.tcp_rcvbuf =
        mca_btl_tcp_param_register_int ("rcvbuf", NULL, 128*1024);
    mca_btl_tcp_component.tcp_endpoint_cache =
        mca_btl_tcp_param_register_int ("endpoint_cache",
                                        "The size of the internal cache for each TCP connection. This cache is"
                                        " used to reduce the number of syscalls, by replacing them with memcpy."
                                        " Every read will read the expected data plus the amount of the"
                                        " endpoint_cache", 30*1024);
    mca_btl_tcp_component.tcp_use_nodelay =
        !mca_btl_tcp_param_register_int ("use_nagle", "Whether to use Nagle's algorithm or not (using Nagle's algorithm may increase short message latency)", 0);
    mca_btl_tcp_component.tcp_port_min =
        mca_btl_tcp_param_register_int( "port_min_v4",
                                        "The minimum port where the TCP BTL will try to bind (default 1024)", 1024 );
    if( mca_btl_tcp_component.tcp_port_min > USHRT_MAX ) {
        orte_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
                       true, "v4", orte_process_info.nodename,
                       mca_btl_tcp_component.tcp_port_min );
        mca_btl_tcp_component.tcp_port_min = 1024;
    }
    asprintf( &message,
              "The number of ports where the TCP BTL will try to bind (default %d)."
              " This parameter together with the port min, define a range of ports"
              " where Open MPI will open sockets.",
              (0x1 << 16) - mca_btl_tcp_component.tcp_port_min - 1 );
    mca_btl_tcp_component.tcp_port_range =
        mca_btl_tcp_param_register_int( "port_range_v4", message,
                                        (0x1 << 16) - mca_btl_tcp_component.tcp_port_min - 1);
    free(message);
#if OPAL_WANT_IPV6
    mca_btl_tcp_component.tcp6_port_min =
        mca_btl_tcp_param_register_int( "port_min_v6",
                                        "The minimum port where the TCP BTL will try to bind (default 1024)", 1024 );
    if( mca_btl_tcp_component.tcp6_port_min > USHRT_MAX ) {
        orte_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
                       true, "v6", orte_process_info.nodename,
                       mca_btl_tcp_component.tcp6_port_min );
        mca_btl_tcp_component.tcp6_port_min = 1024;
    }
    asprintf( &message,
              "The number of ports where the TCP BTL will try to bind (default %d)."
              " This parameter together with the port min, define a range of ports"
              " where Open MPI will open sockets.",
              (0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1 );
    mca_btl_tcp_component.tcp6_port_range =
        mca_btl_tcp_param_register_int( "port_range_v6", message,
                                        (0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1);
    free(message);
#endif
    mca_btl_tcp_module.super.btl_exclusivity =  MCA_BTL_EXCLUSIVITY_LOW + 100;
    mca_btl_tcp_module.super.btl_eager_limit = 64*1024;
    mca_btl_tcp_module.super.btl_rndv_eager_limit = 64*1024;
    mca_btl_tcp_module.super.btl_max_send_size = 128*1024;
    mca_btl_tcp_module.super.btl_rdma_pipeline_send_length = 128*1024;
    mca_btl_tcp_module.super.btl_rdma_pipeline_frag_size = INT_MAX;
    mca_btl_tcp_module.super.btl_min_rdma_pipeline_size = 0;
    mca_btl_tcp_module.super.btl_flags = MCA_BTL_FLAGS_PUT |
                                         MCA_BTL_FLAGS_SEND_INPLACE |
                                         MCA_BTL_FLAGS_NEED_CSUM |
                                         MCA_BTL_FLAGS_NEED_ACK |
                                         MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
    mca_btl_tcp_module.super.btl_bandwidth = 100;
    mca_btl_tcp_module.super.btl_latency = 100;
    mca_btl_base_param_register(&mca_btl_tcp_component.super.btl_version,
                                &mca_btl_tcp_module.super);

    mca_btl_tcp_component.tcp_disable_family =
        mca_btl_tcp_param_register_int ("disable_family", NULL, 0);

    return OMPI_SUCCESS;
}
Ejemplo n.º 9
0
static int btl_scif_component_register(void)
{
    (void) mca_base_var_group_component_register(&mca_btl_scif_component.super.btl_version,
                                                 "SCIF byte transport layer");

    mca_btl_scif_component.scif_free_list_num = 8;
    (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
                                           "free_list_num", "Initial fragment free list size",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_scif_component.scif_free_list_num);
    mca_btl_scif_component.scif_free_list_max = 16384;
    (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
                                           "free_list_max", "Maximum fragment free list size",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_scif_component.scif_free_list_max);
    mca_btl_scif_component.scif_free_list_inc = 64;
    (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
                                           "free_list_inc", "Fragment free list size increment",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_scif_component.scif_free_list_inc);

    mca_btl_scif_component.segment_size = 8 * 1024;
    (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
                                           "segment_size", "Size of memory segment to "
                                           "allocate for each remote process (default: "
                                           "8k)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_scif_component.segment_size);

    mca_btl_scif_component.rma_use_cpu = false;
    (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
                                           "rma_use_cpu", "Use CPU instead of DMA "
                                           "for RMA copies (default: false)", MCA_BASE_VAR_TYPE_BOOL,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_scif_component.rma_use_cpu);


    mca_btl_scif_component.rma_sync = true;
    (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
                                           "rma_sync", "Use synchronous RMA instead of "
                                           "an RMA fence (default: true)", MCA_BASE_VAR_TYPE_BOOL,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_scif_component.rma_sync);

#if defined(SCIF_TIMING)
    mca_btl_scif_component.aquire_buffer_time = 0.0;
    (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
                                            "aquire_buffer_time", "Aggregate time spent "
                                            "aquiring send buffers", OPAL_INFO_LVL_9,
                                            MCA_BASE_PVAR_CLASS_AGGREGATE, MCA_BASE_VAR_TYPE_DOUBLE,
                                            NULL, MCA_BASE_VAR_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY |
                                            MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL, NULL, NULL,
                                            &mca_btl_scif_component.aquire_buffer_time);

    mca_btl_scif_component.send_time = 0.0;
    (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
                                            "send_time", "Aggregate time spent writing to "
                                            "send buffers", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE,
                                            MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
                                            MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
                                            NULL, NULL, NULL, &mca_btl_scif_component.send_time);

    mca_btl_scif_component.sendi_time = 0.0;
    (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
                                            "sendi_time", "Aggregate time spent writing to "
                                            "send buffers in sendi", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE,
                                            MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
                                            MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
                                            NULL, NULL, NULL, &mca_btl_scif_component.sendi_time);

    mca_btl_scif_component.get_time = 0.0;
    (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
                                            "get_time", "Aggregate time spent in DMA read (scif_readfrom)",
                                            OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE,
                                            MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
                                            MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
                                            NULL, NULL, NULL, &mca_btl_scif_component.get_time);

    mca_btl_scif_component.get_count = 0;
    (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
                                            "get_count", "Number of times btl_scif_get was called",
                                            OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_COUNTER,
                                            MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
                                            MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
                                            NULL, NULL, NULL, &mca_btl_scif_component.get_count);

    mca_btl_scif_component.put_time = 0.0;
    (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
                                            "put_time", "Aggregate time spent in DMA write (scif_writeto)",
                                            OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE,
                                            MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
                                            MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
                                            NULL, NULL, NULL, &mca_btl_scif_component.put_time);

    mca_btl_scif_component.put_count = 0;
    (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
                                            "put_count", "Number of times btl_scif_put was called",
                                            OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_COUNTER,
                                            MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
                                            MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
                                            NULL, NULL, NULL, &mca_btl_scif_component.put_count);
#endif

    mca_btl_scif_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
    mca_btl_scif_module.super.btl_eager_limit               = 1 * 1024;
    mca_btl_scif_module.super.btl_rndv_eager_limit          = 1 * 1024;
    mca_btl_scif_module.super.btl_rdma_pipeline_frag_size   = 4 * 1024 * 1024;
    mca_btl_scif_module.super.btl_max_send_size             = 1 * 1024;
    mca_btl_scif_module.super.btl_rdma_pipeline_send_length = 1 * 1024;

    /* threshold for put */
    mca_btl_scif_module.super.btl_min_rdma_pipeline_size    = 1 * 1024;

    mca_btl_scif_module.super.btl_flags = MCA_BTL_FLAGS_SEND |
        MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE;

    mca_btl_scif_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);

    mca_btl_scif_module.super.btl_bandwidth = 50000; /* Mbs */
    mca_btl_scif_module.super.btl_latency   = 2;     /* Microsecs */

    /* Call the BTL based to register its MCA params */
    mca_btl_base_param_register(&mca_btl_scif_component.super.btl_version,
                                &mca_btl_scif_module.super);

    return OPAL_SUCCESS;
}
Ejemplo n.º 10
0
static int mca_btl_tcp_component_register(void)
{
    char* message;

    /* register TCP component parameters */
    mca_btl_tcp_param_register_uint("links", NULL, 1, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_num_links);
    mca_btl_tcp_param_register_string("if_include", "Comma-delimited list of devices and/or CIDR notation of networks to use for MPI communication (e.g., \"eth0,192.168.0.0/16\").  Mutually exclusive with btl_tcp_if_exclude.", "", OPAL_INFO_LVL_1, &mca_btl_tcp_component.tcp_if_include);
    mca_btl_tcp_param_register_string("if_exclude", "Comma-delimited list of devices and/or CIDR notation of networks to NOT use for MPI communication -- all devices not matching these specifications will be used (e.g., \"eth0,192.168.0.0/16\").  If set to a non-default value, it is mutually exclusive with btl_tcp_if_include.", 
                                      "127.0.0.1/8,sppp",
                                      OPAL_INFO_LVL_1, &mca_btl_tcp_component.tcp_if_exclude);

    mca_btl_tcp_param_register_int ("free_list_num", NULL, 8, OPAL_INFO_LVL_5,  &mca_btl_tcp_component.tcp_free_list_num);
    mca_btl_tcp_param_register_int ("free_list_max", NULL, -1, OPAL_INFO_LVL_5,  &mca_btl_tcp_component.tcp_free_list_max);
    mca_btl_tcp_param_register_int ("free_list_inc", NULL, 32, OPAL_INFO_LVL_5,  &mca_btl_tcp_component.tcp_free_list_inc);
    mca_btl_tcp_param_register_int ("sndbuf", NULL, 128*1024, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_sndbuf);
    mca_btl_tcp_param_register_int ("rcvbuf", NULL, 128*1024, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_rcvbuf);
    mca_btl_tcp_param_register_int ("endpoint_cache",
        "The size of the internal cache for each TCP connection. This cache is"
        " used to reduce the number of syscalls, by replacing them with memcpy."
        " Every read will read the expected data plus the amount of the"
                                    " endpoint_cache", 30*1024, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_endpoint_cache);
    mca_btl_tcp_param_register_int ("use_nagle", "Whether to use Nagle's algorithm or not (using Nagle's algorithm may increase short message latency)", 0, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_not_use_nodelay);
    mca_btl_tcp_param_register_int( "port_min_v4", 
                                    "The minimum port where the TCP BTL will try to bind (default 1024)",
                                    1024, OPAL_INFO_LVL_2, &mca_btl_tcp_component.tcp_port_min);

    asprintf( &message, 
              "The number of ports where the TCP BTL will try to bind (default %d)."
              " This parameter together with the port min, define a range of ports"
              " where Open MPI will open sockets.",
              (0x1 << 16) - mca_btl_tcp_component.tcp_port_min - 1 );
    mca_btl_tcp_param_register_int( "port_range_v4", message,
                                    (0x1 << 16) - mca_btl_tcp_component.tcp_port_min - 1,
                                    OPAL_INFO_LVL_2, &mca_btl_tcp_component.tcp_port_range);
    free(message);
#if OPAL_WANT_IPV6
    mca_btl_tcp_param_register_int( "port_min_v6",
                                    "The minimum port where the TCP BTL will try to bind (default 1024)", 1024,
                                    OPAL_INFO_LVL_2, & mca_btl_tcp_component.tcp6_port_min );
    asprintf( &message, 
              "The number of ports where the TCP BTL will try to bind (default %d)."
              " This parameter together with the port min, define a range of ports"
              " where Open MPI will open sockets.",
              (0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1 );
    mca_btl_tcp_param_register_int( "port_range_v6", message,
                                    (0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1,
                                    OPAL_INFO_LVL_2, &mca_btl_tcp_component.tcp6_port_range );
    free(message);
#endif

    mca_btl_tcp_module.super.btl_exclusivity =  MCA_BTL_EXCLUSIVITY_LOW + 100;
    mca_btl_tcp_module.super.btl_eager_limit = 64*1024;
    mca_btl_tcp_module.super.btl_rndv_eager_limit = 64*1024;
    mca_btl_tcp_module.super.btl_max_send_size = 128*1024;
    mca_btl_tcp_module.super.btl_rdma_pipeline_send_length = 128*1024;
    mca_btl_tcp_module.super.btl_rdma_pipeline_frag_size = INT_MAX;
    mca_btl_tcp_module.super.btl_min_rdma_pipeline_size = 0;
    mca_btl_tcp_module.super.btl_flags = MCA_BTL_FLAGS_PUT |
                                       MCA_BTL_FLAGS_SEND_INPLACE |
                                       MCA_BTL_FLAGS_NEED_CSUM |
                                       MCA_BTL_FLAGS_NEED_ACK |
                                       MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
    mca_btl_tcp_module.super.btl_seg_size = sizeof (mca_btl_base_segment_t);
    mca_btl_tcp_module.super.btl_bandwidth = 100;
    mca_btl_tcp_module.super.btl_latency = 100;

    mca_btl_base_param_register(&mca_btl_tcp_component.super.btl_version,
                                &mca_btl_tcp_module.super);

    mca_btl_tcp_param_register_int ("disable_family", NULL, 0, OPAL_INFO_LVL_2,  &mca_btl_tcp_component.tcp_disable_family);

    /* Register a list of interfaces to use in sequence */
    mca_btl_tcp_param_register_string("if_seq", 
                                      "If specified, a comma-delimited list of TCP interfaces.  Interfaces will be assigned, one to each MPI process, in a round-robin fashion on each server.  For example, if the list is \"eth0,eth1\" and four MPI processes are run on a single server, then local ranks 0 and 2 will use eth0 and local ranks 1 and 3 will use eth1.", NULL, OPAL_INFO_LVL_9, &mca_btl_tcp_if_seq_string);

    mca_btl_tcp_component.tcp_if_seq = NULL;
    if (NULL != mca_btl_tcp_if_seq_string && '\0' != *mca_btl_tcp_if_seq_string) {
        char **argv = opal_argv_split(mca_btl_tcp_if_seq_string, ',');

        if (NULL != argv && '\0' != *(argv[0])) {
            int if_index, rc, count;
            ompi_node_rank_t node_rank;
            char name[256];

            node_rank = ompi_process_info.my_node_rank;

            /* Now that we've got that local rank, take the
               corresponding entry from the tcp_if_seq list (wrapping
               if necessary) */
            count = opal_argv_count(argv);
            mca_btl_tcp_component.tcp_if_seq = 
                strdup(argv[node_rank % count]);
            opal_argv_free(argv);

            /* Double check that the selected interface actually exists */
            for (if_index = opal_ifbegin(); if_index >= 0; 
                 if_index = opal_ifnext(if_index)){
                if (OPAL_SUCCESS != 
                    (rc = opal_ifindextoname(if_index, name, sizeof(name)))) {
                    return rc;
                }
                if (0 == strcmp(name, mca_btl_tcp_component.tcp_if_seq)) {
                    break;
                }
            }
            if (if_index < 0) {
                opal_show_help("help-mpi-btl-tcp.txt", 
                               "invalid if_inexclude",
                               true, "if_seq",
                               ompi_process_info.nodename,
                               mca_btl_tcp_component.tcp_if_seq,
                               "Interface does not exist");
                free(mca_btl_tcp_component.tcp_if_seq);
                mca_btl_tcp_component.tcp_if_seq = NULL;
            } else {
                BTL_VERBOSE(("Node rank %d using TCP interface %s",
                             node_rank, mca_btl_tcp_component.tcp_if_seq));
            }
        }
    }

    return mca_btl_tcp_component_verify();
}
Ejemplo n.º 11
0
static int
btl_ugni_component_register(void)
{
    mca_base_var_enum_t *new_enum;
    gni_nic_device_t device_type;
    int rc;

    (void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version,
                                                 "uGNI byte transport layer");

    mca_btl_ugni_component.ugni_free_list_num = 8;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "free_list_num", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_free_list_num);
    mca_btl_ugni_component.ugni_free_list_max = 4096;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "free_list_max", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_free_list_max);
    mca_btl_ugni_component.ugni_free_list_inc = 64;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "free_list_inc", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_free_list_inc);

    mca_btl_ugni_component.ugni_eager_num = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "eager_num", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_eager_num);
    mca_btl_ugni_component.ugni_eager_max = 128;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "eager_max", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_eager_max);
    mca_btl_ugni_component.ugni_eager_inc = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "eager_inc", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_eager_inc);

    mca_btl_ugni_component.remote_cq_size = 40000;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "remote_cq_size", "Remote SMSG completion queue "
                                           "size (default 40000)", MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.remote_cq_size);
    mca_btl_ugni_component.local_cq_size = 8192;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "local_cq_size", "Local completion queue size "
                                           "(default 8192)", MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.local_cq_size);

    mca_btl_ugni_component.ugni_smsg_limit = 0;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "smsg_limit", "Maximum size message that "
                                           "will be sent using the SMSG/MSGQ protocol "
                                           "(0 - autoselect(default), 16k max)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_smsg_limit);

    mca_btl_ugni_component.smsg_max_credits = 32;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "smsg_max_credits", "Maximum number of "
                                           "outstanding SMSG/MSGQ message (default 32)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.smsg_max_credits);

    mca_btl_ugni_component.ugni_fma_limit = 1024;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "fma_limit", "Maximum size message that "
                                           "will be sent using the FMA (Fast Memory "
                                           "Access) protocol (default 1024, 64k max)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_fma_limit);

    mca_btl_ugni_component.rdma_max_retries = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "rdma_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.rdma_max_retries);

    mca_btl_ugni_component.smsg_max_retries = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "smsg_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.smsg_max_retries);

    mca_btl_ugni_component.max_mem_reg = 0;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "max_mem_reg", "Maximum number of "
                                           "memory registrations a process can "
                                           "hold (0 - autoselect, -1 - unlimited)"
                                           " (default 0)", MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.max_mem_reg);

    mca_btl_ugni_component.mbox_increment = 0;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "mbox_inc", "Number of SMSG mailboxes to "
                                           "allocate in each block (0 - autoselect(default))",
                                           MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
                                           MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mbox_increment);

    mca_btl_ugni_component.smsg_page_size = 2 << 20;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "smsg_page_size", "Page size to use for SMSG "
                                           "mailbox allocation (default 2M)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.smsg_page_size);

    mca_btl_ugni_component.progress_thread_requested = 0;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "request_progress_thread",
                                           "Enable to request ugni btl progress thread - requires MPI_THREAD_MULTIPLE support",
                                           MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_3,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.progress_thread_requested);

    /* performance variables */
    mca_btl_ugni_progress_thread_wakeups = 0;
    (void) mca_base_component_pvar_register(&mca_btl_ugni_component.super.btl_version,
                                            "progress_thread_wakeups", "Number of times the progress thread "
                                            "has been woken", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_COUNTER,
                                            MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
                                            MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL,
                                            NULL, NULL, &mca_btl_ugni_progress_thread_wakeups);

    /* btl/ugni can only support only a fixed set of mpools (these mpools have compatible resource
     * structures) */
    rc = mca_base_var_enum_create ("btl_ugni_mpool", mpool_values, &new_enum);
    if (OPAL_SUCCESS != rc) {
        return rc;
    }

    mca_btl_ugni_component.mpool_type = MCA_BTL_UGNI_MPOOL_UDREG;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "mpool", "mpool to use", MCA_BASE_VAR_TYPE_INT, new_enum,
                                           0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
                                           MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mpool_type);
    OBJ_RELEASE(new_enum);

    /* ensure we loose send exclusivity to sm and vader if they are enabled */
    mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 2;

    /* smsg threshold */
    mca_btl_ugni_module.super.btl_eager_limit               = 8 * 1024;
    mca_btl_ugni_module.super.btl_rndv_eager_limit          = 8 * 1024;
    mca_btl_ugni_module.super.btl_rdma_pipeline_frag_size   = 4 * 1024 * 1024;
    mca_btl_ugni_module.super.btl_max_send_size             = 8 * 1024;
    mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = 8 * 1024;

    mca_btl_ugni_module.super.btl_get_limit = 1 * 1024 * 1024;

    /* determine if there are get alignment restrictions */
    GNI_GetDeviceType (&device_type);

    /*
     * see def. of ALIGNMENT_MASK to figure this one out
     */
    /* both gemini and aries have a 4-byte alignment requirement on remote addresses */
    mca_btl_ugni_module.super.btl_get_alignment = 4;

    /* threshold for put */
    mca_btl_ugni_module.super.btl_min_rdma_pipeline_size    = 8 * 1024;

    mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND |
        MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_ATOMIC_OPS |
        MCA_BTL_FLAGS_ATOMIC_FOPS;
    mca_btl_ugni_module.super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD |
        MCA_BTL_ATOMIC_SUPPORTS_AND | MCA_BTL_ATOMIC_SUPPORTS_OR | MCA_BTL_ATOMIC_SUPPORTS_XOR |
        MCA_BTL_ATOMIC_SUPPORTS_CSWAP;

    mca_btl_ugni_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);

    mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */
    mca_btl_ugni_module.super.btl_latency   = 2;     /* Microsecs */

    mca_btl_ugni_module.super.btl_get_local_registration_threshold = 0;
    mca_btl_ugni_module.super.btl_put_local_registration_threshold = mca_btl_ugni_component.ugni_fma_limit;

    /* Call the BTL based to register its MCA params */
    mca_btl_base_param_register(&mca_btl_ugni_component.super.btl_version,
                                &mca_btl_ugni_module.super);

    return OPAL_SUCCESS;
}
static int mca_btl_vader_component_register (void)
{
    (void) mca_base_var_group_component_register(&mca_btl_vader_component.super.btl_version,
                                                 "XPMEM shared memory byte transport later");

    /* register VADER component variables */
    mca_btl_vader_component.vader_free_list_num = 8;
    (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
                                           "free_list_num", "Initial number of fragments "
                                           "to allocate for shared memory communication.",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_vader_component.vader_free_list_num);
    mca_btl_vader_component.vader_free_list_max = 16384;
    (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
                                           "free_list_max", "Maximum number of fragments "
                                           "to allocate for shared memory communication.",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_vader_component.vader_free_list_max);
    mca_btl_vader_component.vader_free_list_inc = 64;
    (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
                                           "free_list_inc", "Number of fragments to create "
                                           "on each allocation.", MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_vader_component.vader_free_list_inc);

    mca_btl_vader_component.memcpy_limit = 524288;
    (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
                                           "memcpy_limit", "Message size to switch from using "
                                           "memove to memcpy. The relative speed of these two "
                                           "routines can vary by size.", MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_vader_component.memcpy_limit);
    mca_btl_vader_component.log_attach_align = 21;
    (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
                                           "log_align", "Log base 2 of the alignment to use for xpmem "
                                           "segments (default: 21, minimum: 12, maximum: 25)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_vader_component.log_attach_align);

    mca_btl_vader_component.segment_size = 1 << 24;
    (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
                                           "segment_size", "Maximum size of all shared "
                                           "memory buffers (default: 16M)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_vader_component.segment_size);

    mca_btl_vader_component.max_inline_send = 256;
    (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
                                           "max_inline_send", "Maximum size to transfer "
                                           "using copy-in copy-out semantics",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_vader_component.max_inline_send);

    mca_btl_vader.super.btl_exclusivity               = MCA_BTL_EXCLUSIVITY_HIGH;
    mca_btl_vader.super.btl_eager_limit               = 32 * 1024;
    mca_btl_vader.super.btl_rndv_eager_limit          = mca_btl_vader.super.btl_eager_limit;
    mca_btl_vader.super.btl_max_send_size             = mca_btl_vader.super.btl_eager_limit;
    mca_btl_vader.super.btl_rdma_pipeline_send_length = mca_btl_vader.super.btl_eager_limit;
    mca_btl_vader.super.btl_rdma_pipeline_frag_size   = mca_btl_vader.super.btl_eager_limit;
    mca_btl_vader.super.btl_min_rdma_pipeline_size    = mca_btl_vader.super.btl_eager_limit;

    mca_btl_vader.super.btl_flags     = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE;
    mca_btl_vader.super.btl_seg_size  = sizeof (mca_btl_base_segment_t);
    mca_btl_vader.super.btl_bandwidth = 40000; /* Mbs */
    mca_btl_vader.super.btl_latency   = 1;     /* Microsecs */

    /* Call the BTL based to register its MCA params */
    mca_btl_base_param_register(&mca_btl_vader_component.super.btl_version,
                                &mca_btl_vader.super);

    return OMPI_SUCCESS;
}
Ejemplo n.º 13
0
/*
 * Register and check all MCA parameters
 */
int btl_openib_register_mca_params(void)
{
    char default_qps[100];
    uint32_t mid_qp_size;
    int i;
    char *msg, *str, *pkey;
    int ival, ival2, ret, tmp;

    ret = OMPI_SUCCESS;
#define CHECK(expr) do {\
        tmp = (expr); \
        if (OMPI_SUCCESS != tmp) ret = tmp; \
     } while (0)

    /* register openib component parameters */
    CHECK(reg_int("verbose", NULL,
                  "Output some verbose OpenIB BTL information "
                  "(0 = no output, nonzero = output)", 0, &ival, 0));
    mca_btl_openib_component.verbose = (0 != ival);

    CHECK(reg_int("warn_no_device_params_found",
                  "warn_no_hca_params_found",
                  "Warn when no device-specific parameters are found in the INI file specified by the btl_openib_device_param_files MCA parameter (0 = do not warn; any other value = warn)",
                  1, &ival, 0));
    mca_btl_openib_component.warn_no_device_params_found = (0 != ival);
    CHECK(reg_int("warn_default_gid_prefix", NULL,
                  "Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)",
                  1, &ival, 0));
    mca_btl_openib_component.warn_default_gid_prefix = (0 != ival);
    CHECK(reg_int("warn_nonexistent_if", NULL,
                  "Warn if non-existent devices and/or ports are specified in the btl_openib_if_[in|ex]clude MCA parameters (0 = do not warn; any other value = warn)",
                  1, &ival, 0));
    mca_btl_openib_component.warn_nonexistent_if = (0 != ival);

    if (OMPI_HAVE_IBV_FORK_INIT) {
        ival2 = -1;
    } else {
        ival2 = 0;
    }
    CHECK(reg_int("want_fork_support", NULL,
                  "Whether fork support is desired or not "
                  "(negative = try to enable fork support, but continue even if it is not available, 0 = do not enable fork support, positive = try to enable fork support and fail if it is not available)",
                  ival2, &ival, 0));
#ifdef HAVE_IBV_FORK_INIT
    mca_btl_openib_component.want_fork_support = ival;
#else
    if (0 != ival) {
        orte_show_help("help-mpi-btl-openib.txt",
                       "ibv_fork requested but not supported", true,
                       orte_process_info.nodename);
        return OMPI_ERROR;
    }
#endif

    asprintf(&str, "%s/mca-btl-openib-device-params.ini",
             opal_install_dirs.pkgdatadir);
    if (NULL == str) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }
    CHECK(reg_string("device_param_files", "hca_param_files",
                     "Colon-delimited list of INI-style files that contain device vendor/part-specific parameters",
                     str, &mca_btl_openib_component.device_params_file_names, 
                     0));
    free(str);

    CHECK(reg_string("device_type", NULL,
                     "Specify to only use IB or iWARP network adapters (infiniband = only use InfiniBand HCAs; iwarp = only use iWARP NICs; all = use any available adapters)",
                     "all", &str, 0));
    if (0 == strcasecmp(str, "ib") ||
        0 == strcasecmp(str, "infiniband")) {
        mca_btl_openib_component.device_type = BTL_OPENIB_DT_IB;
    } else if (0 == strcasecmp(str, "iw") ||
               0 == strcasecmp(str, "iwarp")) {
        mca_btl_openib_component.device_type = BTL_OPENIB_DT_IWARP;
    } else if (0 == strcasecmp(str, "all")) {
        mca_btl_openib_component.device_type = BTL_OPENIB_DT_ALL;
    } else {
        orte_show_help("help-mpi-btl-openib.txt",
                       "ibv_fork requested but not supported", true,
                       orte_process_info.nodename);
        return OMPI_ERROR;
    }
    free(str);

    CHECK(reg_int("max_btls", NULL,
                  "Maximum number of device ports to use "
                  "(-1 = use all available, otherwise must be >= 1)",
                  -1, &mca_btl_openib_component.ib_max_btls,
                  REGINT_NEG_ONE_OK | REGINT_GE_ONE));
    CHECK(reg_int("free_list_num", NULL,
                  "Intial size of free lists (must be >= 1)",
                  8, &mca_btl_openib_component.ib_free_list_num,
                  REGINT_GE_ONE));
    CHECK(reg_int("free_list_max", NULL,
                  "Maximum size of free lists "
                  "(-1 = infinite, otherwise must be >= 0)",
                  -1, &mca_btl_openib_component.ib_free_list_max,
                  REGINT_NEG_ONE_OK | REGINT_GE_ONE));
    CHECK(reg_int("free_list_inc", NULL,
                  "Increment size of free lists (must be >= 1)",
                  32, &mca_btl_openib_component.ib_free_list_inc,
                  REGINT_GE_ONE));
    CHECK(reg_string("mpool", NULL,
                     "Name of the memory pool to be used (it is unlikely that you will ever want to change this",
                     "rdma", &mca_btl_openib_component.ib_mpool_name,
                     0));
    CHECK(reg_int("reg_mru_len", NULL,
                  "Length of the registration cache most recently used list "
                  "(must be >= 1)",
                  16, (int*) &mca_btl_openib_component.reg_mru_len,
                  REGINT_GE_ONE));

    CHECK(reg_int("cq_size", "ib_cq_size",
                  "Size of the OpenFabrics completion "
                  "queue (will automatically be set to a minimum of "
                  "(2 * number_of_peers * btl_openib_rd_num))",
                  1000, &ival, REGINT_GE_ONE));
    mca_btl_openib_component.ib_cq_size[BTL_OPENIB_LP_CQ] =
        mca_btl_openib_component.ib_cq_size[BTL_OPENIB_HP_CQ] = (uint32_t) ival;

    CHECK(reg_int("max_inline_data", "ib_max_inline_data",
                  "Maximum size of inline data segment "
                  "(-1 = run-time probe to discover max value, "
                  "otherwise must be >= 0). "
                  "If not explicitly set, use max_inline_data from "
                  "the INI file containing device-specific parameters",
                  -1, &ival, REGINT_NEG_ONE_OK | REGINT_GE_ZERO));
    mca_btl_openib_component.ib_max_inline_data = (int32_t) ival;

    CHECK(reg_string("pkey", "ib_pkey_val", 
                     "OpenFabrics partition key (pkey) value. "
                     "Unsigned integer decimal or hex values are allowed (e.g., \"3\" or \"0x3f\") and will be masked against the maximum allowable IB paritition key value (0x7fff)",
                     "0", &pkey, 0));
    mca_btl_openib_component.ib_pkey_val = 
        ompi_btl_openib_ini_intify(pkey) & MCA_BTL_IB_PKEY_MASK;
    free(pkey);

    CHECK(reg_int("psn", "ib_psn",
                  "OpenFabrics packet sequence starting number "
                  "(must be >= 0)",
                  0, &ival, REGINT_GE_ZERO));
    mca_btl_openib_component.ib_psn = (uint32_t) ival;

    CHECK(reg_int("ib_qp_ous_rd_atom", NULL, 
                  "InfiniBand outstanding atomic reads "
                  "(must be >= 0)",
                  4, &ival, REGINT_GE_ZERO));
    mca_btl_openib_component.ib_qp_ous_rd_atom = (uint32_t) ival;

    asprintf(&msg, "OpenFabrics MTU, in bytes (if not specified in INI files).  Valid values are: %d=256 bytes, %d=512 bytes, %d=1024 bytes, %d=2048 bytes, %d=4096 bytes",
             IBV_MTU_256,
             IBV_MTU_512,
             IBV_MTU_1024,
             IBV_MTU_2048,
             IBV_MTU_4096);
    if (NULL == msg) {
        /* Don't try to recover from this */
        return OMPI_ERR_OUT_OF_RESOURCE;
    }
    CHECK(reg_int("mtu", "ib_mtu", msg, IBV_MTU_1024, &ival, 0));
    free(msg);
    if (ival < IBV_MTU_1024 || ival > IBV_MTU_4096) {
        orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
                       true, "invalid value for btl_openib_ib_mtu",
                       "btl_openib_ib_mtu reset to 1024");
        mca_btl_openib_component.ib_mtu = IBV_MTU_1024;
    } else {
        mca_btl_openib_component.ib_mtu = (uint32_t) ival;
    }

    CHECK(reg_int("ib_min_rnr_timer", NULL, "InfiniBand minimum "
                  "\"receiver not ready\" timer, in seconds "
                  "(must be >= 0 and <= 31)",
                  25, &ival, 0));
    if (ival > 31) {
        orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
                       true, "btl_openib_ib_min_rnr_timer > 31",
                       "btl_openib_ib_min_rnr_timer reset to 31");
        ival = 31;
    } else if (ival < 0){
        orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
                   true, "btl_openib_ib_min_rnr_timer < 0",
                   "btl_openib_ib_min_rnr_timer reset to 0");
        ival = 0;
    }
    mca_btl_openib_component.ib_min_rnr_timer = (uint32_t) ival;

    CHECK(reg_int("ib_timeout", NULL, "InfiniBand transmit timeout, plugged into formula: 4.096 microseconds * (2^btl_openib_ib_timeout)"
                  "(must be >= 0 and <= 31)",
                  20, &ival, 0));
    if (ival > 31) {
        orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
                       true, "btl_openib_ib_timeout > 31",
                       "btl_openib_ib_timeout reset to 31");
        ival = 31;
    } else if (ival < 0) {
        orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
                   true, "btl_openib_ib_timeout < 0",
                   "btl_openib_ib_timeout reset to 0");
        ival = 0;
    }
    mca_btl_openib_component.ib_timeout = (uint32_t) ival;

    CHECK(reg_int("ib_retry_count", NULL, "InfiniBand transmit retry count "
                  "(must be >= 0 and <= 7)",
                  7, &ival, 0));
    if (ival > 7) {
        orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
                       true, "btl_openib_ib_retry_count > 7",
                       "btl_openib_ib_retry_count reset to 7");
        ival = 7;
    } else if (ival < 0) {
        orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
                   true, "btl_openib_ib_retry_count < 0",
                   "btl_openib_ib_retry_count reset to 0");
        ival = 0;
    }
    mca_btl_openib_component.ib_retry_count = (uint32_t) ival;

    CHECK(reg_int("ib_rnr_retry", NULL, "InfiniBand \"receiver not ready\" "
                  "retry count; applies *only* to SRQ/XRC queues.  PP queues "
                  "use RNR retry values of 0 because Open MPI performs "
                  "software flow control to guarantee that RNRs never occur "
                  "(must be >= 0 and <= 7; 7 = \"infinite\")",
                  7, &ival, 0));
    if (ival > 7) {
        orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
                       true, "btl_openib_ib_rnr_retry > 7",
                       "btl_openib_ib_rnr_retry reset to 7");
        ival = 7;
    } else if (ival < 0) {
        orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
                   true, "btl_openib_ib_rnr_retry < 0",
                   "btl_openib_ib_rnr_retry reset to 0");
        ival = 0;
    }
    mca_btl_openib_component.ib_rnr_retry = (uint32_t) ival;

    CHECK(reg_int("ib_max_rdma_dst_ops", NULL, "InfiniBand maximum pending RDMA "
                  "destination operations "
                  "(must be >= 0)",
                  4, &ival, REGINT_GE_ZERO));
    mca_btl_openib_component.ib_max_rdma_dst_ops = (uint32_t) ival;

    CHECK(reg_int("ib_service_level", NULL, "InfiniBand service level "
                  "(must be >= 0 and <= 15)",
                  0, &ival, 0));
    if (ival > 15) {
        orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
                       true, "btl_openib_ib_service_level > 15",
                       "btl_openib_ib_service_level reset to 15");
        ival = 15;
    } else if (ival < 0) {
        orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
                   true, "btl_openib_ib_service_level < 0",
                   "btl_openib_ib_service_level reset to 0");
        ival = 0;
    }
    mca_btl_openib_component.ib_service_level = (uint32_t) ival;

    CHECK(reg_int("use_eager_rdma", NULL, "Use RDMA for eager messages "
                  "(-1 = use device default, 0 = do not use eager RDMA, "
                  "1 = use eager RDMA)",
                  -1, &ival, 0));
    mca_btl_openib_component.use_eager_rdma = (int32_t) ival;

    CHECK(reg_int("eager_rdma_threshold", NULL,
                  "Use RDMA for short messages after this number of "
                  "messages are received from a given peer "
                  "(must be >= 1)",
                  16, &ival, REGINT_GE_ONE));
    mca_btl_openib_component.eager_rdma_threshold = (int32_t) ival;

    CHECK(reg_int("max_eager_rdma", NULL, "Maximum number of peers allowed to use "
                  "RDMA for short messages (RDMA is used for all long "
                  "messages, except if explicitly disabled, such as "
                  "with the \"dr\" pml) "
                  "(must be >= 0)",
                  16, &ival, REGINT_GE_ZERO));
    mca_btl_openib_component.max_eager_rdma = (int32_t) ival;

    CHECK(reg_int("eager_rdma_num", NULL, "Number of RDMA buffers to allocate "
                  "for small messages"
                  "(must be >= 1)",
                  16, &ival, REGINT_GE_ONE));
    mca_btl_openib_component.eager_rdma_num = (int32_t) (ival + 1);

    CHECK(reg_int("btls_per_lid", NULL, "Number of BTLs to create for each "
                  "InfiniBand LID "
                  "(must be >= 1)",
                  1, &ival, REGINT_GE_ONE));
    mca_btl_openib_component.btls_per_lid = (uint32_t) ival;

    CHECK(reg_int("max_lmc", NULL, "Maximum number of LIDs to use for each device port "
                  "(must be >= 0, where 0 = use all available)",
                  0, &ival, REGINT_GE_ZERO));
    mca_btl_openib_component.max_lmc = (uint32_t) ival;

#if OMPI_HAVE_THREADS
    CHECK(reg_int("enable_apm_over_lmc", NULL, "Maximum number of alterative paths for each device port "
                  "(must be >= -1, where 0 = disable apm, -1 = all availible alternative paths )",
                  0, &ival, REGINT_NEG_ONE_OK|REGINT_GE_ZERO));
    mca_btl_openib_component.apm_lmc = (uint32_t) ival;
    CHECK(reg_int("enable_apm_over_ports", NULL, "Enable alterative path migration (APM) over different ports of the same device "
                  "(must be >= 0, where 0 = disable APM over ports , 1 = enable APM over ports of the same device)",
                  0, &ival, REGINT_GE_ZERO));
    mca_btl_openib_component.apm_ports = (uint32_t) ival;

    CHECK(reg_int("enable_apm_over_lmc", NULL, "Maximum number of alterative paths for each device port "
                  "(must be >= -1, where 0 = disable APM, -1 = all availible alternative paths)",
                  0, &ival, REGINT_NEG_ONE_OK|REGINT_GE_ZERO));
    mca_btl_openib_component.apm_lmc = (uint32_t) ival;
    CHECK(reg_int("enable_apm_over_ports", NULL, "Enable alterative path migration (APM) over different ports of the same device "
                  "(must be >= 0, where 0 = disable APM over ports, 1 = enable APM over ports of the same device)",
                  0, &ival, REGINT_GE_ZERO));
    mca_btl_openib_component.apm_ports = (uint32_t) ival;

    CHECK(reg_int("use_async_event_thread", NULL,
                "If nonzero, use the thread that will handle InfiniBand asyncihronous events ",
                1, &ival, 0));
    mca_btl_openib_component.use_async_event_thread = (0 != ival);
#endif

    CHECK(reg_int("buffer_alignment", NULL,
                  "Prefered communication buffer alignment, in bytes "
                  "(must be > 0 and power of two)",
                  64, &ival, REGINT_GE_ZERO));
    if(ival <= 1 || (ival & (ival - 1))) {
        orte_show_help("help-mpi-btl-openib.txt", "wrong buffer alignment",
                true, ival, orte_process_info.nodename, 64);
        mca_btl_openib_component.buffer_alignment = 64;
    } else {
        mca_btl_openib_component.buffer_alignment = (uint32_t) ival;
    }

    CHECK(reg_int("use_message_coalescing", NULL,
                  "Use message coalescing", 1, &ival, 0));
    mca_btl_openib_component.use_message_coalescing = (0 != ival);

    CHECK(reg_int("cq_poll_ratio", NULL,
                  "how often poll high priority CQ versus low priority CQ",
                  100, &ival, REGINT_GE_ONE));
    mca_btl_openib_component.cq_poll_ratio = (uint32_t)ival;
    CHECK(reg_int("eager_rdma_poll_ratio", NULL,
                  "how often poll eager RDMA channel versus CQ",
                  100, &ival, REGINT_GE_ONE));
    mca_btl_openib_component.eager_rdma_poll_ratio = (uint32_t)ival;
    CHECK(reg_int("hp_cq_poll_per_progress", NULL,
                  "max number of completion events to process for each call "
                  "of BTL progress engine",
                  10, &ival, REGINT_GE_ONE));
    mca_btl_openib_component.cq_poll_progress = (uint32_t)ival;

    /* Info only */
    mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
                           "have_fork_support",
                           "Whether the OpenFabrics stack supports applications that invoke the \"fork()\" system call or not (0 = no, 1 = yes).  Note that this value does NOT indicate whether the system being run on supports \"fork()\" with OpenFabrics applications or not.",
                           false, true,
                           OMPI_HAVE_IBV_FORK_INIT ? 1 : 0,
                           NULL);

    mca_btl_openib_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_DEFAULT;

    mca_btl_openib_module.super.btl_eager_limit = 12 * 1024;
    mca_btl_openib_module.super.btl_rndv_eager_limit = 12 * 1024;
    mca_btl_openib_module.super.btl_max_send_size = 64 * 1024;
    mca_btl_openib_module.super.btl_rdma_pipeline_send_length = 1024 * 1024;
    mca_btl_openib_module.super.btl_rdma_pipeline_frag_size = 1024 * 1024;
    mca_btl_openib_module.super.btl_min_rdma_pipeline_size = 256 * 1024;
    mca_btl_openib_module.super.btl_flags = MCA_BTL_FLAGS_RDMA |
        MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
    mca_btl_openib_module.super.btl_bandwidth = 800;
    mca_btl_openib_module.super.btl_latency = 10;
    CHECK(mca_btl_base_param_register(
            &mca_btl_openib_component.super.btl_version,
            &mca_btl_openib_module.super));

    /* setup all the qp stuff */
    mid_qp_size = mca_btl_openib_module.super.btl_eager_limit / 4;
    /* round mid_qp_size to smallest power of two */
    for(i = 31; i > 0; i--) {
        if(!(mid_qp_size & (1<<i))) {
            continue;
        }
        mid_qp_size = (1<<i);
        break;
    }

    if(mid_qp_size <= 128) {
        mid_qp_size = 1024;
    }

    snprintf(default_qps, 100,
            "P,128,256,192,128:S,%u,256,128,32:S,%u,256,128,32:S,%u,256,128,32",
            mid_qp_size,
            (uint32_t)mca_btl_openib_module.super.btl_eager_limit,
            (uint32_t)mca_btl_openib_module.super.btl_max_send_size);

    mca_btl_openib_component.default_recv_qps = strdup(default_qps);
    if(NULL == mca_btl_openib_component.default_recv_qps) {
        BTL_ERROR(("Unable to allocate memory for default receive queues string.\n"));
        return OMPI_ERROR;
    }

    CHECK(reg_string("receive_queues", NULL,
                     "Colon-delimited, comma delimited list of receive queues: P,4096,8,6,4:P,32768,8,6,4",
                     default_qps, &mca_btl_openib_component.receive_queues, 
                     0));
    mca_btl_openib_component.receive_queues_source = 
        (0 == strcmp(default_qps, 
                     mca_btl_openib_component.receive_queues)) ? 
        BTL_OPENIB_RQ_SOURCE_DEFAULT : BTL_OPENIB_RQ_SOURCE_MCA;

    CHECK(reg_string("if_include", NULL,
                     "Comma-delimited list of devices/ports to be used (e.g. \"mthca0,mthca1:2\"; empty value means to use all ports found).  Mutually exclusive with btl_openib_if_exclude.",
                     NULL, &mca_btl_openib_component.if_include,
                     0));

    CHECK(reg_string("if_exclude", NULL,
                     "Comma-delimited list of device/ports to be excluded (empty value means to not exclude any ports).  Mutually exclusive with btl_openib_if_include.",
                     NULL, &mca_btl_openib_component.if_exclude,
                     0));

    CHECK(reg_string("ipaddr_include", NULL,
                     "Comma-delimited list of IP Addresses to be used (e.g. \"192.168.1.0/24\").  Mutually exclusive with btl_openib_ipaddr_exclude.",
                     NULL, &mca_btl_openib_component.ipaddr_include,
                     0));

    CHECK(reg_string("ipaddr_exclude", NULL,
                     "Comma-delimited list of IP Addresses to be excluded (e.g. \"192.168.1.0/24\").  Mutually exclusive with btl_openib_ipaddr_include.",
                     NULL, &mca_btl_openib_component.ipaddr_exclude,
                     0));

    /* Register any MCA params for the connect pseudo-components */
    if (OMPI_SUCCESS == ret) {
        ret = ompi_btl_openib_connect_base_register();
    }

    return ret;
}
Ejemplo n.º 14
0
static int btl_ugni_component_register(void)
{
    mca_base_var_enum_t *new_enum;
    gni_nic_device_t device_type;
    char *mpool_hints_tmp = NULL;
    int rc;

    (void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version,
                                                 "uGNI byte transport layer");

    mca_btl_ugni_component.ugni_free_list_num = 8;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "free_list_num", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_free_list_num);
    mca_btl_ugni_component.ugni_free_list_max = 4096;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "free_list_max", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_free_list_max);
    mca_btl_ugni_component.ugni_free_list_inc = 64;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "free_list_inc", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_free_list_inc);

    mca_btl_ugni_component.ugni_eager_num = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "eager_num", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_eager_num);
    mca_btl_ugni_component.ugni_eager_max = 128;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "eager_max", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_eager_max);
    mca_btl_ugni_component.ugni_eager_inc = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "eager_inc", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_eager_inc);

    mca_btl_ugni_component.remote_cq_size = 40000;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "remote_cq_size", "Remote SMSG completion queue "
                                           "size (default 40000)", MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.remote_cq_size);
    mca_btl_ugni_component.local_cq_size = 8192;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "local_cq_size", "Local completion queue size "
                                           "(default 8192)", MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.local_cq_size);

    mca_btl_ugni_component.ugni_smsg_limit = 0;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "smsg_limit", "Maximum size message that "
                                           "will be sent using the SMSG/MSGQ protocol "
                                           "(0 - autoselect(default), 16k max)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_smsg_limit);

    mca_btl_ugni_component.smsg_max_credits = 32;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "smsg_max_credits", "Maximum number of "
                                           "outstanding SMSG/MSGQ message (default 32)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.smsg_max_credits);

    mca_btl_ugni_component.ugni_fma_limit = 1024;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "fma_limit", "Maximum size message that "
                                           "will be sent using the FMA (Fast Memory "
                                           "Access) protocol (default 1024, 64k max)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_fma_limit);

    mca_btl_ugni_component.rdma_max_retries = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "rdma_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.rdma_max_retries);

    mca_btl_ugni_component.smsg_max_retries = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "smsg_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.smsg_max_retries);

    mca_btl_ugni_component.max_mem_reg = 0;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "max_mem_reg", "Maximum number of "
                                           "memory registrations a process can "
                                           "hold (0 - autoselect, -1 - unlimited)"
                                           " (default 0)", MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.max_mem_reg);

    mca_btl_ugni_component.mbox_increment = 0;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "mbox_inc", "Number of SMSG mailboxes to "
                                           "allocate in each block (0 - autoselect(default))",
                                           MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
                                           MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mbox_increment);

    /* communication domain flags */
    rc = mca_base_var_enum_create_flag ("btl_ugni_cdm_flags", cdm_flags, (mca_base_var_enum_flag_t **) &new_enum);
    if (OPAL_SUCCESS != rc) {
        return rc;
    }

    mca_btl_ugni_component.cdm_flags = GNI_CDM_MODE_FORK_PARTCOPY | GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL |
        GNI_CDM_MODE_MDD_SHARED | GNI_CDM_MODE_FMA_SHARED | GNI_CDM_MODE_FMA_SMALL_WINDOW;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "cdm_flags", "Flags to set when creating a communication domain "
                                           " (default: fork-fullcopy,cached-amo-enabled,err-no-kill,fast-datagram-poll,"
                                           "fma-shared,fma-small-window)",
                                           MCA_BASE_VAR_TYPE_UNSIGNED_INT, new_enum, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
                                           MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.cdm_flags);
    OBJ_RELEASE(new_enum);

    mca_btl_ugni_component.virtual_device_count = 0;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "virtual_device_count", "Number of virtual devices to create. Higher numbers may "
                                           "result in better performance when using threads. (default: auto, max: 8)",
                                           MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
                                           MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.virtual_device_count);

    /* determine if there are get alignment restrictions */
    GNI_GetDeviceType (&device_type);


    mca_btl_ugni_component.smsg_page_size = 2 << 20;
    if (GNI_DEVICE_GEMINI == device_type) {
        if (access ("/sys/class/gemini/ghal0/mrt", R_OK)) {
            int fd = open ("/sys/class/gemini/ghal0/mrt", O_RDONLY);
            char buffer[10];

            if (0 <= fd) {
                memset (buffer, 0, sizeof (buffer));
                read (fd, buffer, sizeof (buffer) - 1);
                close (fd);
                mca_btl_ugni_ugni_page_size = strtol (buffer, NULL, 10) * 1024;
                mca_btl_ugni_component.smsg_page_size = mca_btl_ugni_ugni_page_size;
            }
        }
    }

    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "smsg_page_size", "Page size to use for SMSG mailbox allocation (default: detect)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
                                           MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_page_size);

    mca_btl_ugni_component.progress_thread_requested = 0;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "request_progress_thread",
                                           "Enable to request ugni btl progress thread - requires MPI_THREAD_MULTIPLE support",
                                           MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_3,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.progress_thread_requested);

    /* performance variables */
    mca_btl_ugni_progress_thread_wakeups = 0;
    (void) mca_base_component_pvar_register(&mca_btl_ugni_component.super.btl_version,
                                            "progress_thread_wakeups", "Number of times the progress thread "
                                            "has been woken", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_COUNTER,
                                            MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
                                            MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL,
                                            NULL, NULL, &mca_btl_ugni_progress_thread_wakeups);

    /* register network statistics as performance variables */
    for (int i = 0 ; i < GNI_NUM_STATS ; ++i) {
        char name[128], desc[128];
        size_t str_len = strlen (gni_statistic_str[i]);

        assert (str_len < sizeof (name));

        /* we can get an all-caps string for the variable from gni_statistic_str. need to make it lowercase
         * to match ompi standards */
        for (size_t j = 0 ; j < str_len ; ++j) {
            name[j] = tolower (gni_statistic_str[i][j]);
            desc[j] = ('_' == name[j]) ? ' ' : name[j];
        }

        name[str_len] = '\0';
        desc[str_len] = '\0';

        (void) mca_base_component_pvar_register (&mca_btl_ugni_component.super.btl_version, name, desc,
                                                 OPAL_INFO_LVL_4, MCA_BASE_PVAR_CLASS_COUNTER,
                                                 MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
                                                 MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
                                                 mca_btl_ugni_get_stat, NULL, mca_btl_ugni_notify_stat,
                                                 (void *) (intptr_t) i);
    }

    /* btl/ugni can only support only a fixed set of rcache components (these rcache components have compatible resource
     * structures) */
    rc = mca_base_var_enum_create ("btl_ugni_rcache", rcache_values, &new_enum);
    if (OPAL_SUCCESS != rc) {
        return rc;
    }

    /* NTH: there are known *serious* performance issues with udreg. if they are ever resolved it is the preferred rcache */
    mca_btl_ugni_component.rcache_type = MCA_BTL_UGNI_RCACHE_GRDMA;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "rcache", "registration cache to use (default: grdma)", MCA_BASE_VAR_TYPE_INT, new_enum,
                                           0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
                                           MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.rcache_type);
    OBJ_RELEASE(new_enum);

    if (mca_btl_ugni_ugni_page_size) {
        rc = asprintf (&mpool_hints_tmp, "page_size=%lu", mca_btl_ugni_ugni_page_size);
        if (rc < 0) {
            return OPAL_ERR_OUT_OF_RESOURCE;
        }

        mca_btl_ugni_component.mpool_hints = mpool_hints_tmp;
    } else {
        mca_btl_ugni_component.mpool_hints = "page_size=2M";
    }

    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "mpool_hints", "hints to use when selecting a memory pool (default: "
                                           "\"page_size=2M\")", MCA_BASE_VAR_TYPE_STRING, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
                                           MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mpool_hints);
    free (mpool_hints_tmp);

    /* ensure we loose send exclusivity to sm and vader if they are enabled */
    mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 2;

    /* smsg threshold */
    mca_btl_ugni_module.super.btl_eager_limit               = 8 * 1024;
    mca_btl_ugni_module.super.btl_rndv_eager_limit          = 8 * 1024;
    mca_btl_ugni_module.super.btl_rdma_pipeline_frag_size   = 4 * 1024 * 1024;
    mca_btl_ugni_module.super.btl_max_send_size             = 8 * 1024;
    mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = 8 * 1024;

    mca_btl_ugni_module.super.btl_get_limit = 1 * 1024 * 1024;

    /*
     * see def. of ALIGNMENT_MASK to figure this one out
     */
    /* both gemini and aries have a 4-byte alignment requirement on remote addresses */
    mca_btl_ugni_module.super.btl_get_alignment = 4;

    /* threshold for put */
    mca_btl_ugni_module.super.btl_min_rdma_pipeline_size    = 8 * 1024;

    mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND |
        MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_ATOMIC_OPS |
        MCA_BTL_FLAGS_ATOMIC_FOPS;
    mca_btl_ugni_module.super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD |
        MCA_BTL_ATOMIC_SUPPORTS_AND | MCA_BTL_ATOMIC_SUPPORTS_OR | MCA_BTL_ATOMIC_SUPPORTS_XOR |
        MCA_BTL_ATOMIC_SUPPORTS_CSWAP;

    if (GNI_DEVICE_ARIES == device_type) {
        /* aries supports additional atomic operations */
        mca_btl_ugni_module.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_MIN | MCA_BTL_ATOMIC_SUPPORTS_MAX |
            MCA_BTL_ATOMIC_SUPPORTS_LAND | MCA_BTL_ATOMIC_SUPPORTS_LOR | MCA_BTL_ATOMIC_SUPPORTS_LXOR |
            MCA_BTL_ATOMIC_SUPPORTS_32BIT | MCA_BTL_ATOMIC_SUPPORTS_FLOAT;
    }

    mca_btl_ugni_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);

    mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */
    mca_btl_ugni_module.super.btl_latency   = 2;     /* Microsecs */

    mca_btl_ugni_module.super.btl_get_local_registration_threshold = 0;
    mca_btl_ugni_module.super.btl_put_local_registration_threshold = mca_btl_ugni_component.ugni_fma_limit;

    /* Call the BTL based to register its MCA params */
    mca_btl_base_param_register(&mca_btl_ugni_component.super.btl_version,
                                &mca_btl_ugni_module.super);

    return OPAL_SUCCESS;
}
static int
btl_ugni_component_register(void)
{
    (void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version,
                                                 "Gemini byte transport layer");

    mca_btl_ugni_component.ugni_free_list_num = 8;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "free_list_num", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_free_list_num);
    mca_btl_ugni_component.ugni_free_list_max = 16384;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "free_list_max", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_free_list_max);
    mca_btl_ugni_component.ugni_free_list_inc = 64;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "free_list_inc", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_free_list_inc);

    mca_btl_ugni_component.ugni_eager_num = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                          "eager_num", NULL, MCA_BASE_VAR_TYPE_INT,
                                          NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                          OPAL_INFO_LVL_5,
                                          MCA_BASE_VAR_SCOPE_LOCAL,
                                          &mca_btl_ugni_component.ugni_eager_num);
    mca_btl_ugni_component.ugni_eager_max = 128;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "eager_max", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_eager_max);
    mca_btl_ugni_component.ugni_eager_inc = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "eager_inc", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_eager_inc);

    mca_btl_ugni_component.remote_cq_size = 40000;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "remote_cq_size", "Remote SMSG completion queue "
                                           "size (default 40000)", MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.remote_cq_size);
    mca_btl_ugni_component.local_cq_size = 8192;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "local_cq_size", "Local completion queue size "
                                           "(default 8192)", MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.local_cq_size);

    mca_btl_ugni_component.ugni_smsg_limit = 0;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "smsg_limit", "Maximum size message that "
                                           "will be sent using the SMSG/MSGQ protocol "
                                           "(0 - autoselect(default), 16k max)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_smsg_limit);

    mca_btl_ugni_component.smsg_max_credits = 32;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "smsg_max_credits", "Maximum number of "
                                           "outstanding SMSG/MSGQ message (default 32)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.smsg_max_credits);

    mca_btl_ugni_component.ugni_fma_limit = 1024;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "fma_limit", "Maximum size message that "
                                           "will be sent using the FMA (Fast Memory "
                                           "Access) protocol (default 1024, 64k max)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_fma_limit);

    mca_btl_ugni_component.ugni_get_limit = 1 * 1024 * 1024;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "get_limit", "Maximum size message that "
                                           "will be sent using a get protocol "
                                           "(default 1M)", MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_get_limit);

    mca_btl_ugni_component.rdma_max_retries = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "rdma_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.rdma_max_retries);

    mca_btl_ugni_component.smsg_max_retries = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "smsg_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.smsg_max_retries);

    mca_btl_ugni_component.max_mem_reg = 0;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "max_mem_reg", "Maximum number of "
                                           "memory registrations a process can "
                                           "hold (0 - autoselect, -1 - unlimited)"
                                           " (default 0)", MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_3,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.max_mem_reg);

    mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;

    /* smsg threshold */
    mca_btl_ugni_module.super.btl_eager_limit               = 8 * 1024;
    mca_btl_ugni_module.super.btl_rndv_eager_limit          = 8 * 1024;
    mca_btl_ugni_module.super.btl_rdma_pipeline_frag_size   = 4 * 1024 * 1024;
    mca_btl_ugni_module.super.btl_max_send_size             = 8 * 1024;
    mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = 8 * 1024;

    /* threshold for put */
    mca_btl_ugni_module.super.btl_min_rdma_pipeline_size    = 8 * 1024;

    mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND |
        MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE;

    mca_btl_ugni_module.super.btl_seg_size = sizeof (mca_btl_ugni_segment_t);

    mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */
    mca_btl_ugni_module.super.btl_latency   = 2;     /* Microsecs */

    /* Call the BTL based to register its MCA params */
    mca_btl_base_param_register(&mca_btl_ugni_component.super.btl_version,
                                &mca_btl_ugni_module.super);

    return OMPI_SUCCESS;
}
Ejemplo n.º 16
0
/*
 * Register and check all MCA parameters
 *
 * @return                 OMPI_SUCCESS or OMPI_ERR_BAD_PARAM
 */
int mca_btl_udapl_register_mca_params(void) 
{
    int rc, tmp_rc;

    rc = OMPI_SUCCESS;

    /* register uDAPL component parameters */
    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("free_list_num", 
        "Initial size of free lists (must be >= 1).", 
        8,
        &mca_btl_udapl_component.udapl_free_list_num,
        REGINT_GE_ONE), tmp_rc, rc);
    
    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("free_list_max",
        "Maximum size of free lists "
        "(-1 = infinite, otherwise must be >= 1).",
        -1,
        &mca_btl_udapl_component.udapl_free_list_max,
        REGINT_NEG_ONE_OK | REGINT_GE_ONE), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("free_list_inc", 
        "Increment size of free lists (must be >= 1).",
        8,
        &mca_btl_udapl_component.udapl_free_list_inc,
        REGINT_GE_ONE), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_string("mpool",
        "Name of the memory pool to be used.",
        "grdma",
        &mca_btl_udapl_component.udapl_mpool_name,
        REGSTR_EMPTY_NOT_OK), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("max_modules",
        "Maximum number of supported HCAs.",
        8,
        &mca_btl_udapl_component.udapl_max_btls,
        REGINT_GE_ONE), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("num_recvs",
        "Total number of receive buffers to keep posted "
        "per endpoint (must be >= 1).",
        8,
        &mca_btl_udapl_component.udapl_num_recvs,
        REGINT_GE_ONE), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("num_sends",
        "Maximum number of sends to post on an endpoint "
        "(must be >= 1).",
        7,
        &mca_btl_udapl_component.udapl_num_sends,
        REGINT_GE_ONE), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("sr_win",
        "Window size at which point an explicit "
        "credit message will be generated (must be >= 1).",
        4,
        &mca_btl_udapl_component.udapl_sr_win,
        REGINT_GE_ONE), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("use_eager_rdma",
        "Use of RDMA for small messages : "
        "1 = default, use RDMA for small messages; "
        "0 = do not use RDMA for small messages. ",
        1,
        &mca_btl_udapl_component.udapl_use_eager_rdma,
        REGINT_GE_ZERO), tmp_rc, rc); 

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("eager_rdma_num",
        "Number of RDMA buffers to allocate "
        "for small messages (must be >= 1).",
        32,
        &mca_btl_udapl_component.udapl_eager_rdma_num,
        REGINT_GE_ONE), tmp_rc, rc); 
        
    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("max_eager_rdma_peers",
        "Maximum number of peers allowed to use "
        "RDMA for short messages (independently RDMA will "
        "still be used for large messages, (must be >= 0; "
        "if zero then RDMA will not be used for short messages).",
        16,
        &mca_btl_udapl_component.udapl_max_eager_rdma_peers,
        REGINT_GE_ZERO), tmp_rc, rc);
        
    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("eager_rdma_win",
        "Window size at which point an explicit "
        "credit message will be generated (must be >= 1).",
        28,
        &mca_btl_udapl_component.udapl_eager_rdma_win,
        REGINT_GE_ONE), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("timeout",
        "Connection timeout, in microseconds.",
        MCA_BTL_UDAPL_CONN_TIMEOUT_DEFAULT,
        &mca_btl_udapl_component.udapl_timeout,
        REGINT_GE_ONE), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("conn_priv_data",
        "Use connect private data to establish connections "
        "(not supported by all uDAPL implementations).",
        0,
        &mca_btl_udapl_component.udapl_conn_priv_data,
        REGINT_GE_ZERO), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("async_events",
        "The asynchronous event queue will only be "
        "checked after entering progress this number of times.",
        100000000,
        &mca_btl_udapl_component.udapl_async_events,
        REGINT_GE_ONE), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("buffer_alignment",
        "Preferred communication buffer alignment, "
        "in bytes (must be >= 1).",
        DAT_OPTIMAL_ALIGNMENT,
        &mca_btl_udapl_component.udapl_buffer_alignment,
        REGINT_GE_ONE), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_string("if_include",
        "Comma-delimited list of interfaces to be included "
        "(e.g. \"ibd0,ibd1 or OpenIB-cma,OpenIB-cma-1\"; empty value means "
        "to use all interfaces found). Mutually exclusive with "
        "btl_udapl_if_exclude.",
        NULL, &mca_btl_udapl_component.if_include,
        REGSTR_EMPTY_OK), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_string("if_exclude",
        "Comma-delimited list of interfaces to be excluded from use "
        "(e.g. \"ibd0,ibd1 or OpenIB-cma,OpenIB-cma-1\"; empty value means "
        "not to exclude any). Mutually exclusive with btl_udapl_if_include.",
        NULL, &mca_btl_udapl_component.if_exclude,
        REGSTR_EMPTY_OK), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("verbose",
        "Verbosity level of the uDAPL BTL (-1 thru 100)",
        VERBOSE_SHOW_HELP,
        &(mca_btl_udapl_component.udapl_verbosity),
        REGINT_NEG_ONE_OK), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("compare_subnet",
        "By default uDAPL BTL will compare subnets using netmask to "
        "determine if an interface is reachable. Setting this parameter to "
        "0 will essentially turn this comparison off and the uDAPL BTL will "
        "assume all uDAPL interfaces are reachable (0 or 1, default==1).",
        1,
        &(mca_btl_udapl_component.udapl_compare_subnet),
        REGINT_GE_ZERO), tmp_rc, rc);

    /* register uDAPL module parameters */
    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("async_evd_qlen",
        "The asynchronous event dispatcher queue length.",
        MCA_BTL_UDAPL_ASYNC_EVD_QLEN_DEFAULT,
        (int*)&mca_btl_udapl_module.udapl_async_evd_qlen,
        REGINT_GE_ONE), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("conn_evd_qlen",
        "The connection event dispatcher queue length is "
        "a function of the number of connections expected.",
        MCA_BTL_UDAPL_CONN_EVD_QLEN_DEFAULT,
        (int*)&mca_btl_udapl_module.udapl_conn_evd_qlen,
        REGINT_GE_ONE), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("dto_evd_qlen",
        "The data transfer operation event dispatcher queue length is "
        "a function of the number of connections as well as the "
        "maximum number of outstanding data transfer operations.",
        MCA_BTL_UDAPL_DTO_EVD_QLEN_DEFAULT,
        (int*)&mca_btl_udapl_module.udapl_dto_evd_qlen,
        REGINT_GE_ONE), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("max_request_dtos",
        "Maximum number of outstanding "
        "submitted sends and rdma operations per endpoint, (see Section "
        "6.6.6 of uDAPL Spec.).",
        MCA_BTL_UDAPL_MAX_REQUEST_DTOS_DEFAULT,
        (int*)&mca_btl_udapl_module.udapl_max_request_dtos,
        REGINT_GE_ONE), tmp_rc, rc);

    CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("max_recv_dtos",
        "Maximum number of outstanding "
        "submitted receive operations per endpoint, (see Section "
        "6.6.6 of uDAPL Spec.).",
        MCA_BTL_UDAPL_MAX_RECV_DTOS_DEFAULT,
        (int*)&mca_btl_udapl_module.udapl_max_recv_dtos,
        REGINT_GE_ONE), tmp_rc, rc);

    mca_btl_udapl_module.super.btl_exclusivity =
        MCA_BTL_EXCLUSIVITY_DEFAULT - 10;
    mca_btl_udapl_module.super.btl_eager_limit = 8*1024;
    mca_btl_udapl_module.super.btl_rndv_eager_limit = 8*1024;
    mca_btl_udapl_module.super.btl_max_send_size = 64*1024;
    mca_btl_udapl_module.super.btl_rdma_pipeline_send_length = 512*1024;
    mca_btl_udapl_module.super.btl_rdma_pipeline_frag_size = 128 * 1024;
    mca_btl_udapl_module.super.btl_min_rdma_pipeline_size = 0;
    mca_btl_udapl_module.super.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND;
    mca_btl_udapl_module.super.btl_bandwidth = 225;
    mca_btl_udapl_module.super.btl_latency = 0;

    mca_btl_base_param_register(&mca_btl_udapl_component.super.btl_version,
            &mca_btl_udapl_module.super);

    return rc;
}
Ejemplo n.º 17
0
static int
btl_ugni_component_register(void)
{
    mca_base_var_enum_t *new_enum;
    int rc;

    (void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version,
                                                 "Gemini byte transport layer");

    mca_btl_ugni_component.ugni_free_list_num = 8;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "free_list_num", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_free_list_num);
    mca_btl_ugni_component.ugni_free_list_max = 16384;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "free_list_max", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_free_list_max);
    mca_btl_ugni_component.ugni_free_list_inc = 64;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "free_list_inc", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_free_list_inc);

    mca_btl_ugni_component.ugni_eager_num = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "eager_num", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_eager_num);
    mca_btl_ugni_component.ugni_eager_max = 128;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "eager_max", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_eager_max);
    mca_btl_ugni_component.ugni_eager_inc = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "eager_inc", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_eager_inc);

    mca_btl_ugni_component.remote_cq_size = 40000;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "remote_cq_size", "Remote SMSG completion queue "
                                           "size (default 40000)", MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.remote_cq_size);
    mca_btl_ugni_component.local_cq_size = 8192;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "local_cq_size", "Local completion queue size "
                                           "(default 8192)", MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.local_cq_size);

    mca_btl_ugni_component.ugni_smsg_limit = 0;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "smsg_limit", "Maximum size message that "
                                           "will be sent using the SMSG/MSGQ protocol "
                                           "(0 - autoselect(default), 16k max)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_smsg_limit);

    mca_btl_ugni_component.smsg_max_credits = 32;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "smsg_max_credits", "Maximum number of "
                                           "outstanding SMSG/MSGQ message (default 32)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.smsg_max_credits);

    mca_btl_ugni_component.ugni_fma_limit = 1024;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "fma_limit", "Maximum size message that "
                                           "will be sent using the FMA (Fast Memory "
                                           "Access) protocol (default 1024, 64k max)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_fma_limit);

    mca_btl_ugni_component.ugni_get_limit = 1 * 1024 * 1024;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "get_limit", "Maximum size message that "
                                           "will be sent using a get protocol "
                                           "(default 1M)", MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.ugni_get_limit);

    mca_btl_ugni_component.rdma_max_retries = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "rdma_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.rdma_max_retries);

    mca_btl_ugni_component.smsg_max_retries = 16;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "smsg_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.smsg_max_retries);

    mca_btl_ugni_component.max_mem_reg = 0;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "max_mem_reg", "Maximum number of "
                                           "memory registrations a process can "
                                           "hold (0 - autoselect, -1 - unlimited)"
                                           " (default 0)", MCA_BASE_VAR_TYPE_INT,
                                           NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                           OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.max_mem_reg);

    mca_btl_ugni_component.mbox_increment = 0;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "mbox_inc", "Number of SMSG mailboxes to "
                                           "allocate in each block (0 - autoselect(default))",
                                           MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
                                           MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mbox_increment);

    mca_btl_ugni_component.smsg_page_size = 2 << 20;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "smsg_page_size", "Page size to use for SMSG "
                                           "mailbox allocation (default 2M)",
                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
                                           MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
                                           MCA_BASE_VAR_SCOPE_LOCAL,
                                           &mca_btl_ugni_component.smsg_page_size);

    /* btl/ugni can only support only a fixed set of mpools (these mpools have compatible resource
     * structures) */
    rc = mca_base_var_enum_create ("btl_ugni_mpool", mpool_values, &new_enum);
    if (OPAL_SUCCESS != rc) {
        return rc;
    }

    mca_btl_ugni_component.mpool_type = MCA_BTL_UGNI_MPOOL_UDREG;
    (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
                                           "mpool", "mpool to use", MCA_BASE_VAR_TYPE_INT, new_enum,
                                           0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
                                           MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mpool_type);
    OBJ_RELEASE(new_enum);

    mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;

    /* smsg threshold */
    mca_btl_ugni_module.super.btl_eager_limit               = 8 * 1024;
    mca_btl_ugni_module.super.btl_rndv_eager_limit          = 8 * 1024;
    mca_btl_ugni_module.super.btl_rdma_pipeline_frag_size   = 4 * 1024 * 1024;
    mca_btl_ugni_module.super.btl_max_send_size             = 8 * 1024;
    mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = 8 * 1024;

    /* threshold for put */
    mca_btl_ugni_module.super.btl_min_rdma_pipeline_size    = 8 * 1024;

    mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND |
        MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE;

    mca_btl_ugni_module.super.btl_seg_size = sizeof (mca_btl_ugni_segment_t);

    mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */
    mca_btl_ugni_module.super.btl_latency   = 2;     /* Microsecs */

    /* Call the BTL based to register its MCA params */
    mca_btl_base_param_register(&mca_btl_ugni_component.super.btl_version,
                                &mca_btl_ugni_module.super);

    return OMPI_SUCCESS;
}
Ejemplo n.º 18
0
int mca_btl_tcp_component_open(void)
{
    char* message;
#ifdef __WINDOWS__
    WSADATA win_sock_data;
    if( WSAStartup(MAKEWORD(2,2), &win_sock_data) != 0 ) {
        BTL_ERROR(("failed to initialise windows sockets:%d", WSAGetLastError()));
        return OMPI_ERROR;
    }
#endif

    /* initialize state */
    mca_btl_tcp_component.tcp_listen_sd = -1;
#if OPAL_WANT_IPV6
    mca_btl_tcp_component.tcp6_listen_sd = -1;
#endif
    mca_btl_tcp_component.tcp_num_btls=0;
    mca_btl_tcp_component.tcp_addr_count = 0;
    mca_btl_tcp_component.tcp_btls=NULL;
    
    /* initialize objects */ 
    OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_lock, opal_mutex_t);
    OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_procs, opal_hash_table_t);
    OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_events, opal_list_t);
    OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_eager, ompi_free_list_t);
    OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_max, ompi_free_list_t);
    OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_user, ompi_free_list_t);
    opal_hash_table_init(&mca_btl_tcp_component.tcp_procs, 256);

    /* register TCP component parameters */
    mca_btl_tcp_component.tcp_num_links =
        mca_btl_tcp_param_register_int("links", NULL, 1);
    mca_btl_tcp_component.tcp_if_include =
        mca_btl_tcp_param_register_string("if_include", "Comma-delimited list of devices or CIDR notation of networks to use for MPI communication (e.g., \"eth0,eth1\" or \"192.168.0.0/16,10.1.4.0/24\").  Mutually exclusive with btl_tcp_if_exclude.", "");
    mca_btl_tcp_component.tcp_if_exclude =
        mca_btl_tcp_param_register_string("if_exclude", "Comma-delimited list of devices or CIDR notation of networks to NOT use for MPI communication -- all devices not matching these specifications will be used (e.g., \"eth0,eth1\" or \"192.168.0.0/16,10.1.4.0/24\").  Mutually exclusive with btl_tcp_if_include.", "lo,sppp");
    mca_btl_tcp_component.tcp_free_list_num =
        mca_btl_tcp_param_register_int ("free_list_num", NULL, 8);
    mca_btl_tcp_component.tcp_free_list_max =
        mca_btl_tcp_param_register_int ("free_list_max", NULL, -1);
    mca_btl_tcp_component.tcp_free_list_inc =
        mca_btl_tcp_param_register_int ("free_list_inc", NULL, 32);
    mca_btl_tcp_component.tcp_sndbuf =
        mca_btl_tcp_param_register_int ("sndbuf", NULL, 128*1024);
    mca_btl_tcp_component.tcp_rcvbuf =
        mca_btl_tcp_param_register_int ("rcvbuf", NULL, 128*1024);
    mca_btl_tcp_component.tcp_endpoint_cache =
        mca_btl_tcp_param_register_int ("endpoint_cache",
            "The size of the internal cache for each TCP connection. This cache is"
            " used to reduce the number of syscalls, by replacing them with memcpy."
            " Every read will read the expected data plus the amount of the"
            " endpoint_cache", 30*1024);
    mca_btl_tcp_component.tcp_use_nodelay =
        !mca_btl_tcp_param_register_int ("use_nagle", "Whether to use Nagle's algorithm or not (using Nagle's algorithm may increase short message latency)", 0);
    mca_btl_tcp_component.tcp_port_min =
        mca_btl_tcp_param_register_int( "port_min_v4",
            "The minimum port where the TCP BTL will try to bind (default 1024)", 1024 );
    if( mca_btl_tcp_component.tcp_port_min > USHRT_MAX ) {
        orte_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
                       true, "v4", orte_process_info.nodename,
                       mca_btl_tcp_component.tcp_port_min );
        mca_btl_tcp_component.tcp_port_min = 1024;
    }
    asprintf( &message, 
              "The number of ports where the TCP BTL will try to bind (default %d)."
              " This parameter together with the port min, define a range of ports"
              " where Open MPI will open sockets.",
              (0x1 << 16) - mca_btl_tcp_component.tcp_port_min - 1 );
    mca_btl_tcp_component.tcp_port_range =
        mca_btl_tcp_param_register_int( "port_range_v4", message,
                                        (0x1 << 16) - mca_btl_tcp_component.tcp_port_min - 1);
    free(message);
#if OPAL_WANT_IPV6
    mca_btl_tcp_component.tcp6_port_min =
        mca_btl_tcp_param_register_int( "port_min_v6",
            "The minimum port where the TCP BTL will try to bind (default 1024)", 1024 );
    if( mca_btl_tcp_component.tcp6_port_min > USHRT_MAX ) {
        orte_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
                       true, "v6", orte_process_info.nodename,
                       mca_btl_tcp_component.tcp6_port_min );
        mca_btl_tcp_component.tcp6_port_min = 1024;
    }
    asprintf( &message, 
              "The number of ports where the TCP BTL will try to bind (default %d)."
              " This parameter together with the port min, define a range of ports"
              " where Open MPI will open sockets.",
              (0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1 );
    mca_btl_tcp_component.tcp6_port_range =
        mca_btl_tcp_param_register_int( "port_range_v6", message,
                                        (0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1);
    free(message);
#endif
    mca_btl_tcp_module.super.btl_exclusivity =  MCA_BTL_EXCLUSIVITY_LOW + 100;
    mca_btl_tcp_module.super.btl_eager_limit = 64*1024;
    mca_btl_tcp_module.super.btl_rndv_eager_limit = 64*1024;
    mca_btl_tcp_module.super.btl_max_send_size = 128*1024;
    mca_btl_tcp_module.super.btl_rdma_pipeline_send_length = 128*1024;
    mca_btl_tcp_module.super.btl_rdma_pipeline_frag_size = INT_MAX;
    mca_btl_tcp_module.super.btl_min_rdma_pipeline_size = 0;
    mca_btl_tcp_module.super.btl_flags = MCA_BTL_FLAGS_PUT |
                                       MCA_BTL_FLAGS_SEND_INPLACE |
                                       MCA_BTL_FLAGS_NEED_CSUM |
                                       MCA_BTL_FLAGS_NEED_ACK |
                                       MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
    mca_btl_tcp_module.super.btl_bandwidth = 100;
    mca_btl_tcp_module.super.btl_latency = 100;
    mca_btl_base_param_register(&mca_btl_tcp_component.super.btl_version,
                                &mca_btl_tcp_module.super);

    mca_btl_tcp_component.tcp_disable_family =
        mca_btl_tcp_param_register_int ("disable_family", NULL, 0);

    /* Register a list of interfaces to use in sequence */
    message = mca_btl_tcp_param_register_string("if_seq", 
                                                "If specified, a comma-delimited list of TCP interfaces.  Interfaces will be assigned, one to each MPI process, in a round-robin fashion on each server.  For example, if the list is \"eth0,eth1\" and four MPI processes are run on a single server, then local ranks 0 and 2 will use eth0 and local ranks 1 and 3 will use eth1.", NULL);
    mca_btl_tcp_component.tcp_if_seq = NULL;
    if (NULL != message && '\0' != *message) {
        char **argv = opal_argv_split(message, ',');

        if (NULL != argv && '\0' != *(argv[0])) {
            int if_index, rc, count;
            orte_node_rank_t node_rank;
            char name[256];

            node_rank = orte_ess.get_node_rank(ORTE_PROC_MY_NAME);

            /* Now that we've got that local rank, take the
               corresponding entry from the tcp_if_seq list (wrapping
               if necessary) */
            count = opal_argv_count(argv);
            mca_btl_tcp_component.tcp_if_seq = 
                strdup(argv[node_rank % count]);
            opal_argv_free(argv);

            /* Double check that the selected interface actually exists */
            for (if_index = opal_ifbegin(); if_index >= 0; 
                 if_index = opal_ifnext(if_index)){
                if (OPAL_SUCCESS != 
                    (rc = opal_ifindextoname(if_index, name, sizeof(name)))) {
                    return rc;
                }
                if (0 == strcmp(name, mca_btl_tcp_component.tcp_if_seq)) {
                    break;
                }
            }
            if (if_index < 0) {
                orte_show_help("help-mpi-btl-tcp.txt", 
                               "invalid if_inexclude",
                               true, "if_seq",
                               orte_process_info.nodename,
                               mca_btl_tcp_component.tcp_if_seq,
                               "Interface does not exist");
                return OMPI_ERR_BAD_PARAM;
            }
            BTL_VERBOSE(("Node rank %d using TCP interface %s",
                         node_rank, mca_btl_tcp_component.tcp_if_seq));
        }
    }

    return OMPI_SUCCESS;
}