int mca_btl_self_component_open(void) { /* register SELF component parameters */ mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_self_component, "free_list_num", "Number of fragments by default", false, false, 0, &mca_btl_self_component.free_list_num ); mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_self_component, "free_list_max", "Maximum number of fragments", false, false, -1, &mca_btl_self_component.free_list_max ); mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_self_component, "free_list_inc", "Increment by this number of fragments", false, false, 32, &mca_btl_self_component.free_list_inc ); mca_btl_self.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH; mca_btl_self.btl_eager_limit = 128 * 1024; mca_btl_self.btl_rndv_eager_limit = 128 * 1024; mca_btl_self.btl_max_send_size = 256 * 1024; mca_btl_self.btl_rdma_pipeline_send_length = INT_MAX; mca_btl_self.btl_rdma_pipeline_frag_size = INT_MAX; mca_btl_self.btl_min_rdma_pipeline_size = 0; mca_btl_self.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE; mca_btl_self.btl_bandwidth = 100; mca_btl_self.btl_latency = 0; mca_btl_base_param_register(&mca_btl_self_component.super.btl_version, &mca_btl_self); /* initialize objects */ OBJ_CONSTRUCT(&mca_btl_self_component.self_lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_btl_self_component.self_frags_eager, ompi_free_list_t); OBJ_CONSTRUCT(&mca_btl_self_component.self_frags_send, ompi_free_list_t); OBJ_CONSTRUCT(&mca_btl_self_component.self_frags_rdma, ompi_free_list_t); return OMPI_SUCCESS; }
static int mca_btl_portals4_component_open(void) { OPAL_OUTPUT_VERBOSE((1, opal_btl_base_framework.framework_output, "mca_btl_portals4_component_open\n")); /* * fill default module state */ mca_btl_portals4_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_LOW + 100; mca_btl_portals4_module.super.btl_eager_limit = 32 * 1024; mca_btl_portals4_module.super.btl_rndv_eager_limit = 32 * 1024; mca_btl_portals4_module.super.btl_max_send_size = 64 * 1024; mca_btl_portals4_module.super.btl_rdma_pipeline_send_length = 64 * 1024; mca_btl_portals4_module.super.btl_rdma_pipeline_frag_size = INT_MAX; mca_btl_portals4_module.super.btl_min_rdma_pipeline_size = 0; mca_btl_portals4_module.super.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_RDMA_MATCHED; mca_btl_portals4_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t); mca_btl_portals4_module.super.btl_get_limit = SIZE_MAX; mca_btl_portals4_module.super.btl_put_limit = 0; /* not implemented */ mca_btl_portals4_module.super.btl_get_alignment = 0; mca_btl_portals4_module.super.btl_put_alignment = 0; mca_btl_portals4_module.super.btl_get_local_registration_threshold = 0; mca_btl_portals4_module.super.btl_put_local_registration_threshold = 0; mca_btl_portals4_module.super.btl_bandwidth = 1000; mca_btl_portals4_module.super.btl_latency = 0; mca_btl_base_param_register(&mca_btl_portals4_component.super.btl_version, &mca_btl_portals4_module.super); mca_btl_portals4_module.portals_num_procs = 0; mca_btl_portals4_module.recv_eq_h = PTL_EQ_NONE; #if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE mca_btl_portals4_module.send_md_hs = NULL; #else mca_btl_portals4_module.send_md_h = PTL_INVALID_HANDLE; #endif mca_btl_portals4_module.portals_ni_h = PTL_INVALID_HANDLE; mca_btl_portals4_module.zero_md_h = PTL_INVALID_HANDLE; mca_btl_portals4_module.long_overflow_me_h = PTL_INVALID_HANDLE; mca_btl_portals4_module.portals_outstanding_ops = 0; mca_btl_portals4_module.recv_idx = (ptl_pt_index_t) ~0UL; return OPAL_SUCCESS; }
static int mca_btl_sctp_component_register(void) { /* register SCTP component parameters */ /* num links */ mca_btl_sctp_component.sctp_if_include = mca_btl_sctp_param_register_string("if_include", ""); mca_btl_sctp_component.sctp_if_exclude = mca_btl_sctp_param_register_string("if_exclude", "lo"); mca_btl_sctp_component.sctp_free_list_num = mca_btl_sctp_param_register_int ("free_list_num", 8); mca_btl_sctp_component.sctp_free_list_max = mca_btl_sctp_param_register_int ("free_list_max", -1); mca_btl_sctp_component.sctp_free_list_inc = mca_btl_sctp_param_register_int ("free_list_inc", 32); mca_btl_sctp_component.sctp_sndbuf = mca_btl_sctp_param_register_int ("sndbuf", 128*1024); mca_btl_sctp_component.sctp_rcvbuf = mca_btl_sctp_param_register_int ("rcvbuf", 128*1024); mca_btl_sctp_component.sctp_endpoint_cache = mca_btl_sctp_param_register_int ("endpoint_cache", 30*1024); mca_btl_sctp_component.sctp_use_nodelay = !mca_btl_sctp_param_register_int ("use_nagle", 0); /* port_min */ /* port_range */ /* use a single one-to-many socket by default except in Solaris (see * the configure.m4 file) */ mca_base_param_reg_int(&mca_btl_sctp_component.super.btl_version, "if_11", "If 0, have one SCTP BTL module and let SCTP do multilink scheduling. If non-zero, have an SCTP BTL module per link and let the PML do the scheduling.", false, false, OMPI_MCA_BTL_SCTP_USE_ONE_TO_ONE_SOCKET, &mca_btl_sctp_component.sctp_if_11); /* have lower exclusivity than tcp */ mca_btl_sctp_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_LOW; mca_btl_sctp_module.super.btl_eager_limit = 64*1024; mca_btl_sctp_module.super.btl_rndv_eager_limit = 64*1024; mca_btl_sctp_module.super.btl_max_send_size = 128*1024; mca_btl_sctp_module.super.btl_rdma_pipeline_send_length = 128*1024; mca_btl_sctp_module.super.btl_rdma_pipeline_frag_size = INT_MAX; mca_btl_sctp_module.super.btl_min_rdma_pipeline_size = 0; mca_btl_sctp_module.super.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA; mca_btl_sctp_module.super.btl_bandwidth = 100; mca_btl_sctp_module.super.btl_latency = 100; mca_btl_base_param_register(&mca_btl_sctp_component.super.btl_version, &mca_btl_sctp_module.super); return OMPI_SUCCESS; }
static int mca_btl_vader_component_register (void) { /* register VADER component parameters */ mca_btl_vader_component.vader_free_list_num = mca_btl_vader_param_register_int("free_list_num", 8); mca_btl_vader_component.vader_free_list_max = mca_btl_vader_param_register_int("free_list_max", -1); mca_btl_vader_component.vader_free_list_inc = mca_btl_vader_param_register_int("free_list_inc", 64); mca_btl_vader_component.vader_mpool_name = mca_btl_vader_param_register_string("mpool", "sm"); mca_btl_vader_memcpy_limit = mca_btl_vader_param_register_int("memcpy_limit", mca_btl_vader_memcpy_limit); mca_btl_vader_log_align = mca_btl_vader_param_register_int("log_align", mca_btl_vader_log_align); /* limit segment alignment to be between 4k and 16M */ if (mca_btl_vader_log_align < 12) { mca_btl_vader_log_align = 12; } else if (mca_btl_vader_log_align > 25) { mca_btl_vader_log_align = 25; } mca_btl_vader_max_inline_send = mca_btl_vader_param_register_int("max_inline_send", mca_btl_vader_max_inline_send); mca_btl_vader.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH; mca_btl_vader.super.btl_eager_limit = 64 * 1024; mca_btl_vader.super.btl_rndv_eager_limit = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_max_send_size = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_rdma_pipeline_send_length = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_rdma_pipeline_frag_size = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_min_rdma_pipeline_size = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE; mca_btl_vader.super.btl_seg_size = sizeof (mca_btl_base_segment_t); mca_btl_vader.super.btl_bandwidth = 40000; /* Mbs */ mca_btl_vader.super.btl_latency = 1; /* Microsecs */ /* Call the BTL based to register its MCA params */ mca_btl_base_param_register(&mca_btl_vader_component.super.btl_version, &mca_btl_vader.super); return OMPI_SUCCESS; }
static int mca_btl_template_component_register(void) { /* initialize state */ mca_btl_template_component.template_num_btls=0; mca_btl_template_component.template_btls=NULL; /* initialize objects */ OBJ_CONSTRUCT(&mca_btl_template_component.template_procs, opal_list_t); /* register TEMPLATE component parameters */ mca_btl_template_component.template_free_list_num = 8; (void) mca_base_component_var_register(&mca_btl_template_component.super.btl_version, "free_list_num", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_template_component.template_free_list_num); (void) mca_base_component_var_register(&mca_btl_template_component.super.btl_version, "free_list_max", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_template_component.template_free_list_max); (void) mca_base_component_var_register(&mca_btl_template_component.super.btl_version, "free_list_inc", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_template_component.template_free_list_inc); mca_btl_template_component.template_mpool_name = "grdma"; (void) mca_base_component_var_register(&mca_btl_template_component.super.btl_version, "mpool", NULL, MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_template_component.template_mpool_name); mca_btl_template_module.super.btl_exclusivity = 0; mca_btl_template_module.super.btl_eager_limit = 64*1024; mca_btl_template_module.super.btl_rndv_eager_limit = 64*1024; mca_btl_template_module.super.btl_max_send_size = 128*1024; mca_btl_template_module.super.btl_min_rdma_pipeline_size = 1024*1024; mca_btl_template_module.super.btl_rdma_pipeline_frag_size = 1024*1024; mca_btl_template_module.super.btl_rdma_pipeline_send_length = 1024*1024; mca_btl_template_module.super.btl_flags = MCA_BTL_FLAGS_PUT; return mca_btl_base_param_register(&mca_btl_template_component.super.btl_version, &mca_btl_template_module.super); }
static int mca_btl_self_component_register(void) { mca_base_var_group_component_register(&mca_btl_self_component.super.btl_version, "BTL for self communication"); /* register SELF component parameters */ mca_btl_self_component.free_list_num = 0; (void) mca_base_component_var_register(&mca_btl_self_component.super.btl_version, "free_list_num", "Number of fragments by default", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_self_component.free_list_num); mca_btl_self_component.free_list_max = -1; (void) mca_base_component_var_register(&mca_btl_self_component.super.btl_version, "free_list_max", "Maximum number of fragments", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_self_component.free_list_max); mca_btl_self_component.free_list_inc = 32; (void) mca_base_component_var_register(&mca_btl_self_component.super.btl_version, "free_list_inc", "Increment by this number of fragments", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_self_component.free_list_inc); mca_btl_self.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH; mca_btl_self.btl_eager_limit = 128 * 1024; mca_btl_self.btl_rndv_eager_limit = 128 * 1024; mca_btl_self.btl_max_send_size = 256 * 1024; mca_btl_self.btl_rdma_pipeline_send_length = INT_MAX; mca_btl_self.btl_rdma_pipeline_frag_size = INT_MAX; mca_btl_self.btl_min_rdma_pipeline_size = 0; mca_btl_self.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE; mca_btl_self.btl_seg_size = sizeof (mca_btl_base_segment_t); mca_btl_self.btl_bandwidth = 100; mca_btl_self.btl_latency = 0; mca_btl_base_param_register(&mca_btl_self_component.super.btl_version, &mca_btl_self); return OMPI_SUCCESS; }
int mca_btl_elan_component_open(void) { /* initialize state */ mca_btl_elan_component.elan_num_btls = 0; mca_btl_elan_component.elan_btls = NULL; mca_btl_elan_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_DEFAULT; mca_btl_elan_module.super.btl_eager_limit = 32*1024; mca_btl_elan_module.super.btl_rndv_eager_limit = mca_btl_elan_module.super.btl_eager_limit; mca_btl_elan_module.super.btl_max_send_size = 64*1024; /*64*1024;*/ mca_btl_elan_module.super.btl_rdma_pipeline_send_length = 512 * 1024; mca_btl_elan_module.super.btl_rdma_pipeline_frag_size = 128 * 1024; mca_btl_elan_module.super.btl_min_rdma_pipeline_size = 128 * 1024; mca_btl_elan_module.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND; mca_btl_elan_module.super.btl_bandwidth = 1959; mca_btl_elan_module.super.btl_latency = 4; mca_btl_base_param_register(&mca_btl_elan_component.super.btl_version, &mca_btl_elan_module.super); mca_base_param_reg_string( (mca_base_component_t*)&mca_btl_elan_component, "elanidmap", "System-wide configuration file for the Quadrics network (elanidmap)", false, false, "/etc/elanidmap", &mca_btl_elan_component.elanidmap_file ); mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_elan_component, "max_posted_recv", "Number of received posted in advance. Increasing this number for" " communication bound application can lead to visible improvement" " in performances", false, false, 128, &mca_btl_elan_component.elan_max_posted_recv ); /* register Elan4 component parameters */ mca_btl_elan_component.elan_free_list_num = mca_btl_elan_param_register_int( "free_list_num", 8 ); mca_btl_elan_component.elan_free_list_max = mca_btl_elan_param_register_int( "free_list_max", (mca_btl_elan_component.elan_free_list_num + mca_btl_elan_component.elan_max_posted_recv) ); mca_btl_elan_component.elan_free_list_inc = mca_btl_elan_param_register_int( "free_list_inc", 32 ); return OMPI_SUCCESS; }
int mca_btl_tcp_component_open(void) { char* message; #ifdef __WINDOWS__ WSADATA win_sock_data; if( WSAStartup(MAKEWORD(2,2), &win_sock_data) != 0 ) { BTL_ERROR(("failed to initialise windows sockets:%d", WSAGetLastError())); return OMPI_ERROR; } #endif /* initialize state */ mca_btl_tcp_component.tcp_listen_sd = -1; #if OPAL_WANT_IPV6 mca_btl_tcp_component.tcp6_listen_sd = -1; #endif mca_btl_tcp_component.tcp_num_btls=0; mca_btl_tcp_component.tcp_addr_count = 0; mca_btl_tcp_component.tcp_btls=NULL; /* initialize objects */ OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_procs, opal_hash_table_t); OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_events, opal_list_t); OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_eager, ompi_free_list_t); OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_max, ompi_free_list_t); OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_user, ompi_free_list_t); opal_hash_table_init(&mca_btl_tcp_component.tcp_procs, 256); /* register TCP component parameters */ mca_btl_tcp_component.tcp_num_links = mca_btl_tcp_param_register_int("links", NULL, 1); mca_btl_tcp_component.tcp_if_include = mca_btl_tcp_param_register_string("if_include", NULL, ""); mca_btl_tcp_component.tcp_if_exclude = mca_btl_tcp_param_register_string("if_exclude", NULL, "lo"); mca_btl_tcp_component.tcp_free_list_num = mca_btl_tcp_param_register_int ("free_list_num", NULL, 8); mca_btl_tcp_component.tcp_free_list_max = mca_btl_tcp_param_register_int ("free_list_max", NULL, -1); mca_btl_tcp_component.tcp_free_list_inc = mca_btl_tcp_param_register_int ("free_list_inc", NULL, 32); mca_btl_tcp_component.tcp_sndbuf = mca_btl_tcp_param_register_int ("sndbuf", NULL, 128*1024); mca_btl_tcp_component.tcp_rcvbuf = mca_btl_tcp_param_register_int ("rcvbuf", NULL, 128*1024); mca_btl_tcp_component.tcp_endpoint_cache = mca_btl_tcp_param_register_int ("endpoint_cache", "The size of the internal cache for each TCP connection. This cache is" " used to reduce the number of syscalls, by replacing them with memcpy." " Every read will read the expected data plus the amount of the" " endpoint_cache", 30*1024); mca_btl_tcp_component.tcp_use_nodelay = !mca_btl_tcp_param_register_int ("use_nagle", "Whether to use Nagle's algorithm or not (using Nagle's algorithm may increase short message latency)", 0); mca_btl_tcp_component.tcp_port_min = mca_btl_tcp_param_register_int( "port_min_v4", "The minimum port where the TCP BTL will try to bind (default 1024)", 1024 ); if( mca_btl_tcp_component.tcp_port_min > USHRT_MAX ) { orte_show_help("help-mpi-btl-tcp.txt", "invalid minimum port", true, "v4", orte_process_info.nodename, mca_btl_tcp_component.tcp_port_min ); mca_btl_tcp_component.tcp_port_min = 1024; } asprintf( &message, "The number of ports where the TCP BTL will try to bind (default %d)." " This parameter together with the port min, define a range of ports" " where Open MPI will open sockets.", (0x1 << 16) - mca_btl_tcp_component.tcp_port_min - 1 ); mca_btl_tcp_component.tcp_port_range = mca_btl_tcp_param_register_int( "port_range_v4", message, (0x1 << 16) - mca_btl_tcp_component.tcp_port_min - 1); free(message); #if OPAL_WANT_IPV6 mca_btl_tcp_component.tcp6_port_min = mca_btl_tcp_param_register_int( "port_min_v6", "The minimum port where the TCP BTL will try to bind (default 1024)", 1024 ); if( mca_btl_tcp_component.tcp6_port_min > USHRT_MAX ) { orte_show_help("help-mpi-btl-tcp.txt", "invalid minimum port", true, "v6", orte_process_info.nodename, mca_btl_tcp_component.tcp6_port_min ); mca_btl_tcp_component.tcp6_port_min = 1024; } asprintf( &message, "The number of ports where the TCP BTL will try to bind (default %d)." " This parameter together with the port min, define a range of ports" " where Open MPI will open sockets.", (0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1 ); mca_btl_tcp_component.tcp6_port_range = mca_btl_tcp_param_register_int( "port_range_v6", message, (0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1); free(message); #endif mca_btl_tcp_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_LOW + 100; mca_btl_tcp_module.super.btl_eager_limit = 64*1024; mca_btl_tcp_module.super.btl_rndv_eager_limit = 64*1024; mca_btl_tcp_module.super.btl_max_send_size = 128*1024; mca_btl_tcp_module.super.btl_rdma_pipeline_send_length = 128*1024; mca_btl_tcp_module.super.btl_rdma_pipeline_frag_size = INT_MAX; mca_btl_tcp_module.super.btl_min_rdma_pipeline_size = 0; mca_btl_tcp_module.super.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA; mca_btl_tcp_module.super.btl_bandwidth = 100; mca_btl_tcp_module.super.btl_latency = 100; mca_btl_base_param_register(&mca_btl_tcp_component.super.btl_version, &mca_btl_tcp_module.super); mca_btl_tcp_component.tcp_disable_family = mca_btl_tcp_param_register_int ("disable_family", NULL, 0); return OMPI_SUCCESS; }
static int btl_scif_component_register(void) { (void) mca_base_var_group_component_register(&mca_btl_scif_component.super.btl_version, "SCIF byte transport layer"); mca_btl_scif_component.scif_free_list_num = 8; (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version, "free_list_num", "Initial fragment free list size", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_scif_component.scif_free_list_num); mca_btl_scif_component.scif_free_list_max = 16384; (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version, "free_list_max", "Maximum fragment free list size", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_scif_component.scif_free_list_max); mca_btl_scif_component.scif_free_list_inc = 64; (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version, "free_list_inc", "Fragment free list size increment", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_scif_component.scif_free_list_inc); mca_btl_scif_component.segment_size = 8 * 1024; (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version, "segment_size", "Size of memory segment to " "allocate for each remote process (default: " "8k)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_scif_component.segment_size); mca_btl_scif_component.rma_use_cpu = false; (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version, "rma_use_cpu", "Use CPU instead of DMA " "for RMA copies (default: false)", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_scif_component.rma_use_cpu); mca_btl_scif_component.rma_sync = true; (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version, "rma_sync", "Use synchronous RMA instead of " "an RMA fence (default: true)", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_scif_component.rma_sync); #if defined(SCIF_TIMING) mca_btl_scif_component.aquire_buffer_time = 0.0; (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version, "aquire_buffer_time", "Aggregate time spent " "aquiring send buffers", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE, MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL, NULL, NULL, &mca_btl_scif_component.aquire_buffer_time); mca_btl_scif_component.send_time = 0.0; (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version, "send_time", "Aggregate time spent writing to " "send buffers", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE, MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL, NULL, NULL, &mca_btl_scif_component.send_time); mca_btl_scif_component.sendi_time = 0.0; (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version, "sendi_time", "Aggregate time spent writing to " "send buffers in sendi", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE, MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL, NULL, NULL, &mca_btl_scif_component.sendi_time); mca_btl_scif_component.get_time = 0.0; (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version, "get_time", "Aggregate time spent in DMA read (scif_readfrom)", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE, MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL, NULL, NULL, &mca_btl_scif_component.get_time); mca_btl_scif_component.get_count = 0; (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version, "get_count", "Number of times btl_scif_get was called", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_COUNTER, MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL, NULL, NULL, &mca_btl_scif_component.get_count); mca_btl_scif_component.put_time = 0.0; (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version, "put_time", "Aggregate time spent in DMA write (scif_writeto)", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE, MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL, NULL, NULL, &mca_btl_scif_component.put_time); mca_btl_scif_component.put_count = 0; (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version, "put_count", "Number of times btl_scif_put was called", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_COUNTER, MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL, NULL, NULL, &mca_btl_scif_component.put_count); #endif mca_btl_scif_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH; mca_btl_scif_module.super.btl_eager_limit = 1 * 1024; mca_btl_scif_module.super.btl_rndv_eager_limit = 1 * 1024; mca_btl_scif_module.super.btl_rdma_pipeline_frag_size = 4 * 1024 * 1024; mca_btl_scif_module.super.btl_max_send_size = 1 * 1024; mca_btl_scif_module.super.btl_rdma_pipeline_send_length = 1 * 1024; /* threshold for put */ mca_btl_scif_module.super.btl_min_rdma_pipeline_size = 1 * 1024; mca_btl_scif_module.super.btl_flags = MCA_BTL_FLAGS_SEND | MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE; mca_btl_scif_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t); mca_btl_scif_module.super.btl_bandwidth = 50000; /* Mbs */ mca_btl_scif_module.super.btl_latency = 2; /* Microsecs */ /* Call the BTL based to register its MCA params */ mca_btl_base_param_register(&mca_btl_scif_component.super.btl_version, &mca_btl_scif_module.super); return OPAL_SUCCESS; }
static int mca_btl_tcp_component_register(void) { char* message; /* register TCP component parameters */ mca_btl_tcp_param_register_uint("links", NULL, 1, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_num_links); mca_btl_tcp_param_register_string("if_include", "Comma-delimited list of devices and/or CIDR notation of networks to use for MPI communication (e.g., \"eth0,192.168.0.0/16\"). Mutually exclusive with btl_tcp_if_exclude.", "", OPAL_INFO_LVL_1, &mca_btl_tcp_component.tcp_if_include); mca_btl_tcp_param_register_string("if_exclude", "Comma-delimited list of devices and/or CIDR notation of networks to NOT use for MPI communication -- all devices not matching these specifications will be used (e.g., \"eth0,192.168.0.0/16\"). If set to a non-default value, it is mutually exclusive with btl_tcp_if_include.", "127.0.0.1/8,sppp", OPAL_INFO_LVL_1, &mca_btl_tcp_component.tcp_if_exclude); mca_btl_tcp_param_register_int ("free_list_num", NULL, 8, OPAL_INFO_LVL_5, &mca_btl_tcp_component.tcp_free_list_num); mca_btl_tcp_param_register_int ("free_list_max", NULL, -1, OPAL_INFO_LVL_5, &mca_btl_tcp_component.tcp_free_list_max); mca_btl_tcp_param_register_int ("free_list_inc", NULL, 32, OPAL_INFO_LVL_5, &mca_btl_tcp_component.tcp_free_list_inc); mca_btl_tcp_param_register_int ("sndbuf", NULL, 128*1024, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_sndbuf); mca_btl_tcp_param_register_int ("rcvbuf", NULL, 128*1024, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_rcvbuf); mca_btl_tcp_param_register_int ("endpoint_cache", "The size of the internal cache for each TCP connection. This cache is" " used to reduce the number of syscalls, by replacing them with memcpy." " Every read will read the expected data plus the amount of the" " endpoint_cache", 30*1024, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_endpoint_cache); mca_btl_tcp_param_register_int ("use_nagle", "Whether to use Nagle's algorithm or not (using Nagle's algorithm may increase short message latency)", 0, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_not_use_nodelay); mca_btl_tcp_param_register_int( "port_min_v4", "The minimum port where the TCP BTL will try to bind (default 1024)", 1024, OPAL_INFO_LVL_2, &mca_btl_tcp_component.tcp_port_min); asprintf( &message, "The number of ports where the TCP BTL will try to bind (default %d)." " This parameter together with the port min, define a range of ports" " where Open MPI will open sockets.", (0x1 << 16) - mca_btl_tcp_component.tcp_port_min - 1 ); mca_btl_tcp_param_register_int( "port_range_v4", message, (0x1 << 16) - mca_btl_tcp_component.tcp_port_min - 1, OPAL_INFO_LVL_2, &mca_btl_tcp_component.tcp_port_range); free(message); #if OPAL_WANT_IPV6 mca_btl_tcp_param_register_int( "port_min_v6", "The minimum port where the TCP BTL will try to bind (default 1024)", 1024, OPAL_INFO_LVL_2, & mca_btl_tcp_component.tcp6_port_min ); asprintf( &message, "The number of ports where the TCP BTL will try to bind (default %d)." " This parameter together with the port min, define a range of ports" " where Open MPI will open sockets.", (0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1 ); mca_btl_tcp_param_register_int( "port_range_v6", message, (0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1, OPAL_INFO_LVL_2, &mca_btl_tcp_component.tcp6_port_range ); free(message); #endif mca_btl_tcp_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_LOW + 100; mca_btl_tcp_module.super.btl_eager_limit = 64*1024; mca_btl_tcp_module.super.btl_rndv_eager_limit = 64*1024; mca_btl_tcp_module.super.btl_max_send_size = 128*1024; mca_btl_tcp_module.super.btl_rdma_pipeline_send_length = 128*1024; mca_btl_tcp_module.super.btl_rdma_pipeline_frag_size = INT_MAX; mca_btl_tcp_module.super.btl_min_rdma_pipeline_size = 0; mca_btl_tcp_module.super.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA; mca_btl_tcp_module.super.btl_seg_size = sizeof (mca_btl_base_segment_t); mca_btl_tcp_module.super.btl_bandwidth = 100; mca_btl_tcp_module.super.btl_latency = 100; mca_btl_base_param_register(&mca_btl_tcp_component.super.btl_version, &mca_btl_tcp_module.super); mca_btl_tcp_param_register_int ("disable_family", NULL, 0, OPAL_INFO_LVL_2, &mca_btl_tcp_component.tcp_disable_family); /* Register a list of interfaces to use in sequence */ mca_btl_tcp_param_register_string("if_seq", "If specified, a comma-delimited list of TCP interfaces. Interfaces will be assigned, one to each MPI process, in a round-robin fashion on each server. For example, if the list is \"eth0,eth1\" and four MPI processes are run on a single server, then local ranks 0 and 2 will use eth0 and local ranks 1 and 3 will use eth1.", NULL, OPAL_INFO_LVL_9, &mca_btl_tcp_if_seq_string); mca_btl_tcp_component.tcp_if_seq = NULL; if (NULL != mca_btl_tcp_if_seq_string && '\0' != *mca_btl_tcp_if_seq_string) { char **argv = opal_argv_split(mca_btl_tcp_if_seq_string, ','); if (NULL != argv && '\0' != *(argv[0])) { int if_index, rc, count; ompi_node_rank_t node_rank; char name[256]; node_rank = ompi_process_info.my_node_rank; /* Now that we've got that local rank, take the corresponding entry from the tcp_if_seq list (wrapping if necessary) */ count = opal_argv_count(argv); mca_btl_tcp_component.tcp_if_seq = strdup(argv[node_rank % count]); opal_argv_free(argv); /* Double check that the selected interface actually exists */ for (if_index = opal_ifbegin(); if_index >= 0; if_index = opal_ifnext(if_index)){ if (OPAL_SUCCESS != (rc = opal_ifindextoname(if_index, name, sizeof(name)))) { return rc; } if (0 == strcmp(name, mca_btl_tcp_component.tcp_if_seq)) { break; } } if (if_index < 0) { opal_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude", true, "if_seq", ompi_process_info.nodename, mca_btl_tcp_component.tcp_if_seq, "Interface does not exist"); free(mca_btl_tcp_component.tcp_if_seq); mca_btl_tcp_component.tcp_if_seq = NULL; } else { BTL_VERBOSE(("Node rank %d using TCP interface %s", node_rank, mca_btl_tcp_component.tcp_if_seq)); } } } return mca_btl_tcp_component_verify(); }
static int btl_ugni_component_register(void) { mca_base_var_enum_t *new_enum; gni_nic_device_t device_type; int rc; (void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version, "uGNI byte transport layer"); mca_btl_ugni_component.ugni_free_list_num = 8; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "free_list_num", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_free_list_num); mca_btl_ugni_component.ugni_free_list_max = 4096; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "free_list_max", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_free_list_max); mca_btl_ugni_component.ugni_free_list_inc = 64; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "free_list_inc", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_free_list_inc); mca_btl_ugni_component.ugni_eager_num = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "eager_num", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_eager_num); mca_btl_ugni_component.ugni_eager_max = 128; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "eager_max", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_eager_max); mca_btl_ugni_component.ugni_eager_inc = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "eager_inc", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_eager_inc); mca_btl_ugni_component.remote_cq_size = 40000; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "remote_cq_size", "Remote SMSG completion queue " "size (default 40000)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.remote_cq_size); mca_btl_ugni_component.local_cq_size = 8192; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "local_cq_size", "Local completion queue size " "(default 8192)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.local_cq_size); mca_btl_ugni_component.ugni_smsg_limit = 0; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_limit", "Maximum size message that " "will be sent using the SMSG/MSGQ protocol " "(0 - autoselect(default), 16k max)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_smsg_limit); mca_btl_ugni_component.smsg_max_credits = 32; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_max_credits", "Maximum number of " "outstanding SMSG/MSGQ message (default 32)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_max_credits); mca_btl_ugni_component.ugni_fma_limit = 1024; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "fma_limit", "Maximum size message that " "will be sent using the FMA (Fast Memory " "Access) protocol (default 1024, 64k max)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_fma_limit); mca_btl_ugni_component.rdma_max_retries = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "rdma_max_retries", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.rdma_max_retries); mca_btl_ugni_component.smsg_max_retries = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_max_retries", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_max_retries); mca_btl_ugni_component.max_mem_reg = 0; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "max_mem_reg", "Maximum number of " "memory registrations a process can " "hold (0 - autoselect, -1 - unlimited)" " (default 0)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.max_mem_reg); mca_btl_ugni_component.mbox_increment = 0; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "mbox_inc", "Number of SMSG mailboxes to " "allocate in each block (0 - autoselect(default))", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mbox_increment); mca_btl_ugni_component.smsg_page_size = 2 << 20; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_page_size", "Page size to use for SMSG " "mailbox allocation (default 2M)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_page_size); mca_btl_ugni_component.progress_thread_requested = 0; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "request_progress_thread", "Enable to request ugni btl progress thread - requires MPI_THREAD_MULTIPLE support", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.progress_thread_requested); /* performance variables */ mca_btl_ugni_progress_thread_wakeups = 0; (void) mca_base_component_pvar_register(&mca_btl_ugni_component.super.btl_version, "progress_thread_wakeups", "Number of times the progress thread " "has been woken", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_COUNTER, MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL, NULL, NULL, &mca_btl_ugni_progress_thread_wakeups); /* btl/ugni can only support only a fixed set of mpools (these mpools have compatible resource * structures) */ rc = mca_base_var_enum_create ("btl_ugni_mpool", mpool_values, &new_enum); if (OPAL_SUCCESS != rc) { return rc; } mca_btl_ugni_component.mpool_type = MCA_BTL_UGNI_MPOOL_UDREG; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "mpool", "mpool to use", MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mpool_type); OBJ_RELEASE(new_enum); /* ensure we loose send exclusivity to sm and vader if they are enabled */ mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 2; /* smsg threshold */ mca_btl_ugni_module.super.btl_eager_limit = 8 * 1024; mca_btl_ugni_module.super.btl_rndv_eager_limit = 8 * 1024; mca_btl_ugni_module.super.btl_rdma_pipeline_frag_size = 4 * 1024 * 1024; mca_btl_ugni_module.super.btl_max_send_size = 8 * 1024; mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = 8 * 1024; mca_btl_ugni_module.super.btl_get_limit = 1 * 1024 * 1024; /* determine if there are get alignment restrictions */ GNI_GetDeviceType (&device_type); /* * see def. of ALIGNMENT_MASK to figure this one out */ /* both gemini and aries have a 4-byte alignment requirement on remote addresses */ mca_btl_ugni_module.super.btl_get_alignment = 4; /* threshold for put */ mca_btl_ugni_module.super.btl_min_rdma_pipeline_size = 8 * 1024; mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND | MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_ATOMIC_OPS | MCA_BTL_FLAGS_ATOMIC_FOPS; mca_btl_ugni_module.super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD | MCA_BTL_ATOMIC_SUPPORTS_AND | MCA_BTL_ATOMIC_SUPPORTS_OR | MCA_BTL_ATOMIC_SUPPORTS_XOR | MCA_BTL_ATOMIC_SUPPORTS_CSWAP; mca_btl_ugni_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t); mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */ mca_btl_ugni_module.super.btl_latency = 2; /* Microsecs */ mca_btl_ugni_module.super.btl_get_local_registration_threshold = 0; mca_btl_ugni_module.super.btl_put_local_registration_threshold = mca_btl_ugni_component.ugni_fma_limit; /* Call the BTL based to register its MCA params */ mca_btl_base_param_register(&mca_btl_ugni_component.super.btl_version, &mca_btl_ugni_module.super); return OPAL_SUCCESS; }
static int mca_btl_vader_component_register (void) { (void) mca_base_var_group_component_register(&mca_btl_vader_component.super.btl_version, "XPMEM shared memory byte transport later"); /* register VADER component variables */ mca_btl_vader_component.vader_free_list_num = 8; (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version, "free_list_num", "Initial number of fragments " "to allocate for shared memory communication.", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_vader_component.vader_free_list_num); mca_btl_vader_component.vader_free_list_max = 16384; (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version, "free_list_max", "Maximum number of fragments " "to allocate for shared memory communication.", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_vader_component.vader_free_list_max); mca_btl_vader_component.vader_free_list_inc = 64; (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version, "free_list_inc", "Number of fragments to create " "on each allocation.", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_vader_component.vader_free_list_inc); mca_btl_vader_component.memcpy_limit = 524288; (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version, "memcpy_limit", "Message size to switch from using " "memove to memcpy. The relative speed of these two " "routines can vary by size.", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_vader_component.memcpy_limit); mca_btl_vader_component.log_attach_align = 21; (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version, "log_align", "Log base 2 of the alignment to use for xpmem " "segments (default: 21, minimum: 12, maximum: 25)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_vader_component.log_attach_align); mca_btl_vader_component.segment_size = 1 << 24; (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version, "segment_size", "Maximum size of all shared " "memory buffers (default: 16M)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_vader_component.segment_size); mca_btl_vader_component.max_inline_send = 256; (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version, "max_inline_send", "Maximum size to transfer " "using copy-in copy-out semantics", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_vader_component.max_inline_send); mca_btl_vader.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH; mca_btl_vader.super.btl_eager_limit = 32 * 1024; mca_btl_vader.super.btl_rndv_eager_limit = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_max_send_size = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_rdma_pipeline_send_length = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_rdma_pipeline_frag_size = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_min_rdma_pipeline_size = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE; mca_btl_vader.super.btl_seg_size = sizeof (mca_btl_base_segment_t); mca_btl_vader.super.btl_bandwidth = 40000; /* Mbs */ mca_btl_vader.super.btl_latency = 1; /* Microsecs */ /* Call the BTL based to register its MCA params */ mca_btl_base_param_register(&mca_btl_vader_component.super.btl_version, &mca_btl_vader.super); return OMPI_SUCCESS; }
/* * Register and check all MCA parameters */ int btl_openib_register_mca_params(void) { char default_qps[100]; uint32_t mid_qp_size; int i; char *msg, *str, *pkey; int ival, ival2, ret, tmp; ret = OMPI_SUCCESS; #define CHECK(expr) do {\ tmp = (expr); \ if (OMPI_SUCCESS != tmp) ret = tmp; \ } while (0) /* register openib component parameters */ CHECK(reg_int("verbose", NULL, "Output some verbose OpenIB BTL information " "(0 = no output, nonzero = output)", 0, &ival, 0)); mca_btl_openib_component.verbose = (0 != ival); CHECK(reg_int("warn_no_device_params_found", "warn_no_hca_params_found", "Warn when no device-specific parameters are found in the INI file specified by the btl_openib_device_param_files MCA parameter (0 = do not warn; any other value = warn)", 1, &ival, 0)); mca_btl_openib_component.warn_no_device_params_found = (0 != ival); CHECK(reg_int("warn_default_gid_prefix", NULL, "Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)", 1, &ival, 0)); mca_btl_openib_component.warn_default_gid_prefix = (0 != ival); CHECK(reg_int("warn_nonexistent_if", NULL, "Warn if non-existent devices and/or ports are specified in the btl_openib_if_[in|ex]clude MCA parameters (0 = do not warn; any other value = warn)", 1, &ival, 0)); mca_btl_openib_component.warn_nonexistent_if = (0 != ival); if (OMPI_HAVE_IBV_FORK_INIT) { ival2 = -1; } else { ival2 = 0; } CHECK(reg_int("want_fork_support", NULL, "Whether fork support is desired or not " "(negative = try to enable fork support, but continue even if it is not available, 0 = do not enable fork support, positive = try to enable fork support and fail if it is not available)", ival2, &ival, 0)); #ifdef HAVE_IBV_FORK_INIT mca_btl_openib_component.want_fork_support = ival; #else if (0 != ival) { orte_show_help("help-mpi-btl-openib.txt", "ibv_fork requested but not supported", true, orte_process_info.nodename); return OMPI_ERROR; } #endif asprintf(&str, "%s/mca-btl-openib-device-params.ini", opal_install_dirs.pkgdatadir); if (NULL == str) { return OMPI_ERR_OUT_OF_RESOURCE; } CHECK(reg_string("device_param_files", "hca_param_files", "Colon-delimited list of INI-style files that contain device vendor/part-specific parameters", str, &mca_btl_openib_component.device_params_file_names, 0)); free(str); CHECK(reg_string("device_type", NULL, "Specify to only use IB or iWARP network adapters (infiniband = only use InfiniBand HCAs; iwarp = only use iWARP NICs; all = use any available adapters)", "all", &str, 0)); if (0 == strcasecmp(str, "ib") || 0 == strcasecmp(str, "infiniband")) { mca_btl_openib_component.device_type = BTL_OPENIB_DT_IB; } else if (0 == strcasecmp(str, "iw") || 0 == strcasecmp(str, "iwarp")) { mca_btl_openib_component.device_type = BTL_OPENIB_DT_IWARP; } else if (0 == strcasecmp(str, "all")) { mca_btl_openib_component.device_type = BTL_OPENIB_DT_ALL; } else { orte_show_help("help-mpi-btl-openib.txt", "ibv_fork requested but not supported", true, orte_process_info.nodename); return OMPI_ERROR; } free(str); CHECK(reg_int("max_btls", NULL, "Maximum number of device ports to use " "(-1 = use all available, otherwise must be >= 1)", -1, &mca_btl_openib_component.ib_max_btls, REGINT_NEG_ONE_OK | REGINT_GE_ONE)); CHECK(reg_int("free_list_num", NULL, "Intial size of free lists (must be >= 1)", 8, &mca_btl_openib_component.ib_free_list_num, REGINT_GE_ONE)); CHECK(reg_int("free_list_max", NULL, "Maximum size of free lists " "(-1 = infinite, otherwise must be >= 0)", -1, &mca_btl_openib_component.ib_free_list_max, REGINT_NEG_ONE_OK | REGINT_GE_ONE)); CHECK(reg_int("free_list_inc", NULL, "Increment size of free lists (must be >= 1)", 32, &mca_btl_openib_component.ib_free_list_inc, REGINT_GE_ONE)); CHECK(reg_string("mpool", NULL, "Name of the memory pool to be used (it is unlikely that you will ever want to change this", "rdma", &mca_btl_openib_component.ib_mpool_name, 0)); CHECK(reg_int("reg_mru_len", NULL, "Length of the registration cache most recently used list " "(must be >= 1)", 16, (int*) &mca_btl_openib_component.reg_mru_len, REGINT_GE_ONE)); CHECK(reg_int("cq_size", "ib_cq_size", "Size of the OpenFabrics completion " "queue (will automatically be set to a minimum of " "(2 * number_of_peers * btl_openib_rd_num))", 1000, &ival, REGINT_GE_ONE)); mca_btl_openib_component.ib_cq_size[BTL_OPENIB_LP_CQ] = mca_btl_openib_component.ib_cq_size[BTL_OPENIB_HP_CQ] = (uint32_t) ival; CHECK(reg_int("max_inline_data", "ib_max_inline_data", "Maximum size of inline data segment " "(-1 = run-time probe to discover max value, " "otherwise must be >= 0). " "If not explicitly set, use max_inline_data from " "the INI file containing device-specific parameters", -1, &ival, REGINT_NEG_ONE_OK | REGINT_GE_ZERO)); mca_btl_openib_component.ib_max_inline_data = (int32_t) ival; CHECK(reg_string("pkey", "ib_pkey_val", "OpenFabrics partition key (pkey) value. " "Unsigned integer decimal or hex values are allowed (e.g., \"3\" or \"0x3f\") and will be masked against the maximum allowable IB paritition key value (0x7fff)", "0", &pkey, 0)); mca_btl_openib_component.ib_pkey_val = ompi_btl_openib_ini_intify(pkey) & MCA_BTL_IB_PKEY_MASK; free(pkey); CHECK(reg_int("psn", "ib_psn", "OpenFabrics packet sequence starting number " "(must be >= 0)", 0, &ival, REGINT_GE_ZERO)); mca_btl_openib_component.ib_psn = (uint32_t) ival; CHECK(reg_int("ib_qp_ous_rd_atom", NULL, "InfiniBand outstanding atomic reads " "(must be >= 0)", 4, &ival, REGINT_GE_ZERO)); mca_btl_openib_component.ib_qp_ous_rd_atom = (uint32_t) ival; asprintf(&msg, "OpenFabrics MTU, in bytes (if not specified in INI files). Valid values are: %d=256 bytes, %d=512 bytes, %d=1024 bytes, %d=2048 bytes, %d=4096 bytes", IBV_MTU_256, IBV_MTU_512, IBV_MTU_1024, IBV_MTU_2048, IBV_MTU_4096); if (NULL == msg) { /* Don't try to recover from this */ return OMPI_ERR_OUT_OF_RESOURCE; } CHECK(reg_int("mtu", "ib_mtu", msg, IBV_MTU_1024, &ival, 0)); free(msg); if (ival < IBV_MTU_1024 || ival > IBV_MTU_4096) { orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value", true, "invalid value for btl_openib_ib_mtu", "btl_openib_ib_mtu reset to 1024"); mca_btl_openib_component.ib_mtu = IBV_MTU_1024; } else { mca_btl_openib_component.ib_mtu = (uint32_t) ival; } CHECK(reg_int("ib_min_rnr_timer", NULL, "InfiniBand minimum " "\"receiver not ready\" timer, in seconds " "(must be >= 0 and <= 31)", 25, &ival, 0)); if (ival > 31) { orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value", true, "btl_openib_ib_min_rnr_timer > 31", "btl_openib_ib_min_rnr_timer reset to 31"); ival = 31; } else if (ival < 0){ orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value", true, "btl_openib_ib_min_rnr_timer < 0", "btl_openib_ib_min_rnr_timer reset to 0"); ival = 0; } mca_btl_openib_component.ib_min_rnr_timer = (uint32_t) ival; CHECK(reg_int("ib_timeout", NULL, "InfiniBand transmit timeout, plugged into formula: 4.096 microseconds * (2^btl_openib_ib_timeout)" "(must be >= 0 and <= 31)", 20, &ival, 0)); if (ival > 31) { orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value", true, "btl_openib_ib_timeout > 31", "btl_openib_ib_timeout reset to 31"); ival = 31; } else if (ival < 0) { orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value", true, "btl_openib_ib_timeout < 0", "btl_openib_ib_timeout reset to 0"); ival = 0; } mca_btl_openib_component.ib_timeout = (uint32_t) ival; CHECK(reg_int("ib_retry_count", NULL, "InfiniBand transmit retry count " "(must be >= 0 and <= 7)", 7, &ival, 0)); if (ival > 7) { orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value", true, "btl_openib_ib_retry_count > 7", "btl_openib_ib_retry_count reset to 7"); ival = 7; } else if (ival < 0) { orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value", true, "btl_openib_ib_retry_count < 0", "btl_openib_ib_retry_count reset to 0"); ival = 0; } mca_btl_openib_component.ib_retry_count = (uint32_t) ival; CHECK(reg_int("ib_rnr_retry", NULL, "InfiniBand \"receiver not ready\" " "retry count; applies *only* to SRQ/XRC queues. PP queues " "use RNR retry values of 0 because Open MPI performs " "software flow control to guarantee that RNRs never occur " "(must be >= 0 and <= 7; 7 = \"infinite\")", 7, &ival, 0)); if (ival > 7) { orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value", true, "btl_openib_ib_rnr_retry > 7", "btl_openib_ib_rnr_retry reset to 7"); ival = 7; } else if (ival < 0) { orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value", true, "btl_openib_ib_rnr_retry < 0", "btl_openib_ib_rnr_retry reset to 0"); ival = 0; } mca_btl_openib_component.ib_rnr_retry = (uint32_t) ival; CHECK(reg_int("ib_max_rdma_dst_ops", NULL, "InfiniBand maximum pending RDMA " "destination operations " "(must be >= 0)", 4, &ival, REGINT_GE_ZERO)); mca_btl_openib_component.ib_max_rdma_dst_ops = (uint32_t) ival; CHECK(reg_int("ib_service_level", NULL, "InfiniBand service level " "(must be >= 0 and <= 15)", 0, &ival, 0)); if (ival > 15) { orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value", true, "btl_openib_ib_service_level > 15", "btl_openib_ib_service_level reset to 15"); ival = 15; } else if (ival < 0) { orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value", true, "btl_openib_ib_service_level < 0", "btl_openib_ib_service_level reset to 0"); ival = 0; } mca_btl_openib_component.ib_service_level = (uint32_t) ival; CHECK(reg_int("use_eager_rdma", NULL, "Use RDMA for eager messages " "(-1 = use device default, 0 = do not use eager RDMA, " "1 = use eager RDMA)", -1, &ival, 0)); mca_btl_openib_component.use_eager_rdma = (int32_t) ival; CHECK(reg_int("eager_rdma_threshold", NULL, "Use RDMA for short messages after this number of " "messages are received from a given peer " "(must be >= 1)", 16, &ival, REGINT_GE_ONE)); mca_btl_openib_component.eager_rdma_threshold = (int32_t) ival; CHECK(reg_int("max_eager_rdma", NULL, "Maximum number of peers allowed to use " "RDMA for short messages (RDMA is used for all long " "messages, except if explicitly disabled, such as " "with the \"dr\" pml) " "(must be >= 0)", 16, &ival, REGINT_GE_ZERO)); mca_btl_openib_component.max_eager_rdma = (int32_t) ival; CHECK(reg_int("eager_rdma_num", NULL, "Number of RDMA buffers to allocate " "for small messages" "(must be >= 1)", 16, &ival, REGINT_GE_ONE)); mca_btl_openib_component.eager_rdma_num = (int32_t) (ival + 1); CHECK(reg_int("btls_per_lid", NULL, "Number of BTLs to create for each " "InfiniBand LID " "(must be >= 1)", 1, &ival, REGINT_GE_ONE)); mca_btl_openib_component.btls_per_lid = (uint32_t) ival; CHECK(reg_int("max_lmc", NULL, "Maximum number of LIDs to use for each device port " "(must be >= 0, where 0 = use all available)", 0, &ival, REGINT_GE_ZERO)); mca_btl_openib_component.max_lmc = (uint32_t) ival; #if OMPI_HAVE_THREADS CHECK(reg_int("enable_apm_over_lmc", NULL, "Maximum number of alterative paths for each device port " "(must be >= -1, where 0 = disable apm, -1 = all availible alternative paths )", 0, &ival, REGINT_NEG_ONE_OK|REGINT_GE_ZERO)); mca_btl_openib_component.apm_lmc = (uint32_t) ival; CHECK(reg_int("enable_apm_over_ports", NULL, "Enable alterative path migration (APM) over different ports of the same device " "(must be >= 0, where 0 = disable APM over ports , 1 = enable APM over ports of the same device)", 0, &ival, REGINT_GE_ZERO)); mca_btl_openib_component.apm_ports = (uint32_t) ival; CHECK(reg_int("enable_apm_over_lmc", NULL, "Maximum number of alterative paths for each device port " "(must be >= -1, where 0 = disable APM, -1 = all availible alternative paths)", 0, &ival, REGINT_NEG_ONE_OK|REGINT_GE_ZERO)); mca_btl_openib_component.apm_lmc = (uint32_t) ival; CHECK(reg_int("enable_apm_over_ports", NULL, "Enable alterative path migration (APM) over different ports of the same device " "(must be >= 0, where 0 = disable APM over ports, 1 = enable APM over ports of the same device)", 0, &ival, REGINT_GE_ZERO)); mca_btl_openib_component.apm_ports = (uint32_t) ival; CHECK(reg_int("use_async_event_thread", NULL, "If nonzero, use the thread that will handle InfiniBand asyncihronous events ", 1, &ival, 0)); mca_btl_openib_component.use_async_event_thread = (0 != ival); #endif CHECK(reg_int("buffer_alignment", NULL, "Prefered communication buffer alignment, in bytes " "(must be > 0 and power of two)", 64, &ival, REGINT_GE_ZERO)); if(ival <= 1 || (ival & (ival - 1))) { orte_show_help("help-mpi-btl-openib.txt", "wrong buffer alignment", true, ival, orte_process_info.nodename, 64); mca_btl_openib_component.buffer_alignment = 64; } else { mca_btl_openib_component.buffer_alignment = (uint32_t) ival; } CHECK(reg_int("use_message_coalescing", NULL, "Use message coalescing", 1, &ival, 0)); mca_btl_openib_component.use_message_coalescing = (0 != ival); CHECK(reg_int("cq_poll_ratio", NULL, "how often poll high priority CQ versus low priority CQ", 100, &ival, REGINT_GE_ONE)); mca_btl_openib_component.cq_poll_ratio = (uint32_t)ival; CHECK(reg_int("eager_rdma_poll_ratio", NULL, "how often poll eager RDMA channel versus CQ", 100, &ival, REGINT_GE_ONE)); mca_btl_openib_component.eager_rdma_poll_ratio = (uint32_t)ival; CHECK(reg_int("hp_cq_poll_per_progress", NULL, "max number of completion events to process for each call " "of BTL progress engine", 10, &ival, REGINT_GE_ONE)); mca_btl_openib_component.cq_poll_progress = (uint32_t)ival; /* Info only */ mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version, "have_fork_support", "Whether the OpenFabrics stack supports applications that invoke the \"fork()\" system call or not (0 = no, 1 = yes). Note that this value does NOT indicate whether the system being run on supports \"fork()\" with OpenFabrics applications or not.", false, true, OMPI_HAVE_IBV_FORK_INIT ? 1 : 0, NULL); mca_btl_openib_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_DEFAULT; mca_btl_openib_module.super.btl_eager_limit = 12 * 1024; mca_btl_openib_module.super.btl_rndv_eager_limit = 12 * 1024; mca_btl_openib_module.super.btl_max_send_size = 64 * 1024; mca_btl_openib_module.super.btl_rdma_pipeline_send_length = 1024 * 1024; mca_btl_openib_module.super.btl_rdma_pipeline_frag_size = 1024 * 1024; mca_btl_openib_module.super.btl_min_rdma_pipeline_size = 256 * 1024; mca_btl_openib_module.super.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA; mca_btl_openib_module.super.btl_bandwidth = 800; mca_btl_openib_module.super.btl_latency = 10; CHECK(mca_btl_base_param_register( &mca_btl_openib_component.super.btl_version, &mca_btl_openib_module.super)); /* setup all the qp stuff */ mid_qp_size = mca_btl_openib_module.super.btl_eager_limit / 4; /* round mid_qp_size to smallest power of two */ for(i = 31; i > 0; i--) { if(!(mid_qp_size & (1<<i))) { continue; } mid_qp_size = (1<<i); break; } if(mid_qp_size <= 128) { mid_qp_size = 1024; } snprintf(default_qps, 100, "P,128,256,192,128:S,%u,256,128,32:S,%u,256,128,32:S,%u,256,128,32", mid_qp_size, (uint32_t)mca_btl_openib_module.super.btl_eager_limit, (uint32_t)mca_btl_openib_module.super.btl_max_send_size); mca_btl_openib_component.default_recv_qps = strdup(default_qps); if(NULL == mca_btl_openib_component.default_recv_qps) { BTL_ERROR(("Unable to allocate memory for default receive queues string.\n")); return OMPI_ERROR; } CHECK(reg_string("receive_queues", NULL, "Colon-delimited, comma delimited list of receive queues: P,4096,8,6,4:P,32768,8,6,4", default_qps, &mca_btl_openib_component.receive_queues, 0)); mca_btl_openib_component.receive_queues_source = (0 == strcmp(default_qps, mca_btl_openib_component.receive_queues)) ? BTL_OPENIB_RQ_SOURCE_DEFAULT : BTL_OPENIB_RQ_SOURCE_MCA; CHECK(reg_string("if_include", NULL, "Comma-delimited list of devices/ports to be used (e.g. \"mthca0,mthca1:2\"; empty value means to use all ports found). Mutually exclusive with btl_openib_if_exclude.", NULL, &mca_btl_openib_component.if_include, 0)); CHECK(reg_string("if_exclude", NULL, "Comma-delimited list of device/ports to be excluded (empty value means to not exclude any ports). Mutually exclusive with btl_openib_if_include.", NULL, &mca_btl_openib_component.if_exclude, 0)); CHECK(reg_string("ipaddr_include", NULL, "Comma-delimited list of IP Addresses to be used (e.g. \"192.168.1.0/24\"). Mutually exclusive with btl_openib_ipaddr_exclude.", NULL, &mca_btl_openib_component.ipaddr_include, 0)); CHECK(reg_string("ipaddr_exclude", NULL, "Comma-delimited list of IP Addresses to be excluded (e.g. \"192.168.1.0/24\"). Mutually exclusive with btl_openib_ipaddr_include.", NULL, &mca_btl_openib_component.ipaddr_exclude, 0)); /* Register any MCA params for the connect pseudo-components */ if (OMPI_SUCCESS == ret) { ret = ompi_btl_openib_connect_base_register(); } return ret; }
static int btl_ugni_component_register(void) { mca_base_var_enum_t *new_enum; gni_nic_device_t device_type; char *mpool_hints_tmp = NULL; int rc; (void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version, "uGNI byte transport layer"); mca_btl_ugni_component.ugni_free_list_num = 8; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "free_list_num", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_free_list_num); mca_btl_ugni_component.ugni_free_list_max = 4096; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "free_list_max", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_free_list_max); mca_btl_ugni_component.ugni_free_list_inc = 64; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "free_list_inc", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_free_list_inc); mca_btl_ugni_component.ugni_eager_num = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "eager_num", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_eager_num); mca_btl_ugni_component.ugni_eager_max = 128; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "eager_max", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_eager_max); mca_btl_ugni_component.ugni_eager_inc = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "eager_inc", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_eager_inc); mca_btl_ugni_component.remote_cq_size = 40000; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "remote_cq_size", "Remote SMSG completion queue " "size (default 40000)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.remote_cq_size); mca_btl_ugni_component.local_cq_size = 8192; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "local_cq_size", "Local completion queue size " "(default 8192)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.local_cq_size); mca_btl_ugni_component.ugni_smsg_limit = 0; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_limit", "Maximum size message that " "will be sent using the SMSG/MSGQ protocol " "(0 - autoselect(default), 16k max)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_smsg_limit); mca_btl_ugni_component.smsg_max_credits = 32; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_max_credits", "Maximum number of " "outstanding SMSG/MSGQ message (default 32)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_max_credits); mca_btl_ugni_component.ugni_fma_limit = 1024; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "fma_limit", "Maximum size message that " "will be sent using the FMA (Fast Memory " "Access) protocol (default 1024, 64k max)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_fma_limit); mca_btl_ugni_component.rdma_max_retries = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "rdma_max_retries", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.rdma_max_retries); mca_btl_ugni_component.smsg_max_retries = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_max_retries", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_max_retries); mca_btl_ugni_component.max_mem_reg = 0; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "max_mem_reg", "Maximum number of " "memory registrations a process can " "hold (0 - autoselect, -1 - unlimited)" " (default 0)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.max_mem_reg); mca_btl_ugni_component.mbox_increment = 0; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "mbox_inc", "Number of SMSG mailboxes to " "allocate in each block (0 - autoselect(default))", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mbox_increment); /* communication domain flags */ rc = mca_base_var_enum_create_flag ("btl_ugni_cdm_flags", cdm_flags, (mca_base_var_enum_flag_t **) &new_enum); if (OPAL_SUCCESS != rc) { return rc; } mca_btl_ugni_component.cdm_flags = GNI_CDM_MODE_FORK_PARTCOPY | GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL | GNI_CDM_MODE_MDD_SHARED | GNI_CDM_MODE_FMA_SHARED | GNI_CDM_MODE_FMA_SMALL_WINDOW; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "cdm_flags", "Flags to set when creating a communication domain " " (default: fork-fullcopy,cached-amo-enabled,err-no-kill,fast-datagram-poll," "fma-shared,fma-small-window)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.cdm_flags); OBJ_RELEASE(new_enum); mca_btl_ugni_component.virtual_device_count = 0; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "virtual_device_count", "Number of virtual devices to create. Higher numbers may " "result in better performance when using threads. (default: auto, max: 8)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.virtual_device_count); /* determine if there are get alignment restrictions */ GNI_GetDeviceType (&device_type); mca_btl_ugni_component.smsg_page_size = 2 << 20; if (GNI_DEVICE_GEMINI == device_type) { if (access ("/sys/class/gemini/ghal0/mrt", R_OK)) { int fd = open ("/sys/class/gemini/ghal0/mrt", O_RDONLY); char buffer[10]; if (0 <= fd) { memset (buffer, 0, sizeof (buffer)); read (fd, buffer, sizeof (buffer) - 1); close (fd); mca_btl_ugni_ugni_page_size = strtol (buffer, NULL, 10) * 1024; mca_btl_ugni_component.smsg_page_size = mca_btl_ugni_ugni_page_size; } } } (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_page_size", "Page size to use for SMSG mailbox allocation (default: detect)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_page_size); mca_btl_ugni_component.progress_thread_requested = 0; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "request_progress_thread", "Enable to request ugni btl progress thread - requires MPI_THREAD_MULTIPLE support", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.progress_thread_requested); /* performance variables */ mca_btl_ugni_progress_thread_wakeups = 0; (void) mca_base_component_pvar_register(&mca_btl_ugni_component.super.btl_version, "progress_thread_wakeups", "Number of times the progress thread " "has been woken", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_COUNTER, MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL, NULL, NULL, &mca_btl_ugni_progress_thread_wakeups); /* register network statistics as performance variables */ for (int i = 0 ; i < GNI_NUM_STATS ; ++i) { char name[128], desc[128]; size_t str_len = strlen (gni_statistic_str[i]); assert (str_len < sizeof (name)); /* we can get an all-caps string for the variable from gni_statistic_str. need to make it lowercase * to match ompi standards */ for (size_t j = 0 ; j < str_len ; ++j) { name[j] = tolower (gni_statistic_str[i][j]); desc[j] = ('_' == name[j]) ? ' ' : name[j]; } name[str_len] = '\0'; desc[str_len] = '\0'; (void) mca_base_component_pvar_register (&mca_btl_ugni_component.super.btl_version, name, desc, OPAL_INFO_LVL_4, MCA_BASE_PVAR_CLASS_COUNTER, MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, mca_btl_ugni_get_stat, NULL, mca_btl_ugni_notify_stat, (void *) (intptr_t) i); } /* btl/ugni can only support only a fixed set of rcache components (these rcache components have compatible resource * structures) */ rc = mca_base_var_enum_create ("btl_ugni_rcache", rcache_values, &new_enum); if (OPAL_SUCCESS != rc) { return rc; } /* NTH: there are known *serious* performance issues with udreg. if they are ever resolved it is the preferred rcache */ mca_btl_ugni_component.rcache_type = MCA_BTL_UGNI_RCACHE_GRDMA; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "rcache", "registration cache to use (default: grdma)", MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.rcache_type); OBJ_RELEASE(new_enum); if (mca_btl_ugni_ugni_page_size) { rc = asprintf (&mpool_hints_tmp, "page_size=%lu", mca_btl_ugni_ugni_page_size); if (rc < 0) { return OPAL_ERR_OUT_OF_RESOURCE; } mca_btl_ugni_component.mpool_hints = mpool_hints_tmp; } else { mca_btl_ugni_component.mpool_hints = "page_size=2M"; } (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "mpool_hints", "hints to use when selecting a memory pool (default: " "\"page_size=2M\")", MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mpool_hints); free (mpool_hints_tmp); /* ensure we loose send exclusivity to sm and vader if they are enabled */ mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 2; /* smsg threshold */ mca_btl_ugni_module.super.btl_eager_limit = 8 * 1024; mca_btl_ugni_module.super.btl_rndv_eager_limit = 8 * 1024; mca_btl_ugni_module.super.btl_rdma_pipeline_frag_size = 4 * 1024 * 1024; mca_btl_ugni_module.super.btl_max_send_size = 8 * 1024; mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = 8 * 1024; mca_btl_ugni_module.super.btl_get_limit = 1 * 1024 * 1024; /* * see def. of ALIGNMENT_MASK to figure this one out */ /* both gemini and aries have a 4-byte alignment requirement on remote addresses */ mca_btl_ugni_module.super.btl_get_alignment = 4; /* threshold for put */ mca_btl_ugni_module.super.btl_min_rdma_pipeline_size = 8 * 1024; mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND | MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_ATOMIC_OPS | MCA_BTL_FLAGS_ATOMIC_FOPS; mca_btl_ugni_module.super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD | MCA_BTL_ATOMIC_SUPPORTS_AND | MCA_BTL_ATOMIC_SUPPORTS_OR | MCA_BTL_ATOMIC_SUPPORTS_XOR | MCA_BTL_ATOMIC_SUPPORTS_CSWAP; if (GNI_DEVICE_ARIES == device_type) { /* aries supports additional atomic operations */ mca_btl_ugni_module.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_MIN | MCA_BTL_ATOMIC_SUPPORTS_MAX | MCA_BTL_ATOMIC_SUPPORTS_LAND | MCA_BTL_ATOMIC_SUPPORTS_LOR | MCA_BTL_ATOMIC_SUPPORTS_LXOR | MCA_BTL_ATOMIC_SUPPORTS_32BIT | MCA_BTL_ATOMIC_SUPPORTS_FLOAT; } mca_btl_ugni_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t); mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */ mca_btl_ugni_module.super.btl_latency = 2; /* Microsecs */ mca_btl_ugni_module.super.btl_get_local_registration_threshold = 0; mca_btl_ugni_module.super.btl_put_local_registration_threshold = mca_btl_ugni_component.ugni_fma_limit; /* Call the BTL based to register its MCA params */ mca_btl_base_param_register(&mca_btl_ugni_component.super.btl_version, &mca_btl_ugni_module.super); return OPAL_SUCCESS; }
static int btl_ugni_component_register(void) { (void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version, "Gemini byte transport layer"); mca_btl_ugni_component.ugni_free_list_num = 8; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "free_list_num", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_free_list_num); mca_btl_ugni_component.ugni_free_list_max = 16384; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "free_list_max", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_free_list_max); mca_btl_ugni_component.ugni_free_list_inc = 64; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "free_list_inc", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_free_list_inc); mca_btl_ugni_component.ugni_eager_num = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "eager_num", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_eager_num); mca_btl_ugni_component.ugni_eager_max = 128; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "eager_max", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_eager_max); mca_btl_ugni_component.ugni_eager_inc = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "eager_inc", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_eager_inc); mca_btl_ugni_component.remote_cq_size = 40000; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "remote_cq_size", "Remote SMSG completion queue " "size (default 40000)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.remote_cq_size); mca_btl_ugni_component.local_cq_size = 8192; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "local_cq_size", "Local completion queue size " "(default 8192)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.local_cq_size); mca_btl_ugni_component.ugni_smsg_limit = 0; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_limit", "Maximum size message that " "will be sent using the SMSG/MSGQ protocol " "(0 - autoselect(default), 16k max)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_smsg_limit); mca_btl_ugni_component.smsg_max_credits = 32; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_max_credits", "Maximum number of " "outstanding SMSG/MSGQ message (default 32)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_max_credits); mca_btl_ugni_component.ugni_fma_limit = 1024; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "fma_limit", "Maximum size message that " "will be sent using the FMA (Fast Memory " "Access) protocol (default 1024, 64k max)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_fma_limit); mca_btl_ugni_component.ugni_get_limit = 1 * 1024 * 1024; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "get_limit", "Maximum size message that " "will be sent using a get protocol " "(default 1M)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_get_limit); mca_btl_ugni_component.rdma_max_retries = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "rdma_max_retries", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.rdma_max_retries); mca_btl_ugni_component.smsg_max_retries = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_max_retries", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_max_retries); mca_btl_ugni_component.max_mem_reg = 0; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "max_mem_reg", "Maximum number of " "memory registrations a process can " "hold (0 - autoselect, -1 - unlimited)" " (default 0)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.max_mem_reg); mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH; /* smsg threshold */ mca_btl_ugni_module.super.btl_eager_limit = 8 * 1024; mca_btl_ugni_module.super.btl_rndv_eager_limit = 8 * 1024; mca_btl_ugni_module.super.btl_rdma_pipeline_frag_size = 4 * 1024 * 1024; mca_btl_ugni_module.super.btl_max_send_size = 8 * 1024; mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = 8 * 1024; /* threshold for put */ mca_btl_ugni_module.super.btl_min_rdma_pipeline_size = 8 * 1024; mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND | MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE; mca_btl_ugni_module.super.btl_seg_size = sizeof (mca_btl_ugni_segment_t); mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */ mca_btl_ugni_module.super.btl_latency = 2; /* Microsecs */ /* Call the BTL based to register its MCA params */ mca_btl_base_param_register(&mca_btl_ugni_component.super.btl_version, &mca_btl_ugni_module.super); return OMPI_SUCCESS; }
/* * Register and check all MCA parameters * * @return OMPI_SUCCESS or OMPI_ERR_BAD_PARAM */ int mca_btl_udapl_register_mca_params(void) { int rc, tmp_rc; rc = OMPI_SUCCESS; /* register uDAPL component parameters */ CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("free_list_num", "Initial size of free lists (must be >= 1).", 8, &mca_btl_udapl_component.udapl_free_list_num, REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("free_list_max", "Maximum size of free lists " "(-1 = infinite, otherwise must be >= 1).", -1, &mca_btl_udapl_component.udapl_free_list_max, REGINT_NEG_ONE_OK | REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("free_list_inc", "Increment size of free lists (must be >= 1).", 8, &mca_btl_udapl_component.udapl_free_list_inc, REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_string("mpool", "Name of the memory pool to be used.", "grdma", &mca_btl_udapl_component.udapl_mpool_name, REGSTR_EMPTY_NOT_OK), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("max_modules", "Maximum number of supported HCAs.", 8, &mca_btl_udapl_component.udapl_max_btls, REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("num_recvs", "Total number of receive buffers to keep posted " "per endpoint (must be >= 1).", 8, &mca_btl_udapl_component.udapl_num_recvs, REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("num_sends", "Maximum number of sends to post on an endpoint " "(must be >= 1).", 7, &mca_btl_udapl_component.udapl_num_sends, REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("sr_win", "Window size at which point an explicit " "credit message will be generated (must be >= 1).", 4, &mca_btl_udapl_component.udapl_sr_win, REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("use_eager_rdma", "Use of RDMA for small messages : " "1 = default, use RDMA for small messages; " "0 = do not use RDMA for small messages. ", 1, &mca_btl_udapl_component.udapl_use_eager_rdma, REGINT_GE_ZERO), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("eager_rdma_num", "Number of RDMA buffers to allocate " "for small messages (must be >= 1).", 32, &mca_btl_udapl_component.udapl_eager_rdma_num, REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("max_eager_rdma_peers", "Maximum number of peers allowed to use " "RDMA for short messages (independently RDMA will " "still be used for large messages, (must be >= 0; " "if zero then RDMA will not be used for short messages).", 16, &mca_btl_udapl_component.udapl_max_eager_rdma_peers, REGINT_GE_ZERO), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("eager_rdma_win", "Window size at which point an explicit " "credit message will be generated (must be >= 1).", 28, &mca_btl_udapl_component.udapl_eager_rdma_win, REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("timeout", "Connection timeout, in microseconds.", MCA_BTL_UDAPL_CONN_TIMEOUT_DEFAULT, &mca_btl_udapl_component.udapl_timeout, REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("conn_priv_data", "Use connect private data to establish connections " "(not supported by all uDAPL implementations).", 0, &mca_btl_udapl_component.udapl_conn_priv_data, REGINT_GE_ZERO), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("async_events", "The asynchronous event queue will only be " "checked after entering progress this number of times.", 100000000, &mca_btl_udapl_component.udapl_async_events, REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("buffer_alignment", "Preferred communication buffer alignment, " "in bytes (must be >= 1).", DAT_OPTIMAL_ALIGNMENT, &mca_btl_udapl_component.udapl_buffer_alignment, REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_string("if_include", "Comma-delimited list of interfaces to be included " "(e.g. \"ibd0,ibd1 or OpenIB-cma,OpenIB-cma-1\"; empty value means " "to use all interfaces found). Mutually exclusive with " "btl_udapl_if_exclude.", NULL, &mca_btl_udapl_component.if_include, REGSTR_EMPTY_OK), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_string("if_exclude", "Comma-delimited list of interfaces to be excluded from use " "(e.g. \"ibd0,ibd1 or OpenIB-cma,OpenIB-cma-1\"; empty value means " "not to exclude any). Mutually exclusive with btl_udapl_if_include.", NULL, &mca_btl_udapl_component.if_exclude, REGSTR_EMPTY_OK), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("verbose", "Verbosity level of the uDAPL BTL (-1 thru 100)", VERBOSE_SHOW_HELP, &(mca_btl_udapl_component.udapl_verbosity), REGINT_NEG_ONE_OK), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("compare_subnet", "By default uDAPL BTL will compare subnets using netmask to " "determine if an interface is reachable. Setting this parameter to " "0 will essentially turn this comparison off and the uDAPL BTL will " "assume all uDAPL interfaces are reachable (0 or 1, default==1).", 1, &(mca_btl_udapl_component.udapl_compare_subnet), REGINT_GE_ZERO), tmp_rc, rc); /* register uDAPL module parameters */ CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("async_evd_qlen", "The asynchronous event dispatcher queue length.", MCA_BTL_UDAPL_ASYNC_EVD_QLEN_DEFAULT, (int*)&mca_btl_udapl_module.udapl_async_evd_qlen, REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("conn_evd_qlen", "The connection event dispatcher queue length is " "a function of the number of connections expected.", MCA_BTL_UDAPL_CONN_EVD_QLEN_DEFAULT, (int*)&mca_btl_udapl_module.udapl_conn_evd_qlen, REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("dto_evd_qlen", "The data transfer operation event dispatcher queue length is " "a function of the number of connections as well as the " "maximum number of outstanding data transfer operations.", MCA_BTL_UDAPL_DTO_EVD_QLEN_DEFAULT, (int*)&mca_btl_udapl_module.udapl_dto_evd_qlen, REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("max_request_dtos", "Maximum number of outstanding " "submitted sends and rdma operations per endpoint, (see Section " "6.6.6 of uDAPL Spec.).", MCA_BTL_UDAPL_MAX_REQUEST_DTOS_DEFAULT, (int*)&mca_btl_udapl_module.udapl_max_request_dtos, REGINT_GE_ONE), tmp_rc, rc); CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("max_recv_dtos", "Maximum number of outstanding " "submitted receive operations per endpoint, (see Section " "6.6.6 of uDAPL Spec.).", MCA_BTL_UDAPL_MAX_RECV_DTOS_DEFAULT, (int*)&mca_btl_udapl_module.udapl_max_recv_dtos, REGINT_GE_ONE), tmp_rc, rc); mca_btl_udapl_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_DEFAULT - 10; mca_btl_udapl_module.super.btl_eager_limit = 8*1024; mca_btl_udapl_module.super.btl_rndv_eager_limit = 8*1024; mca_btl_udapl_module.super.btl_max_send_size = 64*1024; mca_btl_udapl_module.super.btl_rdma_pipeline_send_length = 512*1024; mca_btl_udapl_module.super.btl_rdma_pipeline_frag_size = 128 * 1024; mca_btl_udapl_module.super.btl_min_rdma_pipeline_size = 0; mca_btl_udapl_module.super.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND; mca_btl_udapl_module.super.btl_bandwidth = 225; mca_btl_udapl_module.super.btl_latency = 0; mca_btl_base_param_register(&mca_btl_udapl_component.super.btl_version, &mca_btl_udapl_module.super); return rc; }
static int btl_ugni_component_register(void) { mca_base_var_enum_t *new_enum; int rc; (void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version, "Gemini byte transport layer"); mca_btl_ugni_component.ugni_free_list_num = 8; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "free_list_num", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_free_list_num); mca_btl_ugni_component.ugni_free_list_max = 16384; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "free_list_max", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_free_list_max); mca_btl_ugni_component.ugni_free_list_inc = 64; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "free_list_inc", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_free_list_inc); mca_btl_ugni_component.ugni_eager_num = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "eager_num", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_eager_num); mca_btl_ugni_component.ugni_eager_max = 128; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "eager_max", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_eager_max); mca_btl_ugni_component.ugni_eager_inc = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "eager_inc", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_eager_inc); mca_btl_ugni_component.remote_cq_size = 40000; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "remote_cq_size", "Remote SMSG completion queue " "size (default 40000)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.remote_cq_size); mca_btl_ugni_component.local_cq_size = 8192; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "local_cq_size", "Local completion queue size " "(default 8192)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.local_cq_size); mca_btl_ugni_component.ugni_smsg_limit = 0; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_limit", "Maximum size message that " "will be sent using the SMSG/MSGQ protocol " "(0 - autoselect(default), 16k max)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_smsg_limit); mca_btl_ugni_component.smsg_max_credits = 32; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_max_credits", "Maximum number of " "outstanding SMSG/MSGQ message (default 32)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_max_credits); mca_btl_ugni_component.ugni_fma_limit = 1024; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "fma_limit", "Maximum size message that " "will be sent using the FMA (Fast Memory " "Access) protocol (default 1024, 64k max)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_fma_limit); mca_btl_ugni_component.ugni_get_limit = 1 * 1024 * 1024; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "get_limit", "Maximum size message that " "will be sent using a get protocol " "(default 1M)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.ugni_get_limit); mca_btl_ugni_component.rdma_max_retries = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "rdma_max_retries", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.rdma_max_retries); mca_btl_ugni_component.smsg_max_retries = 16; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_max_retries", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_max_retries); mca_btl_ugni_component.max_mem_reg = 0; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "max_mem_reg", "Maximum number of " "memory registrations a process can " "hold (0 - autoselect, -1 - unlimited)" " (default 0)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.max_mem_reg); mca_btl_ugni_component.mbox_increment = 0; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "mbox_inc", "Number of SMSG mailboxes to " "allocate in each block (0 - autoselect(default))", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mbox_increment); mca_btl_ugni_component.smsg_page_size = 2 << 20; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_page_size", "Page size to use for SMSG " "mailbox allocation (default 2M)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_page_size); /* btl/ugni can only support only a fixed set of mpools (these mpools have compatible resource * structures) */ rc = mca_base_var_enum_create ("btl_ugni_mpool", mpool_values, &new_enum); if (OPAL_SUCCESS != rc) { return rc; } mca_btl_ugni_component.mpool_type = MCA_BTL_UGNI_MPOOL_UDREG; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "mpool", "mpool to use", MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mpool_type); OBJ_RELEASE(new_enum); mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH; /* smsg threshold */ mca_btl_ugni_module.super.btl_eager_limit = 8 * 1024; mca_btl_ugni_module.super.btl_rndv_eager_limit = 8 * 1024; mca_btl_ugni_module.super.btl_rdma_pipeline_frag_size = 4 * 1024 * 1024; mca_btl_ugni_module.super.btl_max_send_size = 8 * 1024; mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = 8 * 1024; /* threshold for put */ mca_btl_ugni_module.super.btl_min_rdma_pipeline_size = 8 * 1024; mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND | MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE; mca_btl_ugni_module.super.btl_seg_size = sizeof (mca_btl_ugni_segment_t); mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */ mca_btl_ugni_module.super.btl_latency = 2; /* Microsecs */ /* Call the BTL based to register its MCA params */ mca_btl_base_param_register(&mca_btl_ugni_component.super.btl_version, &mca_btl_ugni_module.super); return OMPI_SUCCESS; }
int mca_btl_tcp_component_open(void) { char* message; #ifdef __WINDOWS__ WSADATA win_sock_data; if( WSAStartup(MAKEWORD(2,2), &win_sock_data) != 0 ) { BTL_ERROR(("failed to initialise windows sockets:%d", WSAGetLastError())); return OMPI_ERROR; } #endif /* initialize state */ mca_btl_tcp_component.tcp_listen_sd = -1; #if OPAL_WANT_IPV6 mca_btl_tcp_component.tcp6_listen_sd = -1; #endif mca_btl_tcp_component.tcp_num_btls=0; mca_btl_tcp_component.tcp_addr_count = 0; mca_btl_tcp_component.tcp_btls=NULL; /* initialize objects */ OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_procs, opal_hash_table_t); OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_events, opal_list_t); OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_eager, ompi_free_list_t); OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_max, ompi_free_list_t); OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_user, ompi_free_list_t); opal_hash_table_init(&mca_btl_tcp_component.tcp_procs, 256); /* register TCP component parameters */ mca_btl_tcp_component.tcp_num_links = mca_btl_tcp_param_register_int("links", NULL, 1); mca_btl_tcp_component.tcp_if_include = mca_btl_tcp_param_register_string("if_include", "Comma-delimited list of devices or CIDR notation of networks to use for MPI communication (e.g., \"eth0,eth1\" or \"192.168.0.0/16,10.1.4.0/24\"). Mutually exclusive with btl_tcp_if_exclude.", ""); mca_btl_tcp_component.tcp_if_exclude = mca_btl_tcp_param_register_string("if_exclude", "Comma-delimited list of devices or CIDR notation of networks to NOT use for MPI communication -- all devices not matching these specifications will be used (e.g., \"eth0,eth1\" or \"192.168.0.0/16,10.1.4.0/24\"). Mutually exclusive with btl_tcp_if_include.", "lo,sppp"); mca_btl_tcp_component.tcp_free_list_num = mca_btl_tcp_param_register_int ("free_list_num", NULL, 8); mca_btl_tcp_component.tcp_free_list_max = mca_btl_tcp_param_register_int ("free_list_max", NULL, -1); mca_btl_tcp_component.tcp_free_list_inc = mca_btl_tcp_param_register_int ("free_list_inc", NULL, 32); mca_btl_tcp_component.tcp_sndbuf = mca_btl_tcp_param_register_int ("sndbuf", NULL, 128*1024); mca_btl_tcp_component.tcp_rcvbuf = mca_btl_tcp_param_register_int ("rcvbuf", NULL, 128*1024); mca_btl_tcp_component.tcp_endpoint_cache = mca_btl_tcp_param_register_int ("endpoint_cache", "The size of the internal cache for each TCP connection. This cache is" " used to reduce the number of syscalls, by replacing them with memcpy." " Every read will read the expected data plus the amount of the" " endpoint_cache", 30*1024); mca_btl_tcp_component.tcp_use_nodelay = !mca_btl_tcp_param_register_int ("use_nagle", "Whether to use Nagle's algorithm or not (using Nagle's algorithm may increase short message latency)", 0); mca_btl_tcp_component.tcp_port_min = mca_btl_tcp_param_register_int( "port_min_v4", "The minimum port where the TCP BTL will try to bind (default 1024)", 1024 ); if( mca_btl_tcp_component.tcp_port_min > USHRT_MAX ) { orte_show_help("help-mpi-btl-tcp.txt", "invalid minimum port", true, "v4", orte_process_info.nodename, mca_btl_tcp_component.tcp_port_min ); mca_btl_tcp_component.tcp_port_min = 1024; } asprintf( &message, "The number of ports where the TCP BTL will try to bind (default %d)." " This parameter together with the port min, define a range of ports" " where Open MPI will open sockets.", (0x1 << 16) - mca_btl_tcp_component.tcp_port_min - 1 ); mca_btl_tcp_component.tcp_port_range = mca_btl_tcp_param_register_int( "port_range_v4", message, (0x1 << 16) - mca_btl_tcp_component.tcp_port_min - 1); free(message); #if OPAL_WANT_IPV6 mca_btl_tcp_component.tcp6_port_min = mca_btl_tcp_param_register_int( "port_min_v6", "The minimum port where the TCP BTL will try to bind (default 1024)", 1024 ); if( mca_btl_tcp_component.tcp6_port_min > USHRT_MAX ) { orte_show_help("help-mpi-btl-tcp.txt", "invalid minimum port", true, "v6", orte_process_info.nodename, mca_btl_tcp_component.tcp6_port_min ); mca_btl_tcp_component.tcp6_port_min = 1024; } asprintf( &message, "The number of ports where the TCP BTL will try to bind (default %d)." " This parameter together with the port min, define a range of ports" " where Open MPI will open sockets.", (0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1 ); mca_btl_tcp_component.tcp6_port_range = mca_btl_tcp_param_register_int( "port_range_v6", message, (0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1); free(message); #endif mca_btl_tcp_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_LOW + 100; mca_btl_tcp_module.super.btl_eager_limit = 64*1024; mca_btl_tcp_module.super.btl_rndv_eager_limit = 64*1024; mca_btl_tcp_module.super.btl_max_send_size = 128*1024; mca_btl_tcp_module.super.btl_rdma_pipeline_send_length = 128*1024; mca_btl_tcp_module.super.btl_rdma_pipeline_frag_size = INT_MAX; mca_btl_tcp_module.super.btl_min_rdma_pipeline_size = 0; mca_btl_tcp_module.super.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA; mca_btl_tcp_module.super.btl_bandwidth = 100; mca_btl_tcp_module.super.btl_latency = 100; mca_btl_base_param_register(&mca_btl_tcp_component.super.btl_version, &mca_btl_tcp_module.super); mca_btl_tcp_component.tcp_disable_family = mca_btl_tcp_param_register_int ("disable_family", NULL, 0); /* Register a list of interfaces to use in sequence */ message = mca_btl_tcp_param_register_string("if_seq", "If specified, a comma-delimited list of TCP interfaces. Interfaces will be assigned, one to each MPI process, in a round-robin fashion on each server. For example, if the list is \"eth0,eth1\" and four MPI processes are run on a single server, then local ranks 0 and 2 will use eth0 and local ranks 1 and 3 will use eth1.", NULL); mca_btl_tcp_component.tcp_if_seq = NULL; if (NULL != message && '\0' != *message) { char **argv = opal_argv_split(message, ','); if (NULL != argv && '\0' != *(argv[0])) { int if_index, rc, count; orte_node_rank_t node_rank; char name[256]; node_rank = orte_ess.get_node_rank(ORTE_PROC_MY_NAME); /* Now that we've got that local rank, take the corresponding entry from the tcp_if_seq list (wrapping if necessary) */ count = opal_argv_count(argv); mca_btl_tcp_component.tcp_if_seq = strdup(argv[node_rank % count]); opal_argv_free(argv); /* Double check that the selected interface actually exists */ for (if_index = opal_ifbegin(); if_index >= 0; if_index = opal_ifnext(if_index)){ if (OPAL_SUCCESS != (rc = opal_ifindextoname(if_index, name, sizeof(name)))) { return rc; } if (0 == strcmp(name, mca_btl_tcp_component.tcp_if_seq)) { break; } } if (if_index < 0) { orte_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude", true, "if_seq", orte_process_info.nodename, mca_btl_tcp_component.tcp_if_seq, "Interface does not exist"); return OMPI_ERR_BAD_PARAM; } BTL_VERBOSE(("Node rank %d using TCP interface %s", node_rank, mca_btl_tcp_component.tcp_if_seq)); } } return OMPI_SUCCESS; }