Beispiel #1
0
/*
 * The component found a device and is querying to see if an INI file
 * specified any parameters for it.
 */
int ompi_btl_openib_ini_query(uint32_t vendor_id, uint32_t vendor_part_id,
                              ompi_btl_openib_ini_values_t *values)
{
    int ret;
    device_values_t *h;
    opal_list_item_t *item;

    if (!initialized) {
        if (OMPI_SUCCESS != (ret = ompi_btl_openib_ini_init())) {
            return ret;
        }
    }

    if (mca_btl_openib_component.verbose) {
        BTL_OUTPUT(("Querying INI files for vendor 0x%04x, part ID %d",
                    vendor_id, vendor_part_id));
    }

    reset_values(values);

    /* Iterate over all the saved devices */
    for (item = opal_list_get_first(&devices);
            item != opal_list_get_end(&devices);
            item = opal_list_get_next(item)) {
        h = (device_values_t*) item;
        if (vendor_id == h->vendor_id &&
                vendor_part_id == h->vendor_part_id) {
            /* Found it! */
            /* NOTE: There is a bug in the PGI 6.2 series that causes
               the compiler to choke when copying structs containing
               bool members by value.  So do a memcpy here instead. */
            memcpy(values, &h->values, sizeof(h->values));
            if (mca_btl_openib_component.verbose) {
                BTL_OUTPUT(("Found corresponding INI values: %s",
                            h->section_name));
            }
            return OMPI_SUCCESS;
        }
    }

    /* If we fall through to here, we didn't find it */
    if (mca_btl_openib_component.verbose) {
        BTL_OUTPUT(("Did not find corresponding INI values"));
    }
    return OMPI_ERR_NOT_FOUND;
}
static int mca_btl_tcp_create(int if_kindex, const char* if_name)
{
    struct mca_btl_tcp_module_t* btl;
    char param[256];
    int i;

    for( i = 0; i < (int)mca_btl_tcp_component.tcp_num_links; i++ ) {
        btl = (struct mca_btl_tcp_module_t *)malloc(sizeof(mca_btl_tcp_module_t));
        if(NULL == btl)
            return OMPI_ERR_OUT_OF_RESOURCE;
        memcpy(btl, &mca_btl_tcp_module, sizeof(mca_btl_tcp_module));
        OBJ_CONSTRUCT(&btl->tcp_endpoints, opal_list_t);
        mca_btl_tcp_component.tcp_btls[mca_btl_tcp_component.tcp_num_btls++] = btl;

        /* initialize the btl */
        btl->tcp_ifkindex = (uint16_t) if_kindex;
#if MCA_BTL_TCP_STATISTICS
        btl->tcp_bytes_recv = 0;
        btl->tcp_bytes_sent = 0;
        btl->tcp_send_handler = 0;
#endif

        /* allow user to specify interface bandwidth */
        sprintf(param, "bandwidth_%s", if_name);
        mca_btl_tcp_param_register_uint(param, NULL, btl->super.btl_bandwidth, OPAL_INFO_LVL_5, &btl->super.btl_bandwidth);

        /* allow user to override/specify latency ranking */
        sprintf(param, "latency_%s", if_name);
        mca_btl_tcp_param_register_uint(param, NULL, btl->super.btl_latency, OPAL_INFO_LVL_5,  &btl->super.btl_latency);
        if( i > 0 ) {
            btl->super.btl_bandwidth >>= 1;
            btl->super.btl_latency   <<= 1;
        }

        /* allow user to specify interface bandwidth */
        sprintf(param, "bandwidth_%s:%d", if_name, i);
        mca_btl_tcp_param_register_uint(param, NULL, btl->super.btl_bandwidth, OPAL_INFO_LVL_5, &btl->super.btl_bandwidth);

        /* allow user to override/specify latency ranking */
        sprintf(param, "latency_%s:%d", if_name, i);
        mca_btl_tcp_param_register_uint(param, NULL, btl->super.btl_latency, OPAL_INFO_LVL_5, &btl->super.btl_latency);
#if 0 && OPAL_ENABLE_DEBUG
        BTL_OUTPUT(("interface %s instance %i: bandwidth %d latency %d\n", if_name, i,
                    btl->super.btl_bandwidth, btl->super.btl_latency));
#endif
    }
static int mca_btl_sctp_create(int if_index, const char* if_name)
{
    if(mca_btl_sctp_component.sctp_if_11) {

        char param[256];
        struct mca_btl_sctp_module_t* btl = (struct mca_btl_sctp_module_t *)malloc(sizeof(mca_btl_sctp_module_t));
        if(NULL == btl) {
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        memcpy(btl, &mca_btl_sctp_module, sizeof(mca_btl_sctp_module));
        OBJ_CONSTRUCT(&btl->sctp_endpoints, opal_list_t);
        mca_btl_sctp_component.sctp_btls[mca_btl_sctp_component.sctp_num_btls++] = btl;

        /* initialize the btl */
        btl->sctp_ifindex = if_index;
#if MCA_BTL_SCTP_STATISTICS
        btl->sctp_bytes_recv = 0;
        btl->sctp_bytes_sent = 0;
        btl->sctp_send_handler = 0;
#endif
        opal_ifindextoaddr(if_index, (struct sockaddr*)&btl->sctp_ifaddr, sizeof(btl->sctp_ifaddr));
        /* prepare for bind call later before connect */
        btl->sctp_ifaddr.sin_family = AF_INET;
#ifdef FREEBSD
        btl->sctp_ifaddr.sin_len = sizeof(struct sockaddr);
#endif
        btl->sctp_ifaddr.sin_port = 0;       
        opal_ifindextomask(if_index, (uint32_t *)&btl->sctp_ifmask, sizeof(btl->sctp_ifmask));

        /* allow user to specify interface bandwidth */
        sprintf(param, "bandwidth_%s", if_name);
        btl->super.btl_bandwidth = mca_btl_sctp_param_register_int(param, 0);

        /* allow user to override/specify latency ranking */
        sprintf(param, "latency_%s", if_name);
        btl->super.btl_latency = mca_btl_sctp_param_register_int(param, 0);

#if 0 && OPAL_ENABLE_DEBUG
        BTL_OUTPUT(("interface: %s bandwidth %d latency %d",
                    if_name, btl->super.btl_bandwidth, btl->super.btl_latency));
#endif
        return OMPI_SUCCESS;
    }

    else {
        /* 1 to many */
        struct mca_btl_sctp_module_t* btl;
        char param[256];
        struct sockaddr_in next_ifaddr;
        socklen_t len = sizeof(struct sockaddr_in);
        opal_socklen_t addrlen;
        
        /* check if this is the first time this function is being called */
        if(0 == mca_btl_sctp_component.sctp_num_btls) {

            /* fill in btl struct with first interface's information (arbitary) */

            btl = (struct mca_btl_sctp_module_t *)malloc(sizeof(mca_btl_sctp_module_t));
            if(NULL == btl) {
                return OMPI_ERR_OUT_OF_RESOURCE;
            }
            memcpy(btl, &mca_btl_sctp_module, sizeof(mca_btl_sctp_module));
            OBJ_CONSTRUCT(&btl->sctp_endpoints, opal_list_t);
            mca_btl_sctp_component.sctp_btls[mca_btl_sctp_component.sctp_num_btls++] = btl;

            /* initialize the btl */
            btl->sctp_ifindex = if_index;
#if MCA_BTL_SCTP_STATISTICS
            btl->sctp_bytes_recv = 0;
            btl->sctp_bytes_sent = 0;
            btl->sctp_send_handler = 0;
#endif
            opal_ifindextoaddr(if_index, (struct sockaddr*)&btl->sctp_ifaddr, sizeof(btl->sctp_ifaddr));
            opal_ifindextomask(if_index, (uint32_t *)&btl->sctp_ifmask, sizeof(btl->sctp_ifmask));

            /* allow user to specify interface bandwidth */
            sprintf(param, "bandwidth_%s", if_name);
            btl->super.btl_bandwidth = mca_btl_sctp_param_register_int(param, 0);

            /* allow user to override/specify latency ranking */
            sprintf(param, "latency_%s", if_name);
            btl->super.btl_latency = mca_btl_sctp_param_register_int(param, 0);

#if 0 && OPAL_ENABLE_DEBUG
            BTL_OUTPUT(("interface: %s bandwidth %d latency %d",
                        if_name, btl->super.btl_bandwidth, btl->super.btl_latency));
#endif
            /* call bind to this (initial) addr */
            opal_ifindextoaddr(if_index, (struct sockaddr*)&next_ifaddr, sizeof(next_ifaddr));
            next_ifaddr.sin_family = AF_INET;
#ifdef FREEBSD
            next_ifaddr.sin_len = sizeof(struct sockaddr);
#endif
            next_ifaddr.sin_port = 0;

            if(bind(mca_btl_sctp_component.sctp_listen_sd, (struct sockaddr *) &next_ifaddr, len) < 0) {
                return OMPI_ERR_FATAL;
            }

            /* resolve system assignend port */
            addrlen = sizeof(struct sockaddr_in);
            if(getsockname(mca_btl_sctp_component.sctp_listen_sd, (struct sockaddr*)&next_ifaddr, &addrlen) < 0) {
                BTL_ERROR(("getsockname() failed with errno=%d", opal_socket_errno));
                return OMPI_ERROR;
            }
            /* need to get the port after the first bind call for subsequent
             *  sctp_bindx calls.
             */
            mca_btl_sctp_component.sctp_listen_port = next_ifaddr.sin_port;
            
        } 

        else {
            next_ifaddr.sin_port = htons((unsigned short) mca_btl_sctp_component.sctp_listen_port);

            /* add this addr to bindx */
            opal_ifindextoaddr(if_index, (struct sockaddr*)&next_ifaddr, sizeof(next_ifaddr));
            next_ifaddr.sin_family = AF_INET;
#ifdef FREEBSD
            next_ifaddr.sin_len = sizeof(struct sockaddr);
#endif

            if(sctp_bindx(mca_btl_sctp_component.sctp_listen_sd, (struct sockaddr *) &next_ifaddr,
                          1, SCTP_BINDX_ADD_ADDR) < 0) {
                return OMPI_ERR_FATAL;
            }

        }

        return OMPI_SUCCESS;
    }
}
/*
 * Connect the local ends of all qp's to the remote side
 */
static int qp_connect_all(mca_btl_openib_endpoint_t *endpoint)
{
    int i;
    mca_btl_openib_module_t* openib_btl =
        (mca_btl_openib_module_t*)endpoint->endpoint_btl;

    for (i = 0; i < mca_btl_openib_component.num_qps; i++) {
        struct ibv_qp_attr attr;
        struct ibv_qp* qp = endpoint->qps[i].qp->lcl_qp;
        enum ibv_mtu mtu = (openib_btl->device->mtu < endpoint->rem_info.rem_mtu) ?
            openib_btl->device->mtu : endpoint->rem_info.rem_mtu;

        memset(&attr, 0, sizeof(attr));
        attr.qp_state           = IBV_QPS_RTR;
        attr.path_mtu           = mtu;
        attr.dest_qp_num        = endpoint->rem_info.rem_qps[i].rem_qp_num;
        attr.rq_psn             = endpoint->rem_info.rem_qps[i].rem_psn;
        attr.max_dest_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops;
        attr.min_rnr_timer  = mca_btl_openib_component.ib_min_rnr_timer;
        attr.ah_attr.is_global     = 0;
        attr.ah_attr.dlid          = endpoint->rem_info.rem_lid;
        attr.ah_attr.sl            = mca_btl_openib_component.ib_service_level;
        attr.ah_attr.src_path_bits = openib_btl->src_path_bits;
        attr.ah_attr.port_num      = openib_btl->port_num;
        /* JMS to be filled in later dynamically */
        attr.ah_attr.static_rate   = 0;

        if (mca_btl_openib_component.verbose) {
            BTL_OUTPUT(("Set MTU to IBV value %d (%s bytes)", mtu,
                        (mtu == IBV_MTU_256) ? "256" :
                        (mtu == IBV_MTU_512) ? "512" :
                        (mtu == IBV_MTU_1024) ? "1024" :
                        (mtu == IBV_MTU_2048) ? "2048" :
                        (mtu == IBV_MTU_4096) ? "4096" :
                        "unknown (!)"));
        }

        if (ibv_modify_qp(qp, &attr,
                          IBV_QP_STATE              |
                          IBV_QP_AV                 |
                          IBV_QP_PATH_MTU           |
                          IBV_QP_DEST_QPN           |
                          IBV_QP_RQ_PSN             |
                          IBV_QP_MAX_DEST_RD_ATOMIC |
                          IBV_QP_MIN_RNR_TIMER)) {
            BTL_ERROR(("error modifing QP to RTR errno says %s",
                       strerror(errno)));
            return OMPI_ERROR; 
        }
        attr.qp_state       = IBV_QPS_RTS;
        attr.timeout        = mca_btl_openib_component.ib_timeout;
        attr.retry_cnt      = mca_btl_openib_component.ib_retry_count;
        /* On PP QPs we have SW flow control, no need for rnr retries. Setting
         * it to zero helps to catch bugs */
        attr.rnr_retry      = BTL_OPENIB_QP_TYPE_PP(i) ? 0 :
            mca_btl_openib_component.ib_rnr_retry;
        attr.sq_psn         = endpoint->qps[i].qp->lcl_psn;
        attr.max_rd_atomic  = mca_btl_openib_component.ib_max_rdma_dst_ops;
        if (ibv_modify_qp(qp, &attr,
                          IBV_QP_STATE              |
                          IBV_QP_TIMEOUT            |
                          IBV_QP_RETRY_CNT          |
                          IBV_QP_RNR_RETRY          |
                          IBV_QP_SQ_PSN             |
                          IBV_QP_MAX_QP_RD_ATOMIC)) {
            BTL_ERROR(("error modifying QP to RTS errno says %s",
                       strerror(errno)));
            return OMPI_ERROR;
        }
    }

    return OMPI_SUCCESS;
}