void * mad_rpc_open_port(char *dev_name, int dev_port, int *mgmt_classes, int num_classes) { struct ibmad_port *p; int port_id; if (num_classes >= MAX_CLASS) { IBWARN("too many classes %d requested", num_classes); errno = EINVAL; return NULL; } if (umad_init() < 0) { IBWARN("can't init UMAD library"); errno = ENODEV; return NULL; } p = malloc(sizeof(*p)); if (!p) { errno = ENOMEM; return NULL; } memset(p, 0, sizeof(*p)); if ((port_id = umad_open_port(dev_name, dev_port)) < 0) { IBWARN("can't open UMAD port (%s:%d)", dev_name, dev_port); if (!errno) errno = EIO; free(p); return NULL; } while (num_classes--) { int rmpp_version = 0; int mgmt = *mgmt_classes++; int agent; if (mgmt == IB_SA_CLASS) rmpp_version = 1; if (mgmt < 0 || mgmt >= MAX_CLASS || (agent = mad_register_port_client(port_id, mgmt, rmpp_version)) < 0) { IBWARN("client_register for mgmt %d failed", mgmt); if(!errno) errno = EINVAL; umad_close_port(port_id); free(p); return NULL; } p->class_agents[mgmt] = agent; } p->port_id = port_id; return p; }
/** ========================================================================= * Init sub libraries like umad here * */ static int init_sub_lib(void) { static int done = 0; if (done) return 0; if (umad_init() < 0) { OUTPUT_ERROR("can't init UMAD library\n"); return EIO; } done = 1; return (0); }
static void setup(char *CA, SInt16 port, int *fd, int *agent) { umad_init(); *fd = umad_open_port(CA, port); if (UNLIKELY((*fd) < 0)) { FATAL("umad_open_port() failed with error %d: %s", -(*fd), strerror(-(*fd))); } #define MGMT_VERSION MAD_HEADER_CLASS_VERSION_SA #define RMPP_VERSION 1 *agent = umad_register((*fd), IB_SA_CLASS, MGMT_VERSION, RMPP_VERSION, NULL); if (UNLIKELY((*agent) < 0)) { FATAL("umad_register() failed with error %d: %s", -(*agent), strerror(-(*agent))); } }
void madrpc_init(char *dev_name, int dev_port, int *mgmt_classes, int num_classes) { if (umad_init() < 0) IBPANIC("can't init UMAD library"); if ((mad_portid = umad_open_port(dev_name, dev_port)) < 0) IBPANIC("can't open UMAD port (%s:%d)", dev_name, dev_port); if (num_classes >= MAX_CLASS) IBPANIC("too many classes %d requested", num_classes); while (num_classes--) { int rmpp_version = 0; int mgmt = *mgmt_classes++; if (mgmt == IB_SA_CLASS) rmpp_version = 1; if (mad_register_client(mgmt, rmpp_version) < 0) IBPANIC("client_register for mgmt class %d failed", mgmt); } }
/** * use libumad to discover IB ports */ static void init_ib_counter( ) { char names[20][UMAD_CA_NAME_LEN]; int n, i; char *ca_name; umad_ca_t ca; int r; int portnum; if ( umad_init( ) < 0 ) { fprintf( stderr, "can't init UMAD library\n" ); exit( 1 ); } if ( ( n = umad_get_cas_names( ( void * ) names, UMAD_CA_NAME_LEN ) ) < 0 ) { fprintf( stderr, "can't list IB device names\n" ); exit( 1 ); } for ( i = 0; i < n; i++ ) { ca_name = names[i]; if ( ( r = umad_get_ca( ca_name, &ca ) ) < 0 ) { fprintf( stderr, "can't read ca from IB device\n" ); exit( 1 ); } if ( !ca.node_type ) continue; /* port numbers are '1' based in OFED */ for ( portnum = 1; portnum <= ca.numports; portnum++ ) addIBPort( ca.ca_name, ca.ports[portnum] ); } }
/****************************************************************************** * join_multicast_group ******************************************************************************/ int join_multicast_group(subn_adm_method method,struct mcast_parameters *params) { int portid = -1; int agentid = -1; void *umad_buff = NULL; void *mad = NULL; int length = MAD_SIZE; int test_result = 0; // mlid will be assigned to the new LID after the join if (umad_init() < 0) { fprintf(stderr, "failed to init the UMAD library\n"); goto cleanup; } /* use casting to loose the "const char0 *" */ portid = umad_open_port((char*)params->ib_devname,params->ib_port); if (portid < 0) { fprintf(stderr,"failed to open UMAD port %d\n",params->ib_port); goto cleanup; } agentid = umad_register(portid,MANAGMENT_CLASS_SUBN_ADM, 2, 0, 0); if (agentid < 0) { fprintf(stderr,"failed to register UMAD agent for MADs\n"); goto cleanup; } umad_buff = umad_alloc(1, umad_size() + MAD_SIZE); if (!umad_buff) { fprintf(stderr, "failed to allocate MAD buffer\n"); goto cleanup; } mad = umad_get_mad(umad_buff); prepare_mcast_mad(method,params,(struct sa_mad_packet_t *)mad); if (umad_set_addr(umad_buff,params->sm_lid,1,params->sm_sl,QP1_WELL_KNOWN_Q_KEY) < 0) { fprintf(stderr, "failed to set the destination address of the SMP\n"); goto cleanup; } if (umad_send(portid,agentid,umad_buff,MAD_SIZE,100,5) < 0) { fprintf(stderr, "failed to send MAD\n"); goto cleanup; } if (umad_recv(portid,umad_buff,&length,5000) < 0) { fprintf(stderr, "failed to receive MAD response\n"); goto cleanup; } if (check_mad_status((struct sa_mad_packet_t*)mad)) { fprintf(stderr, "failed to get mlid from MAD\n"); goto cleanup; } // "Join multicast group" message was sent if (method == SUBN_ADM_METHOD_SET) { get_mlid_from_mad((struct sa_mad_packet_t*)mad,¶ms->mlid); params->mcast_state |= MCAST_IS_JOINED; // "Leave multicast group" message was sent } else { params->mcast_state &= ~MCAST_IS_JOINED; } cleanup: if (umad_buff) umad_free(umad_buff); if (portid >= 0) { if (agentid >= 0) { if (umad_unregister(portid, agentid)) { fprintf(stderr, "failed to deregister UMAD agent for MADs\n"); test_result = 1; } } if (umad_close_port(portid)) { fprintf(stderr, "failed to close UMAD portid\n"); test_result = 1; } } return test_result; }
int main(int argc, char *argv[]) { char names[UMAD_MAX_DEVICES][UMAD_CA_NAME_LEN]; int dev_port = -1; int list_only = 0, short_format = 0, list_ports = 0; int n, i; static char const str_opts[] = "dlspVhu"; static const struct option long_opts[] = { { "debug", 0, 0, 'd'}, { "list_of_cas", 0, 0, 'l'}, { "short", 0, 0, 's'}, { "port_list", 0, 0, 'p'}, { "Version", 0, 0, 'V'}, { "help", 0, 0, 'h'}, { "usage", 0, 0, 'u'}, { } }; argv0 = argv[0]; while (1) { int ch = getopt_long(argc, argv, str_opts, long_opts, NULL); if ( ch == -1 ) break; switch(ch) { case 'd': debug++; break; case 'l': list_only++; break; case 's': short_format++; break; case 'p': list_ports++; break; case 'V': fprintf(stderr, "%s %s\n", argv0, get_build_version() ); exit(-1); default: usage(); break; } } argc -= optind; argv += optind; if (argc > 1) dev_port = strtol(argv[1], 0, 0); if (umad_init() < 0) IBPANIC("can't init UMAD library"); if ((n = umad_get_cas_names(names, UMAD_MAX_DEVICES)) < 0) IBPANIC("can't list IB device names"); if (argc) { for (i = 0; i < n; i++) if (!strncmp(names[i], argv[0], sizeof names[i])) break; if (i >= n) IBPANIC("'%s' IB device can't be found", argv[0]); strncpy(names[i], argv[0], sizeof names[i]); n = 1; } if (list_ports) { if (ports_list(names, n) < 0) IBPANIC("can't list ports"); return 0; } if (!list_only && argc) { if (ca_stat(argv[0], dev_port, short_format) < 0) IBPANIC("stat of IB device '%s' failed", argv[0]); return 0; } for (i = 0; i < n; i++) { if (list_only) printf("%s\n", names[i]); else if (ca_stat(names[i], -1, short_format) < 0) IBPANIC("stat of IB device '%s' failed", names[i]); } return 0; }
/** * Initialize the HCAs * Look at rdma_open_hca() & rdma_iba_hca_init_noqp() in * mvapich2/trunk/src/mpid/ch3/channels/mrail/src/gen2/rdma_iba_priv.c * * Store all the HCA info in mv2_nem_dev_info_t->hca[hca_num] * * Output: * hca_list: fill it with the HCAs information * * \see hca_list */ int MPID_nem_ib_init_hca() { int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPIDI_INIT_HCA); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_INIT_HCA); struct ibv_device *ib_dev = NULL; struct ibv_device **dev_list = NULL; int nHca; int num_devices = 0; #ifdef CRC_CHECK gen_crc_table(); #endif memset( hca_list, 0, sizeof(hca_list) ); /* Get the list of devices */ dev_list = ibv_get_device_list(&num_devices); if (dev_list==NULL) { MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s", "No IB device found"); } if (umad_init() < 0) MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s", "Can't init UMAD library"); /* Runtime checks */ MPIU_Assert( num_devices<=MAX_NUM_HCAS ); if ( num_devices> MAX_NUM_HCAS) { MPIU_Error_printf( "WARNING: found %d IB devices, the maximum is %d (MAX_NUM_HCAS). ", num_devices, MAX_NUM_HCAS); num_devices = MAX_NUM_HCAS; } if ( ib_hca_num_hcas > num_devices) { MPIU_Error_printf( "WARNING: user requested %d IB devices, the available number is %d. ", ib_hca_num_hcas, num_devices); ib_hca_num_hcas = num_devices; } MPIU_DBG_MSG_P( CH3_CHANNEL, VERBOSE, "[HCA] Found %d HCAs\n", num_devices); MPIU_DBG_MSG_P( CH3_CHANNEL, VERBOSE, "[HCA] User requested %d\n", ib_hca_num_hcas); /* Retrieve information for each found device */ for (nHca = 0; nHca < ib_hca_num_hcas; nHca++) { /* Check for user choice */ if( (rdma_iba_hca[0]==0) || (strncmp(rdma_iba_hca, RDMA_IBA_NULL_HCA, 32)!=0) || (ib_hca_num_hcas > 1)) { /* User hasn't specified any HCA name, or the number of HCAs is greater then 1 */ ib_dev = dev_list[nHca]; } else { /* User specified a HCA, try to look for it */ int dev_count; dev_count = 0; while(dev_list[dev_count]) { if(!strncmp(ibv_get_device_name(dev_list[dev_count]), rdma_iba_hca, 32)) { ib_dev = dev_list[dev_count]; break; } dev_count++; } } /* Check if device has been identified */ hca_list[nHca].ib_dev = ib_dev; if (!ib_dev) { MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s", "No IB device found"); } MPIU_DBG_MSG_P( CH3_CHANNEL, VERBOSE, "[HCA] HCA device %d found\n", nHca); hca_list[nHca].nic_context = ibv_open_device(ib_dev); if (hca_list[nHca].nic_context==NULL) { MPIU_ERR_SETFATALANDJUMP2(mpi_errno, MPI_ERR_OTHER, "**fail", "%s %d", "Failed to open HCA number", nHca); } hca_list[nHca].ptag = ibv_alloc_pd(hca_list[nHca].nic_context); if (!hca_list[nHca].ptag) { MPIU_ERR_SETFATALANDJUMP2(mpi_errno, MPI_ERR_OTHER, "**fail", "%s%d", "Failed to alloc pd number ", nHca); } /* Set the hca type */ #if defined(RDMA_CM) if (process_info.use_iwarp_mode) { if ((mpi_errno = rdma_cm_get_hca_type(process_info.use_iwarp_mode, &process_info.hca_type)) != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); } if (process_info.hca_type == CHELSIO_T3) { process_info.use_iwarp_mode = 1; } } else #endif /* defined(RDMA_CM) */ mpi_errno = get_hca_type(hca_list[nHca].ib_dev, hca_list[nHca].nic_context, &hca_list[nHca].hca_type); if (mpi_errno != MPI_SUCCESS) { fprintf(stderr, "[%s, %d] Error in get_hca_type", __FILE__, __LINE__ ); MPIU_ERR_POP(mpi_errno); } } if (!strncmp(rdma_iba_hca, RDMA_IBA_NULL_HCA, 32) && (ib_hca_num_hcas==1) && (num_devices > nHca) && (rdma_find_active_port(hca_list[0].nic_context, hca_list[nHca].ib_dev)==-1)) { /* Trac #376 - There are multiple rdma capable devices (num_devices) in * the system. The user has asked us to use ANY (!strncmp) ONE device * (rdma_num_hcas), and the first device does not have an active port. So * try to find some other device with an active port. */ int j; for (j = 0; dev_list[j]; j++) { ib_dev = dev_list[j]; if (ib_dev) { hca_list[0].nic_context = ibv_open_device(ib_dev); if (!hca_list[0].nic_context) { /* Go to next device */ continue; } if (rdma_find_active_port(hca_list[0].nic_context, ib_dev)!=-1) { hca_list[0].ib_dev = ib_dev; hca_list[0].ptag = ibv_alloc_pd(hca_list[0].nic_context); if (!hca_list[0].ptag) { MPIU_ERR_SETFATALANDJUMP2(mpi_errno, MPI_ERR_OTHER, "**fail", "%s%d", "Failed to alloc pd number ", nHca); } } } } } fn_exit: /* Clean up before exit */ if (dev_list!=NULL) ibv_free_device_list(dev_list); MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_INIT_HCA); return mpi_errno; fn_fail: goto fn_exit; }
/** * Get the type of a device, from name. * * Output in hca_type * */ int hcaNameToType(char *dev_name, HCA_Type* hca_type) { MPIDI_STATE_DECL(MPID_STATE_HCANAMETOTYPE); MPIDI_FUNC_ENTER(MPID_STATE_HCANAMETOTYPE); int mpi_errno = MPI_SUCCESS; int rate; *hca_type = UNKNOWN_HCA; if (!strncmp(dev_name, "mlx4", 4) || !strncmp(dev_name, "mthca", 5)) { umad_ca_t umad_ca; *hca_type = MLX_PCI_X; if (umad_init() < 0) { MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**umadinit"); } memset(&umad_ca, 0, sizeof(umad_ca_t)); if (umad_get_ca(dev_name, &umad_ca) < 0) { MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**umadgetca"); } rate = get_rate(&umad_ca); if (!rate) { umad_release_ca(&umad_ca); umad_done(); MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**umadgetrate"); } if (!strncmp(dev_name, "mthca", 5)) { *hca_type = MLX_PCI_X; if (!strncmp(umad_ca.ca_type, "MT25", 4)) { switch (rate) { case 20: *hca_type = MLX_PCI_EX_DDR; break; case 10: *hca_type = MLX_PCI_EX_SDR; break; default: *hca_type = MLX_PCI_EX_SDR; break; } } else if (!strncmp(umad_ca.ca_type, "MT23", 4)) { *hca_type = MLX_PCI_X; } else { *hca_type = MLX_PCI_EX_SDR; } } else { /* mlx4 */ switch(rate) { case 40: *hca_type = MLX_CX_QDR; break; case 20: *hca_type = MLX_CX_DDR; break; case 10: *hca_type = MLX_CX_SDR; break; default: *hca_type = MLX_CX_SDR; break; } } umad_release_ca(&umad_ca); umad_done(); } else if(!strncmp(dev_name, "ipath", 5)) { *hca_type = PATH_HT; } else if(!strncmp(dev_name, "ehca", 4)) { *hca_type = IBM_EHCA; } else if (!strncmp(dev_name, "cxgb3", 5)) { *hca_type = CHELSIO_T3; } else if (!strncmp(dev_name, "cxgb4", 5)) { *hca_type = CHELSIO_T4; } else { *hca_type = UNKNOWN_HCA; } fn_fail: MPIDI_FUNC_EXIT(MPID_STATE_HCANAMETOTYPE); return mpi_errno; }
int main(int argc, char *argv[]) { char names[UMAD_MAX_DEVICES][UMAD_CA_NAME_LEN]; int dev_port = -1; int n, i; const struct ibdiag_opt opts[] = { {"list_of_cas", 'l', 0, NULL, "list all IB devices"}, {"short", 's', 0, NULL, "short output"}, {"port_list", 'p', 0, NULL, "show port list"}, {0} }; char usage_args[] = "<ca_name> [portnum]"; const char *usage_examples[] = { "-l # list all IB devices", "mthca0 2 # stat port 2 of 'mthca0'", NULL }; ibdiag_process_opts(argc, argv, NULL, "CDeGKLPsty", opts, process_opt, usage_args, usage_examples); argc -= optind; argv += optind; if (argc > 1) dev_port = strtol(argv[1], 0, 0); if (umad_init() < 0) IBPANIC("can't init UMAD library"); if ((n = umad_get_cas_names(names, UMAD_MAX_DEVICES)) < 0) IBPANIC("can't list IB device names"); if (argc) { for (i = 0; i < n; i++) if (!strncmp(names[i], argv[0], sizeof names[i])) break; if (i >= n) IBPANIC("'%s' IB device can't be found", argv[0]); strncpy(names[0], argv[0], sizeof(names[0])-1); names[0][sizeof(names[0])-1] = '\0'; n = 1; } if (list_ports) { if (ports_list(names, n) < 0) IBPANIC("can't list ports"); return 0; } for (i = 0; i < n; i++) { if (list_only) printf("%s\n", names[i]); else if (ca_stat(names[i], dev_port, short_format) < 0) IBPANIC("stat of IB device '%s' failed", names[i]); } return 0; }