uint8_t * port_performance_query_via(void *rcvbuf, ib_portid_t *dest, int port, unsigned timeout, const void *srcport) { return pma_query_via(rcvbuf, dest, port, timeout, IB_GSI_PORT_COUNTERS, srcport); }
uint8_t * port_samples_control_query_via(void *rcvbuf, ib_portid_t *dest, int port, unsigned timeout, const void *srcport) { return pma_query_via(rcvbuf, dest, port, timeout, IB_GSI_PORT_SAMPLES_CONTROL, srcport); }
uint8_t * perf_classportinfo_query_via(void *rcvbuf, ib_portid_t *dest, int port, unsigned timeout, const void *srcport) { return pma_query_via(rcvbuf, dest, port, timeout, CLASS_PORT_INFO, srcport); }
uint8_t * port_samples_result_query_via(void *rcvbuf, ib_portid_t *dest, int port, unsigned timeout, const void *srcport) { return pma_query_via(rcvbuf, dest, port, timeout, IB_GSI_PORT_SAMPLES_RESULT, srcport); }
static int query_and_dump(char *buf, size_t size, ib_portid_t * portid, ibnd_node_t * node, char *node_name, int portnum, const char *attr_name, uint16_t attr_id, int start_field, int end_field) { uint8_t pc[1024]; uint32_t val = 0; int i, n; memset(pc, 0, sizeof(pc)); if (!pma_query_via(pc, portid, portnum, ibd_timeout, attr_id, ibmad_port)) { IBWARN("%s query failed on %s, %s port %d", attr_name, node_name, portid2str(portid), portnum); summary.pma_query_failures++; return 0; } for (n = 0, i = start_field; i < end_field; i++) { mad_decode_field(pc, i, (void *)&val); if (val) n += snprintf(buf + n, size - n, " [%s == %u]", mad_field_name(i), val); } return n; }
/** * read and reset IB counters (reset on demand) */ static int read_ib_counter( ) { uint32_t send_val; uint32_t recv_val; uint8_t pc[1024]; /* 32 bit counter FFFFFFFF */ uint32_t max_val = 4294967295; /* if it is bigger than this -> reset */ uint32_t reset_limit = max_val * 0.7; int mask = 0xFFFF; if ( active_ib_port == NULL ) return 0; /* reading cost ~70 mirco secs */ if ( !pma_query_via ( pc, &portid, ibportnum, ib_timeout, IB_GSI_PORT_COUNTERS, srcport ) ) { fprintf( stderr, "perfquery\n" ); exit( 1 ); } mad_decode_field( pc, IB_PC_XMT_BYTES_F, &send_val ); mad_decode_field( pc, IB_PC_RCV_BYTES_F, &recv_val ); /* multiply the numbers read by 4 as the IB port counters are not counting bytes. they always count 32dwords. see man page of perfquery for details internally a uint64_t ia used to sum up the values */ active_ib_port->sum_send_val += ( send_val - active_ib_port->last_send_val ) * 4; active_ib_port->sum_recv_val += ( recv_val - active_ib_port->last_recv_val ) * 4; active_ib_port->send_cntr->value = active_ib_port->sum_send_val; active_ib_port->recv_cntr->value = active_ib_port->sum_recv_val; if ( send_val > reset_limit || recv_val > reset_limit ) { /* reset cost ~70 mirco secs */ if ( !performance_reset_via ( pc, &portid, ibportnum, mask, ib_timeout, IB_GSI_PORT_COUNTERS, srcport ) ) { fprintf( stderr, "perf reset\n" ); exit( 1 ); } mad_decode_field( pc, IB_PC_XMT_BYTES_F, &active_ib_port->last_send_val ); mad_decode_field( pc, IB_PC_RCV_BYTES_F, &active_ib_port->last_recv_val ); } else { active_ib_port->last_send_val = send_val; active_ib_port->last_recv_val = recv_val; } return 0; }
static int print_errors(ib_portid_t * portid, uint16_t cap_mask, char *node_name, ibnd_node_t * node, int portnum, int *header_printed) { uint8_t pc[1024]; uint8_t pce[1024]; uint8_t *pc_ext = NULL; memset(pc, 0, 1024); memset(pce, 0, 1024); portid->sl = lid2sl_table[portid->lid]; if (!pma_query_via(pc, portid, portnum, ibd_timeout, IB_GSI_PORT_COUNTERS, ibmad_port)) { IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d", node_name, portid2str(portid), portnum); summary.pma_query_failures++; return (0); } if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) { if (!pma_query_via(pce, portid, portnum, ibd_timeout, IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) { IBWARN("IB_GSI_PORT_COUNTERS_EXT query failed on %s, %s port %d", node_name, portid2str(portid), portnum); summary.pma_query_failures++; return (0); } pc_ext = pce; } if (!(cap_mask & IB_PM_PC_XMIT_WAIT_SUP)) { /* if PortCounters:PortXmitWait not supported clear this counter */ uint32_t foo = 0; mad_encode_field(pc, IB_PC_XMT_WAIT_F, &foo); } return (print_results(portid, node_name, node, pc, portnum, header_printed, pc_ext, cap_mask)); }
/** * initialize one IB port so that we are able to read values from it */ static int init_ib_port( ib_port * portdata ) { int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS, IB_PERFORMANCE_CLASS }; char *ca = 0; static uint8_t pc[1024]; int mask = 0xFFFF; srcport = mad_rpc_open_port( ca, portdata->port_number, mgmt_classes, 4 ); if ( !srcport ) { fprintf( stderr, "Failed to open '%s' port '%d'\n", ca, portdata->port_number ); exit( 1 ); } if ( ib_resolve_self_via( &portid, &ibportnum, 0, srcport ) < 0 ) { fprintf( stderr, "can't resolve self port\n" ); exit( 1 ); } /* PerfMgt ClassPortInfo is a required attribute */ /* might be redundant, could be left out for fast implementation */ if ( !pma_query_via ( pc, &portid, ibportnum, ib_timeout, CLASS_PORT_INFO, srcport ) ) { fprintf( stderr, "classportinfo query\n" ); exit( 1 ); } if ( !performance_reset_via ( pc, &portid, ibportnum, mask, ib_timeout, IB_GSI_PORT_COUNTERS, srcport ) ) { fprintf( stderr, "perf reset\n" ); exit( 1 ); } /* read the initial values */ mad_decode_field( pc, IB_PC_XMT_BYTES_F, &portdata->last_send_val ); portdata->sum_send_val = 0; mad_decode_field( pc, IB_PC_RCV_BYTES_F, &portdata->last_recv_val ); portdata->sum_recv_val = 0; portdata->is_initialized = 1; return 0; }
static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum, uint16_t * cap_mask) { uint8_t pc[1024] = { 0 }; uint16_t rc_cap_mask; /* PerfMgt ClassPortInfo is a required attribute */ if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO, ibmad_port)) { IBWARN("classportinfo query failed on %s, %s port %d", node_name, portid2str(portid), portnum); summary.pma_query_failures++; return -1; } /* ClassPortInfo should be supported as part of libibmad */ memcpy(&rc_cap_mask, pc + 2, sizeof(rc_cap_mask)); /* CapabilityMask */ *cap_mask = rc_cap_mask; return 0; }
static uint8_t *_slurm_pma_query_via(void *rcvbuf, ib_portid_t * dest, int port, unsigned timeout, unsigned id, const struct ibmad_port *srcport) { #ifdef HAVE_OFED_PMA_QUERY_VIA return pma_query_via(rcvbuf, dest, port, timeout, id, srcport); #else switch (id) { case CLASS_PORT_INFO: return perf_classportinfo_query_via( pc, &portid, port, ibd_timeout, srcport); break; case IB_GSI_PORT_COUNTERS_EXT: return port_performance_ext_query_via( pc, &portid, port, ibd_timeout, srcport); break; default: error("_slurm_pma_query_via: unhandled id"); } return NULL; #endif }
uint8_t * pma_query(void *rcvbuf, ib_portid_t *dest, int port, unsigned timeout, unsigned id) { return pma_query_via(rcvbuf, dest, port, timeout, id, NULL); }
static int print_data_cnts(ib_portid_t * portid, uint16_t cap_mask, char *node_name, ibnd_node_t * node, int portnum, int *header_printed) { uint8_t pc[1024]; int i; int start_field = IB_PC_XMT_BYTES_F; int end_field = IB_PC_RCV_PKTS_F; memset(pc, 0, 1024); portid->sl = lid2sl_table[portid->lid]; if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) { if (!pma_query_via(pc, portid, portnum, ibd_timeout, IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) { IBWARN("IB_GSI_PORT_COUNTERS_EXT query failed on %s, %s port %d", node_name, portid2str(portid), portnum); summary.pma_query_failures++; return (1); } start_field = IB_PC_EXT_XMT_BYTES_F; if (cap_mask & IB_PM_EXT_WIDTH_SUPPORTED) end_field = IB_PC_EXT_RCV_MPKTS_F; else end_field = IB_PC_EXT_RCV_PKTS_F; } else { if (!pma_query_via(pc, portid, portnum, ibd_timeout, IB_GSI_PORT_COUNTERS, ibmad_port)) { IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d", node_name, portid2str(portid), portnum); summary.pma_query_failures++; return (1); } start_field = IB_PC_XMT_BYTES_F; end_field = IB_PC_RCV_PKTS_F; } if (!*header_printed) { printf("Data Counters for 0x%" PRIx64 " \"%s\"\n", node->guid, node_name); *header_printed = 1; } if (portnum == 0xFF) printf(" GUID 0x%" PRIx64 " port ALL:", node->guid); else printf(" GUID 0x%" PRIx64 " port %d:", node->guid, portnum); for (i = start_field; i <= end_field; i++) { uint64_t val64 = 0; float val = 0; char *unit = ""; int data = 0; mad_decode_field(pc, i, (void *)&val64); if (i == IB_PC_EXT_XMT_BYTES_F || i == IB_PC_EXT_RCV_BYTES_F || i == IB_PC_XMT_BYTES_F || i == IB_PC_RCV_BYTES_F) data = 1; unit = conv_cnt_human_readable(val64, &val, data); printf(" [%s == %" PRIu64 " (%5.3f%s)]", mad_field_name(i), val64, val, unit); } printf("\n"); if (portnum != 0xFF && port_config) print_port_config(node_name, node, portnum); return (0); }
static void collect_hca_port(struct stats *stats, char *hca_name, int hca_port) { struct ibmad_port *mad_port = NULL; int mad_timeout = 15; int mad_classes[] = { IB_SMI_DIRECT_CLASS, IB_PERFORMANCE_CLASS, }; mad_port = mad_rpc_open_port(hca_name, hca_port, mad_classes, 2); if (mad_port == NULL) { ERROR("cannot open MAD port for HCA `%s' port %d\n", hca_name, hca_port); goto out; } /* For reasons we don't understand, PMA queries can only be LID addressed. But we don't know the LID of the switch to which the HCA is connected, so we send a SMP on the directed route 0,1 and ask the port to identify itself. */ ib_portid_t sw_port_id = { .drpath = { .cnt = 1, .p = { 0, 1, }, }, }; uint8_t sw_info[64]; memset(sw_info, 0, sizeof(sw_info)); if (smp_query_via(sw_info, &sw_port_id, IB_ATTR_PORT_INFO, 0, mad_timeout, mad_port) == NULL) { ERROR("cannot query port info: %m\n"); goto out; } int sw_lid, sw_port; mad_decode_field(sw_info, IB_PORT_LID_F, &sw_lid); mad_decode_field(sw_info, IB_PORT_LOCAL_PORT_F, &sw_port); printf("IB_ATTR_PORT_INFO(drpath.p = {0, 1}): switch_lid %d, switch_local_port %d\n", sw_lid, sw_port); sw_port_id.lid = sw_lid; uint8_t sw_pma[1024]; memset(sw_pma, 0, sizeof(sw_pma)); if (pma_query_via(sw_pma, &sw_port_id, sw_port, mad_timeout, IB_GSI_PORT_COUNTERS_EXT, mad_port) == NULL) { ERROR("cannot query performance counters of switch LID %d, port %d: %m\n", sw_lid, sw_port); goto out; } uint64_t sw_rx_bytes, sw_rx_packets, sw_tx_bytes, sw_tx_packets; mad_decode_field(sw_pma, IB_PC_EXT_RCV_BYTES_F, &sw_rx_bytes); mad_decode_field(sw_pma, IB_PC_EXT_RCV_PKTS_F, &sw_rx_packets); mad_decode_field(sw_pma, IB_PC_EXT_XMT_BYTES_F, &sw_tx_bytes); mad_decode_field(sw_pma, IB_PC_EXT_XMT_PKTS_F, &sw_tx_packets); TRACE("sw_rx_bytes %lu, sw_rx_packets %lu, sw_tx_bytes %lu, sw_tx_packets %lu\n", sw_rx_bytes, sw_rx_packets, sw_tx_bytes, sw_tx_packets); /* The transposition of tx and rx is intentional: the switch port receives what we send, and conversely. */ stats_set(stats, "rx_bytes", sw_tx_bytes); stats_set(stats, "rx_packets", sw_tx_packets); stats_set(stats, "tx_bytes", sw_rx_bytes); stats_set(stats, "tx_packets", sw_rx_packets); out: if (mad_port != NULL) mad_rpc_close_port(mad_port); }