static void update_umad_port_cache(struct oib_port *port) { int rc; struct ibv_async_event event; ibv_get_async_event(port->verbs_ctx, &event); ibv_ack_async_event(&event); switch (event.event_type) { case IBV_EVENT_PORT_ACTIVE: case IBV_EVENT_PORT_ERR: case IBV_EVENT_LID_CHANGE: case IBV_EVENT_PKEY_CHANGE: rc = cache_port_details(port); if (rc != 0) OUTPUT_ERROR ("umad port cache data invalid!\n"); break; case IBV_EVENT_SM_CHANGE: case IBV_EVENT_CLIENT_REREGISTER: if (reregister_traps(port)) OUTPUT_ERROR("failed to reregister traps.\n"); rc = cache_port_details(port); if (rc != 0) OUTPUT_ERROR ("umad port cache data invalid!\n"); break; case IBV_EVENT_CQ_ERR: OUTPUT_ERROR("got IBV_EVENT_CQ_ERR\n"); break; default: break; } }
/** * Link status handler. * * @param priv * Pointer to private structure. * @param dev * Pointer to the rte_eth_dev structure. * * @return * Nonzero if the callback process can be called immediately. */ static int priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev) { struct ibv_async_event event; int port_change = 0; int ret = 0; /* Read all message and acknowledge them. */ for (;;) { if (ibv_get_async_event(priv->ctx, &event)) break; if (event.event_type == IBV_EVENT_PORT_ACTIVE || event.event_type == IBV_EVENT_PORT_ERR) port_change = 1; else DEBUG("event type %d on port %d not handled", event.event_type, event.element.port_num); ibv_ack_async_event(&event); } if (port_change ^ priv->pending_alarm) { struct rte_eth_link *link = &dev->data->dev_link; priv->pending_alarm = 0; mlx5_link_update_unlocked(dev, 0); if (((link->link_speed == 0) && link->link_status) || ((link->link_speed != 0) && !link->link_status)) { /* Inconsistent status, check again later. */ priv->pending_alarm = 1; rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, mlx5_dev_link_status_handler, dev); } else ret = 1; } return ret; }
int __ibv_get_async_event_1_0(struct ibv_context_1_0 *context, struct ibv_async_event *event) { int ret; ret = ibv_get_async_event(context->real_context, event); if (ret) return ret; switch (event->event_type) { case IBV_EVENT_CQ_ERR: event->element.cq = event->element.cq->cq_context; break; case IBV_EVENT_QP_FATAL: case IBV_EVENT_QP_REQ_ERR: case IBV_EVENT_QP_ACCESS_ERR: case IBV_EVENT_COMM_EST: case IBV_EVENT_SQ_DRAINED: case IBV_EVENT_PATH_MIG: case IBV_EVENT_PATH_MIG_ERR: case IBV_EVENT_QP_LAST_WQE_REACHED: event->element.qp = event->element.qp->qp_context; break; case IBV_EVENT_SRQ_ERR: case IBV_EVENT_SRQ_LIMIT_REACHED: event->element.srq = event->element.srq->srq_context; break; default: break; } return ret; }
/* Function handle async device events */ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_poll, int index, opal_list_t *ignore_qp_err_list) { int j; mca_btl_openib_device_t *device = NULL; struct ibv_async_event event; bool xrc_event = false; int event_type; /* We need to find correct device and process this event */ for (j=0; j < mca_btl_openib_component.ib_num_btls; j++) { if (mca_btl_openib_component.openib_btls[j]->device->ib_dev_context->async_fd == devices_poll->async_pollfd[index].fd ) { device = mca_btl_openib_component.openib_btls[j]->device; break; } } if (NULL != device) { if (ibv_get_async_event((struct ibv_context *)device->ib_dev_context,&event) < 0) { if (EWOULDBLOCK == errno) { /* No event found ? * It was handled by somebody other */ return OPAL_SUCCESS; } else { BTL_ERROR(("Failed to get async event")); return OPAL_ERROR; } } event_type = event.event_type; #if HAVE_XRC /* is it XRC event ?*/ if (IBV_XRC_QP_EVENT_FLAG & event.event_type) { xrc_event = true; /* Clean the bitnd handel as usual */ event_type ^= IBV_XRC_QP_EVENT_FLAG; } #endif switch(event_type) { case IBV_EVENT_PATH_MIG: BTL_ERROR(("Alternative path migration event reported")); if (APM_ENABLED) { BTL_ERROR(("Trying to find additional path...")); if (!xrc_event) mca_btl_openib_load_apm(event.element.qp, qp2endpoint(event.element.qp, device)); #if HAVE_XRC else mca_btl_openib_load_apm_xrc_rcv(event.element.xrc_qp_num, xrc_qp2endpoint(event.element.xrc_qp_num, device)); #endif } break; case IBV_EVENT_DEVICE_FATAL: /* Set the flag to fatal */ device->got_fatal_event = true; /* It is not critical to protect the counter */ OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1); case IBV_EVENT_CQ_ERR: case IBV_EVENT_QP_FATAL: if (event_type == IBV_EVENT_QP_FATAL) { opal_list_item_t *item; mca_btl_openib_qp_list *qp_item; bool in_ignore_list = false; BTL_VERBOSE(("QP is in err state %p", (void *)event.element.qp)); /* look through ignore list */ for (item = opal_list_get_first(ignore_qp_err_list); item != opal_list_get_end(ignore_qp_err_list); item = opal_list_get_next(item)) { qp_item = (mca_btl_openib_qp_list *)item; if (qp_item->qp == event.element.qp) { BTL_VERBOSE(("QP %p is in error ignore list", (void *)event.element.qp)); in_ignore_list = true; break; } } if (in_ignore_list) break; } case IBV_EVENT_QP_REQ_ERR: case IBV_EVENT_QP_ACCESS_ERR: case IBV_EVENT_PATH_MIG_ERR: case IBV_EVENT_SRQ_ERR: opal_show_help("help-mpi-btl-openib.txt", "of error event", true,opal_proc_local_get()->proc_hostname, (int)getpid(), event_type, openib_event_to_str((enum ibv_event_type)event_type), xrc_event ? "true" : "false"); break; case IBV_EVENT_PORT_ERR: opal_show_help("help-mpi-btl-openib.txt", "of error event", true,opal_proc_local_get()->proc_hostname, (int)getpid(), event_type, openib_event_to_str((enum ibv_event_type)event_type), xrc_event ? "true" : "false"); /* Set the flag to indicate port error */ device->got_port_event = true; OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1); break; case IBV_EVENT_COMM_EST: case IBV_EVENT_PORT_ACTIVE: case IBV_EVENT_SQ_DRAINED: case IBV_EVENT_LID_CHANGE: case IBV_EVENT_PKEY_CHANGE: case IBV_EVENT_SM_CHANGE: case IBV_EVENT_QP_LAST_WQE_REACHED: #if HAVE_DECL_IBV_EVENT_CLIENT_REREGISTER case IBV_EVENT_CLIENT_REREGISTER: #endif break; /* The event is signaled when number of prepost receive WQEs is going under predefined threshold - srq_limit */ case IBV_EVENT_SRQ_LIMIT_REACHED: if(OPAL_SUCCESS != btl_openib_async_srq_limit_event(event.element.srq)) { return OPAL_ERROR; } break; default: opal_show_help("help-mpi-btl-openib.txt", "of unknown event", true,opal_proc_local_get()->proc_hostname, (int)getpid(), event_type, xrc_event ? "true" : "false"); } ibv_ack_async_event(&event); } else { /* if (device == NULL), then failed to locate the device! This should never happen... */ BTL_ERROR(("Failed to find device with FD %d. " "Fatal error, stoping asynch event thread", devices_poll->async_pollfd[index].fd)); return OPAL_ERROR; } return OPAL_SUCCESS; }
/* Function handle async device events */ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_poll, int index) { int j; mca_btl_openib_device_t *device = NULL; struct ibv_async_event event; bool xrc_event = false; int event_type; /* We need to find correct device and process this event */ for (j=0; j < mca_btl_openib_component.ib_num_btls; j++) { if (mca_btl_openib_component.openib_btls[j]->device->ib_dev_context->async_fd == devices_poll->async_pollfd[index].fd ) { device = mca_btl_openib_component.openib_btls[j]->device; break; } } if (NULL != device) { if (ibv_get_async_event((struct ibv_context *)device->ib_dev_context,&event) < 0) { if (EWOULDBLOCK == errno) { /* No event found ? * It was handled by somebody other */ return OMPI_SUCCESS; } else { BTL_ERROR(("Failed to get async event")); return OMPI_ERROR; } } event_type = event.event_type; #if HAVE_XRC /* is it XRC event ?*/ if (IBV_XRC_QP_EVENT_FLAG & event.event_type) { xrc_event = true; /* Clean the bitnd handel as usual */ event_type ^= IBV_XRC_QP_EVENT_FLAG; } #endif switch(event_type) { case IBV_EVENT_PATH_MIG: BTL_ERROR(("Alternative path migration event reported")); if (APM_ENABLED) { BTL_ERROR(("Trying to find additional path...")); if (!xrc_event) mca_btl_openib_load_apm(event.element.qp, qp2endpoint(event.element.qp, device)); #if HAVE_XRC else mca_btl_openib_load_apm_xrc_rcv(event.element.xrc_qp_num, xrc_qp2endpoint(event.element.xrc_qp_num, device)); #endif } break; case IBV_EVENT_DEVICE_FATAL: /* Set the flag to fatal */ device->got_fatal_event = true; /* It is not critical to protect the counter */ OPAL_THREAD_ADD32(&mca_btl_openib_component.fatal_counter, 1); case IBV_EVENT_CQ_ERR: case IBV_EVENT_QP_FATAL: case IBV_EVENT_QP_REQ_ERR: case IBV_EVENT_QP_ACCESS_ERR: case IBV_EVENT_PATH_MIG_ERR: case IBV_EVENT_SRQ_ERR: case IBV_EVENT_PORT_ERR: orte_show_help("help-mpi-btl-openib.txt", "of error event", true,orte_process_info.nodename, orte_process_info.pid, event.event_type, openib_event_to_str(event.event_type), xrc_event ? "true" : "false"); break; case IBV_EVENT_COMM_EST: case IBV_EVENT_PORT_ACTIVE: case IBV_EVENT_SQ_DRAINED: case IBV_EVENT_LID_CHANGE: case IBV_EVENT_PKEY_CHANGE: case IBV_EVENT_SM_CHANGE: case IBV_EVENT_QP_LAST_WQE_REACHED: #if HAVE_DECL_IBV_EVENT_CLIENT_REREGISTER case IBV_EVENT_CLIENT_REREGISTER: #endif case IBV_EVENT_SRQ_LIMIT_REACHED: break; default: orte_show_help("help-mpi-btl-openib.txt", "of unknown event", true,orte_process_info.nodename, orte_process_info.pid, event.event_type, xrc_event ? "true" : "false"); } ibv_ack_async_event(&event); } else { /* if (device == NULL), then failed to locate the device! This should never happen... */ BTL_ERROR(("Failed to find device with FD %d. " "Fatal error, stoping asynch event thread", devices_poll->async_pollfd[index].fd)); return OMPI_ERROR; } return OMPI_SUCCESS; }
static int mlx5_glue_get_async_event(struct ibv_context *context, struct ibv_async_event *event) { return ibv_get_async_event(context, event); }
int main(int argc, char *argv[]) { char *device_name = NULL; struct ibv_async_event event; struct ibv_context *ctx; int ret = 0; /* parse command line options */ while (1) { int c; c = getopt(argc, argv, "d:h"); if (c == -1) break; switch (c) { case 'd': device_name = strdup(optarg); if (!device_name) { fprintf(stderr, "Error, failed to allocate memory for the device name\n"); return -1; } break; case 'h': usage(argv[0]); exit(1); default: fprintf(stderr, "Bad command line was used\n\n"); usage(argv[0]); exit(1); } } if (!device_name) { fprintf(stderr, "Error, the device name is mandatory\n"); return -1; } ctx = get_device_context(device_name); if (!ctx) { fprintf(stderr, "Error, the context of the device name '%s' could not be opened\n", device_name); free(device_name); return -1; } printf("Listening on events for the device '%s'\n", device_name); while (1) { /* wait for the next async event */ ret = ibv_get_async_event(ctx, &event); if (ret) { fprintf(stderr, "Error, ibv_get_async_event() failed\n"); goto out; } /* print the event */ print_async_event(ctx, &event); /* ack the event */ ibv_ack_async_event(&event); } ret = 0; out: if (ibv_close_device(ctx)) { fprintf(stderr, "Error, failed to close the context of the device '%s'\n", device_name); return -1; } printf("The context of the device name '%s' was closed\n", device_name); free(device_name); return ret; }
void *async_event_thread(void *ptr) { struct ibv_async_event event; struct async_event_thread_context_t *aet_ctx = (struct async_event_thread_context_t *)ptr; struct ibv_context *ibv_ctx = aet_ctx->ibv_ctx; int rc; DEBUG("async event thread started"); while (1) { rc = ibv_get_async_event(ibv_ctx, &event); if (rc) { ERROR("ibv_get_async_event failed"); exit(1); } ibv_ack_async_event(&event); switch (event.event_type) { case IBV_EVENT_QP_FATAL: case IBV_EVENT_QP_REQ_ERR: case IBV_EVENT_QP_ACCESS_ERR: case IBV_EVENT_COMM_EST: case IBV_EVENT_SQ_DRAINED: case IBV_EVENT_PATH_MIG: case IBV_EVENT_PATH_MIG_ERR: case IBV_EVENT_QP_LAST_WQE_REACHED: DEBUG("Got QP async event. Type: %s(0x%x), QP handle: %p", ibv_event_type_str(event.event_type), event.event_type, event.element.qp); break; case IBV_EVENT_CQ_ERR: DEBUG("Got CQ async event. Type: %s(0x%x), CQ handle: %p", ibv_event_type_str(event.event_type), event.event_type, event.element.cq); break; case IBV_EVENT_SRQ_ERR: case IBV_EVENT_SRQ_LIMIT_REACHED: DEBUG("Got SRQ async event. Type: %s(0x%x), SRQ handle: %p", ibv_event_type_str(event.event_type), event.event_type, event.element.srq); break; case IBV_EVENT_DEVICE_FATAL: DEBUG("Got CA async event. Type: %s(0x%x)", ibv_event_type_str(event.event_type), event.event_type); break; case IBV_EVENT_PORT_ACTIVE: case IBV_EVENT_PORT_ERR: case IBV_EVENT_LID_CHANGE: case IBV_EVENT_PKEY_CHANGE: case IBV_EVENT_SM_CHANGE: DEBUG("Got Port async event. Type: %s(0x%x), Port number: %d", ibv_event_type_str(event.event_type), event.event_type, event.element.port_num); break; default: DEBUG("Got unexpected async event. Type: %s(0x%x)", ibv_event_type_str(event.event_type), event.event_type); } if (event.event_type < MAX_ASYNC_EVENT_VALUE) { continue; #if 0 if (async_thread_data_p->expected_event_arr[event.event_type]) { continue; } #endif } else { ERROR("The async event value %u is not supported by test", event.event_type); } return NULL; } }