Beispiel #1
0
static void update_umad_port_cache(struct oib_port *port)
{
    int rc;
    struct ibv_async_event event;

    ibv_get_async_event(port->verbs_ctx, &event);
    ibv_ack_async_event(&event);
    switch (event.event_type) {
        case IBV_EVENT_PORT_ACTIVE:
        case IBV_EVENT_PORT_ERR:
        case IBV_EVENT_LID_CHANGE:
        case IBV_EVENT_PKEY_CHANGE:
            rc = cache_port_details(port);
            if (rc != 0)
                OUTPUT_ERROR ("umad port cache data invalid!\n");
            break;

        case IBV_EVENT_SM_CHANGE:
        case IBV_EVENT_CLIENT_REREGISTER:
            if (reregister_traps(port))
                OUTPUT_ERROR("failed to reregister traps.\n");
            rc = cache_port_details(port);
            if (rc != 0)
                OUTPUT_ERROR ("umad port cache data invalid!\n");
            break;
        case IBV_EVENT_CQ_ERR:
            OUTPUT_ERROR("got IBV_EVENT_CQ_ERR\n");
            break;
        default:
            break;
    }
}
Beispiel #2
0
/**
 * Link status handler.
 *
 * @param priv
 *   Pointer to private structure.
 * @param dev
 *   Pointer to the rte_eth_dev structure.
 *
 * @return
 *   Nonzero if the callback process can be called immediately.
 */
static int
priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev)
{
	struct ibv_async_event event;
	int port_change = 0;
	int ret = 0;

	/* Read all message and acknowledge them. */
	for (;;) {
		if (ibv_get_async_event(priv->ctx, &event))
			break;

		if (event.event_type == IBV_EVENT_PORT_ACTIVE ||
		    event.event_type == IBV_EVENT_PORT_ERR)
			port_change = 1;
		else
			DEBUG("event type %d on port %d not handled",
			      event.event_type, event.element.port_num);
		ibv_ack_async_event(&event);
	}

	if (port_change ^ priv->pending_alarm) {
		struct rte_eth_link *link = &dev->data->dev_link;

		priv->pending_alarm = 0;
		mlx5_link_update_unlocked(dev, 0);
		if (((link->link_speed == 0) && link->link_status) ||
		    ((link->link_speed != 0) && !link->link_status)) {
			/* Inconsistent status, check again later. */
			priv->pending_alarm = 1;
			rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
					  mlx5_dev_link_status_handler,
					  dev);
		} else
			ret = 1;
	}
	return ret;
}
Beispiel #3
0
int __ibv_get_async_event_1_0(struct ibv_context_1_0 *context,
			      struct ibv_async_event *event)
{
	int ret;

	ret = ibv_get_async_event(context->real_context, event);
	if (ret)
		return ret;

	switch (event->event_type) {
	case IBV_EVENT_CQ_ERR:
		event->element.cq = event->element.cq->cq_context;
		break;

	case IBV_EVENT_QP_FATAL:
	case IBV_EVENT_QP_REQ_ERR:
	case IBV_EVENT_QP_ACCESS_ERR:
	case IBV_EVENT_COMM_EST:
	case IBV_EVENT_SQ_DRAINED:
	case IBV_EVENT_PATH_MIG:
	case IBV_EVENT_PATH_MIG_ERR:
	case IBV_EVENT_QP_LAST_WQE_REACHED:
		event->element.qp = event->element.qp->qp_context;
		break;

	case IBV_EVENT_SRQ_ERR:
	case IBV_EVENT_SRQ_LIMIT_REACHED:
		event->element.srq = event->element.srq->srq_context;
		break;

	default:
		break;
	}

	return ret;
}
/* Function handle async device events */
static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_poll, int index,
                                    opal_list_t *ignore_qp_err_list)
{
    int j;
    mca_btl_openib_device_t *device = NULL;
    struct ibv_async_event event;
    bool xrc_event = false;
    int event_type;

    /* We need to find correct device and process this event */
    for (j=0; j < mca_btl_openib_component.ib_num_btls; j++) {
        if (mca_btl_openib_component.openib_btls[j]->device->ib_dev_context->async_fd ==
                devices_poll->async_pollfd[index].fd ) {
            device = mca_btl_openib_component.openib_btls[j]->device;
            break;
        }
    }
    if (NULL != device) {
        if (ibv_get_async_event((struct ibv_context *)device->ib_dev_context,&event) < 0) {
            if (EWOULDBLOCK == errno) {
                /* No event found ?
                 * It was handled by somebody other */
                return OPAL_SUCCESS;
            } else {
                BTL_ERROR(("Failed to get async event"));
                return OPAL_ERROR;
            }
        }

        event_type = event.event_type;
#if HAVE_XRC
        /* is it XRC event ?*/
        if (IBV_XRC_QP_EVENT_FLAG & event.event_type) {
            xrc_event = true;
            /* Clean the bitnd handel as usual */
            event_type ^= IBV_XRC_QP_EVENT_FLAG;
        }
#endif
        switch(event_type) {
            case IBV_EVENT_PATH_MIG:
                BTL_ERROR(("Alternative path migration event reported"));
                if (APM_ENABLED) {
                    BTL_ERROR(("Trying to find additional path..."));
                    if (!xrc_event)
                        mca_btl_openib_load_apm(event.element.qp,
                                qp2endpoint(event.element.qp, device));
#if HAVE_XRC
                    else
                        mca_btl_openib_load_apm_xrc_rcv(event.element.xrc_qp_num,
                                xrc_qp2endpoint(event.element.xrc_qp_num, device));
#endif
                }
                break;
            case IBV_EVENT_DEVICE_FATAL:
                /* Set the flag to fatal */
                device->got_fatal_event = true;
                /* It is not critical to protect the counter */
                OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1);
            case IBV_EVENT_CQ_ERR:
            case IBV_EVENT_QP_FATAL:
              if (event_type == IBV_EVENT_QP_FATAL) {
                  opal_list_item_t *item;
                  mca_btl_openib_qp_list *qp_item;
                  bool in_ignore_list = false;

                  BTL_VERBOSE(("QP is in err state %p", (void *)event.element.qp));

                  /* look through ignore list */
                  for (item = opal_list_get_first(ignore_qp_err_list);
                       item != opal_list_get_end(ignore_qp_err_list);
                       item = opal_list_get_next(item)) {
                      qp_item = (mca_btl_openib_qp_list *)item;
                      if (qp_item->qp == event.element.qp) {
                          BTL_VERBOSE(("QP %p is in error ignore list",
                                       (void *)event.element.qp));
                          in_ignore_list = true;
                          break;
                      }
                  }
                  if (in_ignore_list)
                      break;
              }

            case IBV_EVENT_QP_REQ_ERR:
            case IBV_EVENT_QP_ACCESS_ERR:
            case IBV_EVENT_PATH_MIG_ERR:
            case IBV_EVENT_SRQ_ERR:
                opal_show_help("help-mpi-btl-openib.txt", "of error event",
                    true,opal_proc_local_get()->proc_hostname, (int)getpid(),
                    event_type,
                    openib_event_to_str((enum ibv_event_type)event_type),
                    xrc_event ? "true" : "false");
                break;
            case IBV_EVENT_PORT_ERR:
                opal_show_help("help-mpi-btl-openib.txt", "of error event",
                    true,opal_proc_local_get()->proc_hostname, (int)getpid(),
                    event_type,
                    openib_event_to_str((enum ibv_event_type)event_type),
                    xrc_event ? "true" : "false");
                /* Set the flag to indicate port error */
                device->got_port_event = true;
                OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1);
                break;
            case IBV_EVENT_COMM_EST:
            case IBV_EVENT_PORT_ACTIVE:
            case IBV_EVENT_SQ_DRAINED:
            case IBV_EVENT_LID_CHANGE:
            case IBV_EVENT_PKEY_CHANGE:
            case IBV_EVENT_SM_CHANGE:
            case IBV_EVENT_QP_LAST_WQE_REACHED:
#if HAVE_DECL_IBV_EVENT_CLIENT_REREGISTER
            case IBV_EVENT_CLIENT_REREGISTER:
#endif
                break;
            /* The event is signaled when number of prepost receive WQEs is going
                                            under predefined threshold - srq_limit */
            case IBV_EVENT_SRQ_LIMIT_REACHED:
                if(OPAL_SUCCESS !=
                         btl_openib_async_srq_limit_event(event.element.srq)) {
                    return OPAL_ERROR;
                }

                break;
            default:
                opal_show_help("help-mpi-btl-openib.txt", "of unknown event",
                        true,opal_proc_local_get()->proc_hostname, (int)getpid(),
                        event_type, xrc_event ? "true" : "false");
        }
        ibv_ack_async_event(&event);
    } else {
        /* if (device == NULL), then failed to locate the device!
           This should never happen... */
        BTL_ERROR(("Failed to find device with FD %d.  "
                   "Fatal error, stoping asynch event thread",
                   devices_poll->async_pollfd[index].fd));
        return OPAL_ERROR;
    }
    return OPAL_SUCCESS;
}
Beispiel #5
0
/* Function handle async device events */
static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_poll, int index)
{
    int j;
    mca_btl_openib_device_t *device = NULL;
    struct ibv_async_event event;
    bool xrc_event = false;
    int event_type;

    /* We need to find correct device and process this event */
    for (j=0; j < mca_btl_openib_component.ib_num_btls; j++) {
        if (mca_btl_openib_component.openib_btls[j]->device->ib_dev_context->async_fd ==
                devices_poll->async_pollfd[index].fd ) {
            device = mca_btl_openib_component.openib_btls[j]->device;
            break;
        }
    }
    if (NULL != device) {
        if (ibv_get_async_event((struct ibv_context *)device->ib_dev_context,&event) < 0) {
            if (EWOULDBLOCK == errno) {
                /* No event found ?
                 * It was handled by somebody other */
                return OMPI_SUCCESS;
            } else {
                BTL_ERROR(("Failed to get async event"));
                return OMPI_ERROR;
            }
        }

        event_type = event.event_type;
#if HAVE_XRC
        /* is it XRC event ?*/
        if (IBV_XRC_QP_EVENT_FLAG & event.event_type) {
            xrc_event = true;
            /* Clean the bitnd handel as usual */
            event_type ^= IBV_XRC_QP_EVENT_FLAG;
        }
#endif
        switch(event_type) {
            case IBV_EVENT_PATH_MIG:
                BTL_ERROR(("Alternative path migration event reported"));
                if (APM_ENABLED) {
                    BTL_ERROR(("Trying to find additional path..."));
                    if (!xrc_event) 
                        mca_btl_openib_load_apm(event.element.qp,
                                qp2endpoint(event.element.qp, device));
#if HAVE_XRC
                    else
                        mca_btl_openib_load_apm_xrc_rcv(event.element.xrc_qp_num,
                                xrc_qp2endpoint(event.element.xrc_qp_num, device));
#endif
                }
                break;
            case IBV_EVENT_DEVICE_FATAL:
                /* Set the flag to fatal */
                device->got_fatal_event = true;
                /* It is not critical to protect the counter */
                OPAL_THREAD_ADD32(&mca_btl_openib_component.fatal_counter, 1);
            case IBV_EVENT_CQ_ERR:
            case IBV_EVENT_QP_FATAL:
            case IBV_EVENT_QP_REQ_ERR:
            case IBV_EVENT_QP_ACCESS_ERR:
            case IBV_EVENT_PATH_MIG_ERR:
            case IBV_EVENT_SRQ_ERR:
            case IBV_EVENT_PORT_ERR:
                orte_show_help("help-mpi-btl-openib.txt", "of error event",
                    true,orte_process_info.nodename, orte_process_info.pid,
                    event.event_type, openib_event_to_str(event.event_type),
                    xrc_event ? "true" : "false");
                break;
            case IBV_EVENT_COMM_EST:
            case IBV_EVENT_PORT_ACTIVE:
            case IBV_EVENT_SQ_DRAINED:
            case IBV_EVENT_LID_CHANGE:
            case IBV_EVENT_PKEY_CHANGE:
            case IBV_EVENT_SM_CHANGE:
            case IBV_EVENT_QP_LAST_WQE_REACHED:
#if HAVE_DECL_IBV_EVENT_CLIENT_REREGISTER
            case IBV_EVENT_CLIENT_REREGISTER:
#endif
            case IBV_EVENT_SRQ_LIMIT_REACHED:
                break;
            default:
                orte_show_help("help-mpi-btl-openib.txt", "of unknown event",
                        true,orte_process_info.nodename, orte_process_info.pid,
                        event.event_type, xrc_event ? "true" : "false");
        }
        ibv_ack_async_event(&event);
    } else {
        /* if (device == NULL), then failed to locate the device!
           This should never happen... */
        BTL_ERROR(("Failed to find device with FD %d.  "
                   "Fatal error, stoping asynch event thread",
                   devices_poll->async_pollfd[index].fd));
        return OMPI_ERROR;
    }
    return OMPI_SUCCESS;
}
Beispiel #6
0
static int
mlx5_glue_get_async_event(struct ibv_context *context,
			  struct ibv_async_event *event)
{
	return ibv_get_async_event(context, event);
}
Beispiel #7
0
int main(int argc, char *argv[])
{
	char *device_name = NULL;
	struct ibv_async_event event;
	struct ibv_context *ctx;
	int ret = 0;

	/* parse command line options */
	while (1) {
		int c;

		c = getopt(argc, argv, "d:h");
		if (c == -1)
			break;

		switch (c) {
		case 'd':
			device_name = strdup(optarg);
			if (!device_name) {
				fprintf(stderr, "Error, failed to allocate memory for the device name\n");
				return -1;
			}
			break;

		case 'h':
			usage(argv[0]);
			exit(1);

		default:
			fprintf(stderr, "Bad command line was used\n\n");
			usage(argv[0]);
			exit(1);
		}
	}

	if (!device_name) {
		fprintf(stderr, "Error, the device name is mandatory\n");
		return -1;
	}

	ctx = get_device_context(device_name);
	if (!ctx) {
		fprintf(stderr, "Error, the context of the device name '%s' could not be opened\n", device_name);
		free(device_name);
		return -1;
	}

	printf("Listening on events for the device '%s'\n", device_name);

	while (1) {
		/* wait for the next async event */
		ret = ibv_get_async_event(ctx, &event);
		if (ret) {
			fprintf(stderr, "Error, ibv_get_async_event() failed\n");
			goto out;
		}

		/* print the event */
		print_async_event(ctx, &event);

		/* ack the event */
		ibv_ack_async_event(&event);
	}

	ret = 0;

out:
	if (ibv_close_device(ctx)) {
		fprintf(stderr, "Error, failed to close the context of the device '%s'\n", device_name);
		return -1;
	}

	printf("The context of the device name '%s' was closed\n", device_name);
	free(device_name);

	return ret;
}
Beispiel #8
0
void *async_event_thread(void *ptr)
{
	struct ibv_async_event event;
	struct async_event_thread_context_t *aet_ctx = (struct async_event_thread_context_t *)ptr;
	struct ibv_context *ibv_ctx = aet_ctx->ibv_ctx;
	int rc;

	DEBUG("async event thread started");

	while (1) {
		rc = ibv_get_async_event(ibv_ctx, &event);
		if (rc) {
			ERROR("ibv_get_async_event failed");
			exit(1);
		}

		ibv_ack_async_event(&event);
		switch (event.event_type) {
		case IBV_EVENT_QP_FATAL:
		case IBV_EVENT_QP_REQ_ERR:
		case IBV_EVENT_QP_ACCESS_ERR:
		case IBV_EVENT_COMM_EST:
		case IBV_EVENT_SQ_DRAINED:
		case IBV_EVENT_PATH_MIG:
		case IBV_EVENT_PATH_MIG_ERR:
		case IBV_EVENT_QP_LAST_WQE_REACHED:
			DEBUG("Got QP async event. Type: %s(0x%x), QP handle: %p",
				ibv_event_type_str(event.event_type),
				event.event_type, event.element.qp);
			break;

		case IBV_EVENT_CQ_ERR:
			DEBUG("Got CQ async event. Type: %s(0x%x), CQ handle: %p",
				ibv_event_type_str(event.event_type),
				event.event_type, event.element.cq);
			break;

		case IBV_EVENT_SRQ_ERR:	
		case IBV_EVENT_SRQ_LIMIT_REACHED:
			DEBUG("Got SRQ async event. Type: %s(0x%x), SRQ handle: %p",
				ibv_event_type_str(event.event_type),
				event.event_type, event.element.srq);
			break;

		case IBV_EVENT_DEVICE_FATAL:
			DEBUG("Got CA async event. Type: %s(0x%x)",
				ibv_event_type_str(event.event_type), event.event_type);
			break;

		case IBV_EVENT_PORT_ACTIVE:
		case IBV_EVENT_PORT_ERR:
		case IBV_EVENT_LID_CHANGE:
		case IBV_EVENT_PKEY_CHANGE:
		case IBV_EVENT_SM_CHANGE:
			DEBUG("Got Port async event. Type: %s(0x%x), Port number: %d",
				ibv_event_type_str(event.event_type), event.event_type, event.element.port_num);
			break;

		default:
			DEBUG("Got unexpected async event. Type: %s(0x%x)",
				ibv_event_type_str(event.event_type), event.event_type);
		}

		if (event.event_type < MAX_ASYNC_EVENT_VALUE) {
			continue;
#if 0
			if (async_thread_data_p->expected_event_arr[event.event_type]) {
				continue;
			}
#endif
		} else {
			ERROR("The async event value %u is not supported by test", event.event_type);
		}

		return NULL;
	}
}