static void poll_health(unsigned long data) { struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; struct mlx5_core_health *health = &dev->priv.health; u32 count; if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { trigger_cmd_completions(dev); mod_timer(&health->timer, get_next_poll_jiffies()); return; } count = ioread32be(health->health_counter); if (count == health->prev) ++health->miss_counter; else health->miss_counter = 0; health->prev = count; if (health->miss_counter == MAX_MISSES) { dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n"); print_health_info(dev); } else { mod_timer(&health->timer, get_next_poll_jiffies()); } if (in_fatal(dev) && !health->sick) { health->sick = true; print_health_info(dev); queue_work(health->wq, &health->work); } }
void mlx5_enter_error_state(struct mlx5_core_dev *dev) { mutex_lock(&dev->intf_state_mutex); if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) goto unlock; mlx5_core_err(dev, "start\n"); if (pci_channel_offline(dev->pdev) || in_fatal(dev)) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; trigger_cmd_completions(dev); } mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0); mlx5_core_err(dev, "end\n"); unlock: mutex_unlock(&dev->intf_state_mutex); }