Skip to content

Commit

Permalink
net/mlx5: Cancel delayed recovery work when unloading the driver
Browse files Browse the repository at this point in the history
Draining the health workqueue will ignore future health works including
the one that report hardware failure and thus we can't enter error state
Instead cancel the recovery flow and make sure only recovery flow won't
be scheduled.

Fixes: 5e44fca ('net/mlx5: Only cancel recovery work when cleaning up device')
Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
  • Loading branch information
Mohamad Haj Yahia authored and Saeed Mahameed committed Jun 27, 2017
1 parent 8ce59b1 commit 2a0165a
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 2 deletions.
15 changes: 14 additions & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/health.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ enum {

enum {
MLX5_DROP_NEW_HEALTH_WORK,
MLX5_DROP_NEW_RECOVERY_WORK,
};

static u8 get_nic_state(struct mlx5_core_dev *dev)
Expand Down Expand Up @@ -193,7 +194,7 @@ static void health_care(struct work_struct *work)
mlx5_handle_bad_state(dev);

spin_lock(&health->wq_lock);
if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags))
schedule_delayed_work(&health->recover_work, recover_delay);
else
dev_err(&dev->pdev->dev,
Expand Down Expand Up @@ -313,6 +314,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
init_timer(&health->timer);
health->sick = 0;
clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
health->health = &dev->iseg->health;
health->health_counter = &dev->iseg->health_counter;

Expand All @@ -335,11 +337,22 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev)

spin_lock(&health->wq_lock);
set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
spin_unlock(&health->wq_lock);
cancel_delayed_work_sync(&health->recover_work);
cancel_work_sync(&health->work);
}

void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;

spin_lock(&health->wq_lock);
set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
spin_unlock(&health->wq_lock);
cancel_delayed_work_sync(&dev->priv.health.recover_work);
}

void mlx5_health_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1228,7 +1228,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
int err = 0;

if (cleanup)
mlx5_drain_health_wq(dev);
mlx5_drain_health_recovery(dev);

mutex_lock(&dev->intf_state_mutex);
if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
Expand Down
1 change: 1 addition & 0 deletions include/linux/mlx5/driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev);
void mlx5_start_health_poll(struct mlx5_core_dev *dev);
void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
struct mlx5_buf *buf, int node);
int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
Expand Down

0 comments on commit 2a0165a

Please sign in to comment.