net/mlx5: Continue health polling until it is explicitly stopped
authorMohamad Haj Yahia <mohamad@mellanox.com>
Sun, 9 Apr 2017 14:19:37 +0000 (17:19 +0300)
committerSaeed Mahameed <saeedm@mellanox.com>
Sun, 11 Jun 2017 10:10:36 +0000 (13:10 +0300)
The issue is that when we get an assert we will stop polling the health
and thus we cant enter error state when we have a real health issue.

Fixes: fd76ee4da55a ('net/mlx5_core: Fix internal error detection conditions')
Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/health.c

index 44f59b1d6f0f27f7bb4f818b11f341af28ba09dc..f27f84ffbc850487557ad0184960d7c872abb160 100644 (file)
@@ -275,10 +275,8 @@ static void poll_health(unsigned long data)
        struct mlx5_core_health *health = &dev->priv.health;
        u32 count;
 
-       if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-               mod_timer(&health->timer, get_next_poll_jiffies());
-               return;
-       }
+       if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+               goto out;
 
        count = ioread32be(health->health_counter);
        if (count == health->prev)
@@ -290,8 +288,6 @@ static void poll_health(unsigned long data)
        if (health->miss_counter == MAX_MISSES) {
                dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n");
                print_health_info(dev);
-       } else {
-               mod_timer(&health->timer, get_next_poll_jiffies());
        }
 
        if (in_fatal(dev) && !health->sick) {
@@ -305,6 +301,9 @@ static void poll_health(unsigned long data)
                                "new health works are not permitted at this stage\n");
                spin_unlock(&health->wq_lock);
        }
+
+out:
+       mod_timer(&health->timer, get_next_poll_jiffies());
 }
 
 void mlx5_start_health_poll(struct mlx5_core_dev *dev)