scsi: smartpqi: add heartbeat check
authorKevin Barnett <kevin.barnett@microsemi.com>
Wed, 3 May 2017 23:53:11 +0000 (18:53 -0500)
committerMartin K. Petersen <martin.petersen@oracle.com>
Tue, 13 Jun 2017 00:48:02 +0000 (20:48 -0400)
check for controller lockups

Reviewed-by: Scott Benesh <scott.benesh@microsemi.com>
Signed-off-by: Kevin Barnett <kevin.barnett@microsemi.com>
Signed-off-by: Don Brace <don.brace@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/smartpqi/smartpqi.h
drivers/scsi/smartpqi/smartpqi_init.c
drivers/scsi/smartpqi/smartpqi_sis.c

index 06e2b7152d52a5be81fb74a7cd74b02c164f4024..1ac09e74d8c29fa7ca8050530fda3d5a0e5c92bc 100644 (file)
@@ -490,7 +490,6 @@ struct pqi_raid_error_info {
 #define PQI_EVENT_TYPE_LOGICAL_DEVICE          0x5
 #define PQI_EVENT_TYPE_AIO_STATE_CHANGE                0xfd
 #define PQI_EVENT_TYPE_AIO_CONFIG_CHANGE       0xfe
-#define PQI_EVENT_TYPE_HEARTBEAT               0xff
 
 #pragma pack()
 
@@ -635,6 +634,58 @@ struct pqi_encryption_info {
        u32     encrypt_tweak_upper;
 };
 
+#pragma pack(1)
+
+#define PQI_CONFIG_TABLE_SIGNATURE     "CFGTABLE"
+#define PQI_CONFIG_TABLE_MAX_LENGTH    ((u16)~0)
+
+/* configuration table section IDs */
+#define PQI_CONFIG_TABLE_SECTION_GENERAL_INFO          0
+#define PQI_CONFIG_TABLE_SECTION_FIRMWARE_FEATURES     1
+#define PQI_CONFIG_TABLE_SECTION_FIRMWARE_ERRATA       2
+#define PQI_CONFIG_TABLE_SECTION_DEBUG                 3
+#define PQI_CONFIG_TABLE_SECTION_HEARTBEAT             4
+
+struct pqi_config_table {
+       u8      signature[8];           /* "CFGTABLE" */
+       __le32  first_section_offset;   /* offset in bytes from the base */
+                                       /* address of this table to the */
+                                       /* first section */
+};
+
+struct pqi_config_table_section_header {
+       __le16  section_id;             /* as defined by the */
+                                       /* PQI_CONFIG_TABLE_SECTION_* */
+                                       /* manifest constants above */
+       __le16  next_section_offset;    /* offset in bytes from base */
+                                       /* address of the table of the */
+                                       /* next section or 0 if last entry */
+};
+
+struct pqi_config_table_general_info {
+       struct pqi_config_table_section_header header;
+       __le32  section_length;         /* size of this section in bytes */
+                                       /* including the section header */
+       __le32  max_outstanding_requests;       /* max. outstanding */
+                                               /* commands supported by */
+                                               /* the controller */
+       __le32  max_sg_size;            /* max. transfer size of a single */
+                                       /* command */
+       __le32  max_sg_per_request;     /* max. number of scatter-gather */
+                                       /* entries supported in a single */
+                                       /* command */
+};
+
+struct pqi_config_table_debug {
+       struct pqi_config_table_section_header header;
+       __le32  scratchpad;
+};
+
+struct pqi_config_table_heartbeat {
+       struct pqi_config_table_section_header header;
+       __le32  heartbeat_counter;
+};
+
 #define PQI_MAX_OUTSTANDING_REQUESTS   ((u32)~0)
 #define PQI_MAX_TRANSFER_SIZE          (4 * 1024U * 1024U)
 
@@ -645,8 +696,6 @@ struct pqi_encryption_info {
 #define PQI_HBA_BUS                    2
 #define PQI_MAX_BUS                    PQI_HBA_BUS
 
-#pragma pack(1)
-
 struct report_lun_header {
        __be32  list_length;
        u8      extended_response;
@@ -870,7 +919,6 @@ struct pqi_io_request {
        struct list_head request_list_entry;
 };
 
-#define PQI_EVENT_HEARTBEAT            0
 #define PQI_NUM_SUPPORTED_EVENTS       6
 
 struct pqi_event {
@@ -943,7 +991,6 @@ struct pqi_ctrl_info {
        u8              inbound_spanning_supported : 1;
        u8              outbound_spanning_supported : 1;
        u8              pqi_mode_enabled : 1;
-       u8              heartbeat_timer_started : 1;
        u8              update_time_worker_scheduled : 1;
 
        struct list_head scsi_device_list;
@@ -963,7 +1010,8 @@ struct pqi_ctrl_info {
 
        atomic_t        num_interrupts;
        int             previous_num_interrupts;
-       unsigned int    num_heartbeats_requested;
+       u32             previous_heartbeat_count;
+       __le32 __iomem  *heartbeat_counter;
        struct timer_list heartbeat_timer;
 
        struct semaphore sync_request_sem;
index 7af4add1627608aeeb54f96cc4227b5eea7b7500..de33942860b3da3858c1f5cb046fdaaabccb43b9 100644 (file)
@@ -267,6 +267,14 @@ static inline void pqi_cancel_rescan_worker(struct pqi_ctrl_info *ctrl_info)
        cancel_delayed_work_sync(&ctrl_info->rescan_work);
 }
 
+static inline u32 pqi_read_heartbeat_counter(struct pqi_ctrl_info *ctrl_info)
+{
+       if (!ctrl_info->heartbeat_counter)
+               return 0;
+
+       return readl(ctrl_info->heartbeat_counter);
+}
+
 static int pqi_map_single(struct pci_dev *pci_dev,
        struct pqi_sg_descriptor *sg_descriptor, void *buffer,
        size_t buffer_length, int data_direction)
@@ -2708,23 +2716,18 @@ static inline unsigned int pqi_num_elements_free(unsigned int pi,
        return elements_in_queue - num_elements_used - 1;
 }
 
-#define PQI_EVENT_ACK_TIMEOUT  30
-
-static void pqi_start_event_ack(struct pqi_ctrl_info *ctrl_info,
+static void pqi_send_event_ack(struct pqi_ctrl_info *ctrl_info,
        struct pqi_event_acknowledge_request *iu, size_t iu_length)
 {
        pqi_index_t iq_pi;
        pqi_index_t iq_ci;
        unsigned long flags;
        void *next_element;
-       unsigned long timeout;
        struct pqi_queue_group *queue_group;
 
        queue_group = &ctrl_info->queue_groups[PQI_DEFAULT_QUEUE_GROUP];
        put_unaligned_le16(queue_group->oq_id, &iu->header.response_queue_id);
 
-       timeout = (PQI_EVENT_ACK_TIMEOUT * HZ) + jiffies;
-
        while (1) {
                spin_lock_irqsave(&queue_group->submit_lock[RAID_PATH], flags);
 
@@ -2738,11 +2741,8 @@ static void pqi_start_event_ack(struct pqi_ctrl_info *ctrl_info,
                spin_unlock_irqrestore(
                        &queue_group->submit_lock[RAID_PATH], flags);
 
-               if (time_after(jiffies, timeout)) {
-                       dev_err(&ctrl_info->pci_dev->dev,
-                               "sending event acknowledge timed out\n");
+               if (pqi_ctrl_offline(ctrl_info))
                        return;
-               }
        }
 
        next_element = queue_group->iq_element_array[RAID_PATH] +
@@ -2751,7 +2751,6 @@ static void pqi_start_event_ack(struct pqi_ctrl_info *ctrl_info,
        memcpy(next_element, iu, iu_length);
 
        iq_pi = (iq_pi + 1) % ctrl_info->num_elements_per_iq;
-
        queue_group->iq_pi_copy[RAID_PATH] = iq_pi;
 
        /*
@@ -2777,7 +2776,7 @@ static void pqi_acknowledge_event(struct pqi_ctrl_info *ctrl_info,
        request.event_id = event->event_id;
        request.additional_event_id = event->additional_event_id;
 
-       pqi_start_event_ack(ctrl_info, &request, sizeof(request));
+       pqi_send_event_ack(ctrl_info, &request, sizeof(request));
 }
 
 static void pqi_event_worker(struct work_struct *work)
@@ -2785,7 +2784,6 @@ static void pqi_event_worker(struct work_struct *work)
        unsigned int i;
        struct pqi_ctrl_info *ctrl_info;
        struct pqi_event *event;
-       bool got_non_heartbeat_event = false;
 
        ctrl_info = container_of(work, struct pqi_ctrl_info, event_work);
 
@@ -2797,8 +2795,6 @@ static void pqi_event_worker(struct work_struct *work)
                if (event->pending) {
                        event->pending = false;
                        pqi_acknowledge_event(ctrl_info, event);
-                       if (i != PQI_EVENT_TYPE_HEARTBEAT)
-                               got_non_heartbeat_event = true;
                }
                event++;
        }
@@ -2848,57 +2844,58 @@ static void pqi_take_ctrl_offline(struct pqi_ctrl_info *ctrl_info)
        }
 }
 
-#define PQI_HEARTBEAT_TIMER_INTERVAL   (5 * HZ)
-#define PQI_MAX_HEARTBEAT_REQUESTS     5
+#define PQI_HEARTBEAT_TIMER_INTERVAL   (10 * HZ)
 
 static void pqi_heartbeat_timer_handler(unsigned long data)
 {
        int num_interrupts;
+       u32 heartbeat_count;
        struct pqi_ctrl_info *ctrl_info = (struct pqi_ctrl_info *)data;
 
-       if (!ctrl_info->heartbeat_timer_started)
+       pqi_check_ctrl_health(ctrl_info);
+       if (pqi_ctrl_offline(ctrl_info))
                return;
 
        num_interrupts = atomic_read(&ctrl_info->num_interrupts);
+       heartbeat_count = pqi_read_heartbeat_counter(ctrl_info);
 
        if (num_interrupts == ctrl_info->previous_num_interrupts) {
-               ctrl_info->num_heartbeats_requested++;
-               if (ctrl_info->num_heartbeats_requested >
-                       PQI_MAX_HEARTBEAT_REQUESTS) {
+               if (heartbeat_count == ctrl_info->previous_heartbeat_count) {
+                       dev_err(&ctrl_info->pci_dev->dev,
+                               "no heartbeat detected - last heartbeat count: %u\n",
+                               heartbeat_count);
                        pqi_take_ctrl_offline(ctrl_info);
                        return;
                }
-               ctrl_info->events[PQI_EVENT_HEARTBEAT].pending = true;
-               schedule_work(&ctrl_info->event_work);
        } else {
-               ctrl_info->num_heartbeats_requested = 0;
+               ctrl_info->previous_num_interrupts = num_interrupts;
        }
 
-       ctrl_info->previous_num_interrupts = num_interrupts;
+       ctrl_info->previous_heartbeat_count = heartbeat_count;
        mod_timer(&ctrl_info->heartbeat_timer,
                jiffies + PQI_HEARTBEAT_TIMER_INTERVAL);
 }
 
 static void pqi_start_heartbeat_timer(struct pqi_ctrl_info *ctrl_info)
 {
+       if (!ctrl_info->heartbeat_counter)
+               return;
+
        ctrl_info->previous_num_interrupts =
                atomic_read(&ctrl_info->num_interrupts);
+       ctrl_info->previous_heartbeat_count =
+               pqi_read_heartbeat_counter(ctrl_info);
 
-       init_timer(&ctrl_info->heartbeat_timer);
        ctrl_info->heartbeat_timer.expires =
                jiffies + PQI_HEARTBEAT_TIMER_INTERVAL;
        ctrl_info->heartbeat_timer.data = (unsigned long)ctrl_info;
        ctrl_info->heartbeat_timer.function = pqi_heartbeat_timer_handler;
-       ctrl_info->heartbeat_timer_started = true;
        add_timer(&ctrl_info->heartbeat_timer);
 }
 
 static inline void pqi_stop_heartbeat_timer(struct pqi_ctrl_info *ctrl_info)
 {
-       if (ctrl_info->heartbeat_timer_started) {
-               ctrl_info->heartbeat_timer_started = false;
-               del_timer_sync(&ctrl_info->heartbeat_timer);
-       }
+       del_timer_sync(&ctrl_info->heartbeat_timer);
 }
 
 static inline int pqi_event_type_to_event_index(unsigned int event_type)
@@ -2925,12 +2922,10 @@ static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info)
        struct pqi_event_queue *event_queue;
        struct pqi_event_response *response;
        struct pqi_event *event;
-       bool need_delayed_work;
        int event_index;
 
        event_queue = &ctrl_info->event_queue;
        num_events = 0;
-       need_delayed_work = false;
        oq_ci = event_queue->oq_ci_copy;
 
        while (1) {
@@ -2953,10 +2948,6 @@ static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info)
                                event->event_id = response->event_id;
                                event->additional_event_id =
                                        response->additional_event_id;
-                               if (event_index != PQI_EVENT_TYPE_HEARTBEAT) {
-                                       event->pending = true;
-                                       need_delayed_work = true;
-                               }
                        }
                }
 
@@ -2966,9 +2957,7 @@ static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info)
        if (num_events) {
                event_queue->oq_ci_copy = oq_ci;
                writel(oq_ci, event_queue->oq_ci);
-
-               if (need_delayed_work)
-                       schedule_work(&ctrl_info->event_work);
+               schedule_work(&ctrl_info->event_work);
        }
 
        return num_events;
@@ -3220,7 +3209,7 @@ static int pqi_alloc_operational_queues(struct pqi_ctrl_info *ctrl_info)
 
        if (!ctrl_info->queue_memory_base) {
                dev_err(&ctrl_info->pci_dev->dev,
-                       "failed to allocate memory for PQI admin queues\n");
+                       "unable to allocate memory for PQI admin queues\n");
                return -ENOMEM;
        }
 
@@ -5672,6 +5661,55 @@ out:
        return rc;
 }
 
+static int pqi_process_config_table(struct pqi_ctrl_info *ctrl_info)
+{
+       u32 table_length;
+       u32 section_offset;
+       void __iomem *table_iomem_addr;
+       struct pqi_config_table *config_table;
+       struct pqi_config_table_section_header *section;
+
+       table_length = ctrl_info->config_table_length;
+
+       config_table = kmalloc(table_length, GFP_KERNEL);
+       if (!config_table) {
+               dev_err(&ctrl_info->pci_dev->dev,
+                       "unable to allocate memory for PQI configuration table\n");
+               return -ENOMEM;
+       }
+
+       /*
+        * Copy the config table contents from I/O memory space into the
+        * temporary buffer.
+        */
+       table_iomem_addr = ctrl_info->iomem_base +
+               ctrl_info->config_table_offset;
+       memcpy_fromio(config_table, table_iomem_addr, table_length);
+
+       section_offset =
+               get_unaligned_le32(&config_table->first_section_offset);
+
+       while (section_offset) {
+               section = (void *)config_table + section_offset;
+
+               switch (get_unaligned_le16(&section->section_id)) {
+               case PQI_CONFIG_TABLE_SECTION_HEARTBEAT:
+                       ctrl_info->heartbeat_counter = table_iomem_addr +
+                               section_offset +
+                               offsetof(struct pqi_config_table_heartbeat,
+                                       heartbeat_counter);
+                       break;
+               }
+
+               section_offset =
+                       get_unaligned_le16(&section->next_section_offset);
+       }
+
+       kfree(config_table);
+
+       return 0;
+}
+
 /* Switches the controller from PQI mode back into SIS mode. */
 
 static int pqi_revert_to_sis_mode(struct pqi_ctrl_info *ctrl_info)
@@ -5783,6 +5821,10 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info)
        ctrl_info->pqi_mode_enabled = true;
        pqi_save_ctrl_mode(ctrl_info, PQI_MODE);
 
+       rc = pqi_process_config_table(ctrl_info);
+       if (rc)
+               return rc;
+
        rc = pqi_alloc_admin_queues(ctrl_info);
        if (rc) {
                dev_err(&ctrl_info->pci_dev->dev,
@@ -6091,6 +6133,8 @@ static struct pqi_ctrl_info *pqi_alloc_ctrl_info(int numa_node)
        INIT_DELAYED_WORK(&ctrl_info->rescan_work, pqi_rescan_worker);
        INIT_DELAYED_WORK(&ctrl_info->update_time_work, pqi_update_time_worker);
 
+       init_timer(&ctrl_info->heartbeat_timer);
+
        sema_init(&ctrl_info->sync_request_sem,
                PQI_RESERVED_IO_SLOTS_SYNCHRONOUS_REQUESTS);
        init_waitqueue_head(&ctrl_info->block_requests_wait);
index 12853fd7555025de7b70049587011f246a8550b2..3155bda88550a74f6de9b9ce1ebbf82028e87fe4 100644 (file)
@@ -422,6 +422,10 @@ void sis_soft_reset(struct pqi_ctrl_info *ctrl_info)
 
 void sis_shutdown_ctrl(struct pqi_ctrl_info *ctrl_info)
 {
+       if (readl(&ctrl_info->registers->sis_firmware_status) &
+               SIS_CTRL_KERNEL_PANIC)
+               return;
+
        writel(SIS_TRIGGER_SHUTDOWN,
                &ctrl_info->registers->sis_host_to_ctrl_doorbell);
 }