atari_scsi: Fix atari_scsi deadlocks on Falcon
authorFinn Thain <fthain@telegraphics.com.au>
Wed, 12 Nov 2014 05:12:08 +0000 (16:12 +1100)
committerChristoph Hellwig <hch@lst.de>
Thu, 20 Nov 2014 08:11:13 +0000 (09:11 +0100)
Don't disable irqs when waiting for the ST DMA "lock"; its release may
require an interrupt.

Introduce stdma_try_lock() for use in soft irq context. atari_scsi now tells
the SCSI mid-layer to defer queueing a command if the ST DMA lock is not
available, as per Michael's patch:
http://marc.info/?l=linux-m68k&m=139095335824863&w=2

The falcon_got_lock variable is race prone: we can't disable IRQs while
waiting to acquire the lock, so after acquiring it there must be some
interval during which falcon_got_lock remains false. Introduce
stdma_is_locked_by() to replace falcon_got_lock.

The falcon_got_lock tests in the EH handlers are incorrect these days. It
can happen that an EH handler is called after a command completes normally.
Remove these checks along with falcon_got_lock.

Also remove the complicated and racy fairness wait queues. If fairness is an
issue (when SCSI competes with IDE for the ST DMA interrupt), the solution
is likely to be a lower value for host->can_queue.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Tested-by: Michael Schmitz <schmitzmic@gmail.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
arch/m68k/atari/stdma.c
arch/m68k/include/asm/atari_stdma.h
drivers/scsi/atari_NCR5380.c
drivers/scsi/atari_scsi.c

index ddbf43ca8858012470d7f20665e89e85f5a045f3..e5a66596b116dc7642700aa62638318c73cca135 100644 (file)
@@ -59,6 +59,31 @@ static irqreturn_t stdma_int (int irq, void *dummy);
 /************************* End of Prototypes **************************/
 
 
+/**
+ * stdma_try_lock - attempt to acquire ST DMA interrupt "lock"
+ * @handler: interrupt handler to use after acquisition
+ *
+ * Returns !0 if lock was acquired; otherwise 0.
+ */
+
+int stdma_try_lock(irq_handler_t handler, void *data)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       if (stdma_locked) {
+               local_irq_restore(flags);
+               return 0;
+       }
+
+       stdma_locked   = 1;
+       stdma_isr      = handler;
+       stdma_isr_data = data;
+       local_irq_restore(flags);
+       return 1;
+}
+EXPORT_SYMBOL(stdma_try_lock);
+
 
 /*
  * Function: void stdma_lock( isrfunc isr, void *data )
@@ -78,19 +103,10 @@ static irqreturn_t stdma_int (int irq, void *dummy);
 
 void stdma_lock(irq_handler_t handler, void *data)
 {
-       unsigned long flags;
-
-       local_irq_save(flags);          /* protect lock */
-
        /* Since the DMA is used for file system purposes, we
         have to sleep uninterruptible (there may be locked
         buffers) */
-       wait_event(stdma_wait, !stdma_locked);
-
-       stdma_locked   = 1;
-       stdma_isr      = handler;
-       stdma_isr_data = data;
-       local_irq_restore(flags);
+       wait_event(stdma_wait, stdma_try_lock(handler, data));
 }
 EXPORT_SYMBOL(stdma_lock);
 
@@ -122,22 +138,25 @@ void stdma_release(void)
 EXPORT_SYMBOL(stdma_release);
 
 
-/*
- * Function: int stdma_others_waiting( void )
- *
- * Purpose: Check if someone waits for the ST-DMA lock.
- *
- * Inputs: none
- *
- * Returns: 0 if no one is waiting, != 0 otherwise
+/**
+ * stdma_is_locked_by - allow lock holder to check whether it needs to release.
+ * @handler: interrupt handler previously used to acquire lock.
  *
+ * Returns !0 if locked for the given handler; 0 otherwise.
  */
 
-int stdma_others_waiting(void)
+int stdma_is_locked_by(irq_handler_t handler)
 {
-       return waitqueue_active(&stdma_wait);
+       unsigned long flags;
+       int result;
+
+       local_irq_save(flags);
+       result = stdma_locked && (stdma_isr == handler);
+       local_irq_restore(flags);
+
+       return result;
 }
-EXPORT_SYMBOL(stdma_others_waiting);
+EXPORT_SYMBOL(stdma_is_locked_by);
 
 
 /*
index 8e389b7fa70c88ff3db422de4bfa7c2306d098ce..d24e34d870dc4c4a25bc450e3b218c298ce6cbed 100644 (file)
@@ -8,11 +8,11 @@
 
 /***************************** Prototypes *****************************/
 
+int stdma_try_lock(irq_handler_t, void *);
 void stdma_lock(irq_handler_t handler, void *data);
 void stdma_release( void );
-int stdma_others_waiting( void );
 int stdma_islocked( void );
-void *stdma_locked_by( void );
+int stdma_is_locked_by(irq_handler_t);
 void stdma_init( void );
 
 /************************* End of Prototypes **************************/
index 4eeb9684977b2adfeea289cd56ca862bdb3ec106..7a6f90ce0316066abc0abda7df4ffa38b17fb161 100644 (file)
@@ -879,10 +879,10 @@ static void NCR5380_exit(struct Scsi_Host *instance)
  *
  */
 
-static int NCR5380_queue_command_lck(struct scsi_cmnd *cmd,
-                                     void (*done)(struct scsi_cmnd *))
+static int NCR5380_queue_command(struct Scsi_Host *instance,
+                                 struct scsi_cmnd *cmd)
 {
-       SETUP_HOSTDATA(cmd->device->host);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        struct scsi_cmnd *tmp;
        unsigned long flags;
 
@@ -893,7 +893,7 @@ static int NCR5380_queue_command_lck(struct scsi_cmnd *cmd,
                printk(KERN_NOTICE "scsi%d: WRITE attempted with NO_WRITE debugging flag set\n",
                       H_NO(cmd));
                cmd->result = (DID_ERROR << 16);
-               done(cmd);
+               cmd->scsi_done(cmd);
                return 0;
        }
 #endif /* (NDEBUG & NDEBUG_NO_WRITE) */
@@ -904,8 +904,6 @@ static int NCR5380_queue_command_lck(struct scsi_cmnd *cmd,
         */
 
        SET_NEXT(cmd, NULL);
-       cmd->scsi_done = done;
-
        cmd->result = 0;
 
        /*
@@ -915,7 +913,6 @@ static int NCR5380_queue_command_lck(struct scsi_cmnd *cmd,
         * sense data is only guaranteed to be valid while the condition exists.
         */
 
-       local_irq_save(flags);
        /* ++guenther: now that the issue queue is being set up, we can lock ST-DMA.
         * Otherwise a running NCR5380_main may steal the lock.
         * Lock before actually inserting due to fairness reasons explained in
@@ -928,11 +925,11 @@ static int NCR5380_queue_command_lck(struct scsi_cmnd *cmd,
         * because also a timer int can trigger an abort or reset, which would
         * alter queues and touch the lock.
         */
-       if (!IS_A_TT()) {
-               /* perhaps stop command timer here */
-               falcon_get_lock();
-               /* perhaps restart command timer here */
-       }
+       if (!falcon_get_lock())
+               return SCSI_MLQUEUE_HOST_BUSY;
+
+       local_irq_save(flags);
+
        if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
                LIST(cmd, hostdata->issue_queue);
                SET_NEXT(cmd, hostdata->issue_queue);
@@ -956,15 +953,13 @@ static int NCR5380_queue_command_lck(struct scsi_cmnd *cmd,
         * If we're not in an interrupt, we can call NCR5380_main()
         * unconditionally, because it cannot be already running.
         */
-       if (in_interrupt() || ((flags >> 8) & 7) >= 6)
+       if (in_interrupt() || irqs_disabled())
                queue_main();
        else
                NCR5380_main(NULL);
        return 0;
 }
 
-static DEF_SCSI_QCMD(NCR5380_queue_command)
-
 /*
  * Function : NCR5380_main (void)
  *
@@ -2554,10 +2549,6 @@ int NCR5380_abort(struct scsi_cmnd *cmd)
 
        local_irq_save(flags);
 
-       if (!IS_A_TT() && !falcon_got_lock)
-               printk(KERN_ERR "scsi%d: !!BINGO!! Falcon has no lock in NCR5380_abort\n",
-                      HOSTNO);
-
        dprintk(NDEBUG_ABORT, "scsi%d: abort called basr 0x%02x, sr 0x%02x\n", HOSTNO,
                    NCR5380_read(BUS_AND_STATUS_REG),
                    NCR5380_read(STATUS_REG));
@@ -2756,10 +2747,6 @@ static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
        struct scsi_cmnd *connected, *disconnected_queue;
 #endif
 
-       if (!IS_A_TT() && !falcon_got_lock)
-               printk(KERN_ERR "scsi%d: !!BINGO!! Falcon has no lock in NCR5380_reset\n",
-                      H_NO(cmd));
-
        NCR5380_print_status(cmd->device->host);
 
        /* get in phase */
index 48fabebdbbb00601622698fed858b56b15bd339f..b2e86d0630ce98fdbdd4f60ada4e9e61fcaa9215 100644 (file)
@@ -184,7 +184,7 @@ static void atari_scsi_fetch_restbytes(void);
 static irqreturn_t scsi_tt_intr(int irq, void *dummy);
 static irqreturn_t scsi_falcon_intr(int irq, void *dummy);
 static void falcon_release_lock_if_possible(struct NCR5380_hostdata *hostdata);
-static void falcon_get_lock(void);
+static int falcon_get_lock(void);
 #ifdef CONFIG_ATARI_SCSI_RESET_BOOT
 static void atari_scsi_reset_boot(void);
 #endif
@@ -473,17 +473,10 @@ static void atari_scsi_fetch_restbytes(void)
 #endif /* REAL_DMA */
 
 
-static int falcon_got_lock = 0;
-static DECLARE_WAIT_QUEUE_HEAD(falcon_fairness_wait);
-static int falcon_trying_lock = 0;
-static DECLARE_WAIT_QUEUE_HEAD(falcon_try_wait);
 static int falcon_dont_release = 0;
 
 /* This function releases the lock on the DMA chip if there is no
- * connected command and the disconnected queue is empty. On
- * releasing, instances of falcon_get_lock are awoken, that put
- * themselves to sleep for fairness. They can now try to get the lock
- * again (but others waiting longer more probably will win).
+ * connected command and the disconnected queue is empty.
  */
 
 static void falcon_release_lock_if_possible(struct NCR5380_hostdata *hostdata)
@@ -495,20 +488,12 @@ static void falcon_release_lock_if_possible(struct NCR5380_hostdata *hostdata)
 
        local_irq_save(flags);
 
-       if (falcon_got_lock && !hostdata->disconnected_queue &&
-           !hostdata->issue_queue && !hostdata->connected) {
-
-               if (falcon_dont_release) {
-#if 0
-                       printk("WARNING: Lock release not allowed. Ignored\n");
-#endif
-                       local_irq_restore(flags);
-                       return;
-               }
-               falcon_got_lock = 0;
+       if (!hostdata->disconnected_queue &&
+           !hostdata->issue_queue &&
+           !hostdata->connected &&
+           !falcon_dont_release &&
+           stdma_is_locked_by(scsi_falcon_intr))
                stdma_release();
-               wake_up(&falcon_fairness_wait);
-       }
 
        local_irq_restore(flags);
 }
@@ -517,51 +502,19 @@ static void falcon_release_lock_if_possible(struct NCR5380_hostdata *hostdata)
  * If the DMA isn't locked already for SCSI, it tries to lock it by
  * calling stdma_lock(). But if the DMA is locked by the SCSI code and
  * there are other drivers waiting for the chip, we do not issue the
- * command immediately but wait on 'falcon_fairness_queue'. We will be
- * waked up when the DMA is unlocked by some SCSI interrupt. After that
- * we try to get the lock again.
- * But we must be prepared that more than one instance of
- * falcon_get_lock() is waiting on the fairness queue. They should not
- * try all at once to call stdma_lock(), one is enough! For that, the
- * first one sets 'falcon_trying_lock', others that see that variable
- * set wait on the queue 'falcon_try_wait'.
- * Complicated, complicated.... Sigh...
+ * command immediately but tell the SCSI mid-layer to defer.
  */
 
-static void falcon_get_lock(void)
+static int falcon_get_lock(void)
 {
-       unsigned long flags;
-
        if (IS_A_TT())
-               return;
+               return 1;
 
-       local_irq_save(flags);
+       if (in_interrupt())
+               return stdma_try_lock(scsi_falcon_intr, NULL);
 
-       wait_event_cmd(falcon_fairness_wait,
-               in_interrupt() || !falcon_got_lock || !stdma_others_waiting(),
-               local_irq_restore(flags),
-               local_irq_save(flags));
-
-       while (!falcon_got_lock) {
-               if (in_irq())
-                       panic("Falcon SCSI hasn't ST-DMA lock in interrupt");
-               if (!falcon_trying_lock) {
-                       falcon_trying_lock = 1;
-                       stdma_lock(scsi_falcon_intr, NULL);
-                       falcon_got_lock = 1;
-                       falcon_trying_lock = 0;
-                       wake_up(&falcon_try_wait);
-               } else {
-                       wait_event_cmd(falcon_try_wait,
-                               falcon_got_lock && !falcon_trying_lock,
-                               local_irq_restore(flags),
-                               local_irq_save(flags));
-               }
-       }
-
-       local_irq_restore(flags);
-       if (!falcon_got_lock)
-               panic("Falcon SCSI: someone stole the lock :-(\n");
+       stdma_lock(scsi_falcon_intr, NULL);
+       return 1;
 }