Revert "semaphore: fix"
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 11 May 2008 03:43:22 +0000 (20:43 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 11 May 2008 03:43:22 +0000 (20:43 -0700)
This reverts commit bf726eab3711cf192405d21688a4b21e07b6188a, as it has
been reported to cause a regression with processes stuck in __down(),
apparently because some missing wakeup.

Quoth Sven Wegener:
 "I'm currently investigating a regression that has showed up with my
  last git pull yesterday.  Bisecting the commits showed bf726e
  "semaphore: fix" to be the culprit, reverting it fixed the issue.

  Symptoms: During heavy filesystem usage (e.g.  a kernel compile) I get
  several compiler processes in uninterruptible sleep, blocking all i/o
  on the filesystem.  System is an Intel Core 2 Quad running a 64bit
  kernel and userspace.  Filesystem is xfs on top of lvm.  See below for
  the output of sysrq-w."

See

http://lkml.org/lkml/2008/5/10/45

for full report.

In the meantime, we can just fix the BKL performance regression by
reverting back to the good old BKL spinlock implementation instead,
since any sleeping lock will generally perform badly, especially if it
tries to be fair.

Reported-by: Sven Wegener <sven.wegener@stealer.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
kernel/semaphore.c

index 5e41217239e89f3fa203f9f73e4476e0985ecd79..5c2942e768cdd44371ae305d5870abfe7428732c 100644 (file)
@@ -54,9 +54,10 @@ void down(struct semaphore *sem)
        unsigned long flags;
 
        spin_lock_irqsave(&sem->lock, flags);
-       if (unlikely(!sem->count))
+       if (likely(sem->count > 0))
+               sem->count--;
+       else
                __down(sem);
-       sem->count--;
        spin_unlock_irqrestore(&sem->lock, flags);
 }
 EXPORT_SYMBOL(down);
@@ -76,10 +77,10 @@ int down_interruptible(struct semaphore *sem)
        int result = 0;
 
        spin_lock_irqsave(&sem->lock, flags);
-       if (unlikely(!sem->count))
-               result = __down_interruptible(sem);
-       if (!result)
+       if (likely(sem->count > 0))
                sem->count--;
+       else
+               result = __down_interruptible(sem);
        spin_unlock_irqrestore(&sem->lock, flags);
 
        return result;
@@ -102,10 +103,10 @@ int down_killable(struct semaphore *sem)
        int result = 0;
 
        spin_lock_irqsave(&sem->lock, flags);
-       if (unlikely(!sem->count))
-               result = __down_killable(sem);
-       if (!result)
+       if (likely(sem->count > 0))
                sem->count--;
+       else
+               result = __down_killable(sem);
        spin_unlock_irqrestore(&sem->lock, flags);
 
        return result;
@@ -156,10 +157,10 @@ int down_timeout(struct semaphore *sem, long jiffies)
        int result = 0;
 
        spin_lock_irqsave(&sem->lock, flags);
-       if (unlikely(!sem->count))
-               result = __down_timeout(sem, jiffies);
-       if (!result)
+       if (likely(sem->count > 0))
                sem->count--;
+       else
+               result = __down_timeout(sem, jiffies);
        spin_unlock_irqrestore(&sem->lock, flags);
 
        return result;
@@ -178,8 +179,9 @@ void up(struct semaphore *sem)
        unsigned long flags;
 
        spin_lock_irqsave(&sem->lock, flags);
-       sem->count++;
-       if (unlikely(!list_empty(&sem->wait_list)))
+       if (likely(list_empty(&sem->wait_list)))
+               sem->count++;
+       else
                __up(sem);
        spin_unlock_irqrestore(&sem->lock, flags);
 }
@@ -190,6 +192,7 @@ EXPORT_SYMBOL(up);
 struct semaphore_waiter {
        struct list_head list;
        struct task_struct *task;
+       int up;
 };
 
 /*
@@ -202,34 +205,33 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
 {
        struct task_struct *task = current;
        struct semaphore_waiter waiter;
-       int ret = 0;
 
-       waiter.task = task;
        list_add_tail(&waiter.list, &sem->wait_list);
+       waiter.task = task;
+       waiter.up = 0;
 
        for (;;) {
-               if (state == TASK_INTERRUPTIBLE && signal_pending(task)) {
-                       ret = -EINTR;
-                       break;
-               }
-               if (state == TASK_KILLABLE && fatal_signal_pending(task)) {
-                       ret = -EINTR;
-                       break;
-               }
-               if (timeout <= 0) {
-                       ret = -ETIME;
-                       break;
-               }
+               if (state == TASK_INTERRUPTIBLE && signal_pending(task))
+                       goto interrupted;
+               if (state == TASK_KILLABLE && fatal_signal_pending(task))
+                       goto interrupted;
+               if (timeout <= 0)
+                       goto timed_out;
                __set_task_state(task, state);
                spin_unlock_irq(&sem->lock);
                timeout = schedule_timeout(timeout);
                spin_lock_irq(&sem->lock);
-               if (sem->count > 0)
-                       break;
+               if (waiter.up)
+                       return 0;
        }
 
+ timed_out:
+       list_del(&waiter.list);
+       return -ETIME;
+
+ interrupted:
        list_del(&waiter.list);
-       return ret;
+       return -EINTR;
 }
 
 static noinline void __sched __down(struct semaphore *sem)
@@ -256,5 +258,7 @@ static noinline void __sched __up(struct semaphore *sem)
 {
        struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
                                                struct semaphore_waiter, list);
+       list_del(&waiter->list);
+       waiter->up = 1;
        wake_up_process(waiter->task);
 }