perf/core: Fix event inheritance on fork()

[GitHub/mt8127/android_kernel_alcatel_ttab.git] / kernel / workqueue.c
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 45c24d723448197ce13f867bfa6d69bcc9c0ecfd..66972ac0c6c0b60c869bec2d19a21eefcb4a6f6b 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -606,6 +606,35 @@ static void set_work_pool_and_clear_pending(struct work_struct *work,
          */
         smp_wmb();
         set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
+       /*
+        * The following mb guarantees that previous clear of a PENDING bit
+        * will not be reordered with any speculative LOADS or STORES from
+        * work->current_func, which is executed afterwards.  This possible
+        * reordering can lead to a missed execution on attempt to qeueue
+        * the same @work.  E.g. consider this case:
+        *
+        *   CPU#0                         CPU#1
+        *   ----------------------------  --------------------------------
+        *
+        * 1  STORE event_indicated
+        * 2  queue_work_on() {
+        * 3    test_and_set_bit(PENDING)
+        * 4 }                             set_..._and_clear_pending() {
+        * 5                                 set_work_data() # clear bit
+        * 6                                 smp_mb()
+        * 7                               work->current_func() {
+        * 8                                  LOAD event_indicated
+        *                                 }
+        *
+        * Without an explicit full barrier speculative LOAD on line 8 can
+        * be executed before CPU#0 does STORE on line 1.  If that happens,
+        * CPU#0 observes the PENDING bit is still set and new execution of
+        * a @work is not queued in a hope, that CPU#1 will eventually
+        * finish the queued @work.  Meanwhile CPU#1 does not see
+        * event_indicated is set, because speculative LOAD was executed
+        * before actual STORE.
+        */
+       smp_mb();
  }
  
  static void clear_work_data(struct work_struct *work)
@@ -1881,6 +1910,12 @@ static void send_mayday(struct work_struct *work)
  
         /* mayday mayday mayday */
         if (list_empty(&pwq->mayday_node)) {
+               /*
+                * If @pwq is for an unbound wq, its base ref may be put at
+                * any time due to an attribute change.  Pin @pwq until the
+                * rescuer is done with it.
+                */
+               get_pwq(pwq);
                 list_add_tail(&pwq->mayday_node, &wq->maydays);
                 wake_up_process(wq->rescuer->task);
         }
@@ -1928,17 +1963,13 @@ static void pool_mayday_timeout(unsigned long __pool)
   * spin_lock_irq(pool->lock) which may be released and regrabbed
   * multiple times.  Does GFP_KERNEL allocations.  Called only from
   * manager.
- *
- * RETURNS:
- * %false if no action was taken and pool->lock stayed locked, %true
- * otherwise.
   */
-static bool maybe_create_worker(struct worker_pool *pool)
+static void maybe_create_worker(struct worker_pool *pool)
  __releases(&pool->lock)
  __acquires(&pool->lock)
  {
         if (!need_to_create_worker(pool))
-               return false;
+               return;
  restart:
         spin_unlock_irq(&pool->lock);
  
@@ -1955,7 +1986,7 @@ restart:
                         start_worker(worker);
                         if (WARN_ON_ONCE(need_to_create_worker(pool)))
                                 goto restart;
-                       return true;
+                       return;
                 }
  
                 if (!need_to_create_worker(pool))
@@ -1972,7 +2003,7 @@ restart:
         spin_lock_irq(&pool->lock);
         if (need_to_create_worker(pool))
                 goto restart;
-       return true;
+       return;
  }
  
  /**
@@ -1985,15 +2016,9 @@ restart:
   * LOCKING:
   * spin_lock_irq(pool->lock) which may be released and regrabbed
   * multiple times.  Called only from manager.
- *
- * RETURNS:
- * %false if no action was taken and pool->lock stayed locked, %true
- * otherwise.
   */
-static bool maybe_destroy_workers(struct worker_pool *pool)
+static void maybe_destroy_workers(struct worker_pool *pool)
  {
-       bool ret = false;
-
         while (too_many_workers(pool)) {
                 struct worker *worker;
                 unsigned long expires;
@@ -2007,10 +2032,7 @@ static bool maybe_destroy_workers(struct worker_pool *pool)
                 }
  
                 destroy_worker(worker);
-               ret = true;
         }
-
-       return ret;
  }
  
  /**
@@ -2030,13 +2052,14 @@ static bool maybe_destroy_workers(struct worker_pool *pool)
   * multiple times.  Does GFP_KERNEL allocations.
   *
   * RETURNS:
- * spin_lock_irq(pool->lock) which may be released and regrabbed
- * multiple times.  Does GFP_KERNEL allocations.
+ * %false if the pool doesn't need management and the caller can safely
+ * start processing works, %true if management function was performed and
+ * the conditions that the caller verified before calling the function may
+ * no longer be true.
   */
  static bool manage_workers(struct worker *worker)
  {
         struct worker_pool *pool = worker->pool;
-       bool ret = false;
  
         /*
          * Managership is governed by two mutexes - manager_arb and
@@ -2060,7 +2083,7 @@ static bool manage_workers(struct worker *worker)
          * manager_mutex.
          */
         if (!mutex_trylock(&pool->manager_arb))
-               return ret;
+               return false;
  
         /*
          * With manager arbitration won, manager_mutex would be free in
@@ -2070,7 +2093,6 @@ static bool manage_workers(struct worker *worker)
                 spin_unlock_irq(&pool->lock);
                 mutex_lock(&pool->manager_mutex);
                 spin_lock_irq(&pool->lock);
-               ret = true;
         }
  
         pool->flags &= ~POOL_MANAGE_WORKERS;
@@ -2079,12 +2101,12 @@ static bool manage_workers(struct worker *worker)
          * Destroy and then create so that may_start_working() is true
          * on return.
          */
-       ret |= maybe_destroy_workers(pool);
-       ret |= maybe_create_worker(pool);
+       maybe_destroy_workers(pool);
+       maybe_create_worker(pool);
  
         mutex_unlock(&pool->manager_mutex);
         mutex_unlock(&pool->manager_arb);
-       return ret;
+       return true;
  }
  
  /**
@@ -2356,6 +2378,7 @@ static int rescuer_thread(void *__rescuer)
         struct worker *rescuer = __rescuer;
         struct workqueue_struct *wq = rescuer->rescue_wq;
         struct list_head *scheduled = &rescuer->scheduled;
+       bool should_stop;
  
         set_user_nice(current, RESCUER_NICE_LEVEL);
  
@@ -2367,11 +2390,15 @@ static int rescuer_thread(void *__rescuer)
  repeat:
         set_current_state(TASK_INTERRUPTIBLE);
  
-       if (kthread_should_stop()) {
-               __set_current_state(TASK_RUNNING);
-               rescuer->task->flags &= ~PF_WQ_WORKER;
-               return 0;
-       }
+       /*
+        * By the time the rescuer is requested to stop, the workqueue
+        * shouldn't have any work pending, but @wq->maydays may still have
+        * pwq(s) queued.  This can happen by non-rescuer workers consuming
+        * all the work items before the rescuer got to them.  Go through
+        * @wq->maydays processing before acting on should_stop so that the
+        * list is always empty on exit.
+        */
+       should_stop = kthread_should_stop();
  
         /* see whether any pwq is asking for help */
         spin_lock_irq(&wq_mayday_lock);
@@ -2402,6 +2429,12 @@ repeat:
  
                 process_scheduled_works(rescuer);
  
+               /*
+                * Put the reference grabbed by send_mayday().  @pool won't
+                * go away while we're holding its lock.
+                */
+               put_pwq(pwq);
+
                 /*
                  * Leave this pool.  If keep_working() is %true, notify a
                  * regular worker; otherwise, we end up with 0 concurrency
@@ -2417,6 +2450,12 @@ repeat:
  
         spin_unlock_irq(&wq_mayday_lock);
  
+       if (should_stop) {
+               __set_current_state(TASK_RUNNING);
+               rescuer->task->flags &= ~PF_WQ_WORKER;
+               return 0;
+       }
+
         /* rescuers should never participate in concurrency management */
         WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
         schedule();
@@ -2851,19 +2890,57 @@ bool flush_work(struct work_struct *work)
  }
  EXPORT_SYMBOL_GPL(flush_work);
  
+struct cwt_wait {
+       wait_queue_t            wait;
+       struct work_struct      *work;
+};
+
+static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+       struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
+
+       if (cwait->work != key)
+               return 0;
+       return autoremove_wake_function(wait, mode, sync, key);
+}
+
  static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
  {
+       static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
         unsigned long flags;
         int ret;
  
         do {
                 ret = try_to_grab_pending(work, is_dwork, &flags);
                 /*
-                * If someone else is canceling, wait for the same event it
-                * would be waiting for before retrying.
+                * If someone else is already canceling, wait for it to
+                * finish.  flush_work() doesn't work for PREEMPT_NONE
+                * because we may get scheduled between @work's completion
+                * and the other canceling task resuming and clearing
+                * CANCELING - flush_work() will return false immediately
+                * as @work is no longer busy, try_to_grab_pending() will
+                * return -ENOENT as @work is still being canceled and the
+                * other canceling task won't be able to clear CANCELING as
+                * we're hogging the CPU.
+                *
+                * Let's wait for completion using a waitqueue.  As this
+                * may lead to the thundering herd problem, use a custom
+                * wake function which matches @work along with exclusive
+                * wait and wakeup.
                  */
-               if (unlikely(ret == -ENOENT))
-                       flush_work(work);
+               if (unlikely(ret == -ENOENT)) {
+                       struct cwt_wait cwait;
+
+                       init_wait(&cwait.wait);
+                       cwait.wait.func = cwt_wakefn;
+                       cwait.work = work;
+
+                       prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
+                                                 TASK_UNINTERRUPTIBLE);
+                       if (work_is_canceling(work))
+                               schedule();
+                       finish_wait(&cancel_waitq, &cwait.wait);
+               }
         } while (unlikely(ret < 0));
  
         /* tell other tasks trying to grab @work to back off */
@@ -2872,6 +2949,16 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
  
         flush_work(work);
         clear_work_data(work);
+
+       /*
+        * Paired with prepare_to_wait() above so that either
+        * waitqueue_active() is visible here or !work_is_canceling() is
+        * visible there.
+        */
+       smp_mb();
+       if (waitqueue_active(&cancel_waitq))
+               __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
+
         return ret;
  }
  
@@ -3350,6 +3437,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
                 }
         }
  
+       dev_set_uevent_suppress(&wq_dev->dev, false);
         kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
         return 0;
  }
@@ -4944,7 +5032,7 @@ static void __init wq_numa_init(void)
         BUG_ON(!tbl);
  
         for_each_node(node)
-               BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
+               BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
                                 node_online(node) ? node : NUMA_NO_NODE));
  
         for_each_possible_cpu(cpu) {