epoll: use RCU to protect wakeup_source in epitem
authorEric Wong <normalperson@yhbt.net>
Tue, 30 Apr 2013 22:27:39 +0000 (15:27 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 1 May 2013 00:04:04 +0000 (17:04 -0700)
This prevents wakeup_source destruction when a user hits the item with
EPOLL_CTL_MOD while ep_poll_callback is running.

Tested with CONFIG_SPARSE_RCU_POINTER=y and "make fs/eventpoll.o C=2"

Signed-off-by: Eric Wong <normalperson@yhbt.net>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Arve Hjønnevåg <arve@android.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: NeilBrown <neilb@suse.de>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/eventpoll.c

index 0e5eda068520afa3bace1209fc04790cdfe350c7..a3acf936c72af92e7316be81a3f5b4900a1fd518 100644 (file)
@@ -160,7 +160,7 @@ struct epitem {
        struct list_head fllink;
 
        /* wakeup_source used when EPOLLWAKEUP is set */
-       struct wakeup_source *ws;
+       struct wakeup_source __rcu *ws;
 
        /* The structure that describe the interested events and the source fd */
        struct epoll_event event;
@@ -538,6 +538,38 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
        }
 }
 
+/* call only when ep->mtx is held */
+static inline struct wakeup_source *ep_wakeup_source(struct epitem *epi)
+{
+       return rcu_dereference_check(epi->ws, lockdep_is_held(&epi->ep->mtx));
+}
+
+/* call only when ep->mtx is held */
+static inline void ep_pm_stay_awake(struct epitem *epi)
+{
+       struct wakeup_source *ws = ep_wakeup_source(epi);
+
+       if (ws)
+               __pm_stay_awake(ws);
+}
+
+static inline bool ep_has_wakeup_source(struct epitem *epi)
+{
+       return rcu_access_pointer(epi->ws) ? true : false;
+}
+
+/* call when ep->mtx cannot be held (ep_poll_callback) */
+static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
+{
+       struct wakeup_source *ws;
+
+       rcu_read_lock();
+       ws = rcu_dereference(epi->ws);
+       if (ws)
+               __pm_stay_awake(ws);
+       rcu_read_unlock();
+}
+
 /**
  * ep_scan_ready_list - Scans the ready list in a way that makes possible for
  *                      the scan code, to call f_op->poll(). Also allows for
@@ -601,7 +633,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
                 */
                if (!ep_is_linked(&epi->rdllink)) {
                        list_add_tail(&epi->rdllink, &ep->rdllist);
-                       __pm_stay_awake(epi->ws);
+                       ep_pm_stay_awake(epi);
                }
        }
        /*
@@ -670,7 +702,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
                list_del_init(&epi->rdllink);
        spin_unlock_irqrestore(&ep->lock, flags);
 
-       wakeup_source_unregister(epi->ws);
+       wakeup_source_unregister(ep_wakeup_source(epi));
 
        /* At this point it is safe to free the eventpoll item */
        kmem_cache_free(epi_cache, epi);
@@ -754,7 +786,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
                         * callback, but it's not actually ready, as far as
                         * caller requested events goes. We can remove it here.
                         */
-                       __pm_relax(epi->ws);
+                       __pm_relax(ep_wakeup_source(epi));
                        list_del_init(&epi->rdllink);
                }
        }
@@ -986,7 +1018,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
        /* If this file is already in the ready list we exit soon */
        if (!ep_is_linked(&epi->rdllink)) {
                list_add_tail(&epi->rdllink, &ep->rdllist);
-               __pm_stay_awake(epi->ws);
+               ep_pm_stay_awake_rcu(epi);
        }
 
        /*
@@ -1148,6 +1180,7 @@ static int reverse_path_check(void)
 static int ep_create_wakeup_source(struct epitem *epi)
 {
        const char *name;
+       struct wakeup_source *ws;
 
        if (!epi->ep->ws) {
                epi->ep->ws = wakeup_source_register("eventpoll");
@@ -1156,17 +1189,29 @@ static int ep_create_wakeup_source(struct epitem *epi)
        }
 
        name = epi->ffd.file->f_path.dentry->d_name.name;
-       epi->ws = wakeup_source_register(name);
-       if (!epi->ws)
+       ws = wakeup_source_register(name);
+
+       if (!ws)
                return -ENOMEM;
+       rcu_assign_pointer(epi->ws, ws);
 
        return 0;
 }
 
-static void ep_destroy_wakeup_source(struct epitem *epi)
+/* rare code path, only used when EPOLL_CTL_MOD removes a wakeup source */
+static noinline void ep_destroy_wakeup_source(struct epitem *epi)
 {
-       wakeup_source_unregister(epi->ws);
-       epi->ws = NULL;
+       struct wakeup_source *ws = ep_wakeup_source(epi);
+
+       rcu_assign_pointer(epi->ws, NULL);
+
+       /*
+        * wait for ep_pm_stay_awake_rcu to finish, synchronize_rcu is
+        * used internally by wakeup_source_remove, too (called by
+        * wakeup_source_unregister), so we cannot use call_rcu
+        */
+       synchronize_rcu();
+       wakeup_source_unregister(ws);
 }
 
 /*
@@ -1201,7 +1246,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
                if (error)
                        goto error_create_wakeup_source;
        } else {
-               epi->ws = NULL;
+               RCU_INIT_POINTER(epi->ws, NULL);
        }
 
        /* Initialize the poll table using the queue callback */
@@ -1249,7 +1294,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
        /* If the file is already "ready" we drop it inside the ready list */
        if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
                list_add_tail(&epi->rdllink, &ep->rdllist);
-               __pm_stay_awake(epi->ws);
+               ep_pm_stay_awake(epi);
 
                /* Notify waiting tasks that events are available */
                if (waitqueue_active(&ep->wq))
@@ -1290,7 +1335,7 @@ error_unregister:
                list_del_init(&epi->rdllink);
        spin_unlock_irqrestore(&ep->lock, flags);
 
-       wakeup_source_unregister(epi->ws);
+       wakeup_source_unregister(ep_wakeup_source(epi));
 
 error_create_wakeup_source:
        kmem_cache_free(epi_cache, epi);
@@ -1319,9 +1364,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
        pt._key = event->events;
        epi->event.data = event->data; /* protected by mtx */
        if (epi->event.events & EPOLLWAKEUP) {
-               if (!epi->ws)
+               if (!ep_has_wakeup_source(epi))
                        ep_create_wakeup_source(epi);
-       } else if (epi->ws) {
+       } else if (ep_has_wakeup_source(epi)) {
                ep_destroy_wakeup_source(epi);
        }
 
@@ -1359,7 +1404,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
                spin_lock_irq(&ep->lock);
                if (!ep_is_linked(&epi->rdllink)) {
                        list_add_tail(&epi->rdllink, &ep->rdllist);
-                       __pm_stay_awake(epi->ws);
+                       ep_pm_stay_awake(epi);
 
                        /* Notify waiting tasks that events are available */
                        if (waitqueue_active(&ep->wq))
@@ -1385,6 +1430,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
        unsigned int revents;
        struct epitem *epi;
        struct epoll_event __user *uevent;
+       struct wakeup_source *ws;
        poll_table pt;
 
        init_poll_funcptr(&pt, NULL);
@@ -1407,9 +1453,13 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
                 * instead, but then epi->ws would temporarily be out of sync
                 * with ep_is_linked().
                 */
-               if (epi->ws && epi->ws->active)
-                       __pm_stay_awake(ep->ws);
-               __pm_relax(epi->ws);
+               ws = ep_wakeup_source(epi);
+               if (ws) {
+                       if (ws->active)
+                               __pm_stay_awake(ep->ws);
+                       __pm_relax(ws);
+               }
+
                list_del_init(&epi->rdllink);
 
                pt._key = epi->event.events;
@@ -1426,7 +1476,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
                        if (__put_user(revents, &uevent->events) ||
                            __put_user(epi->event.data, &uevent->data)) {
                                list_add(&epi->rdllink, head);
-                               __pm_stay_awake(epi->ws);
+                               ep_pm_stay_awake(epi);
                                return eventcnt ? eventcnt : -EFAULT;
                        }
                        eventcnt++;
@@ -1446,7 +1496,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
                                 * poll callback will queue them in ep->ovflist.
                                 */
                                list_add_tail(&epi->rdllink, &ep->rdllist);
-                               __pm_stay_awake(epi->ws);
+                               ep_pm_stay_awake(epi);
                        }
                }
        }