net_sched: fix estimator lock selection for mq child qdiscs
authorPatrick McHardy <kaber@trash.net>
Thu, 10 Sep 2009 01:11:23 +0000 (18:11 -0700)
committerDavid S. Miller <davem@davemloft.net>
Thu, 10 Sep 2009 01:11:23 +0000 (18:11 -0700)
When new child qdiscs are attached to the mq qdisc, they are actually
attached as root qdiscs to the device queues. The lock selection for
new estimators incorrectly picks the root lock of the existing and
to be replaced qdisc, which results in a use-after-free once the old
qdisc has been destroyed.

Mark mq qdisc instances with a new flag and treat qdiscs attached to
mq as children similar to regular root qdiscs.

Additionally prevent estimators from being attached to the mq qdisc
itself since it only updates its byte and packet counters during dumps.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/sch_generic.h
net/sched/sch_api.c
net/sched/sch_mq.c

index 9c69585a1be80cc6e742c6aa0603e65c0e668111..88eb9de095de21006ed26fd84db96ca7f58c3b38 100644 (file)
@@ -46,6 +46,7 @@ struct Qdisc
 #define TCQ_F_THROTTLED                2
 #define TCQ_F_INGRESS          4
 #define TCQ_F_CAN_BYPASS       8
+#define TCQ_F_MQROOT           16
 #define TCQ_F_WARN_NONWC       (1 << 16)
        int                     padded;
        struct Qdisc_ops        *ops;
index 2a78d54101540f10562dbba2332271038f379694..3af106140f3577c2ec5119b7102fafae1d54926a 100644 (file)
@@ -733,7 +733,8 @@ static struct lock_class_key qdisc_rx_lock;
 
 static struct Qdisc *
 qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
-            u32 parent, u32 handle, struct nlattr **tca, int *errp)
+            struct Qdisc *p, u32 parent, u32 handle,
+            struct nlattr **tca, int *errp)
 {
        int err;
        struct nlattr *kind = tca[TCA_KIND];
@@ -810,24 +811,21 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
                if (tca[TCA_RATE]) {
                        spinlock_t *root_lock;
 
+                       err = -EOPNOTSUPP;
+                       if (sch->flags & TCQ_F_MQROOT)
+                               goto err_out4;
+
                        if ((sch->parent != TC_H_ROOT) &&
-                           !(sch->flags & TCQ_F_INGRESS))
+                           !(sch->flags & TCQ_F_INGRESS) &&
+                           (!p || !(p->flags & TCQ_F_MQROOT)))
                                root_lock = qdisc_root_sleeping_lock(sch);
                        else
                                root_lock = qdisc_lock(sch);
 
                        err = gen_new_estimator(&sch->bstats, &sch->rate_est,
                                                root_lock, tca[TCA_RATE]);
-                       if (err) {
-                               /*
-                                * Any broken qdiscs that would require
-                                * a ops->reset() here? The qdisc was never
-                                * in action so it shouldn't be necessary.
-                                */
-                               if (ops->destroy)
-                                       ops->destroy(sch);
-                               goto err_out3;
-                       }
+                       if (err)
+                               goto err_out4;
                }
 
                qdisc_list_add(sch);
@@ -843,6 +841,15 @@ err_out2:
 err_out:
        *errp = err;
        return NULL;
+
+err_out4:
+       /*
+        * Any broken qdiscs that would require a ops->reset() here?
+        * The qdisc was never in action so it shouldn't be necessary.
+        */
+       if (ops->destroy)
+               ops->destroy(sch);
+       goto err_out3;
 }
 
 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
@@ -867,13 +874,16 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
        qdisc_put_stab(sch->stab);
        sch->stab = stab;
 
-       if (tca[TCA_RATE])
+       if (tca[TCA_RATE]) {
                /* NB: ignores errors from replace_estimator
                   because change can't be undone. */
+               if (sch->flags & TCQ_F_MQROOT)
+                       goto out;
                gen_replace_estimator(&sch->bstats, &sch->rate_est,
                                            qdisc_root_sleeping_lock(sch),
                                            tca[TCA_RATE]);
-
+       }
+out:
        return 0;
 }
 
@@ -1097,7 +1107,7 @@ create_n_graft:
        if (!(n->nlmsg_flags&NLM_F_CREATE))
                return -ENOENT;
        if (clid == TC_H_INGRESS)
-               q = qdisc_create(dev, &dev->rx_queue,
+               q = qdisc_create(dev, &dev->rx_queue, p,
                                 tcm->tcm_parent, tcm->tcm_parent,
                                 tca, &err);
        else {
@@ -1106,7 +1116,7 @@ create_n_graft:
                if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
                        ntx = p->ops->cl_ops->select_queue(p, tcm);
 
-               q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx),
+               q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), p,
                                 tcm->tcm_parent, tcm->tcm_handle,
                                 tca, &err);
        }
index c84dec9c8c7d916845dd3b751ee03729dfc66d24..dd5ee022f1f7c1d76b28b4fd76028880db6ecaa5 100644 (file)
@@ -64,6 +64,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
                priv->qdiscs[ntx] = qdisc;
        }
 
+       sch->flags |= TCQ_F_MQROOT;
        return 0;
 
 err: