net: convert BUG_TRAP to generic WARN_ON
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / sched / sch_cbq.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
1da177e4 13#include <linux/module.h>
1da177e4
LT
14#include <linux/types.h>
15#include <linux/kernel.h>
1da177e4 16#include <linux/string.h>
1da177e4 17#include <linux/errno.h>
1da177e4 18#include <linux/skbuff.h>
0ba48053 19#include <net/netlink.h>
1da177e4
LT
20#include <net/pkt_sched.h>
21
22
23/* Class-Based Queueing (CBQ) algorithm.
24 =======================================
25
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
10297b99 27 Management Models for Packet Networks",
1da177e4
LT
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
29
10297b99 30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
1da177e4 31
10297b99 32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
1da177e4
LT
33 Parameters", 1996
34
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
37
38 -----------------------------------------------------------------------
39
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
44
45 --- The WRR algorithm is different. Our version looks more
10297b99
YH
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
1da177e4
LT
52
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
54
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
57
58
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
63
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
70
71struct cbq_sched_data;
72
73
74struct cbq_class
75{
d77fea2e 76 struct Qdisc_class_common common;
1da177e4
LT
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
78
79/* Parameters */
1da177e4
LT
80 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
c3bc7cff 84#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
85 unsigned char police;
86#endif
87
88 u32 defmap;
89
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
92 long offtime;
93 long minidle;
94 u32 avpkt;
95 struct qdisc_rate_table *R_tab;
96
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
1a13cb63 99 psched_tdiff_t penalty;
1da177e4
LT
100
101 /* General scheduler (WRR) parameters */
102 long allot;
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
105
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
111 parent otherwise */
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
114
115 struct Qdisc *q; /* Elementary queueing discipline */
116
117
118/* Variables */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
124 */
125
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
128 long avgidle;
129 long deficit; /* Saved deficit for WRR */
1a13cb63 130 psched_time_t penalized;
1da177e4
LT
131 struct gnet_stats_basic bstats;
132 struct gnet_stats_queue qstats;
133 struct gnet_stats_rate_est rate_est;
1da177e4
LT
134 struct tc_cbq_xstats xstats;
135
136 struct tcf_proto *filter_list;
137
138 int refcnt;
139 int filters;
140
141 struct cbq_class *defaults[TC_PRIO_MAX+1];
142};
143
144struct cbq_sched_data
145{
d77fea2e 146 struct Qdisc_class_hash clhash; /* Hash table of all classes */
1da177e4
LT
147 int nclasses[TC_CBQ_MAXPRIO+1];
148 unsigned quanta[TC_CBQ_MAXPRIO+1];
149
150 struct cbq_class link;
151
152 unsigned activemask;
153 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
154 with backlog */
155
c3bc7cff 156#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
157 struct cbq_class *rx_class;
158#endif
159 struct cbq_class *tx_class;
160 struct cbq_class *tx_borrowed;
161 int tx_len;
162 psched_time_t now; /* Cached timestamp */
163 psched_time_t now_rt; /* Cached real time */
164 unsigned pmask;
165
1a13cb63 166 struct hrtimer delay_timer;
88a99354 167 struct qdisc_watchdog watchdog; /* Watchdog timer,
1da177e4
LT
168 started when CBQ has
169 backlog, but cannot
170 transmit just now */
88a99354 171 psched_tdiff_t wd_expires;
1da177e4
LT
172 int toplevel;
173 u32 hgenerator;
174};
175
176
e9bef55d 177#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
1da177e4 178
1da177e4
LT
179static __inline__ struct cbq_class *
180cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
181{
d77fea2e 182 struct Qdisc_class_common *clc;
1da177e4 183
d77fea2e
PM
184 clc = qdisc_class_find(&q->clhash, classid);
185 if (clc == NULL)
186 return NULL;
187 return container_of(clc, struct cbq_class, common);
1da177e4
LT
188}
189
c3bc7cff 190#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
191
192static struct cbq_class *
193cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
194{
195 struct cbq_class *cl, *new;
196
197 for (cl = this->tparent; cl; cl = cl->tparent)
198 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
199 return new;
200
201 return NULL;
202}
203
204#endif
205
206/* Classify packet. The procedure is pretty complicated, but
207 it allows us to combine link sharing and priority scheduling
208 transparently.
209
210 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 so that it resolves to split nodes. Then packets are classified
212 by logical priority, or a more specific classifier may be attached
213 to the split node.
214 */
215
216static struct cbq_class *
217cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
218{
219 struct cbq_sched_data *q = qdisc_priv(sch);
220 struct cbq_class *head = &q->link;
221 struct cbq_class **defmap;
222 struct cbq_class *cl = NULL;
223 u32 prio = skb->priority;
224 struct tcf_result res;
225
226 /*
227 * Step 1. If skb->priority points to one of our classes, use it.
228 */
229 if (TC_H_MAJ(prio^sch->handle) == 0 &&
230 (cl = cbq_class_lookup(q, prio)) != NULL)
231 return cl;
232
29f1df6c 233 *qerr = NET_XMIT_BYPASS;
1da177e4
LT
234 for (;;) {
235 int result = 0;
236 defmap = head->defaults;
237
238 /*
239 * Step 2+n. Apply classifier.
240 */
73ca4918
PM
241 if (!head->filter_list ||
242 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
1da177e4
LT
243 goto fallback;
244
245 if ((cl = (void*)res.class) == NULL) {
246 if (TC_H_MAJ(res.classid))
247 cl = cbq_class_lookup(q, res.classid);
248 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
249 cl = defmap[TC_PRIO_BESTEFFORT];
250
251 if (cl == NULL || cl->level >= head->level)
252 goto fallback;
253 }
254
255#ifdef CONFIG_NET_CLS_ACT
256 switch (result) {
257 case TC_ACT_QUEUED:
10297b99 258 case TC_ACT_STOLEN:
1da177e4
LT
259 *qerr = NET_XMIT_SUCCESS;
260 case TC_ACT_SHOT:
261 return NULL;
73ca4918
PM
262 case TC_ACT_RECLASSIFY:
263 return cbq_reclassify(skb, cl);
1da177e4 264 }
1da177e4
LT
265#endif
266 if (cl->level == 0)
267 return cl;
268
269 /*
270 * Step 3+n. If classifier selected a link sharing class,
271 * apply agency specific classifier.
272 * Repeat this procdure until we hit a leaf node.
273 */
274 head = cl;
275 }
276
277fallback:
278 cl = head;
279
280 /*
281 * Step 4. No success...
282 */
283 if (TC_H_MAJ(prio) == 0 &&
284 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
285 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
286 return head;
287
288 return cl;
289}
290
291/*
292 A packet has just been enqueued on the empty class.
293 cbq_activate_class adds it to the tail of active class list
294 of its priority band.
295 */
296
297static __inline__ void cbq_activate_class(struct cbq_class *cl)
298{
299 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
300 int prio = cl->cpriority;
301 struct cbq_class *cl_tail;
302
303 cl_tail = q->active[prio];
304 q->active[prio] = cl;
305
306 if (cl_tail != NULL) {
307 cl->next_alive = cl_tail->next_alive;
308 cl_tail->next_alive = cl;
309 } else {
310 cl->next_alive = cl;
311 q->activemask |= (1<<prio);
312 }
313}
314
315/*
316 Unlink class from active chain.
317 Note that this same procedure is done directly in cbq_dequeue*
318 during round-robin procedure.
319 */
320
321static void cbq_deactivate_class(struct cbq_class *this)
322{
323 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
324 int prio = this->cpriority;
325 struct cbq_class *cl;
326 struct cbq_class *cl_prev = q->active[prio];
327
328 do {
329 cl = cl_prev->next_alive;
330 if (cl == this) {
331 cl_prev->next_alive = cl->next_alive;
332 cl->next_alive = NULL;
333
334 if (cl == q->active[prio]) {
335 q->active[prio] = cl_prev;
336 if (cl == q->active[prio]) {
337 q->active[prio] = NULL;
338 q->activemask &= ~(1<<prio);
339 return;
340 }
341 }
1da177e4
LT
342 return;
343 }
344 } while ((cl_prev = cl) != q->active[prio]);
345}
346
347static void
348cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
349{
350 int toplevel = q->toplevel;
351
352 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
353 psched_time_t now;
354 psched_tdiff_t incr;
355
3bebcda2 356 now = psched_get_time();
8edc0c31 357 incr = now - q->now_rt;
7c59e25f 358 now = q->now + incr;
1da177e4
LT
359
360 do {
104e0878 361 if (cl->undertime < now) {
1da177e4
LT
362 q->toplevel = cl->level;
363 return;
364 }
365 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
366 }
367}
368
369static int
370cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
371{
372 struct cbq_sched_data *q = qdisc_priv(sch);
ddeee3ce 373 int uninitialized_var(ret);
1da177e4
LT
374 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
375
c3bc7cff 376#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
377 q->rx_class = cl;
378#endif
379 if (cl == NULL) {
29f1df6c 380 if (ret == NET_XMIT_BYPASS)
1da177e4
LT
381 sch->qstats.drops++;
382 kfree_skb(skb);
383 return ret;
384 }
385
c3bc7cff 386#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
387 cl->q->__parent = sch;
388#endif
5f86173b
JK
389 ret = qdisc_enqueue(skb, cl->q);
390 if (ret == NET_XMIT_SUCCESS) {
1da177e4
LT
391 sch->q.qlen++;
392 sch->bstats.packets++;
0abf77e5 393 sch->bstats.bytes += qdisc_pkt_len(skb);
1da177e4
LT
394 cbq_mark_toplevel(q, cl);
395 if (!cl->next_alive)
396 cbq_activate_class(cl);
397 return ret;
398 }
399
400 sch->qstats.drops++;
401 cbq_mark_toplevel(q, cl);
402 cl->qstats.drops++;
403 return ret;
404}
405
406static int
407cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
408{
409 struct cbq_sched_data *q = qdisc_priv(sch);
410 struct cbq_class *cl;
411 int ret;
412
413 if ((cl = q->tx_class) == NULL) {
414 kfree_skb(skb);
415 sch->qstats.drops++;
416 return NET_XMIT_CN;
417 }
418 q->tx_class = NULL;
419
420 cbq_mark_toplevel(q, cl);
421
c3bc7cff 422#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
423 q->rx_class = cl;
424 cl->q->__parent = sch;
425#endif
426 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
427 sch->q.qlen++;
428 sch->qstats.requeues++;
429 if (!cl->next_alive)
430 cbq_activate_class(cl);
431 return 0;
432 }
433 sch->qstats.drops++;
434 cl->qstats.drops++;
435 return ret;
436}
437
438/* Overlimit actions */
439
440/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
441
442static void cbq_ovl_classic(struct cbq_class *cl)
443{
444 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
8edc0c31 445 psched_tdiff_t delay = cl->undertime - q->now;
1da177e4
LT
446
447 if (!cl->delayed) {
448 delay += cl->offtime;
449
10297b99 450 /*
1da177e4
LT
451 Class goes to sleep, so that it will have no
452 chance to work avgidle. Let's forgive it 8)
453
454 BTW cbq-2.0 has a crap in this
455 place, apparently they forgot to shift it by cl->ewma_log.
456 */
457 if (cl->avgidle < 0)
458 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
459 if (cl->avgidle < cl->minidle)
460 cl->avgidle = cl->minidle;
461 if (delay <= 0)
462 delay = 1;
7c59e25f 463 cl->undertime = q->now + delay;
1da177e4
LT
464
465 cl->xstats.overactions++;
466 cl->delayed = 1;
467 }
468 if (q->wd_expires == 0 || q->wd_expires > delay)
469 q->wd_expires = delay;
470
471 /* Dirty work! We must schedule wakeups based on
472 real available rate, rather than leaf rate,
473 which may be tiny (even zero).
474 */
475 if (q->toplevel == TC_CBQ_MAXLEVEL) {
476 struct cbq_class *b;
477 psched_tdiff_t base_delay = q->wd_expires;
478
479 for (b = cl->borrow; b; b = b->borrow) {
8edc0c31 480 delay = b->undertime - q->now;
1da177e4
LT
481 if (delay < base_delay) {
482 if (delay <= 0)
483 delay = 1;
484 base_delay = delay;
485 }
486 }
487
488 q->wd_expires = base_delay;
489 }
490}
491
492/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
493 they go overlimit
494 */
495
496static void cbq_ovl_rclassic(struct cbq_class *cl)
497{
498 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
499 struct cbq_class *this = cl;
500
501 do {
502 if (cl->level > q->toplevel) {
503 cl = NULL;
504 break;
505 }
506 } while ((cl = cl->borrow) != NULL);
507
508 if (cl == NULL)
509 cl = this;
510 cbq_ovl_classic(cl);
511}
512
513/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
514
515static void cbq_ovl_delay(struct cbq_class *cl)
516{
517 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
8edc0c31 518 psched_tdiff_t delay = cl->undertime - q->now;
1da177e4
LT
519
520 if (!cl->delayed) {
1a13cb63
PM
521 psched_time_t sched = q->now;
522 ktime_t expires;
1da177e4
LT
523
524 delay += cl->offtime;
525 if (cl->avgidle < 0)
526 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
527 if (cl->avgidle < cl->minidle)
528 cl->avgidle = cl->minidle;
7c59e25f 529 cl->undertime = q->now + delay;
1da177e4
LT
530
531 if (delay > 0) {
1a13cb63 532 sched += delay + cl->penalty;
1da177e4
LT
533 cl->penalized = sched;
534 cl->cpriority = TC_CBQ_MAXPRIO;
535 q->pmask |= (1<<TC_CBQ_MAXPRIO);
1a13cb63
PM
536
537 expires = ktime_set(0, 0);
538 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
539 if (hrtimer_try_to_cancel(&q->delay_timer) &&
540 ktime_to_ns(ktime_sub(q->delay_timer.expires,
541 expires)) > 0)
542 q->delay_timer.expires = expires;
543 hrtimer_restart(&q->delay_timer);
1da177e4
LT
544 cl->delayed = 1;
545 cl->xstats.overactions++;
546 return;
547 }
548 delay = 1;
549 }
550 if (q->wd_expires == 0 || q->wd_expires > delay)
551 q->wd_expires = delay;
552}
553
554/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
555
556static void cbq_ovl_lowprio(struct cbq_class *cl)
557{
558 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
559
1a13cb63 560 cl->penalized = q->now + cl->penalty;
1da177e4
LT
561
562 if (cl->cpriority != cl->priority2) {
563 cl->cpriority = cl->priority2;
564 q->pmask |= (1<<cl->cpriority);
565 cl->xstats.overactions++;
566 }
567 cbq_ovl_classic(cl);
568}
569
570/* TC_CBQ_OVL_DROP: penalize class by dropping */
571
572static void cbq_ovl_drop(struct cbq_class *cl)
573{
574 if (cl->q->ops->drop)
575 if (cl->q->ops->drop(cl->q))
576 cl->qdisc->q.qlen--;
577 cl->xstats.overactions++;
578 cbq_ovl_classic(cl);
579}
580
1a13cb63
PM
581static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
582 psched_time_t now)
1da177e4
LT
583{
584 struct cbq_class *cl;
585 struct cbq_class *cl_prev = q->active[prio];
1a13cb63 586 psched_time_t sched = now;
1da177e4
LT
587
588 if (cl_prev == NULL)
e9054a33 589 return 0;
1da177e4
LT
590
591 do {
592 cl = cl_prev->next_alive;
1a13cb63 593 if (now - cl->penalized > 0) {
1da177e4
LT
594 cl_prev->next_alive = cl->next_alive;
595 cl->next_alive = NULL;
596 cl->cpriority = cl->priority;
597 cl->delayed = 0;
598 cbq_activate_class(cl);
599
600 if (cl == q->active[prio]) {
601 q->active[prio] = cl_prev;
602 if (cl == q->active[prio]) {
603 q->active[prio] = NULL;
604 return 0;
605 }
606 }
607
608 cl = cl_prev->next_alive;
1a13cb63 609 } else if (sched - cl->penalized > 0)
1da177e4
LT
610 sched = cl->penalized;
611 } while ((cl_prev = cl) != q->active[prio]);
612
1a13cb63 613 return sched - now;
1da177e4
LT
614}
615
1a13cb63 616static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
1da177e4 617{
1a13cb63
PM
618 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
619 delay_timer);
620 struct Qdisc *sch = q->watchdog.qdisc;
621 psched_time_t now;
622 psched_tdiff_t delay = 0;
1da177e4
LT
623 unsigned pmask;
624
3bebcda2 625 now = psched_get_time();
1a13cb63 626
1da177e4
LT
627 pmask = q->pmask;
628 q->pmask = 0;
629
630 while (pmask) {
631 int prio = ffz(~pmask);
1a13cb63 632 psched_tdiff_t tmp;
1da177e4
LT
633
634 pmask &= ~(1<<prio);
635
1a13cb63 636 tmp = cbq_undelay_prio(q, prio, now);
1da177e4
LT
637 if (tmp > 0) {
638 q->pmask |= 1<<prio;
639 if (tmp < delay || delay == 0)
640 delay = tmp;
641 }
642 }
643
644 if (delay) {
1a13cb63
PM
645 ktime_t time;
646
647 time = ktime_set(0, 0);
648 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
649 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
1da177e4
LT
650 }
651
652 sch->flags &= ~TCQ_F_THROTTLED;
37437bb2 653 __netif_schedule(sch);
1a13cb63 654 return HRTIMER_NORESTART;
1da177e4
LT
655}
656
c3bc7cff 657#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
658static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
659{
1da177e4
LT
660 struct Qdisc *sch = child->__parent;
661 struct cbq_sched_data *q = qdisc_priv(sch);
662 struct cbq_class *cl = q->rx_class;
663
664 q->rx_class = NULL;
665
666 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
667
668 cbq_mark_toplevel(q, cl);
669
670 q->rx_class = cl;
671 cl->q->__parent = sch;
672
5f86173b 673 if (qdisc_enqueue(skb, cl->q) == 0) {
1da177e4
LT
674 sch->q.qlen++;
675 sch->bstats.packets++;
0abf77e5 676 sch->bstats.bytes += qdisc_pkt_len(skb);
1da177e4
LT
677 if (!cl->next_alive)
678 cbq_activate_class(cl);
679 return 0;
680 }
681 sch->qstats.drops++;
682 return 0;
683 }
684
685 sch->qstats.drops++;
686 return -1;
687}
688#endif
689
10297b99 690/*
1da177e4
LT
691 It is mission critical procedure.
692
693 We "regenerate" toplevel cutoff, if transmitting class
694 has backlog and it is not regulated. It is not part of
695 original CBQ description, but looks more reasonable.
696 Probably, it is wrong. This question needs further investigation.
697*/
698
699static __inline__ void
700cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
701 struct cbq_class *borrowed)
702{
703 if (cl && q->toplevel >= borrowed->level) {
704 if (cl->q->q.qlen > 1) {
705 do {
a084980d 706 if (borrowed->undertime == PSCHED_PASTPERFECT) {
1da177e4
LT
707 q->toplevel = borrowed->level;
708 return;
709 }
710 } while ((borrowed=borrowed->borrow) != NULL);
711 }
10297b99 712#if 0
1da177e4
LT
713 /* It is not necessary now. Uncommenting it
714 will save CPU cycles, but decrease fairness.
715 */
716 q->toplevel = TC_CBQ_MAXLEVEL;
717#endif
718 }
719}
720
721static void
722cbq_update(struct cbq_sched_data *q)
723{
724 struct cbq_class *this = q->tx_class;
725 struct cbq_class *cl = this;
726 int len = q->tx_len;
727
728 q->tx_class = NULL;
729
730 for ( ; cl; cl = cl->share) {
731 long avgidle = cl->avgidle;
732 long idle;
733
734 cl->bstats.packets++;
735 cl->bstats.bytes += len;
736
737 /*
738 (now - last) is total time between packet right edges.
739 (last_pktlen/rate) is "virtual" busy time, so that
740
10297b99 741 idle = (now - last) - last_pktlen/rate
1da177e4
LT
742 */
743
8edc0c31 744 idle = q->now - cl->last;
1da177e4
LT
745 if ((unsigned long)idle > 128*1024*1024) {
746 avgidle = cl->maxidle;
747 } else {
748 idle -= L2T(cl, len);
749
750 /* true_avgidle := (1-W)*true_avgidle + W*idle,
751 where W=2^{-ewma_log}. But cl->avgidle is scaled:
752 cl->avgidle == true_avgidle/W,
753 hence:
754 */
755 avgidle += idle - (avgidle>>cl->ewma_log);
756 }
757
758 if (avgidle <= 0) {
759 /* Overlimit or at-limit */
760
761 if (avgidle < cl->minidle)
762 avgidle = cl->minidle;
763
764 cl->avgidle = avgidle;
765
766 /* Calculate expected time, when this class
767 will be allowed to send.
768 It will occur, when:
769 (1-W)*true_avgidle + W*delay = 0, i.e.
770 idle = (1/W - 1)*(-true_avgidle)
771 or
772 idle = (1 - W)*(-cl->avgidle);
773 */
774 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
775
776 /*
777 That is not all.
778 To maintain the rate allocated to the class,
779 we add to undertime virtual clock,
780 necessary to complete transmitted packet.
781 (len/phys_bandwidth has been already passed
782 to the moment of cbq_update)
783 */
784
785 idle -= L2T(&q->link, len);
786 idle += L2T(cl, len);
787
7c59e25f 788 cl->undertime = q->now + idle;
1da177e4
LT
789 } else {
790 /* Underlimit */
791
a084980d 792 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
793 if (avgidle > cl->maxidle)
794 cl->avgidle = cl->maxidle;
795 else
796 cl->avgidle = avgidle;
797 }
798 cl->last = q->now;
799 }
800
801 cbq_update_toplevel(q, this, q->tx_borrowed);
802}
803
804static __inline__ struct cbq_class *
805cbq_under_limit(struct cbq_class *cl)
806{
807 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
808 struct cbq_class *this_cl = cl;
809
810 if (cl->tparent == NULL)
811 return cl;
812
a084980d 813 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
1da177e4
LT
814 cl->delayed = 0;
815 return cl;
816 }
817
818 do {
819 /* It is very suspicious place. Now overlimit
820 action is generated for not bounded classes
821 only if link is completely congested.
822 Though it is in agree with ancestor-only paradigm,
823 it looks very stupid. Particularly,
824 it means that this chunk of code will either
825 never be called or result in strong amplification
826 of burstiness. Dangerous, silly, and, however,
827 no another solution exists.
828 */
829 if ((cl = cl->borrow) == NULL) {
830 this_cl->qstats.overlimits++;
831 this_cl->overlimit(this_cl);
832 return NULL;
833 }
834 if (cl->level > q->toplevel)
835 return NULL;
a084980d 836 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
1da177e4
LT
837
838 cl->delayed = 0;
839 return cl;
840}
841
842static __inline__ struct sk_buff *
843cbq_dequeue_prio(struct Qdisc *sch, int prio)
844{
845 struct cbq_sched_data *q = qdisc_priv(sch);
846 struct cbq_class *cl_tail, *cl_prev, *cl;
847 struct sk_buff *skb;
848 int deficit;
849
850 cl_tail = cl_prev = q->active[prio];
851 cl = cl_prev->next_alive;
852
853 do {
854 deficit = 0;
855
856 /* Start round */
857 do {
858 struct cbq_class *borrow = cl;
859
860 if (cl->q->q.qlen &&
861 (borrow = cbq_under_limit(cl)) == NULL)
862 goto skip_class;
863
864 if (cl->deficit <= 0) {
865 /* Class exhausted its allotment per
866 this round. Switch to the next one.
867 */
868 deficit = 1;
869 cl->deficit += cl->quantum;
870 goto next_class;
871 }
872
873 skb = cl->q->dequeue(cl->q);
874
875 /* Class did not give us any skb :-(
10297b99 876 It could occur even if cl->q->q.qlen != 0
1da177e4
LT
877 f.e. if cl->q == "tbf"
878 */
879 if (skb == NULL)
880 goto skip_class;
881
0abf77e5 882 cl->deficit -= qdisc_pkt_len(skb);
1da177e4
LT
883 q->tx_class = cl;
884 q->tx_borrowed = borrow;
885 if (borrow != cl) {
886#ifndef CBQ_XSTATS_BORROWS_BYTES
887 borrow->xstats.borrows++;
888 cl->xstats.borrows++;
889#else
0abf77e5
JK
890 borrow->xstats.borrows += qdisc_pkt_len(skb);
891 cl->xstats.borrows += qdisc_pkt_len(skb);
1da177e4
LT
892#endif
893 }
0abf77e5 894 q->tx_len = qdisc_pkt_len(skb);
1da177e4
LT
895
896 if (cl->deficit <= 0) {
897 q->active[prio] = cl;
898 cl = cl->next_alive;
899 cl->deficit += cl->quantum;
900 }
901 return skb;
902
903skip_class:
904 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
905 /* Class is empty or penalized.
906 Unlink it from active chain.
907 */
908 cl_prev->next_alive = cl->next_alive;
909 cl->next_alive = NULL;
910
911 /* Did cl_tail point to it? */
912 if (cl == cl_tail) {
913 /* Repair it! */
914 cl_tail = cl_prev;
915
916 /* Was it the last class in this band? */
917 if (cl == cl_tail) {
918 /* Kill the band! */
919 q->active[prio] = NULL;
920 q->activemask &= ~(1<<prio);
921 if (cl->q->q.qlen)
922 cbq_activate_class(cl);
923 return NULL;
924 }
925
926 q->active[prio] = cl_tail;
927 }
928 if (cl->q->q.qlen)
929 cbq_activate_class(cl);
930
931 cl = cl_prev;
932 }
933
934next_class:
935 cl_prev = cl;
936 cl = cl->next_alive;
937 } while (cl_prev != cl_tail);
938 } while (deficit);
939
940 q->active[prio] = cl_prev;
941
942 return NULL;
943}
944
945static __inline__ struct sk_buff *
946cbq_dequeue_1(struct Qdisc *sch)
947{
948 struct cbq_sched_data *q = qdisc_priv(sch);
949 struct sk_buff *skb;
950 unsigned activemask;
951
952 activemask = q->activemask&0xFF;
953 while (activemask) {
954 int prio = ffz(~activemask);
955 activemask &= ~(1<<prio);
956 skb = cbq_dequeue_prio(sch, prio);
957 if (skb)
958 return skb;
959 }
960 return NULL;
961}
962
963static struct sk_buff *
964cbq_dequeue(struct Qdisc *sch)
965{
966 struct sk_buff *skb;
967 struct cbq_sched_data *q = qdisc_priv(sch);
968 psched_time_t now;
969 psched_tdiff_t incr;
970
3bebcda2 971 now = psched_get_time();
8edc0c31 972 incr = now - q->now_rt;
1da177e4
LT
973
974 if (q->tx_class) {
975 psched_tdiff_t incr2;
976 /* Time integrator. We calculate EOS time
977 by adding expected packet transmission time.
978 If real time is greater, we warp artificial clock,
979 so that:
980
981 cbq_time = max(real_time, work);
982 */
983 incr2 = L2T(&q->link, q->tx_len);
7c59e25f 984 q->now += incr2;
1da177e4
LT
985 cbq_update(q);
986 if ((incr -= incr2) < 0)
987 incr = 0;
988 }
7c59e25f 989 q->now += incr;
1da177e4
LT
990 q->now_rt = now;
991
992 for (;;) {
993 q->wd_expires = 0;
994
995 skb = cbq_dequeue_1(sch);
996 if (skb) {
997 sch->q.qlen--;
998 sch->flags &= ~TCQ_F_THROTTLED;
999 return skb;
1000 }
1001
1002 /* All the classes are overlimit.
1003
1004 It is possible, if:
1005
1006 1. Scheduler is empty.
1007 2. Toplevel cutoff inhibited borrowing.
1008 3. Root class is overlimit.
1009
1010 Reset 2d and 3d conditions and retry.
1011
1012 Note, that NS and cbq-2.0 are buggy, peeking
1013 an arbitrary class is appropriate for ancestor-only
1014 sharing, but not for toplevel algorithm.
1015
1016 Our version is better, but slower, because it requires
1017 two passes, but it is unavoidable with top-level sharing.
1018 */
1019
1020 if (q->toplevel == TC_CBQ_MAXLEVEL &&
a084980d 1021 q->link.undertime == PSCHED_PASTPERFECT)
1da177e4
LT
1022 break;
1023
1024 q->toplevel = TC_CBQ_MAXLEVEL;
a084980d 1025 q->link.undertime = PSCHED_PASTPERFECT;
1da177e4
LT
1026 }
1027
1028 /* No packets in scheduler or nobody wants to give them to us :-(
1029 Sigh... start watchdog timer in the last case. */
1030
1031 if (sch->q.qlen) {
1032 sch->qstats.overlimits++;
88a99354
PM
1033 if (q->wd_expires)
1034 qdisc_watchdog_schedule(&q->watchdog,
bb239acf 1035 now + q->wd_expires);
1da177e4
LT
1036 }
1037 return NULL;
1038}
1039
1040/* CBQ class maintanance routines */
1041
1042static void cbq_adjust_levels(struct cbq_class *this)
1043{
1044 if (this == NULL)
1045 return;
1046
1047 do {
1048 int level = 0;
1049 struct cbq_class *cl;
1050
1051 if ((cl = this->children) != NULL) {
1052 do {
1053 if (cl->level > level)
1054 level = cl->level;
1055 } while ((cl = cl->sibling) != this->children);
1056 }
1057 this->level = level+1;
1058 } while ((this = this->tparent) != NULL);
1059}
1060
1061static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1062{
1063 struct cbq_class *cl;
d77fea2e
PM
1064 struct hlist_node *n;
1065 unsigned int h;
1da177e4
LT
1066
1067 if (q->quanta[prio] == 0)
1068 return;
1069
d77fea2e
PM
1070 for (h = 0; h < q->clhash.hashsize; h++) {
1071 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
1072 /* BUGGGG... Beware! This expression suffer of
1073 arithmetic overflows!
1074 */
1075 if (cl->priority == prio) {
1076 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1077 q->quanta[prio];
1078 }
5ce2d488 1079 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
d77fea2e 1080 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
5ce2d488 1081 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1da177e4
LT
1082 }
1083 }
1084 }
1085}
1086
1087static void cbq_sync_defmap(struct cbq_class *cl)
1088{
1089 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1090 struct cbq_class *split = cl->split;
1091 unsigned h;
1092 int i;
1093
1094 if (split == NULL)
1095 return;
1096
1097 for (i=0; i<=TC_PRIO_MAX; i++) {
1098 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1099 split->defaults[i] = NULL;
1100 }
1101
1102 for (i=0; i<=TC_PRIO_MAX; i++) {
1103 int level = split->level;
1104
1105 if (split->defaults[i])
1106 continue;
1107
d77fea2e
PM
1108 for (h = 0; h < q->clhash.hashsize; h++) {
1109 struct hlist_node *n;
1da177e4
LT
1110 struct cbq_class *c;
1111
d77fea2e
PM
1112 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1113 common.hnode) {
1da177e4
LT
1114 if (c->split == split && c->level < level &&
1115 c->defmap&(1<<i)) {
1116 split->defaults[i] = c;
1117 level = c->level;
1118 }
1119 }
1120 }
1121 }
1122}
1123
1124static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1125{
1126 struct cbq_class *split = NULL;
1127
1128 if (splitid == 0) {
1129 if ((split = cl->split) == NULL)
1130 return;
d77fea2e 1131 splitid = split->common.classid;
1da177e4
LT
1132 }
1133
d77fea2e 1134 if (split == NULL || split->common.classid != splitid) {
1da177e4 1135 for (split = cl->tparent; split; split = split->tparent)
d77fea2e 1136 if (split->common.classid == splitid)
1da177e4
LT
1137 break;
1138 }
1139
1140 if (split == NULL)
1141 return;
1142
1143 if (cl->split != split) {
1144 cl->defmap = 0;
1145 cbq_sync_defmap(cl);
1146 cl->split = split;
1147 cl->defmap = def&mask;
1148 } else
1149 cl->defmap = (cl->defmap&~mask)|(def&mask);
1150
1151 cbq_sync_defmap(cl);
1152}
1153
1154static void cbq_unlink_class(struct cbq_class *this)
1155{
1156 struct cbq_class *cl, **clp;
1157 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1158
d77fea2e 1159 qdisc_class_hash_remove(&q->clhash, &this->common);
1da177e4
LT
1160
1161 if (this->tparent) {
1162 clp=&this->sibling;
1163 cl = *clp;
1164 do {
1165 if (cl == this) {
1166 *clp = cl->sibling;
1167 break;
1168 }
1169 clp = &cl->sibling;
1170 } while ((cl = *clp) != this->sibling);
1171
1172 if (this->tparent->children == this) {
1173 this->tparent->children = this->sibling;
1174 if (this->sibling == this)
1175 this->tparent->children = NULL;
1176 }
1177 } else {
547b792c 1178 WARN_ON(this->sibling != this);
1da177e4
LT
1179 }
1180}
1181
1182static void cbq_link_class(struct cbq_class *this)
1183{
1184 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1da177e4
LT
1185 struct cbq_class *parent = this->tparent;
1186
1187 this->sibling = this;
d77fea2e 1188 qdisc_class_hash_insert(&q->clhash, &this->common);
1da177e4
LT
1189
1190 if (parent == NULL)
1191 return;
1192
1193 if (parent->children == NULL) {
1194 parent->children = this;
1195 } else {
1196 this->sibling = parent->children->sibling;
1197 parent->children->sibling = this;
1198 }
1199}
1200
1201static unsigned int cbq_drop(struct Qdisc* sch)
1202{
1203 struct cbq_sched_data *q = qdisc_priv(sch);
1204 struct cbq_class *cl, *cl_head;
1205 int prio;
1206 unsigned int len;
1207
1208 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1209 if ((cl_head = q->active[prio]) == NULL)
1210 continue;
1211
1212 cl = cl_head;
1213 do {
1214 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1215 sch->q.qlen--;
a37ef2e3
JP
1216 if (!cl->q->q.qlen)
1217 cbq_deactivate_class(cl);
1da177e4
LT
1218 return len;
1219 }
1220 } while ((cl = cl->next_alive) != cl_head);
1221 }
1222 return 0;
1223}
1224
1225static void
1226cbq_reset(struct Qdisc* sch)
1227{
1228 struct cbq_sched_data *q = qdisc_priv(sch);
1229 struct cbq_class *cl;
d77fea2e 1230 struct hlist_node *n;
1da177e4
LT
1231 int prio;
1232 unsigned h;
1233
1234 q->activemask = 0;
1235 q->pmask = 0;
1236 q->tx_class = NULL;
1237 q->tx_borrowed = NULL;
88a99354 1238 qdisc_watchdog_cancel(&q->watchdog);
1a13cb63 1239 hrtimer_cancel(&q->delay_timer);
1da177e4 1240 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1241 q->now = psched_get_time();
1da177e4
LT
1242 q->now_rt = q->now;
1243
1244 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1245 q->active[prio] = NULL;
1246
d77fea2e
PM
1247 for (h = 0; h < q->clhash.hashsize; h++) {
1248 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
1249 qdisc_reset(cl->q);
1250
1251 cl->next_alive = NULL;
a084980d 1252 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
1253 cl->avgidle = cl->maxidle;
1254 cl->deficit = cl->quantum;
1255 cl->cpriority = cl->priority;
1256 }
1257 }
1258 sch->q.qlen = 0;
1259}
1260
1261
1262static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1263{
1264 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1265 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1266 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1267 }
1268 if (lss->change&TCF_CBQ_LSS_EWMA)
1269 cl->ewma_log = lss->ewma_log;
1270 if (lss->change&TCF_CBQ_LSS_AVPKT)
1271 cl->avpkt = lss->avpkt;
1272 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1273 cl->minidle = -(long)lss->minidle;
1274 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1275 cl->maxidle = lss->maxidle;
1276 cl->avgidle = lss->maxidle;
1277 }
1278 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1279 cl->offtime = lss->offtime;
1280 return 0;
1281}
1282
1283static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1284{
1285 q->nclasses[cl->priority]--;
1286 q->quanta[cl->priority] -= cl->weight;
1287 cbq_normalize_quanta(q, cl->priority);
1288}
1289
1290static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1291{
1292 q->nclasses[cl->priority]++;
1293 q->quanta[cl->priority] += cl->weight;
1294 cbq_normalize_quanta(q, cl->priority);
1295}
1296
1297static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1298{
1299 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1300
1301 if (wrr->allot)
1302 cl->allot = wrr->allot;
1303 if (wrr->weight)
1304 cl->weight = wrr->weight;
1305 if (wrr->priority) {
1306 cl->priority = wrr->priority-1;
1307 cl->cpriority = cl->priority;
1308 if (cl->priority >= cl->priority2)
1309 cl->priority2 = TC_CBQ_MAXPRIO-1;
1310 }
1311
1312 cbq_addprio(q, cl);
1313 return 0;
1314}
1315
1316static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1317{
1318 switch (ovl->strategy) {
1319 case TC_CBQ_OVL_CLASSIC:
1320 cl->overlimit = cbq_ovl_classic;
1321 break;
1322 case TC_CBQ_OVL_DELAY:
1323 cl->overlimit = cbq_ovl_delay;
1324 break;
1325 case TC_CBQ_OVL_LOWPRIO:
1326 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1327 ovl->priority2-1 <= cl->priority)
1328 return -EINVAL;
1329 cl->priority2 = ovl->priority2-1;
1330 cl->overlimit = cbq_ovl_lowprio;
1331 break;
1332 case TC_CBQ_OVL_DROP:
1333 cl->overlimit = cbq_ovl_drop;
1334 break;
1335 case TC_CBQ_OVL_RCLASSIC:
1336 cl->overlimit = cbq_ovl_rclassic;
1337 break;
1338 default:
1339 return -EINVAL;
1340 }
1a13cb63 1341 cl->penalty = ovl->penalty;
1da177e4
LT
1342 return 0;
1343}
1344
c3bc7cff 1345#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1346static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1347{
1348 cl->police = p->police;
1349
1350 if (cl->q->handle) {
1351 if (p->police == TC_POLICE_RECLASSIFY)
1352 cl->q->reshape_fail = cbq_reshape_fail;
1353 else
1354 cl->q->reshape_fail = NULL;
1355 }
1356 return 0;
1357}
1358#endif
1359
1360static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1361{
1362 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1363 return 0;
1364}
1365
27a3421e
PM
1366static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1367 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1368 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1369 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1370 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1371 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1372 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1373 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1374};
1375
1e90474c 1376static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
1377{
1378 struct cbq_sched_data *q = qdisc_priv(sch);
1e90474c 1379 struct nlattr *tb[TCA_CBQ_MAX + 1];
1da177e4 1380 struct tc_ratespec *r;
cee63723
PM
1381 int err;
1382
27a3421e 1383 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
cee63723
PM
1384 if (err < 0)
1385 return err;
1da177e4 1386
27a3421e 1387 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1da177e4
LT
1388 return -EINVAL;
1389
1e90474c 1390 r = nla_data(tb[TCA_CBQ_RATE]);
1da177e4 1391
1e90474c 1392 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1da177e4
LT
1393 return -EINVAL;
1394
d77fea2e
PM
1395 err = qdisc_class_hash_init(&q->clhash);
1396 if (err < 0)
1397 goto put_rtab;
1398
1da177e4
LT
1399 q->link.refcnt = 1;
1400 q->link.sibling = &q->link;
d77fea2e 1401 q->link.common.classid = sch->handle;
1da177e4 1402 q->link.qdisc = sch;
5ce2d488 1403 if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1404 &pfifo_qdisc_ops,
9f9afec4 1405 sch->handle)))
1da177e4
LT
1406 q->link.q = &noop_qdisc;
1407
1408 q->link.priority = TC_CBQ_MAXPRIO-1;
1409 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1410 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1411 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1412 q->link.overlimit = cbq_ovl_classic;
5ce2d488 1413 q->link.allot = psched_mtu(qdisc_dev(sch));
1da177e4
LT
1414 q->link.quantum = q->link.allot;
1415 q->link.weight = q->link.R_tab->rate.rate;
1416
1417 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1418 q->link.avpkt = q->link.allot/2;
1419 q->link.minidle = -0x7FFFFFFF;
1da177e4 1420
88a99354 1421 qdisc_watchdog_init(&q->watchdog, sch);
1a13cb63 1422 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1da177e4
LT
1423 q->delay_timer.function = cbq_undelay;
1424 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1425 q->now = psched_get_time();
1da177e4
LT
1426 q->now_rt = q->now;
1427
1428 cbq_link_class(&q->link);
1429
1e90474c
PM
1430 if (tb[TCA_CBQ_LSSOPT])
1431 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1da177e4
LT
1432
1433 cbq_addprio(q, &q->link);
1434 return 0;
d77fea2e
PM
1435
1436put_rtab:
1437 qdisc_put_rtab(q->link.R_tab);
1438 return err;
1da177e4
LT
1439}
1440
1441static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1442{
27a884dc 1443 unsigned char *b = skb_tail_pointer(skb);
1da177e4 1444
1e90474c 1445 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1da177e4
LT
1446 return skb->len;
1447
1e90474c 1448nla_put_failure:
dc5fc579 1449 nlmsg_trim(skb, b);
1da177e4
LT
1450 return -1;
1451}
1452
1453static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1454{
27a884dc 1455 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1456 struct tc_cbq_lssopt opt;
1457
1458 opt.flags = 0;
1459 if (cl->borrow == NULL)
1460 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1461 if (cl->share == NULL)
1462 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1463 opt.ewma_log = cl->ewma_log;
1464 opt.level = cl->level;
1465 opt.avpkt = cl->avpkt;
1466 opt.maxidle = cl->maxidle;
1467 opt.minidle = (u32)(-cl->minidle);
1468 opt.offtime = cl->offtime;
1469 opt.change = ~0;
1e90474c 1470 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1da177e4
LT
1471 return skb->len;
1472
1e90474c 1473nla_put_failure:
dc5fc579 1474 nlmsg_trim(skb, b);
1da177e4
LT
1475 return -1;
1476}
1477
1478static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1479{
27a884dc 1480 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1481 struct tc_cbq_wrropt opt;
1482
1483 opt.flags = 0;
1484 opt.allot = cl->allot;
1485 opt.priority = cl->priority+1;
1486 opt.cpriority = cl->cpriority+1;
1487 opt.weight = cl->weight;
1e90474c 1488 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1da177e4
LT
1489 return skb->len;
1490
1e90474c 1491nla_put_failure:
dc5fc579 1492 nlmsg_trim(skb, b);
1da177e4
LT
1493 return -1;
1494}
1495
1496static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1497{
27a884dc 1498 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1499 struct tc_cbq_ovl opt;
1500
1501 opt.strategy = cl->ovl_strategy;
1502 opt.priority2 = cl->priority2+1;
8a47077a 1503 opt.pad = 0;
1a13cb63 1504 opt.penalty = cl->penalty;
1e90474c 1505 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1da177e4
LT
1506 return skb->len;
1507
1e90474c 1508nla_put_failure:
dc5fc579 1509 nlmsg_trim(skb, b);
1da177e4
LT
1510 return -1;
1511}
1512
1513static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1514{
27a884dc 1515 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1516 struct tc_cbq_fopt opt;
1517
1518 if (cl->split || cl->defmap) {
d77fea2e 1519 opt.split = cl->split ? cl->split->common.classid : 0;
1da177e4
LT
1520 opt.defmap = cl->defmap;
1521 opt.defchange = ~0;
1e90474c 1522 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1da177e4
LT
1523 }
1524 return skb->len;
1525
1e90474c 1526nla_put_failure:
dc5fc579 1527 nlmsg_trim(skb, b);
1da177e4
LT
1528 return -1;
1529}
1530
c3bc7cff 1531#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1532static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1533{
27a884dc 1534 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1535 struct tc_cbq_police opt;
1536
1537 if (cl->police) {
1538 opt.police = cl->police;
9ef1d4c7
PM
1539 opt.__res1 = 0;
1540 opt.__res2 = 0;
1e90474c 1541 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1da177e4
LT
1542 }
1543 return skb->len;
1544
1e90474c 1545nla_put_failure:
dc5fc579 1546 nlmsg_trim(skb, b);
1da177e4
LT
1547 return -1;
1548}
1549#endif
1550
1551static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1552{
1553 if (cbq_dump_lss(skb, cl) < 0 ||
1554 cbq_dump_rate(skb, cl) < 0 ||
1555 cbq_dump_wrr(skb, cl) < 0 ||
1556 cbq_dump_ovl(skb, cl) < 0 ||
c3bc7cff 1557#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1558 cbq_dump_police(skb, cl) < 0 ||
1559#endif
1560 cbq_dump_fopt(skb, cl) < 0)
1561 return -1;
1562 return 0;
1563}
1564
1565static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1566{
1567 struct cbq_sched_data *q = qdisc_priv(sch);
4b3550ef 1568 struct nlattr *nest;
1da177e4 1569
4b3550ef
PM
1570 nest = nla_nest_start(skb, TCA_OPTIONS);
1571 if (nest == NULL)
1572 goto nla_put_failure;
1da177e4 1573 if (cbq_dump_attr(skb, &q->link) < 0)
1e90474c 1574 goto nla_put_failure;
4b3550ef 1575 nla_nest_end(skb, nest);
1da177e4
LT
1576 return skb->len;
1577
1e90474c 1578nla_put_failure:
4b3550ef 1579 nla_nest_cancel(skb, nest);
1da177e4
LT
1580 return -1;
1581}
1582
1583static int
1584cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1585{
1586 struct cbq_sched_data *q = qdisc_priv(sch);
1587
1588 q->link.xstats.avgidle = q->link.avgidle;
1589 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1590}
1591
1592static int
1593cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1594 struct sk_buff *skb, struct tcmsg *tcm)
1595{
1596 struct cbq_class *cl = (struct cbq_class*)arg;
4b3550ef 1597 struct nlattr *nest;
1da177e4
LT
1598
1599 if (cl->tparent)
d77fea2e 1600 tcm->tcm_parent = cl->tparent->common.classid;
1da177e4
LT
1601 else
1602 tcm->tcm_parent = TC_H_ROOT;
d77fea2e 1603 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1604 tcm->tcm_info = cl->q->handle;
1605
4b3550ef
PM
1606 nest = nla_nest_start(skb, TCA_OPTIONS);
1607 if (nest == NULL)
1608 goto nla_put_failure;
1da177e4 1609 if (cbq_dump_attr(skb, cl) < 0)
1e90474c 1610 goto nla_put_failure;
4b3550ef 1611 nla_nest_end(skb, nest);
1da177e4
LT
1612 return skb->len;
1613
1e90474c 1614nla_put_failure:
4b3550ef 1615 nla_nest_cancel(skb, nest);
1da177e4
LT
1616 return -1;
1617}
1618
1619static int
1620cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1621 struct gnet_dump *d)
1622{
1623 struct cbq_sched_data *q = qdisc_priv(sch);
1624 struct cbq_class *cl = (struct cbq_class*)arg;
1625
1626 cl->qstats.qlen = cl->q->q.qlen;
1627 cl->xstats.avgidle = cl->avgidle;
1628 cl->xstats.undertime = 0;
1629
a084980d 1630 if (cl->undertime != PSCHED_PASTPERFECT)
8edc0c31 1631 cl->xstats.undertime = cl->undertime - q->now;
1da177e4
LT
1632
1633 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1da177e4 1634 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1da177e4
LT
1635 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1636 return -1;
1637
1638 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1639}
1640
1641static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1642 struct Qdisc **old)
1643{
1644 struct cbq_class *cl = (struct cbq_class*)arg;
1645
1646 if (cl) {
1647 if (new == NULL) {
5ce2d488 1648 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1649 &pfifo_qdisc_ops,
d77fea2e
PM
1650 cl->common.classid);
1651 if (new == NULL)
1da177e4
LT
1652 return -ENOBUFS;
1653 } else {
c3bc7cff 1654#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1655 if (cl->police == TC_POLICE_RECLASSIFY)
1656 new->reshape_fail = cbq_reshape_fail;
1657#endif
1658 }
1659 sch_tree_lock(sch);
a37ef2e3 1660 *old = xchg(&cl->q, new);
5e50da01 1661 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4
LT
1662 qdisc_reset(*old);
1663 sch_tree_unlock(sch);
1664
1665 return 0;
1666 }
1667 return -ENOENT;
1668}
1669
1670static struct Qdisc *
1671cbq_leaf(struct Qdisc *sch, unsigned long arg)
1672{
1673 struct cbq_class *cl = (struct cbq_class*)arg;
1674
1675 return cl ? cl->q : NULL;
1676}
1677
a37ef2e3
JP
1678static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1679{
1680 struct cbq_class *cl = (struct cbq_class *)arg;
1681
1682 if (cl->q->q.qlen == 0)
1683 cbq_deactivate_class(cl);
1684}
1685
1da177e4
LT
1686static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1687{
1688 struct cbq_sched_data *q = qdisc_priv(sch);
1689 struct cbq_class *cl = cbq_class_lookup(q, classid);
1690
1691 if (cl) {
1692 cl->refcnt++;
1693 return (unsigned long)cl;
1694 }
1695 return 0;
1696}
1697
1da177e4
LT
1698static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1699{
1700 struct cbq_sched_data *q = qdisc_priv(sch);
1701
547b792c 1702 WARN_ON(cl->filters);
1da177e4 1703
ff31ab56 1704 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1705 qdisc_destroy(cl->q);
1706 qdisc_put_rtab(cl->R_tab);
1da177e4 1707 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1da177e4
LT
1708 if (cl != &q->link)
1709 kfree(cl);
1710}
1711
1712static void
1713cbq_destroy(struct Qdisc* sch)
1714{
1715 struct cbq_sched_data *q = qdisc_priv(sch);
d77fea2e 1716 struct hlist_node *n, *next;
1da177e4
LT
1717 struct cbq_class *cl;
1718 unsigned h;
1719
c3bc7cff 1720#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1721 q->rx_class = NULL;
1722#endif
1723 /*
1724 * Filters must be destroyed first because we don't destroy the
1725 * classes from root to leafs which means that filters can still
1726 * be bound to classes which have been destroyed already. --TGR '04
1727 */
d77fea2e
PM
1728 for (h = 0; h < q->clhash.hashsize; h++) {
1729 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
ff31ab56 1730 tcf_destroy_chain(&cl->filter_list);
b00b4bf9 1731 }
d77fea2e
PM
1732 for (h = 0; h < q->clhash.hashsize; h++) {
1733 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1734 common.hnode)
1da177e4 1735 cbq_destroy_class(sch, cl);
1da177e4 1736 }
d77fea2e 1737 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1738}
1739
1740static void cbq_put(struct Qdisc *sch, unsigned long arg)
1741{
1742 struct cbq_class *cl = (struct cbq_class*)arg;
1743
1744 if (--cl->refcnt == 0) {
c3bc7cff 1745#ifdef CONFIG_NET_CLS_ACT
7698b4fc 1746 spinlock_t *root_lock = qdisc_root_lock(sch);
1da177e4
LT
1747 struct cbq_sched_data *q = qdisc_priv(sch);
1748
7698b4fc 1749 spin_lock_bh(root_lock);
1da177e4
LT
1750 if (q->rx_class == cl)
1751 q->rx_class = NULL;
7698b4fc 1752 spin_unlock_bh(root_lock);
1da177e4
LT
1753#endif
1754
1755 cbq_destroy_class(sch, cl);
1756 }
1757}
1758
1759static int
1e90474c 1760cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1da177e4
LT
1761 unsigned long *arg)
1762{
1763 int err;
1764 struct cbq_sched_data *q = qdisc_priv(sch);
1765 struct cbq_class *cl = (struct cbq_class*)*arg;
1e90474c
PM
1766 struct nlattr *opt = tca[TCA_OPTIONS];
1767 struct nlattr *tb[TCA_CBQ_MAX + 1];
1da177e4
LT
1768 struct cbq_class *parent;
1769 struct qdisc_rate_table *rtab = NULL;
1770
cee63723 1771 if (opt == NULL)
1da177e4
LT
1772 return -EINVAL;
1773
27a3421e 1774 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
cee63723
PM
1775 if (err < 0)
1776 return err;
1777
1da177e4
LT
1778 if (cl) {
1779 /* Check parent */
1780 if (parentid) {
d77fea2e
PM
1781 if (cl->tparent &&
1782 cl->tparent->common.classid != parentid)
1da177e4
LT
1783 return -EINVAL;
1784 if (!cl->tparent && parentid != TC_H_ROOT)
1785 return -EINVAL;
1786 }
1787
1e90474c
PM
1788 if (tb[TCA_CBQ_RATE]) {
1789 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1da177e4
LT
1790 if (rtab == NULL)
1791 return -EINVAL;
1792 }
1793
1794 /* Change class parameters */
1795 sch_tree_lock(sch);
1796
1797 if (cl->next_alive != NULL)
1798 cbq_deactivate_class(cl);
1799
1800 if (rtab) {
1801 rtab = xchg(&cl->R_tab, rtab);
1802 qdisc_put_rtab(rtab);
1803 }
1804
1e90474c
PM
1805 if (tb[TCA_CBQ_LSSOPT])
1806 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1da177e4 1807
1e90474c 1808 if (tb[TCA_CBQ_WRROPT]) {
1da177e4 1809 cbq_rmprio(q, cl);
1e90474c 1810 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1da177e4
LT
1811 }
1812
1e90474c
PM
1813 if (tb[TCA_CBQ_OVL_STRATEGY])
1814 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1da177e4 1815
c3bc7cff 1816#ifdef CONFIG_NET_CLS_ACT
1e90474c
PM
1817 if (tb[TCA_CBQ_POLICE])
1818 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1da177e4
LT
1819#endif
1820
1e90474c
PM
1821 if (tb[TCA_CBQ_FOPT])
1822 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1da177e4
LT
1823
1824 if (cl->q->q.qlen)
1825 cbq_activate_class(cl);
1826
1827 sch_tree_unlock(sch);
1828
1e90474c 1829 if (tca[TCA_RATE])
1da177e4 1830 gen_replace_estimator(&cl->bstats, &cl->rate_est,
7698b4fc 1831 qdisc_root_lock(sch),
1e90474c 1832 tca[TCA_RATE]);
1da177e4
LT
1833 return 0;
1834 }
1835
1836 if (parentid == TC_H_ROOT)
1837 return -EINVAL;
1838
1e90474c
PM
1839 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1840 tb[TCA_CBQ_LSSOPT] == NULL)
1da177e4
LT
1841 return -EINVAL;
1842
1e90474c 1843 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1da177e4
LT
1844 if (rtab == NULL)
1845 return -EINVAL;
1846
1847 if (classid) {
1848 err = -EINVAL;
1849 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1850 goto failure;
1851 } else {
1852 int i;
1853 classid = TC_H_MAKE(sch->handle,0x8000);
1854
1855 for (i=0; i<0x8000; i++) {
1856 if (++q->hgenerator >= 0x8000)
1857 q->hgenerator = 1;
1858 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1859 break;
1860 }
1861 err = -ENOSR;
1862 if (i >= 0x8000)
1863 goto failure;
1864 classid = classid|q->hgenerator;
1865 }
1866
1867 parent = &q->link;
1868 if (parentid) {
1869 parent = cbq_class_lookup(q, parentid);
1870 err = -EINVAL;
1871 if (parent == NULL)
1872 goto failure;
1873 }
1874
1875 err = -ENOBUFS;
0da974f4 1876 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1da177e4
LT
1877 if (cl == NULL)
1878 goto failure;
1da177e4
LT
1879 cl->R_tab = rtab;
1880 rtab = NULL;
1881 cl->refcnt = 1;
5ce2d488 1882 if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1883 &pfifo_qdisc_ops, classid)))
1da177e4 1884 cl->q = &noop_qdisc;
d77fea2e 1885 cl->common.classid = classid;
1da177e4
LT
1886 cl->tparent = parent;
1887 cl->qdisc = sch;
1888 cl->allot = parent->allot;
1889 cl->quantum = cl->allot;
1890 cl->weight = cl->R_tab->rate.rate;
1da177e4
LT
1891
1892 sch_tree_lock(sch);
1893 cbq_link_class(cl);
1894 cl->borrow = cl->tparent;
1895 if (cl->tparent != &q->link)
1896 cl->share = cl->tparent;
1897 cbq_adjust_levels(parent);
1898 cl->minidle = -0x7FFFFFFF;
1e90474c
PM
1899 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1900 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1da177e4
LT
1901 if (cl->ewma_log==0)
1902 cl->ewma_log = q->link.ewma_log;
1903 if (cl->maxidle==0)
1904 cl->maxidle = q->link.maxidle;
1905 if (cl->avpkt==0)
1906 cl->avpkt = q->link.avpkt;
1907 cl->overlimit = cbq_ovl_classic;
1e90474c
PM
1908 if (tb[TCA_CBQ_OVL_STRATEGY])
1909 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
c3bc7cff 1910#ifdef CONFIG_NET_CLS_ACT
1e90474c
PM
1911 if (tb[TCA_CBQ_POLICE])
1912 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1da177e4 1913#endif
1e90474c
PM
1914 if (tb[TCA_CBQ_FOPT])
1915 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1da177e4
LT
1916 sch_tree_unlock(sch);
1917
d77fea2e
PM
1918 qdisc_class_hash_grow(sch, &q->clhash);
1919
1e90474c 1920 if (tca[TCA_RATE])
1da177e4 1921 gen_new_estimator(&cl->bstats, &cl->rate_est,
7698b4fc 1922 qdisc_root_lock(sch), tca[TCA_RATE]);
1da177e4
LT
1923
1924 *arg = (unsigned long)cl;
1925 return 0;
1926
1927failure:
1928 qdisc_put_rtab(rtab);
1929 return err;
1930}
1931
1932static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1933{
1934 struct cbq_sched_data *q = qdisc_priv(sch);
1935 struct cbq_class *cl = (struct cbq_class*)arg;
a37ef2e3 1936 unsigned int qlen;
1da177e4
LT
1937
1938 if (cl->filters || cl->children || cl == &q->link)
1939 return -EBUSY;
1940
1941 sch_tree_lock(sch);
1942
a37ef2e3
JP
1943 qlen = cl->q->q.qlen;
1944 qdisc_reset(cl->q);
1945 qdisc_tree_decrease_qlen(cl->q, qlen);
1946
1da177e4
LT
1947 if (cl->next_alive)
1948 cbq_deactivate_class(cl);
1949
1950 if (q->tx_borrowed == cl)
1951 q->tx_borrowed = q->tx_class;
1952 if (q->tx_class == cl) {
1953 q->tx_class = NULL;
1954 q->tx_borrowed = NULL;
1955 }
c3bc7cff 1956#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1957 if (q->rx_class == cl)
1958 q->rx_class = NULL;
1959#endif
1960
1961 cbq_unlink_class(cl);
1962 cbq_adjust_levels(cl->tparent);
1963 cl->defmap = 0;
1964 cbq_sync_defmap(cl);
1965
1966 cbq_rmprio(q, cl);
1967 sch_tree_unlock(sch);
1968
1969 if (--cl->refcnt == 0)
1970 cbq_destroy_class(sch, cl);
1971
1972 return 0;
1973}
1974
1975static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1976{
1977 struct cbq_sched_data *q = qdisc_priv(sch);
1978 struct cbq_class *cl = (struct cbq_class *)arg;
1979
1980 if (cl == NULL)
1981 cl = &q->link;
1982
1983 return &cl->filter_list;
1984}
1985
1986static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1987 u32 classid)
1988{
1989 struct cbq_sched_data *q = qdisc_priv(sch);
1990 struct cbq_class *p = (struct cbq_class*)parent;
1991 struct cbq_class *cl = cbq_class_lookup(q, classid);
1992
1993 if (cl) {
1994 if (p && p->level <= cl->level)
1995 return 0;
1996 cl->filters++;
1997 return (unsigned long)cl;
1998 }
1999 return 0;
2000}
2001
2002static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2003{
2004 struct cbq_class *cl = (struct cbq_class*)arg;
2005
2006 cl->filters--;
2007}
2008
2009static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2010{
2011 struct cbq_sched_data *q = qdisc_priv(sch);
d77fea2e
PM
2012 struct cbq_class *cl;
2013 struct hlist_node *n;
1da177e4
LT
2014 unsigned h;
2015
2016 if (arg->stop)
2017 return;
2018
d77fea2e
PM
2019 for (h = 0; h < q->clhash.hashsize; h++) {
2020 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
2021 if (arg->count < arg->skip) {
2022 arg->count++;
2023 continue;
2024 }
2025 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2026 arg->stop = 1;
2027 return;
2028 }
2029 arg->count++;
2030 }
2031 }
2032}
2033
20fea08b 2034static const struct Qdisc_class_ops cbq_class_ops = {
1da177e4
LT
2035 .graft = cbq_graft,
2036 .leaf = cbq_leaf,
a37ef2e3 2037 .qlen_notify = cbq_qlen_notify,
1da177e4
LT
2038 .get = cbq_get,
2039 .put = cbq_put,
2040 .change = cbq_change_class,
2041 .delete = cbq_delete,
2042 .walk = cbq_walk,
2043 .tcf_chain = cbq_find_tcf,
2044 .bind_tcf = cbq_bind_filter,
2045 .unbind_tcf = cbq_unbind_filter,
2046 .dump = cbq_dump_class,
2047 .dump_stats = cbq_dump_class_stats,
2048};
2049
20fea08b 2050static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
1da177e4
LT
2051 .next = NULL,
2052 .cl_ops = &cbq_class_ops,
2053 .id = "cbq",
2054 .priv_size = sizeof(struct cbq_sched_data),
2055 .enqueue = cbq_enqueue,
2056 .dequeue = cbq_dequeue,
2057 .requeue = cbq_requeue,
2058 .drop = cbq_drop,
2059 .init = cbq_init,
2060 .reset = cbq_reset,
2061 .destroy = cbq_destroy,
2062 .change = NULL,
2063 .dump = cbq_dump,
2064 .dump_stats = cbq_dump_stats,
2065 .owner = THIS_MODULE,
2066};
2067
2068static int __init cbq_module_init(void)
2069{
2070 return register_qdisc(&cbq_qdisc_ops);
2071}
10297b99 2072static void __exit cbq_module_exit(void)
1da177e4
LT
2073{
2074 unregister_qdisc(&cbq_qdisc_ops);
2075}
2076module_init(cbq_module_init)
2077module_exit(cbq_module_exit)
2078MODULE_LICENSE("GPL");