dp83640: Fix NOHZ local_softirq_pending 08 warning
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / sched / sch_sfq.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_sfq.c Stochastic Fairness Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */
11
1da177e4 12#include <linux/module.h>
1da177e4
LT
13#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/jiffies.h>
16#include <linux/string.h>
1da177e4
LT
17#include <linux/in.h>
18#include <linux/errno.h>
1da177e4 19#include <linux/init.h>
1da177e4 20#include <linux/skbuff.h>
32740ddc 21#include <linux/jhash.h>
5a0e3ad6 22#include <linux/slab.h>
817fb15d 23#include <linux/vmalloc.h>
0ba48053 24#include <net/netlink.h>
1da177e4 25#include <net/pkt_sched.h>
11fca931 26#include <net/flow_keys.h>
1da177e4
LT
27
28
29/* Stochastic Fairness Queuing algorithm.
30 =======================================
31
32 Source:
33 Paul E. McKenney "Stochastic Fairness Queuing",
34 IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
35
36 Paul E. McKenney "Stochastic Fairness Queuing",
37 "Interworking: Research and Experience", v.2, 1991, p.113-131.
38
39
40 See also:
41 M. Shreedhar and George Varghese "Efficient Fair
42 Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
43
44
10297b99 45 This is not the thing that is usually called (W)FQ nowadays.
1da177e4
LT
46 It does not use any timestamp mechanism, but instead
47 processes queues in round-robin order.
48
49 ADVANTAGE:
50
51 - It is very cheap. Both CPU and memory requirements are minimal.
52
53 DRAWBACKS:
54
10297b99 55 - "Stochastic" -> It is not 100% fair.
1da177e4
LT
56 When hash collisions occur, several flows are considered as one.
57
58 - "Round-robin" -> It introduces larger delays than virtual clock
59 based schemes, and should not be used for isolating interactive
60 traffic from non-interactive. It means, that this scheduler
61 should be used as leaf of CBQ or P3, which put interactive traffic
62 to higher priority band.
63
64 We still need true WFQ for top level CSZ, but using WFQ
65 for the best effort traffic is absolutely pointless:
66 SFQ is superior for this purpose.
67
68 IMPLEMENTATION:
18cb8098
ED
69 This implementation limits :
70 - maximal queue length per flow to 127 packets.
71 - max mtu to 2^18-1;
72 - max 65408 flows,
73 - number of hash buckets to 65536.
1da177e4
LT
74
75 It is easy to increase these values, but not in flight. */
76
18cb8098
ED
77#define SFQ_MAX_DEPTH 127 /* max number of packets per flow */
78#define SFQ_DEFAULT_FLOWS 128
79#define SFQ_MAX_FLOWS (0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
80#define SFQ_EMPTY_SLOT 0xffff
817fb15d
ED
81#define SFQ_DEFAULT_HASH_DIVISOR 1024
82
eeaeb068
ED
83/* We use 16 bits to store allot, and want to handle packets up to 64K
84 * Scale allot by 8 (1<<3) so that no overflow occurs.
85 */
86#define SFQ_ALLOT_SHIFT 3
87#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
1da177e4 88
18cb8098
ED
89/* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
90typedef u16 sfq_index;
1da177e4 91
eda83e3b
ED
92/*
93 * We dont use pointers to save space.
18cb8098
ED
94 * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
95 * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
eda83e3b
ED
96 * are 'pointers' to dep[] array
97 */
cc7ec456 98struct sfq_head {
1da177e4
LT
99 sfq_index next;
100 sfq_index prev;
101};
102
eda83e3b
ED
103struct sfq_slot {
104 struct sk_buff *skblist_next;
105 struct sk_buff *skblist_prev;
106 sfq_index qlen; /* number of skbs in skblist */
18cb8098 107 sfq_index next; /* next slot in sfq RR chain */
eda83e3b
ED
108 struct sfq_head dep; /* anchor in dep[] chains */
109 unsigned short hash; /* hash value (index in ht[]) */
110 short allot; /* credit for this slot */
111};
112
cc7ec456 113struct sfq_sched_data {
18cb8098
ED
114/* frequently used fields */
115 int limit; /* limit of total number of packets in this qdisc */
817fb15d 116 unsigned int divisor; /* number of slots in hash table */
18cb8098
ED
117 unsigned int maxflows; /* number of flows in flows array */
118 int headdrop;
119 int maxdepth; /* limit of packets per flow */
120
32740ddc 121 u32 perturbation;
18cb8098 122 struct tcf_proto *filter_list;
eda83e3b 123 sfq_index cur_depth; /* depth of longest slot */
eeaeb068 124 unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
eda83e3b 125 struct sfq_slot *tail; /* current slot in round */
18cb8098
ED
126 sfq_index *ht; /* Hash table ('divisor' slots) */
127 struct sfq_slot *slots; /* Flows table ('maxflows' entries) */
128
129 struct sfq_head dep[SFQ_MAX_DEPTH + 1];
130 /* Linked lists of slots, indexed by depth
131 * dep[0] : list of unused flows
132 * dep[1] : list of flows with 1 packet
133 * dep[X] : list of flows with X packets
134 */
135
136 int perturb_period;
137 unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
138 struct timer_list perturb_timer;
1da177e4
LT
139};
140
eda83e3b
ED
141/*
142 * sfq_head are either in a sfq_slot or in dep[] array
143 */
144static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
145{
18cb8098 146 if (val < SFQ_MAX_FLOWS)
eda83e3b 147 return &q->slots[val].dep;
18cb8098 148 return &q->dep[val - SFQ_MAX_FLOWS];
eda83e3b
ED
149}
150
225d9b89
ED
151/*
152 * In order to be able to quickly rehash our queue when timer changes
153 * q->perturbation, we store flow_keys in skb->cb[]
154 */
155struct sfq_skb_cb {
156 struct flow_keys keys;
157};
158
159static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
160{
161 BUILD_BUG_ON(sizeof(skb->cb) <
162 sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb));
163 return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
164}
165
11fca931
ED
166static unsigned int sfq_hash(const struct sfq_sched_data *q,
167 const struct sk_buff *skb)
1da177e4 168{
225d9b89 169 const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
11fca931
ED
170 unsigned int hash;
171
225d9b89
ED
172 hash = jhash_3words((__force u32)keys->dst,
173 (__force u32)keys->src ^ keys->ip_proto,
174 (__force u32)keys->ports, q->perturbation);
11fca931 175 return hash & (q->divisor - 1);
1da177e4
LT
176}
177
7d2681a6
PM
178static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
179 int *qerr)
180{
181 struct sfq_sched_data *q = qdisc_priv(sch);
182 struct tcf_result res;
183 int result;
184
185 if (TC_H_MAJ(skb->priority) == sch->handle &&
186 TC_H_MIN(skb->priority) > 0 &&
817fb15d 187 TC_H_MIN(skb->priority) <= q->divisor)
7d2681a6
PM
188 return TC_H_MIN(skb->priority);
189
225d9b89
ED
190 if (!q->filter_list) {
191 skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
7d2681a6 192 return sfq_hash(q, skb) + 1;
225d9b89 193 }
7d2681a6 194
c27f339a 195 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
7d2681a6
PM
196 result = tc_classify(skb, q->filter_list, &res);
197 if (result >= 0) {
198#ifdef CONFIG_NET_CLS_ACT
199 switch (result) {
200 case TC_ACT_STOLEN:
201 case TC_ACT_QUEUED:
378a2f09 202 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
7d2681a6
PM
203 case TC_ACT_SHOT:
204 return 0;
205 }
206#endif
817fb15d 207 if (TC_H_MIN(res.classid) <= q->divisor)
7d2681a6
PM
208 return TC_H_MIN(res.classid);
209 }
210 return 0;
211}
212
eda83e3b 213/*
18cb8098 214 * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
eda83e3b 215 */
1da177e4
LT
216static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
217{
218 sfq_index p, n;
18cb8098
ED
219 struct sfq_slot *slot = &q->slots[x];
220 int qlen = slot->qlen;
eda83e3b 221
18cb8098 222 p = qlen + SFQ_MAX_FLOWS;
eda83e3b 223 n = q->dep[qlen].next;
1da177e4 224
18cb8098
ED
225 slot->dep.next = n;
226 slot->dep.prev = p;
eda83e3b
ED
227
228 q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */
229 sfq_dep_head(q, n)->prev = x;
1da177e4
LT
230}
231
eda83e3b
ED
232#define sfq_unlink(q, x, n, p) \
233 n = q->slots[x].dep.next; \
234 p = q->slots[x].dep.prev; \
235 sfq_dep_head(q, p)->next = n; \
236 sfq_dep_head(q, n)->prev = p
237
238
1da177e4
LT
239static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
240{
241 sfq_index p, n;
eda83e3b 242 int d;
1da177e4 243
eda83e3b 244 sfq_unlink(q, x, n, p);
1da177e4 245
eda83e3b
ED
246 d = q->slots[x].qlen--;
247 if (n == p && q->cur_depth == d)
248 q->cur_depth--;
1da177e4
LT
249 sfq_link(q, x);
250}
251
252static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
253{
254 sfq_index p, n;
255 int d;
256
eda83e3b 257 sfq_unlink(q, x, n, p);
1da177e4 258
eda83e3b
ED
259 d = ++q->slots[x].qlen;
260 if (q->cur_depth < d)
261 q->cur_depth = d;
1da177e4
LT
262 sfq_link(q, x);
263}
264
eda83e3b
ED
265/* helper functions : might be changed when/if skb use a standard list_head */
266
267/* remove one skb from tail of slot queue */
268static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
269{
270 struct sk_buff *skb = slot->skblist_prev;
271
272 slot->skblist_prev = skb->prev;
ee09b3c1 273 skb->prev->next = (struct sk_buff *)slot;
eda83e3b
ED
274 skb->next = skb->prev = NULL;
275 return skb;
276}
277
278/* remove one skb from head of slot queue */
279static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
280{
281 struct sk_buff *skb = slot->skblist_next;
282
283 slot->skblist_next = skb->next;
18c8d82a 284 skb->next->prev = (struct sk_buff *)slot;
eda83e3b
ED
285 skb->next = skb->prev = NULL;
286 return skb;
287}
288
289static inline void slot_queue_init(struct sfq_slot *slot)
290{
18cb8098 291 memset(slot, 0, sizeof(*slot));
eda83e3b
ED
292 slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
293}
294
295/* add skb to slot queue (tail add) */
296static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
297{
298 skb->prev = slot->skblist_prev;
299 skb->next = (struct sk_buff *)slot;
300 slot->skblist_prev->next = skb;
301 slot->skblist_prev = skb;
302}
303
304#define slot_queue_walk(slot, skb) \
305 for (skb = slot->skblist_next; \
306 skb != (struct sk_buff *)slot; \
307 skb = skb->next)
308
1da177e4
LT
309static unsigned int sfq_drop(struct Qdisc *sch)
310{
311 struct sfq_sched_data *q = qdisc_priv(sch);
eda83e3b 312 sfq_index x, d = q->cur_depth;
1da177e4
LT
313 struct sk_buff *skb;
314 unsigned int len;
eda83e3b 315 struct sfq_slot *slot;
1da177e4 316
eda83e3b 317 /* Queue is full! Find the longest slot and drop tail packet from it */
1da177e4 318 if (d > 1) {
eda83e3b
ED
319 x = q->dep[d].next;
320 slot = &q->slots[x];
321drop:
18cb8098 322 skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
0abf77e5 323 len = qdisc_pkt_len(skb);
1da177e4 324 sfq_dec(q, x);
eda83e3b 325 kfree_skb(skb);
1da177e4
LT
326 sch->q.qlen--;
327 sch->qstats.drops++;
f5539eb8 328 sch->qstats.backlog -= len;
1da177e4
LT
329 return len;
330 }
331
332 if (d == 1) {
333 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
eda83e3b
ED
334 x = q->tail->next;
335 slot = &q->slots[x];
336 q->tail->next = slot->next;
337 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
338 goto drop;
1da177e4
LT
339 }
340
341 return 0;
342}
343
344static int
6f9e98f7 345sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1da177e4
LT
346{
347 struct sfq_sched_data *q = qdisc_priv(sch);
7d2681a6 348 unsigned int hash;
8efa8854 349 sfq_index x, qlen;
eda83e3b 350 struct sfq_slot *slot;
7f3ff4f6 351 int uninitialized_var(ret);
7d2681a6
PM
352
353 hash = sfq_classify(skb, sch, &ret);
354 if (hash == 0) {
c27f339a 355 if (ret & __NET_XMIT_BYPASS)
7d2681a6
PM
356 sch->qstats.drops++;
357 kfree_skb(skb);
358 return ret;
359 }
360 hash--;
1da177e4
LT
361
362 x = q->ht[hash];
eda83e3b
ED
363 slot = &q->slots[x];
364 if (x == SFQ_EMPTY_SLOT) {
365 x = q->dep[0].next; /* get a free slot */
18cb8098
ED
366 if (x >= SFQ_MAX_FLOWS)
367 return qdisc_drop(skb, sch);
eda83e3b
ED
368 q->ht[hash] = x;
369 slot = &q->slots[x];
370 slot->hash = hash;
1da177e4 371 }
6f9e98f7 372
18cb8098
ED
373 if (slot->qlen >= q->maxdepth) {
374 struct sk_buff *head;
375
376 if (!q->headdrop)
377 return qdisc_drop(skb, sch);
378
379 head = slot_dequeue_head(slot);
380 sch->qstats.backlog -= qdisc_pkt_len(head);
381 qdisc_drop(head, sch);
382
383 sch->qstats.backlog += qdisc_pkt_len(skb);
384 slot_queue_add(slot, skb);
385 return NET_XMIT_CN;
386 }
32740ddc 387
0abf77e5 388 sch->qstats.backlog += qdisc_pkt_len(skb);
eda83e3b 389 slot_queue_add(slot, skb);
1da177e4 390 sfq_inc(q, x);
eda83e3b
ED
391 if (slot->qlen == 1) { /* The flow is new */
392 if (q->tail == NULL) { /* It is the first flow */
393 slot->next = x;
d47a0ac7 394 q->tail = slot;
1da177e4 395 } else {
eda83e3b
ED
396 slot->next = q->tail->next;
397 q->tail->next = x;
1da177e4 398 }
eeaeb068 399 slot->allot = q->scaled_quantum;
1da177e4 400 }
9190b3b3 401 if (++sch->q.qlen <= q->limit)
9871e50e 402 return NET_XMIT_SUCCESS;
1da177e4 403
8efa8854 404 qlen = slot->qlen;
1da177e4 405 sfq_drop(sch);
8efa8854
ED
406 /* Return Congestion Notification only if we dropped a packet
407 * from this flow.
408 */
e1738bd9
ED
409 if (qlen != slot->qlen)
410 return NET_XMIT_CN;
411
412 /* As we dropped a packet, better let upper stack know this */
413 qdisc_tree_decrease_qlen(sch, 1);
414 return NET_XMIT_SUCCESS;
1da177e4
LT
415}
416
1da177e4 417static struct sk_buff *
6f9e98f7 418sfq_dequeue(struct Qdisc *sch)
1da177e4
LT
419{
420 struct sfq_sched_data *q = qdisc_priv(sch);
421 struct sk_buff *skb;
aa3e2199 422 sfq_index a, next_a;
eda83e3b 423 struct sfq_slot *slot;
1da177e4
LT
424
425 /* No active slots */
eda83e3b 426 if (q->tail == NULL)
1da177e4
LT
427 return NULL;
428
eeaeb068 429next_slot:
eda83e3b
ED
430 a = q->tail->next;
431 slot = &q->slots[a];
eeaeb068
ED
432 if (slot->allot <= 0) {
433 q->tail = slot;
434 slot->allot += q->scaled_quantum;
435 goto next_slot;
436 }
eda83e3b 437 skb = slot_dequeue_head(slot);
1da177e4 438 sfq_dec(q, a);
9190b3b3 439 qdisc_bstats_update(sch, skb);
1da177e4 440 sch->q.qlen--;
0abf77e5 441 sch->qstats.backlog -= qdisc_pkt_len(skb);
1da177e4
LT
442
443 /* Is the slot empty? */
eda83e3b
ED
444 if (slot->qlen == 0) {
445 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
446 next_a = slot->next;
aa3e2199 447 if (a == next_a) {
eda83e3b 448 q->tail = NULL; /* no more active slots */
1da177e4
LT
449 return skb;
450 }
eda83e3b 451 q->tail->next = next_a;
eeaeb068
ED
452 } else {
453 slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
1da177e4
LT
454 }
455 return skb;
456}
457
458static void
6f9e98f7 459sfq_reset(struct Qdisc *sch)
1da177e4
LT
460{
461 struct sk_buff *skb;
462
463 while ((skb = sfq_dequeue(sch)) != NULL)
464 kfree_skb(skb);
465}
466
225d9b89
ED
467/*
468 * When q->perturbation is changed, we rehash all queued skbs
469 * to avoid OOO (Out Of Order) effects.
470 * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
471 * counters.
472 */
18cb8098 473static void sfq_rehash(struct Qdisc *sch)
225d9b89 474{
18cb8098 475 struct sfq_sched_data *q = qdisc_priv(sch);
225d9b89
ED
476 struct sk_buff *skb;
477 int i;
478 struct sfq_slot *slot;
479 struct sk_buff_head list;
18cb8098 480 int dropped = 0;
225d9b89
ED
481
482 __skb_queue_head_init(&list);
483
18cb8098 484 for (i = 0; i < q->maxflows; i++) {
225d9b89
ED
485 slot = &q->slots[i];
486 if (!slot->qlen)
487 continue;
488 while (slot->qlen) {
489 skb = slot_dequeue_head(slot);
490 sfq_dec(q, i);
491 __skb_queue_tail(&list, skb);
492 }
493 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
494 }
495 q->tail = NULL;
496
497 while ((skb = __skb_dequeue(&list)) != NULL) {
498 unsigned int hash = sfq_hash(q, skb);
499 sfq_index x = q->ht[hash];
500
501 slot = &q->slots[x];
502 if (x == SFQ_EMPTY_SLOT) {
503 x = q->dep[0].next; /* get a free slot */
18cb8098
ED
504 if (x >= SFQ_MAX_FLOWS) {
505drop: sch->qstats.backlog -= qdisc_pkt_len(skb);
506 kfree_skb(skb);
507 dropped++;
508 continue;
509 }
225d9b89
ED
510 q->ht[hash] = x;
511 slot = &q->slots[x];
512 slot->hash = hash;
513 }
18cb8098
ED
514 if (slot->qlen >= q->maxdepth)
515 goto drop;
225d9b89
ED
516 slot_queue_add(slot, skb);
517 sfq_inc(q, x);
518 if (slot->qlen == 1) { /* The flow is new */
519 if (q->tail == NULL) { /* It is the first flow */
520 slot->next = x;
521 } else {
522 slot->next = q->tail->next;
523 q->tail->next = x;
524 }
525 q->tail = slot;
526 slot->allot = q->scaled_quantum;
527 }
528 }
18cb8098
ED
529 sch->q.qlen -= dropped;
530 qdisc_tree_decrease_qlen(sch, dropped);
225d9b89
ED
531}
532
1da177e4
LT
533static void sfq_perturbation(unsigned long arg)
534{
6f9e98f7 535 struct Qdisc *sch = (struct Qdisc *)arg;
1da177e4 536 struct sfq_sched_data *q = qdisc_priv(sch);
225d9b89 537 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
1da177e4 538
225d9b89 539 spin_lock(root_lock);
d46f8dd8 540 q->perturbation = net_random();
225d9b89 541 if (!q->filter_list && q->tail)
18cb8098 542 sfq_rehash(sch);
225d9b89 543 spin_unlock(root_lock);
1da177e4 544
32740ddc
AK
545 if (q->perturb_period)
546 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
1da177e4
LT
547}
548
1e90474c 549static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
550{
551 struct sfq_sched_data *q = qdisc_priv(sch);
1e90474c 552 struct tc_sfq_qopt *ctl = nla_data(opt);
18cb8098 553 struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
5e50da01 554 unsigned int qlen;
1da177e4 555
1e90474c 556 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
1da177e4 557 return -EINVAL;
18cb8098
ED
558 if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
559 ctl_v1 = nla_data(opt);
119b3d38 560 if (ctl->divisor &&
561 (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
562 return -EINVAL;
563
1da177e4 564 sch_tree_lock(sch);
18cb8098
ED
565 if (ctl->quantum) {
566 q->quantum = ctl->quantum;
567 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
568 }
6f9e98f7 569 q->perturb_period = ctl->perturb_period * HZ;
18cb8098
ED
570 if (ctl->flows)
571 q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
572 if (ctl->divisor) {
817fb15d 573 q->divisor = ctl->divisor;
18cb8098
ED
574 q->maxflows = min_t(u32, q->maxflows, q->divisor);
575 }
576 if (ctl_v1) {
577 if (ctl_v1->depth)
578 q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
579 q->headdrop = ctl_v1->headdrop;
580 }
581 if (ctl->limit) {
582 q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
583 q->maxflows = min_t(u32, q->maxflows, q->limit);
584 }
585
5e50da01 586 qlen = sch->q.qlen;
5588b40d 587 while (sch->q.qlen > q->limit)
1da177e4 588 sfq_drop(sch);
5e50da01 589 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
1da177e4
LT
590
591 del_timer(&q->perturb_timer);
592 if (q->perturb_period) {
32740ddc 593 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
d46f8dd8 594 q->perturbation = net_random();
1da177e4
LT
595 }
596 sch_tree_unlock(sch);
597 return 0;
598}
599
bd16a6cc
ED
600static void *sfq_alloc(size_t sz)
601{
602 void *ptr = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN);
603
604 if (!ptr)
605 ptr = vmalloc(sz);
606 return ptr;
607}
608
609static void sfq_free(void *addr)
610{
611 if (addr) {
612 if (is_vmalloc_addr(addr))
613 vfree(addr);
614 else
615 kfree(addr);
616 }
617}
618
619static void sfq_destroy(struct Qdisc *sch)
620{
621 struct sfq_sched_data *q = qdisc_priv(sch);
622
623 tcf_destroy_chain(&q->filter_list);
624 q->perturb_period = 0;
625 del_timer_sync(&q->perturb_timer);
626 sfq_free(q->ht);
18cb8098 627 sfq_free(q->slots);
bd16a6cc
ED
628}
629
1e90474c 630static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
631{
632 struct sfq_sched_data *q = qdisc_priv(sch);
633 int i;
634
d3e99483 635 q->perturb_timer.function = sfq_perturbation;
c19a28e1 636 q->perturb_timer.data = (unsigned long)sch;
d3e99483 637 init_timer_deferrable(&q->perturb_timer);
1da177e4 638
18cb8098
ED
639 for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
640 q->dep[i].next = i + SFQ_MAX_FLOWS;
641 q->dep[i].prev = i + SFQ_MAX_FLOWS;
1da177e4 642 }
6f9e98f7 643
18cb8098
ED
644 q->limit = SFQ_MAX_DEPTH;
645 q->maxdepth = SFQ_MAX_DEPTH;
eda83e3b
ED
646 q->cur_depth = 0;
647 q->tail = NULL;
817fb15d 648 q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
18cb8098 649 q->maxflows = SFQ_DEFAULT_FLOWS;
02a9098e
ED
650 q->quantum = psched_mtu(qdisc_dev(sch));
651 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
652 q->perturb_period = 0;
653 q->perturbation = net_random();
654
655 if (opt) {
1da177e4
LT
656 int err = sfq_change(sch, opt);
657 if (err)
658 return err;
659 }
6f9e98f7 660
bd16a6cc 661 q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
18cb8098
ED
662 q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
663 if (!q->ht || !q->slots) {
bd16a6cc 664 sfq_destroy(sch);
817fb15d 665 return -ENOMEM;
bd16a6cc 666 }
817fb15d
ED
667 for (i = 0; i < q->divisor; i++)
668 q->ht[i] = SFQ_EMPTY_SLOT;
669
18cb8098 670 for (i = 0; i < q->maxflows; i++) {
18c8d82a 671 slot_queue_init(&q->slots[i]);
1da177e4 672 sfq_link(q, i);
18c8d82a 673 }
23624935
ED
674 if (q->limit >= 1)
675 sch->flags |= TCQ_F_CAN_BYPASS;
676 else
677 sch->flags &= ~TCQ_F_CAN_BYPASS;
1da177e4
LT
678 return 0;
679}
680
1da177e4
LT
681static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
682{
683 struct sfq_sched_data *q = qdisc_priv(sch);
27a884dc 684 unsigned char *b = skb_tail_pointer(skb);
18cb8098
ED
685 struct tc_sfq_qopt_v1 opt;
686
687 memset(&opt, 0, sizeof(opt));
688 opt.v0.quantum = q->quantum;
689 opt.v0.perturb_period = q->perturb_period / HZ;
690 opt.v0.limit = q->limit;
691 opt.v0.divisor = q->divisor;
692 opt.v0.flows = q->maxflows;
693 opt.depth = q->maxdepth;
694 opt.headdrop = q->headdrop;
1da177e4 695
1e90474c 696 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1da177e4
LT
697
698 return skb->len;
699
1e90474c 700nla_put_failure:
dc5fc579 701 nlmsg_trim(skb, b);
1da177e4
LT
702 return -1;
703}
704
41065fba
JP
705static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
706{
707 return NULL;
708}
709
7d2681a6
PM
710static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
711{
712 return 0;
713}
714
eb4a5527
JP
715static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
716 u32 classid)
717{
23624935
ED
718 /* we cannot bypass queue discipline anymore */
719 sch->flags &= ~TCQ_F_CAN_BYPASS;
eb4a5527
JP
720 return 0;
721}
722
da7115d9
JP
723static void sfq_put(struct Qdisc *q, unsigned long cl)
724{
725}
726
7d2681a6
PM
727static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
728{
729 struct sfq_sched_data *q = qdisc_priv(sch);
730
731 if (cl)
732 return NULL;
733 return &q->filter_list;
734}
735
94de78d1
PM
736static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
737 struct sk_buff *skb, struct tcmsg *tcm)
738{
739 tcm->tcm_handle |= TC_H_MIN(cl);
740 return 0;
741}
742
743static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
744 struct gnet_dump *d)
745{
746 struct sfq_sched_data *q = qdisc_priv(sch);
ee09b3c1
ED
747 sfq_index idx = q->ht[cl - 1];
748 struct gnet_stats_queue qs = { 0 };
749 struct tc_sfq_xstats xstats = { 0 };
c4266263
ED
750 struct sk_buff *skb;
751
ee09b3c1
ED
752 if (idx != SFQ_EMPTY_SLOT) {
753 const struct sfq_slot *slot = &q->slots[idx];
94de78d1 754
eeaeb068 755 xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
ee09b3c1
ED
756 qs.qlen = slot->qlen;
757 slot_queue_walk(slot, skb)
758 qs.backlog += qdisc_pkt_len(skb);
759 }
94de78d1
PM
760 if (gnet_stats_copy_queue(d, &qs) < 0)
761 return -1;
762 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
763}
764
7d2681a6
PM
765static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
766{
94de78d1
PM
767 struct sfq_sched_data *q = qdisc_priv(sch);
768 unsigned int i;
769
770 if (arg->stop)
771 return;
772
817fb15d 773 for (i = 0; i < q->divisor; i++) {
eda83e3b 774 if (q->ht[i] == SFQ_EMPTY_SLOT ||
94de78d1
PM
775 arg->count < arg->skip) {
776 arg->count++;
777 continue;
778 }
779 if (arg->fn(sch, i + 1, arg) < 0) {
780 arg->stop = 1;
781 break;
782 }
783 arg->count++;
784 }
7d2681a6
PM
785}
786
787static const struct Qdisc_class_ops sfq_class_ops = {
41065fba 788 .leaf = sfq_leaf,
7d2681a6 789 .get = sfq_get,
da7115d9 790 .put = sfq_put,
7d2681a6 791 .tcf_chain = sfq_find_tcf,
eb4a5527 792 .bind_tcf = sfq_bind,
da7115d9 793 .unbind_tcf = sfq_put,
94de78d1
PM
794 .dump = sfq_dump_class,
795 .dump_stats = sfq_dump_class_stats,
7d2681a6
PM
796 .walk = sfq_walk,
797};
798
20fea08b 799static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
7d2681a6 800 .cl_ops = &sfq_class_ops,
1da177e4
LT
801 .id = "sfq",
802 .priv_size = sizeof(struct sfq_sched_data),
803 .enqueue = sfq_enqueue,
804 .dequeue = sfq_dequeue,
07bd8df5 805 .peek = qdisc_peek_dequeued,
1da177e4
LT
806 .drop = sfq_drop,
807 .init = sfq_init,
808 .reset = sfq_reset,
809 .destroy = sfq_destroy,
810 .change = NULL,
811 .dump = sfq_dump,
812 .owner = THIS_MODULE,
813};
814
815static int __init sfq_module_init(void)
816{
817 return register_qdisc(&sfq_qdisc_ops);
818}
10297b99 819static void __exit sfq_module_exit(void)
1da177e4
LT
820{
821 unregister_qdisc(&sfq_qdisc_ops);
822}
823module_init(sfq_module_init)
824module_exit(sfq_module_exit)
825MODULE_LICENSE("GPL");