caif-spi: Bugfix SPI_DATA_POS settings were inverted.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / sched / sch_sfq.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_sfq.c Stochastic Fairness Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */
11
1da177e4 12#include <linux/module.h>
1da177e4
LT
13#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/jiffies.h>
16#include <linux/string.h>
1da177e4
LT
17#include <linux/in.h>
18#include <linux/errno.h>
1da177e4 19#include <linux/init.h>
1da177e4 20#include <linux/ipv6.h>
1da177e4 21#include <linux/skbuff.h>
32740ddc 22#include <linux/jhash.h>
5a0e3ad6 23#include <linux/slab.h>
0ba48053
PM
24#include <net/ip.h>
25#include <net/netlink.h>
1da177e4
LT
26#include <net/pkt_sched.h>
27
28
29/* Stochastic Fairness Queuing algorithm.
30 =======================================
31
32 Source:
33 Paul E. McKenney "Stochastic Fairness Queuing",
34 IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
35
36 Paul E. McKenney "Stochastic Fairness Queuing",
37 "Interworking: Research and Experience", v.2, 1991, p.113-131.
38
39
40 See also:
41 M. Shreedhar and George Varghese "Efficient Fair
42 Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
43
44
10297b99 45 This is not the thing that is usually called (W)FQ nowadays.
1da177e4
LT
46 It does not use any timestamp mechanism, but instead
47 processes queues in round-robin order.
48
49 ADVANTAGE:
50
51 - It is very cheap. Both CPU and memory requirements are minimal.
52
53 DRAWBACKS:
54
10297b99 55 - "Stochastic" -> It is not 100% fair.
1da177e4
LT
56 When hash collisions occur, several flows are considered as one.
57
58 - "Round-robin" -> It introduces larger delays than virtual clock
59 based schemes, and should not be used for isolating interactive
60 traffic from non-interactive. It means, that this scheduler
61 should be used as leaf of CBQ or P3, which put interactive traffic
62 to higher priority band.
63
64 We still need true WFQ for top level CSZ, but using WFQ
65 for the best effort traffic is absolutely pointless:
66 SFQ is superior for this purpose.
67
68 IMPLEMENTATION:
69 This implementation limits maximal queue length to 128;
70 maximal mtu to 2^15-1; number of hash buckets to 1024.
71 The only goal of this restrictions was that all data
72 fit into one 4K page :-). Struct sfq_sched_data is
73 organized in anti-cache manner: all the data for a bucket
74 are scattered over different locations. This is not good,
75 but it allowed me to put it into 4K.
76
77 It is easy to increase these values, but not in flight. */
78
79#define SFQ_DEPTH 128
80#define SFQ_HASH_DIVISOR 1024
81
82/* This type should contain at least SFQ_DEPTH*2 values */
83typedef unsigned char sfq_index;
84
85struct sfq_head
86{
87 sfq_index next;
88 sfq_index prev;
89};
90
91struct sfq_sched_data
92{
93/* Parameters */
94 int perturb_period;
95 unsigned quantum; /* Allotment per round: MUST BE >= MTU */
96 int limit;
97
98/* Variables */
7d2681a6 99 struct tcf_proto *filter_list;
1da177e4 100 struct timer_list perturb_timer;
32740ddc 101 u32 perturbation;
1da177e4
LT
102 sfq_index tail; /* Index of current slot in round */
103 sfq_index max_depth; /* Maximal depth */
104
105 sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */
106 sfq_index next[SFQ_DEPTH]; /* Active slots link */
107 short allot[SFQ_DEPTH]; /* Current allotment per slot */
108 unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */
109 struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */
110 struct sfq_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */
111};
112
113static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
114{
32740ddc 115 return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
1da177e4
LT
116}
117
118static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
119{
120 u32 h, h2;
121
122 switch (skb->protocol) {
60678040 123 case htons(ETH_P_IP):
1da177e4 124 {
f2f00981
CG
125 const struct iphdr *iph;
126
127 if (!pskb_network_may_pull(skb, sizeof(*iph)))
128 goto err;
129 iph = ip_hdr(skb);
0eae88f3
ED
130 h = (__force u32)iph->daddr;
131 h2 = (__force u32)iph->saddr ^ iph->protocol;
1da177e4
LT
132 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
133 (iph->protocol == IPPROTO_TCP ||
134 iph->protocol == IPPROTO_UDP ||
a8d0f952 135 iph->protocol == IPPROTO_UDPLITE ||
ae82af54
PM
136 iph->protocol == IPPROTO_SCTP ||
137 iph->protocol == IPPROTO_DCCP ||
f2f00981
CG
138 iph->protocol == IPPROTO_ESP) &&
139 pskb_network_may_pull(skb, iph->ihl * 4 + 4))
1da177e4
LT
140 h2 ^= *(((u32*)iph) + iph->ihl);
141 break;
142 }
60678040 143 case htons(ETH_P_IPV6):
1da177e4 144 {
f2f00981
CG
145 struct ipv6hdr *iph;
146
147 if (!pskb_network_may_pull(skb, sizeof(*iph)))
148 goto err;
149 iph = ipv6_hdr(skb);
0eae88f3
ED
150 h = (__force u32)iph->daddr.s6_addr32[3];
151 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
f2f00981
CG
152 if ((iph->nexthdr == IPPROTO_TCP ||
153 iph->nexthdr == IPPROTO_UDP ||
154 iph->nexthdr == IPPROTO_UDPLITE ||
155 iph->nexthdr == IPPROTO_SCTP ||
156 iph->nexthdr == IPPROTO_DCCP ||
157 iph->nexthdr == IPPROTO_ESP) &&
158 pskb_network_may_pull(skb, sizeof(*iph) + 4))
1da177e4
LT
159 h2 ^= *(u32*)&iph[1];
160 break;
161 }
162 default:
f2f00981 163err:
0eae88f3 164 h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol;
6f9e98f7 165 h2 = (unsigned long)skb->sk;
1da177e4 166 }
6f9e98f7 167
1da177e4
LT
168 return sfq_fold_hash(q, h, h2);
169}
170
7d2681a6
PM
171static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
172 int *qerr)
173{
174 struct sfq_sched_data *q = qdisc_priv(sch);
175 struct tcf_result res;
176 int result;
177
178 if (TC_H_MAJ(skb->priority) == sch->handle &&
179 TC_H_MIN(skb->priority) > 0 &&
180 TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR)
181 return TC_H_MIN(skb->priority);
182
183 if (!q->filter_list)
184 return sfq_hash(q, skb) + 1;
185
c27f339a 186 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
7d2681a6
PM
187 result = tc_classify(skb, q->filter_list, &res);
188 if (result >= 0) {
189#ifdef CONFIG_NET_CLS_ACT
190 switch (result) {
191 case TC_ACT_STOLEN:
192 case TC_ACT_QUEUED:
378a2f09 193 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
7d2681a6
PM
194 case TC_ACT_SHOT:
195 return 0;
196 }
197#endif
198 if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR)
199 return TC_H_MIN(res.classid);
200 }
201 return 0;
202}
203
1da177e4
LT
204static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
205{
206 sfq_index p, n;
207 int d = q->qs[x].qlen + SFQ_DEPTH;
208
209 p = d;
210 n = q->dep[d].next;
211 q->dep[x].next = n;
212 q->dep[x].prev = p;
213 q->dep[p].next = q->dep[n].prev = x;
214}
215
216static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
217{
218 sfq_index p, n;
219
220 n = q->dep[x].next;
221 p = q->dep[x].prev;
222 q->dep[p].next = n;
223 q->dep[n].prev = p;
224
225 if (n == p && q->max_depth == q->qs[x].qlen + 1)
226 q->max_depth--;
227
228 sfq_link(q, x);
229}
230
231static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
232{
233 sfq_index p, n;
234 int d;
235
236 n = q->dep[x].next;
237 p = q->dep[x].prev;
238 q->dep[p].next = n;
239 q->dep[n].prev = p;
240 d = q->qs[x].qlen;
241 if (q->max_depth < d)
242 q->max_depth = d;
243
244 sfq_link(q, x);
245}
246
247static unsigned int sfq_drop(struct Qdisc *sch)
248{
249 struct sfq_sched_data *q = qdisc_priv(sch);
250 sfq_index d = q->max_depth;
251 struct sk_buff *skb;
252 unsigned int len;
253
254 /* Queue is full! Find the longest slot and
255 drop a packet from it */
256
257 if (d > 1) {
6f9e98f7 258 sfq_index x = q->dep[d + SFQ_DEPTH].next;
1da177e4 259 skb = q->qs[x].prev;
0abf77e5 260 len = qdisc_pkt_len(skb);
1da177e4
LT
261 __skb_unlink(skb, &q->qs[x]);
262 kfree_skb(skb);
263 sfq_dec(q, x);
264 sch->q.qlen--;
265 sch->qstats.drops++;
f5539eb8 266 sch->qstats.backlog -= len;
1da177e4
LT
267 return len;
268 }
269
270 if (d == 1) {
271 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
272 d = q->next[q->tail];
273 q->next[q->tail] = q->next[d];
274 q->allot[q->next[d]] += q->quantum;
275 skb = q->qs[d].prev;
0abf77e5 276 len = qdisc_pkt_len(skb);
1da177e4
LT
277 __skb_unlink(skb, &q->qs[d]);
278 kfree_skb(skb);
279 sfq_dec(q, d);
280 sch->q.qlen--;
281 q->ht[q->hash[d]] = SFQ_DEPTH;
282 sch->qstats.drops++;
f5539eb8 283 sch->qstats.backlog -= len;
1da177e4
LT
284 return len;
285 }
286
287 return 0;
288}
289
290static int
6f9e98f7 291sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1da177e4
LT
292{
293 struct sfq_sched_data *q = qdisc_priv(sch);
7d2681a6 294 unsigned int hash;
1da177e4 295 sfq_index x;
7f3ff4f6 296 int uninitialized_var(ret);
7d2681a6
PM
297
298 hash = sfq_classify(skb, sch, &ret);
299 if (hash == 0) {
c27f339a 300 if (ret & __NET_XMIT_BYPASS)
7d2681a6
PM
301 sch->qstats.drops++;
302 kfree_skb(skb);
303 return ret;
304 }
305 hash--;
1da177e4
LT
306
307 x = q->ht[hash];
308 if (x == SFQ_DEPTH) {
309 q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
310 q->hash[x] = hash;
311 }
6f9e98f7 312
32740ddc
AK
313 /* If selected queue has length q->limit, this means that
314 * all another queues are empty and that we do simple tail drop,
315 * i.e. drop _this_ packet.
316 */
317 if (q->qs[x].qlen >= q->limit)
318 return qdisc_drop(skb, sch);
319
0abf77e5 320 sch->qstats.backlog += qdisc_pkt_len(skb);
1da177e4
LT
321 __skb_queue_tail(&q->qs[x], skb);
322 sfq_inc(q, x);
323 if (q->qs[x].qlen == 1) { /* The flow is new */
324 if (q->tail == SFQ_DEPTH) { /* It is the first flow */
325 q->tail = x;
326 q->next[x] = x;
327 q->allot[x] = q->quantum;
328 } else {
329 q->next[x] = q->next[q->tail];
330 q->next[q->tail] = x;
331 q->tail = x;
332 }
333 }
5588b40d 334 if (++sch->q.qlen <= q->limit) {
0abf77e5 335 sch->bstats.bytes += qdisc_pkt_len(skb);
1da177e4 336 sch->bstats.packets++;
9871e50e 337 return NET_XMIT_SUCCESS;
1da177e4
LT
338 }
339
340 sfq_drop(sch);
341 return NET_XMIT_CN;
342}
343
48a8f519
PM
344static struct sk_buff *
345sfq_peek(struct Qdisc *sch)
346{
347 struct sfq_sched_data *q = qdisc_priv(sch);
348 sfq_index a;
1da177e4 349
48a8f519
PM
350 /* No active slots */
351 if (q->tail == SFQ_DEPTH)
352 return NULL;
1da177e4 353
48a8f519
PM
354 a = q->next[q->tail];
355 return skb_peek(&q->qs[a]);
356}
1da177e4
LT
357
358static struct sk_buff *
6f9e98f7 359sfq_dequeue(struct Qdisc *sch)
1da177e4
LT
360{
361 struct sfq_sched_data *q = qdisc_priv(sch);
362 struct sk_buff *skb;
363 sfq_index a, old_a;
364
365 /* No active slots */
366 if (q->tail == SFQ_DEPTH)
367 return NULL;
368
369 a = old_a = q->next[q->tail];
370
371 /* Grab packet */
372 skb = __skb_dequeue(&q->qs[a]);
373 sfq_dec(q, a);
374 sch->q.qlen--;
0abf77e5 375 sch->qstats.backlog -= qdisc_pkt_len(skb);
1da177e4
LT
376
377 /* Is the slot empty? */
378 if (q->qs[a].qlen == 0) {
379 q->ht[q->hash[a]] = SFQ_DEPTH;
380 a = q->next[a];
381 if (a == old_a) {
382 q->tail = SFQ_DEPTH;
383 return skb;
384 }
385 q->next[q->tail] = a;
386 q->allot[a] += q->quantum;
0abf77e5 387 } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {
1da177e4
LT
388 q->tail = a;
389 a = q->next[a];
390 q->allot[a] += q->quantum;
391 }
392 return skb;
393}
394
395static void
6f9e98f7 396sfq_reset(struct Qdisc *sch)
1da177e4
LT
397{
398 struct sk_buff *skb;
399
400 while ((skb = sfq_dequeue(sch)) != NULL)
401 kfree_skb(skb);
402}
403
404static void sfq_perturbation(unsigned long arg)
405{
6f9e98f7 406 struct Qdisc *sch = (struct Qdisc *)arg;
1da177e4
LT
407 struct sfq_sched_data *q = qdisc_priv(sch);
408
d46f8dd8 409 q->perturbation = net_random();
1da177e4 410
32740ddc
AK
411 if (q->perturb_period)
412 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
1da177e4
LT
413}
414
1e90474c 415static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
416{
417 struct sfq_sched_data *q = qdisc_priv(sch);
1e90474c 418 struct tc_sfq_qopt *ctl = nla_data(opt);
5e50da01 419 unsigned int qlen;
1da177e4 420
1e90474c 421 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
1da177e4
LT
422 return -EINVAL;
423
424 sch_tree_lock(sch);
5ce2d488 425 q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
6f9e98f7 426 q->perturb_period = ctl->perturb_period * HZ;
1da177e4 427 if (ctl->limit)
32740ddc 428 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
1da177e4 429
5e50da01 430 qlen = sch->q.qlen;
5588b40d 431 while (sch->q.qlen > q->limit)
1da177e4 432 sfq_drop(sch);
5e50da01 433 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
1da177e4
LT
434
435 del_timer(&q->perturb_timer);
436 if (q->perturb_period) {
32740ddc 437 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
d46f8dd8 438 q->perturbation = net_random();
1da177e4
LT
439 }
440 sch_tree_unlock(sch);
441 return 0;
442}
443
1e90474c 444static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
445{
446 struct sfq_sched_data *q = qdisc_priv(sch);
447 int i;
448
d3e99483 449 q->perturb_timer.function = sfq_perturbation;
c19a28e1 450 q->perturb_timer.data = (unsigned long)sch;
d3e99483 451 init_timer_deferrable(&q->perturb_timer);
1da177e4 452
6f9e98f7 453 for (i = 0; i < SFQ_HASH_DIVISOR; i++)
1da177e4 454 q->ht[i] = SFQ_DEPTH;
6f9e98f7
SH
455
456 for (i = 0; i < SFQ_DEPTH; i++) {
1da177e4 457 skb_queue_head_init(&q->qs[i]);
6f9e98f7
SH
458 q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH;
459 q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
1da177e4 460 }
6f9e98f7 461
32740ddc 462 q->limit = SFQ_DEPTH - 1;
1da177e4
LT
463 q->max_depth = 0;
464 q->tail = SFQ_DEPTH;
465 if (opt == NULL) {
5ce2d488 466 q->quantum = psched_mtu(qdisc_dev(sch));
1da177e4 467 q->perturb_period = 0;
d46f8dd8 468 q->perturbation = net_random();
1da177e4
LT
469 } else {
470 int err = sfq_change(sch, opt);
471 if (err)
472 return err;
473 }
6f9e98f7
SH
474
475 for (i = 0; i < SFQ_DEPTH; i++)
1da177e4
LT
476 sfq_link(q, i);
477 return 0;
478}
479
480static void sfq_destroy(struct Qdisc *sch)
481{
482 struct sfq_sched_data *q = qdisc_priv(sch);
7d2681a6 483
ff31ab56 484 tcf_destroy_chain(&q->filter_list);
980c478d
JP
485 q->perturb_period = 0;
486 del_timer_sync(&q->perturb_timer);
1da177e4
LT
487}
488
489static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
490{
491 struct sfq_sched_data *q = qdisc_priv(sch);
27a884dc 492 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
493 struct tc_sfq_qopt opt;
494
495 opt.quantum = q->quantum;
6f9e98f7 496 opt.perturb_period = q->perturb_period / HZ;
1da177e4
LT
497
498 opt.limit = q->limit;
499 opt.divisor = SFQ_HASH_DIVISOR;
cdec7e50 500 opt.flows = q->limit;
1da177e4 501
1e90474c 502 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1da177e4
LT
503
504 return skb->len;
505
1e90474c 506nla_put_failure:
dc5fc579 507 nlmsg_trim(skb, b);
1da177e4
LT
508 return -1;
509}
510
7d2681a6
PM
511static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
512{
513 return 0;
514}
515
eb4a5527
JP
516static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
517 u32 classid)
518{
519 return 0;
520}
521
da7115d9
JP
522static void sfq_put(struct Qdisc *q, unsigned long cl)
523{
524}
525
7d2681a6
PM
526static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
527{
528 struct sfq_sched_data *q = qdisc_priv(sch);
529
530 if (cl)
531 return NULL;
532 return &q->filter_list;
533}
534
94de78d1
PM
535static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
536 struct sk_buff *skb, struct tcmsg *tcm)
537{
538 tcm->tcm_handle |= TC_H_MIN(cl);
539 return 0;
540}
541
542static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
543 struct gnet_dump *d)
544{
545 struct sfq_sched_data *q = qdisc_priv(sch);
546 sfq_index idx = q->ht[cl-1];
547 struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen };
548 struct tc_sfq_xstats xstats = { .allot = q->allot[idx] };
549
550 if (gnet_stats_copy_queue(d, &qs) < 0)
551 return -1;
552 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
553}
554
7d2681a6
PM
555static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
556{
94de78d1
PM
557 struct sfq_sched_data *q = qdisc_priv(sch);
558 unsigned int i;
559
560 if (arg->stop)
561 return;
562
563 for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
564 if (q->ht[i] == SFQ_DEPTH ||
565 arg->count < arg->skip) {
566 arg->count++;
567 continue;
568 }
569 if (arg->fn(sch, i + 1, arg) < 0) {
570 arg->stop = 1;
571 break;
572 }
573 arg->count++;
574 }
7d2681a6
PM
575}
576
577static const struct Qdisc_class_ops sfq_class_ops = {
578 .get = sfq_get,
da7115d9 579 .put = sfq_put,
7d2681a6 580 .tcf_chain = sfq_find_tcf,
eb4a5527 581 .bind_tcf = sfq_bind,
da7115d9 582 .unbind_tcf = sfq_put,
94de78d1
PM
583 .dump = sfq_dump_class,
584 .dump_stats = sfq_dump_class_stats,
7d2681a6
PM
585 .walk = sfq_walk,
586};
587
20fea08b 588static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
7d2681a6 589 .cl_ops = &sfq_class_ops,
1da177e4
LT
590 .id = "sfq",
591 .priv_size = sizeof(struct sfq_sched_data),
592 .enqueue = sfq_enqueue,
593 .dequeue = sfq_dequeue,
48a8f519 594 .peek = sfq_peek,
1da177e4
LT
595 .drop = sfq_drop,
596 .init = sfq_init,
597 .reset = sfq_reset,
598 .destroy = sfq_destroy,
599 .change = NULL,
600 .dump = sfq_dump,
601 .owner = THIS_MODULE,
602};
603
604static int __init sfq_module_init(void)
605{
606 return register_qdisc(&sfq_qdisc_ops);
607}
10297b99 608static void __exit sfq_module_exit(void)
1da177e4
LT
609{
610 unregister_qdisc(&sfq_qdisc_ops);
611}
612module_init(sfq_module_init)
613module_exit(sfq_module_exit)
614MODULE_LICENSE("GPL");