Pull align-sig-frame into release branch
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / dccp / proto.c
1 /*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_common.h>
27 #include <net/ip.h>
28 #include <net/protocol.h>
29 #include <net/sock.h>
30 #include <net/xfrm.h>
31
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37 #include <linux/dccp.h>
38
39 #include "ccid.h"
40 #include "dccp.h"
41
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 static struct net_protocol dccp_protocol = {
47 .handler = dccp_v4_rcv,
48 .err_handler = dccp_v4_err,
49 };
50
51 const char *dccp_packet_name(const int type)
52 {
53 static const char *dccp_packet_names[] = {
54 [DCCP_PKT_REQUEST] = "REQUEST",
55 [DCCP_PKT_RESPONSE] = "RESPONSE",
56 [DCCP_PKT_DATA] = "DATA",
57 [DCCP_PKT_ACK] = "ACK",
58 [DCCP_PKT_DATAACK] = "DATAACK",
59 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
60 [DCCP_PKT_CLOSE] = "CLOSE",
61 [DCCP_PKT_RESET] = "RESET",
62 [DCCP_PKT_SYNC] = "SYNC",
63 [DCCP_PKT_SYNCACK] = "SYNCACK",
64 };
65
66 if (type >= DCCP_NR_PKT_TYPES)
67 return "INVALID";
68 else
69 return dccp_packet_names[type];
70 }
71
72 EXPORT_SYMBOL_GPL(dccp_packet_name);
73
74 const char *dccp_state_name(const int state)
75 {
76 static char *dccp_state_names[] = {
77 [DCCP_OPEN] = "OPEN",
78 [DCCP_REQUESTING] = "REQUESTING",
79 [DCCP_PARTOPEN] = "PARTOPEN",
80 [DCCP_LISTEN] = "LISTEN",
81 [DCCP_RESPOND] = "RESPOND",
82 [DCCP_CLOSING] = "CLOSING",
83 [DCCP_TIME_WAIT] = "TIME_WAIT",
84 [DCCP_CLOSED] = "CLOSED",
85 };
86
87 if (state >= DCCP_MAX_STATES)
88 return "INVALID STATE!";
89 else
90 return dccp_state_names[state];
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_state_name);
94
95 static inline int dccp_listen_start(struct sock *sk)
96 {
97 struct dccp_sock *dp = dccp_sk(sk);
98
99 dp->dccps_role = DCCP_ROLE_LISTEN;
100 /*
101 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
102 * before calling listen()
103 */
104 if (dccp_service_not_initialized(sk))
105 return -EPROTO;
106 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
107 }
108
109 int dccp_disconnect(struct sock *sk, int flags)
110 {
111 struct inet_connection_sock *icsk = inet_csk(sk);
112 struct inet_sock *inet = inet_sk(sk);
113 int err = 0;
114 const int old_state = sk->sk_state;
115
116 if (old_state != DCCP_CLOSED)
117 dccp_set_state(sk, DCCP_CLOSED);
118
119 /* ABORT function of RFC793 */
120 if (old_state == DCCP_LISTEN) {
121 inet_csk_listen_stop(sk);
122 /* FIXME: do the active reset thing */
123 } else if (old_state == DCCP_REQUESTING)
124 sk->sk_err = ECONNRESET;
125
126 dccp_clear_xmit_timers(sk);
127 __skb_queue_purge(&sk->sk_receive_queue);
128 if (sk->sk_send_head != NULL) {
129 __kfree_skb(sk->sk_send_head);
130 sk->sk_send_head = NULL;
131 }
132
133 inet->dport = 0;
134
135 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
136 inet_reset_saddr(sk);
137
138 sk->sk_shutdown = 0;
139 sock_reset_flag(sk, SOCK_DONE);
140
141 icsk->icsk_backoff = 0;
142 inet_csk_delack_init(sk);
143 __sk_dst_reset(sk);
144
145 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
146
147 sk->sk_error_report(sk);
148 return err;
149 }
150
151 /*
152 * Wait for a DCCP event.
153 *
154 * Note that we don't need to lock the socket, as the upper poll layers
155 * take care of normal races (between the test and the event) and we don't
156 * go look at any of the socket buffers directly.
157 */
158 static unsigned int dccp_poll(struct file *file, struct socket *sock,
159 poll_table *wait)
160 {
161 unsigned int mask;
162 struct sock *sk = sock->sk;
163
164 poll_wait(file, sk->sk_sleep, wait);
165 if (sk->sk_state == DCCP_LISTEN)
166 return inet_csk_listen_poll(sk);
167
168 /* Socket is not locked. We are protected from async events
169 by poll logic and correct handling of state changes
170 made by another threads is impossible in any case.
171 */
172
173 mask = 0;
174 if (sk->sk_err)
175 mask = POLLERR;
176
177 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
178 mask |= POLLHUP;
179 if (sk->sk_shutdown & RCV_SHUTDOWN)
180 mask |= POLLIN | POLLRDNORM;
181
182 /* Connected? */
183 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
184 if (atomic_read(&sk->sk_rmem_alloc) > 0)
185 mask |= POLLIN | POLLRDNORM;
186
187 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
188 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
189 mask |= POLLOUT | POLLWRNORM;
190 } else { /* send SIGIO later */
191 set_bit(SOCK_ASYNC_NOSPACE,
192 &sk->sk_socket->flags);
193 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
194
195 /* Race breaker. If space is freed after
196 * wspace test but before the flags are set,
197 * IO signal will be lost.
198 */
199 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
200 mask |= POLLOUT | POLLWRNORM;
201 }
202 }
203 }
204 return mask;
205 }
206
207 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
208 {
209 dccp_pr_debug("entry\n");
210 return -ENOIOCTLCMD;
211 }
212
213 static int dccp_setsockopt_service(struct sock *sk, const u32 service,
214 char __user *optval, int optlen)
215 {
216 struct dccp_sock *dp = dccp_sk(sk);
217 struct dccp_service_list *sl = NULL;
218
219 if (service == DCCP_SERVICE_INVALID_VALUE ||
220 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
221 return -EINVAL;
222
223 if (optlen > sizeof(service)) {
224 sl = kmalloc(optlen, GFP_KERNEL);
225 if (sl == NULL)
226 return -ENOMEM;
227
228 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
229 if (copy_from_user(sl->dccpsl_list,
230 optval + sizeof(service),
231 optlen - sizeof(service)) ||
232 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
233 kfree(sl);
234 return -EFAULT;
235 }
236 }
237
238 lock_sock(sk);
239 dp->dccps_service = service;
240
241 kfree(dp->dccps_service_list);
242
243 dp->dccps_service_list = sl;
244 release_sock(sk);
245 return 0;
246 }
247
248 int dccp_setsockopt(struct sock *sk, int level, int optname,
249 char __user *optval, int optlen)
250 {
251 struct dccp_sock *dp;
252 int err;
253 int val;
254
255 if (level != SOL_DCCP)
256 return ip_setsockopt(sk, level, optname, optval, optlen);
257
258 if (optlen < sizeof(int))
259 return -EINVAL;
260
261 if (get_user(val, (int __user *)optval))
262 return -EFAULT;
263
264 if (optname == DCCP_SOCKOPT_SERVICE)
265 return dccp_setsockopt_service(sk, val, optval, optlen);
266
267 lock_sock(sk);
268 dp = dccp_sk(sk);
269 err = 0;
270
271 switch (optname) {
272 case DCCP_SOCKOPT_PACKET_SIZE:
273 dp->dccps_packet_size = val;
274 break;
275 default:
276 err = -ENOPROTOOPT;
277 break;
278 }
279
280 release_sock(sk);
281 return err;
282 }
283
284 static int dccp_getsockopt_service(struct sock *sk, int len,
285 u32 __user *optval,
286 int __user *optlen)
287 {
288 const struct dccp_sock *dp = dccp_sk(sk);
289 const struct dccp_service_list *sl;
290 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
291
292 lock_sock(sk);
293 if (dccp_service_not_initialized(sk))
294 goto out;
295
296 if ((sl = dp->dccps_service_list) != NULL) {
297 slen = sl->dccpsl_nr * sizeof(u32);
298 total_len += slen;
299 }
300
301 err = -EINVAL;
302 if (total_len > len)
303 goto out;
304
305 err = 0;
306 if (put_user(total_len, optlen) ||
307 put_user(dp->dccps_service, optval) ||
308 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
309 err = -EFAULT;
310 out:
311 release_sock(sk);
312 return err;
313 }
314
315 int dccp_getsockopt(struct sock *sk, int level, int optname,
316 char __user *optval, int __user *optlen)
317 {
318 struct dccp_sock *dp;
319 int val, len;
320
321 if (level != SOL_DCCP)
322 return ip_getsockopt(sk, level, optname, optval, optlen);
323
324 if (get_user(len, optlen))
325 return -EFAULT;
326
327 if (len < sizeof(int))
328 return -EINVAL;
329
330 dp = dccp_sk(sk);
331
332 switch (optname) {
333 case DCCP_SOCKOPT_PACKET_SIZE:
334 val = dp->dccps_packet_size;
335 len = sizeof(dp->dccps_packet_size);
336 break;
337 case DCCP_SOCKOPT_SERVICE:
338 return dccp_getsockopt_service(sk, len,
339 (u32 __user *)optval, optlen);
340 case 128 ... 191:
341 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
342 len, (u32 __user *)optval, optlen);
343 case 192 ... 255:
344 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
345 len, (u32 __user *)optval, optlen);
346 default:
347 return -ENOPROTOOPT;
348 }
349
350 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
351 return -EFAULT;
352
353 return 0;
354 }
355
356 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
357 size_t len)
358 {
359 const struct dccp_sock *dp = dccp_sk(sk);
360 const int flags = msg->msg_flags;
361 const int noblock = flags & MSG_DONTWAIT;
362 struct sk_buff *skb;
363 int rc, size;
364 long timeo;
365
366 if (len > dp->dccps_mss_cache)
367 return -EMSGSIZE;
368
369 lock_sock(sk);
370 timeo = sock_sndtimeo(sk, noblock);
371
372 /*
373 * We have to use sk_stream_wait_connect here to set sk_write_pending,
374 * so that the trick in dccp_rcv_request_sent_state_process.
375 */
376 /* Wait for a connection to finish. */
377 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
378 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
379 goto out_release;
380
381 size = sk->sk_prot->max_header + len;
382 release_sock(sk);
383 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
384 lock_sock(sk);
385 if (skb == NULL)
386 goto out_release;
387
388 skb_reserve(skb, sk->sk_prot->max_header);
389 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
390 if (rc != 0)
391 goto out_discard;
392
393 rc = dccp_write_xmit(sk, skb, &timeo);
394 /*
395 * XXX we don't use sk_write_queue, so just discard the packet.
396 * Current plan however is to _use_ sk_write_queue with
397 * an algorith similar to tcp_sendmsg, where the main difference
398 * is that in DCCP we have to respect packet boundaries, so
399 * no coalescing of skbs.
400 *
401 * This bug was _quickly_ found & fixed by just looking at an OSTRA
402 * generated callgraph 8) -acme
403 */
404 out_release:
405 release_sock(sk);
406 return rc ? : len;
407 out_discard:
408 kfree_skb(skb);
409 goto out_release;
410 }
411
412 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
413 size_t len, int nonblock, int flags, int *addr_len)
414 {
415 const struct dccp_hdr *dh;
416 long timeo;
417
418 lock_sock(sk);
419
420 if (sk->sk_state == DCCP_LISTEN) {
421 len = -ENOTCONN;
422 goto out;
423 }
424
425 timeo = sock_rcvtimeo(sk, nonblock);
426
427 do {
428 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
429
430 if (skb == NULL)
431 goto verify_sock_status;
432
433 dh = dccp_hdr(skb);
434
435 if (dh->dccph_type == DCCP_PKT_DATA ||
436 dh->dccph_type == DCCP_PKT_DATAACK)
437 goto found_ok_skb;
438
439 if (dh->dccph_type == DCCP_PKT_RESET ||
440 dh->dccph_type == DCCP_PKT_CLOSE) {
441 dccp_pr_debug("found fin ok!\n");
442 len = 0;
443 goto found_fin_ok;
444 }
445 dccp_pr_debug("packet_type=%s\n",
446 dccp_packet_name(dh->dccph_type));
447 sk_eat_skb(sk, skb);
448 verify_sock_status:
449 if (sock_flag(sk, SOCK_DONE)) {
450 len = 0;
451 break;
452 }
453
454 if (sk->sk_err) {
455 len = sock_error(sk);
456 break;
457 }
458
459 if (sk->sk_shutdown & RCV_SHUTDOWN) {
460 len = 0;
461 break;
462 }
463
464 if (sk->sk_state == DCCP_CLOSED) {
465 if (!sock_flag(sk, SOCK_DONE)) {
466 /* This occurs when user tries to read
467 * from never connected socket.
468 */
469 len = -ENOTCONN;
470 break;
471 }
472 len = 0;
473 break;
474 }
475
476 if (!timeo) {
477 len = -EAGAIN;
478 break;
479 }
480
481 if (signal_pending(current)) {
482 len = sock_intr_errno(timeo);
483 break;
484 }
485
486 sk_wait_data(sk, &timeo);
487 continue;
488 found_ok_skb:
489 if (len > skb->len)
490 len = skb->len;
491 else if (len < skb->len)
492 msg->msg_flags |= MSG_TRUNC;
493
494 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
495 /* Exception. Bailout! */
496 len = -EFAULT;
497 break;
498 }
499 found_fin_ok:
500 if (!(flags & MSG_PEEK))
501 sk_eat_skb(sk, skb);
502 break;
503 } while (1);
504 out:
505 release_sock(sk);
506 return len;
507 }
508
509 static int inet_dccp_listen(struct socket *sock, int backlog)
510 {
511 struct sock *sk = sock->sk;
512 unsigned char old_state;
513 int err;
514
515 lock_sock(sk);
516
517 err = -EINVAL;
518 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
519 goto out;
520
521 old_state = sk->sk_state;
522 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
523 goto out;
524
525 /* Really, if the socket is already in listen state
526 * we can only allow the backlog to be adjusted.
527 */
528 if (old_state != DCCP_LISTEN) {
529 /*
530 * FIXME: here it probably should be sk->sk_prot->listen_start
531 * see tcp_listen_start
532 */
533 err = dccp_listen_start(sk);
534 if (err)
535 goto out;
536 }
537 sk->sk_max_ack_backlog = backlog;
538 err = 0;
539
540 out:
541 release_sock(sk);
542 return err;
543 }
544
545 static const unsigned char dccp_new_state[] = {
546 /* current state: new state: action: */
547 [0] = DCCP_CLOSED,
548 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
549 [DCCP_REQUESTING] = DCCP_CLOSED,
550 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
551 [DCCP_LISTEN] = DCCP_CLOSED,
552 [DCCP_RESPOND] = DCCP_CLOSED,
553 [DCCP_CLOSING] = DCCP_CLOSED,
554 [DCCP_TIME_WAIT] = DCCP_CLOSED,
555 [DCCP_CLOSED] = DCCP_CLOSED,
556 };
557
558 static int dccp_close_state(struct sock *sk)
559 {
560 const int next = dccp_new_state[sk->sk_state];
561 const int ns = next & DCCP_STATE_MASK;
562
563 if (ns != sk->sk_state)
564 dccp_set_state(sk, ns);
565
566 return next & DCCP_ACTION_FIN;
567 }
568
569 void dccp_close(struct sock *sk, long timeout)
570 {
571 struct sk_buff *skb;
572
573 lock_sock(sk);
574
575 sk->sk_shutdown = SHUTDOWN_MASK;
576
577 if (sk->sk_state == DCCP_LISTEN) {
578 dccp_set_state(sk, DCCP_CLOSED);
579
580 /* Special case. */
581 inet_csk_listen_stop(sk);
582
583 goto adjudge_to_death;
584 }
585
586 /*
587 * We need to flush the recv. buffs. We do this only on the
588 * descriptor close, not protocol-sourced closes, because the
589 *reader process may not have drained the data yet!
590 */
591 /* FIXME: check for unread data */
592 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
593 __kfree_skb(skb);
594 }
595
596 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
597 /* Check zero linger _after_ checking for unread data. */
598 sk->sk_prot->disconnect(sk, 0);
599 } else if (dccp_close_state(sk)) {
600 dccp_send_close(sk, 1);
601 }
602
603 sk_stream_wait_close(sk, timeout);
604
605 adjudge_to_death:
606 /*
607 * It is the last release_sock in its life. It will remove backlog.
608 */
609 release_sock(sk);
610 /*
611 * Now socket is owned by kernel and we acquire BH lock
612 * to finish close. No need to check for user refs.
613 */
614 local_bh_disable();
615 bh_lock_sock(sk);
616 BUG_TRAP(!sock_owned_by_user(sk));
617
618 sock_hold(sk);
619 sock_orphan(sk);
620
621 /*
622 * The last release_sock may have processed the CLOSE or RESET
623 * packet moving sock to CLOSED state, if not we have to fire
624 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
625 * in draft-ietf-dccp-spec-11. -acme
626 */
627 if (sk->sk_state == DCCP_CLOSING) {
628 /* FIXME: should start at 2 * RTT */
629 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
630 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
631 inet_csk(sk)->icsk_rto,
632 DCCP_RTO_MAX);
633 #if 0
634 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
635 dccp_set_state(sk, DCCP_CLOSED);
636 #endif
637 }
638
639 atomic_inc(sk->sk_prot->orphan_count);
640 if (sk->sk_state == DCCP_CLOSED)
641 inet_csk_destroy_sock(sk);
642
643 /* Otherwise, socket is reprieved until protocol close. */
644
645 bh_unlock_sock(sk);
646 local_bh_enable();
647 sock_put(sk);
648 }
649
650 void dccp_shutdown(struct sock *sk, int how)
651 {
652 dccp_pr_debug("entry\n");
653 }
654
655 static struct proto_ops inet_dccp_ops = {
656 .family = PF_INET,
657 .owner = THIS_MODULE,
658 .release = inet_release,
659 .bind = inet_bind,
660 .connect = inet_stream_connect,
661 .socketpair = sock_no_socketpair,
662 .accept = inet_accept,
663 .getname = inet_getname,
664 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
665 .poll = dccp_poll,
666 .ioctl = inet_ioctl,
667 /* FIXME: work on inet_listen to rename it to sock_common_listen */
668 .listen = inet_dccp_listen,
669 .shutdown = inet_shutdown,
670 .setsockopt = sock_common_setsockopt,
671 .getsockopt = sock_common_getsockopt,
672 .sendmsg = inet_sendmsg,
673 .recvmsg = sock_common_recvmsg,
674 .mmap = sock_no_mmap,
675 .sendpage = sock_no_sendpage,
676 };
677
678 extern struct net_proto_family inet_family_ops;
679
680 static struct inet_protosw dccp_v4_protosw = {
681 .type = SOCK_DCCP,
682 .protocol = IPPROTO_DCCP,
683 .prot = &dccp_v4_prot,
684 .ops = &inet_dccp_ops,
685 .capability = -1,
686 .no_check = 0,
687 .flags = 0,
688 };
689
690 /*
691 * This is the global socket data structure used for responding to
692 * the Out-of-the-blue (OOTB) packets. A control sock will be created
693 * for this socket at the initialization time.
694 */
695 struct socket *dccp_ctl_socket;
696
697 static char dccp_ctl_socket_err_msg[] __initdata =
698 KERN_ERR "DCCP: Failed to create the control socket.\n";
699
700 static int __init dccp_ctl_sock_init(void)
701 {
702 int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
703 &dccp_ctl_socket);
704 if (rc < 0)
705 printk(dccp_ctl_socket_err_msg);
706 else {
707 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
708 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
709
710 /* Unhash it so that IP input processing does not even
711 * see it, we do not wish this socket to see incoming
712 * packets.
713 */
714 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
715 }
716
717 return rc;
718 }
719
720 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
721 void dccp_ctl_sock_exit(void)
722 {
723 if (dccp_ctl_socket != NULL) {
724 sock_release(dccp_ctl_socket);
725 dccp_ctl_socket = NULL;
726 }
727 }
728
729 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
730 #endif
731
732 static int __init init_dccp_v4_mibs(void)
733 {
734 int rc = -ENOMEM;
735
736 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
737 if (dccp_statistics[0] == NULL)
738 goto out;
739
740 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
741 if (dccp_statistics[1] == NULL)
742 goto out_free_one;
743
744 rc = 0;
745 out:
746 return rc;
747 out_free_one:
748 free_percpu(dccp_statistics[0]);
749 dccp_statistics[0] = NULL;
750 goto out;
751
752 }
753
754 static int thash_entries;
755 module_param(thash_entries, int, 0444);
756 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
757
758 #ifdef CONFIG_IP_DCCP_DEBUG
759 int dccp_debug;
760 module_param(dccp_debug, int, 0444);
761 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
762 #endif
763
764 static int __init dccp_init(void)
765 {
766 unsigned long goal;
767 int ehash_order, bhash_order, i;
768 int rc = proto_register(&dccp_v4_prot, 1);
769
770 if (rc)
771 goto out;
772
773 dccp_hashinfo.bind_bucket_cachep =
774 kmem_cache_create("dccp_bind_bucket",
775 sizeof(struct inet_bind_bucket), 0,
776 SLAB_HWCACHE_ALIGN, NULL, NULL);
777 if (!dccp_hashinfo.bind_bucket_cachep)
778 goto out_proto_unregister;
779
780 /*
781 * Size and allocate the main established and bind bucket
782 * hash tables.
783 *
784 * The methodology is similar to that of the buffer cache.
785 */
786 if (num_physpages >= (128 * 1024))
787 goal = num_physpages >> (21 - PAGE_SHIFT);
788 else
789 goal = num_physpages >> (23 - PAGE_SHIFT);
790
791 if (thash_entries)
792 goal = (thash_entries *
793 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
794 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
795 ;
796 do {
797 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
798 sizeof(struct inet_ehash_bucket);
799 dccp_hashinfo.ehash_size >>= 1;
800 while (dccp_hashinfo.ehash_size &
801 (dccp_hashinfo.ehash_size - 1))
802 dccp_hashinfo.ehash_size--;
803 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
804 __get_free_pages(GFP_ATOMIC, ehash_order);
805 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
806
807 if (!dccp_hashinfo.ehash) {
808 printk(KERN_CRIT "Failed to allocate DCCP "
809 "established hash table\n");
810 goto out_free_bind_bucket_cachep;
811 }
812
813 for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
814 rwlock_init(&dccp_hashinfo.ehash[i].lock);
815 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
816 }
817
818 bhash_order = ehash_order;
819
820 do {
821 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
822 sizeof(struct inet_bind_hashbucket);
823 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
824 bhash_order > 0)
825 continue;
826 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
827 __get_free_pages(GFP_ATOMIC, bhash_order);
828 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
829
830 if (!dccp_hashinfo.bhash) {
831 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
832 goto out_free_dccp_ehash;
833 }
834
835 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
836 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
837 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
838 }
839
840 if (init_dccp_v4_mibs())
841 goto out_free_dccp_bhash;
842
843 rc = -EAGAIN;
844 if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
845 goto out_free_dccp_v4_mibs;
846
847 inet_register_protosw(&dccp_v4_protosw);
848
849 rc = dccp_ctl_sock_init();
850 if (rc)
851 goto out_unregister_protosw;
852 out:
853 return rc;
854 out_unregister_protosw:
855 inet_unregister_protosw(&dccp_v4_protosw);
856 inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
857 out_free_dccp_v4_mibs:
858 free_percpu(dccp_statistics[0]);
859 free_percpu(dccp_statistics[1]);
860 dccp_statistics[0] = dccp_statistics[1] = NULL;
861 out_free_dccp_bhash:
862 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
863 dccp_hashinfo.bhash = NULL;
864 out_free_dccp_ehash:
865 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
866 dccp_hashinfo.ehash = NULL;
867 out_free_bind_bucket_cachep:
868 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
869 dccp_hashinfo.bind_bucket_cachep = NULL;
870 out_proto_unregister:
871 proto_unregister(&dccp_v4_prot);
872 goto out;
873 }
874
875 static const char dccp_del_proto_err_msg[] __exitdata =
876 KERN_ERR "can't remove dccp net_protocol\n";
877
878 static void __exit dccp_fini(void)
879 {
880 inet_unregister_protosw(&dccp_v4_protosw);
881
882 if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
883 printk(dccp_del_proto_err_msg);
884
885 free_percpu(dccp_statistics[0]);
886 free_percpu(dccp_statistics[1]);
887 free_pages((unsigned long)dccp_hashinfo.bhash,
888 get_order(dccp_hashinfo.bhash_size *
889 sizeof(struct inet_bind_hashbucket)));
890 free_pages((unsigned long)dccp_hashinfo.ehash,
891 get_order(dccp_hashinfo.ehash_size *
892 sizeof(struct inet_ehash_bucket)));
893 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
894 proto_unregister(&dccp_v4_prot);
895 }
896
897 module_init(dccp_init);
898 module_exit(dccp_fini);
899
900 /*
901 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
902 * values directly, Also cover the case where the protocol is not specified,
903 * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
904 */
905 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
906 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
907 MODULE_LICENSE("GPL");
908 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
909 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");