[INET_TWSK]: Introduce inet_twsk_alloc
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / core / sock.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
10 * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11 *
02c30a84 12 * Authors: Ross Biro
1da177e4
LT
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Florian La Roche, <flla@stud.uni-sb.de>
15 * Alan Cox, <A.Cox@swansea.ac.uk>
16 *
17 * Fixes:
18 * Alan Cox : Numerous verify_area() problems
19 * Alan Cox : Connecting on a connecting socket
20 * now returns an error for tcp.
21 * Alan Cox : sock->protocol is set correctly.
22 * and is not sometimes left as 0.
23 * Alan Cox : connect handles icmp errors on a
24 * connect properly. Unfortunately there
25 * is a restart syscall nasty there. I
26 * can't match BSD without hacking the C
27 * library. Ideas urgently sought!
28 * Alan Cox : Disallow bind() to addresses that are
29 * not ours - especially broadcast ones!!
30 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
31 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
32 * instead they leave that for the DESTROY timer.
33 * Alan Cox : Clean up error flag in accept
34 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
35 * was buggy. Put a remove_sock() in the handler
36 * for memory when we hit 0. Also altered the timer
37 * code. The ACK stuff can wait and needs major
38 * TCP layer surgery.
39 * Alan Cox : Fixed TCP ack bug, removed remove sock
40 * and fixed timer/inet_bh race.
41 * Alan Cox : Added zapped flag for TCP
42 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
43 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
45 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
46 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47 * Rick Sladkey : Relaxed UDP rules for matching packets.
48 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
49 * Pauline Middelink : identd support
50 * Alan Cox : Fixed connect() taking signals I think.
51 * Alan Cox : SO_LINGER supported
52 * Alan Cox : Error reporting fixes
53 * Anonymous : inet_create tidied up (sk->reuse setting)
54 * Alan Cox : inet sockets don't set sk->type!
55 * Alan Cox : Split socket option code
56 * Alan Cox : Callbacks
57 * Alan Cox : Nagle flag for Charles & Johannes stuff
58 * Alex : Removed restriction on inet fioctl
59 * Alan Cox : Splitting INET from NET core
60 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
61 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
62 * Alan Cox : Split IP from generic code
63 * Alan Cox : New kfree_skbmem()
64 * Alan Cox : Make SO_DEBUG superuser only.
65 * Alan Cox : Allow anyone to clear SO_DEBUG
66 * (compatibility fix)
67 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
68 * Alan Cox : Allocator for a socket is settable.
69 * Alan Cox : SO_ERROR includes soft errors.
70 * Alan Cox : Allow NULL arguments on some SO_ opts
71 * Alan Cox : Generic socket allocation to make hooks
72 * easier (suggested by Craig Metz).
73 * Michael Pall : SO_ERROR returns positive errno again
74 * Steve Whitehouse: Added default destructor to free
75 * protocol private data.
76 * Steve Whitehouse: Added various other default routines
77 * common to several socket families.
78 * Chris Evans : Call suser() check last on F_SETOWN
79 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
81 * Andi Kleen : Fix write_space callback
82 * Chris Evans : Security fixes - signedness again
83 * Arnaldo C. Melo : cleanups, use skb_queue_purge
84 *
85 * To Fix:
86 *
87 *
88 * This program is free software; you can redistribute it and/or
89 * modify it under the terms of the GNU General Public License
90 * as published by the Free Software Foundation; either version
91 * 2 of the License, or (at your option) any later version.
92 */
93
94#include <linux/config.h>
95#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
1da177e4
LT
100#include <linux/module.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/sched.h>
104#include <linux/timer.h>
105#include <linux/string.h>
106#include <linux/sockios.h>
107#include <linux/net.h>
108#include <linux/mm.h>
109#include <linux/slab.h>
110#include <linux/interrupt.h>
111#include <linux/poll.h>
112#include <linux/tcp.h>
113#include <linux/init.h>
114
115#include <asm/uaccess.h>
116#include <asm/system.h>
117
118#include <linux/netdevice.h>
119#include <net/protocol.h>
120#include <linux/skbuff.h>
2e6599cb 121#include <net/request_sock.h>
1da177e4
LT
122#include <net/sock.h>
123#include <net/xfrm.h>
124#include <linux/ipsec.h>
125
126#include <linux/filter.h>
127
128#ifdef CONFIG_INET
129#include <net/tcp.h>
130#endif
131
132/* Take into consideration the size of the struct sk_buff overhead in the
133 * determination of these values, since that is non-constant across
134 * platforms. This makes socket queueing behavior and performance
135 * not depend upon such differences.
136 */
137#define _SK_MEM_PACKETS 256
138#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256)
139#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
140#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
141
142/* Run time adjustable parameters. */
143__u32 sysctl_wmem_max = SK_WMEM_MAX;
144__u32 sysctl_rmem_max = SK_RMEM_MAX;
145__u32 sysctl_wmem_default = SK_WMEM_MAX;
146__u32 sysctl_rmem_default = SK_RMEM_MAX;
147
148/* Maximal space eaten by iovec or ancilliary data plus some space */
149int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
150
151static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
152{
153 struct timeval tv;
154
155 if (optlen < sizeof(tv))
156 return -EINVAL;
157 if (copy_from_user(&tv, optval, sizeof(tv)))
158 return -EFAULT;
159
160 *timeo_p = MAX_SCHEDULE_TIMEOUT;
161 if (tv.tv_sec == 0 && tv.tv_usec == 0)
162 return 0;
163 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
164 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
165 return 0;
166}
167
168static void sock_warn_obsolete_bsdism(const char *name)
169{
170 static int warned;
171 static char warncomm[TASK_COMM_LEN];
172 if (strcmp(warncomm, current->comm) && warned < 5) {
173 strcpy(warncomm, current->comm);
174 printk(KERN_WARNING "process `%s' is using obsolete "
175 "%s SO_BSDCOMPAT\n", warncomm, name);
176 warned++;
177 }
178}
179
180static void sock_disable_timestamp(struct sock *sk)
181{
182 if (sock_flag(sk, SOCK_TIMESTAMP)) {
183 sock_reset_flag(sk, SOCK_TIMESTAMP);
184 net_disable_timestamp();
185 }
186}
187
188
189/*
190 * This is meant for all protocols to use and covers goings on
191 * at the socket level. Everything here is generic.
192 */
193
194int sock_setsockopt(struct socket *sock, int level, int optname,
195 char __user *optval, int optlen)
196{
197 struct sock *sk=sock->sk;
198 struct sk_filter *filter;
199 int val;
200 int valbool;
201 struct linger ling;
202 int ret = 0;
203
204 /*
205 * Options without arguments
206 */
207
208#ifdef SO_DONTLINGER /* Compatibility item... */
a77be819
KM
209 if (optname == SO_DONTLINGER) {
210 lock_sock(sk);
211 sock_reset_flag(sk, SOCK_LINGER);
212 release_sock(sk);
213 return 0;
1da177e4 214 }
a77be819
KM
215#endif
216
1da177e4
LT
217 if(optlen<sizeof(int))
218 return(-EINVAL);
219
220 if (get_user(val, (int __user *)optval))
221 return -EFAULT;
222
223 valbool = val?1:0;
224
225 lock_sock(sk);
226
227 switch(optname)
228 {
229 case SO_DEBUG:
230 if(val && !capable(CAP_NET_ADMIN))
231 {
232 ret = -EACCES;
233 }
234 else if (valbool)
235 sock_set_flag(sk, SOCK_DBG);
236 else
237 sock_reset_flag(sk, SOCK_DBG);
238 break;
239 case SO_REUSEADDR:
240 sk->sk_reuse = valbool;
241 break;
242 case SO_TYPE:
243 case SO_ERROR:
244 ret = -ENOPROTOOPT;
245 break;
246 case SO_DONTROUTE:
247 if (valbool)
248 sock_set_flag(sk, SOCK_LOCALROUTE);
249 else
250 sock_reset_flag(sk, SOCK_LOCALROUTE);
251 break;
252 case SO_BROADCAST:
253 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
254 break;
255 case SO_SNDBUF:
256 /* Don't error on this BSD doesn't and if you think
257 about it this is right. Otherwise apps have to
258 play 'guess the biggest size' games. RCVBUF/SNDBUF
259 are treated in BSD as hints */
260
261 if (val > sysctl_wmem_max)
262 val = sysctl_wmem_max;
b0573dea 263set_sndbuf:
1da177e4
LT
264 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
265 if ((val * 2) < SOCK_MIN_SNDBUF)
266 sk->sk_sndbuf = SOCK_MIN_SNDBUF;
267 else
268 sk->sk_sndbuf = val * 2;
269
270 /*
271 * Wake up sending tasks if we
272 * upped the value.
273 */
274 sk->sk_write_space(sk);
275 break;
276
b0573dea
PM
277 case SO_SNDBUFFORCE:
278 if (!capable(CAP_NET_ADMIN)) {
279 ret = -EPERM;
280 break;
281 }
282 goto set_sndbuf;
283
1da177e4
LT
284 case SO_RCVBUF:
285 /* Don't error on this BSD doesn't and if you think
286 about it this is right. Otherwise apps have to
287 play 'guess the biggest size' games. RCVBUF/SNDBUF
288 are treated in BSD as hints */
289
290 if (val > sysctl_rmem_max)
291 val = sysctl_rmem_max;
b0573dea 292set_rcvbuf:
1da177e4
LT
293 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
294 /* FIXME: is this lower bound the right one? */
295 if ((val * 2) < SOCK_MIN_RCVBUF)
296 sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
297 else
298 sk->sk_rcvbuf = val * 2;
299 break;
300
b0573dea
PM
301 case SO_RCVBUFFORCE:
302 if (!capable(CAP_NET_ADMIN)) {
303 ret = -EPERM;
304 break;
305 }
306 goto set_rcvbuf;
307
1da177e4
LT
308 case SO_KEEPALIVE:
309#ifdef CONFIG_INET
310 if (sk->sk_protocol == IPPROTO_TCP)
311 tcp_set_keepalive(sk, valbool);
312#endif
313 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
314 break;
315
316 case SO_OOBINLINE:
317 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
318 break;
319
320 case SO_NO_CHECK:
321 sk->sk_no_check = valbool;
322 break;
323
324 case SO_PRIORITY:
325 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
326 sk->sk_priority = val;
327 else
328 ret = -EPERM;
329 break;
330
331 case SO_LINGER:
332 if(optlen<sizeof(ling)) {
333 ret = -EINVAL; /* 1003.1g */
334 break;
335 }
336 if (copy_from_user(&ling,optval,sizeof(ling))) {
337 ret = -EFAULT;
338 break;
339 }
340 if (!ling.l_onoff)
341 sock_reset_flag(sk, SOCK_LINGER);
342 else {
343#if (BITS_PER_LONG == 32)
344 if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
345 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
346 else
347#endif
348 sk->sk_lingertime = ling.l_linger * HZ;
349 sock_set_flag(sk, SOCK_LINGER);
350 }
351 break;
352
353 case SO_BSDCOMPAT:
354 sock_warn_obsolete_bsdism("setsockopt");
355 break;
356
357 case SO_PASSCRED:
358 if (valbool)
359 set_bit(SOCK_PASSCRED, &sock->flags);
360 else
361 clear_bit(SOCK_PASSCRED, &sock->flags);
362 break;
363
364 case SO_TIMESTAMP:
365 if (valbool) {
366 sock_set_flag(sk, SOCK_RCVTSTAMP);
367 sock_enable_timestamp(sk);
368 } else
369 sock_reset_flag(sk, SOCK_RCVTSTAMP);
370 break;
371
372 case SO_RCVLOWAT:
373 if (val < 0)
374 val = INT_MAX;
375 sk->sk_rcvlowat = val ? : 1;
376 break;
377
378 case SO_RCVTIMEO:
379 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
380 break;
381
382 case SO_SNDTIMEO:
383 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
384 break;
385
386#ifdef CONFIG_NETDEVICES
387 case SO_BINDTODEVICE:
388 {
389 char devname[IFNAMSIZ];
390
391 /* Sorry... */
392 if (!capable(CAP_NET_RAW)) {
393 ret = -EPERM;
394 break;
395 }
396
397 /* Bind this socket to a particular device like "eth0",
398 * as specified in the passed interface name. If the
399 * name is "" or the option length is zero the socket
400 * is not bound.
401 */
402
403 if (!valbool) {
404 sk->sk_bound_dev_if = 0;
405 } else {
406 if (optlen > IFNAMSIZ)
407 optlen = IFNAMSIZ;
408 if (copy_from_user(devname, optval, optlen)) {
409 ret = -EFAULT;
410 break;
411 }
412
413 /* Remove any cached route for this socket. */
414 sk_dst_reset(sk);
415
416 if (devname[0] == '\0') {
417 sk->sk_bound_dev_if = 0;
418 } else {
419 struct net_device *dev = dev_get_by_name(devname);
420 if (!dev) {
421 ret = -ENODEV;
422 break;
423 }
424 sk->sk_bound_dev_if = dev->ifindex;
425 dev_put(dev);
426 }
427 }
428 break;
429 }
430#endif
431
432
433 case SO_ATTACH_FILTER:
434 ret = -EINVAL;
435 if (optlen == sizeof(struct sock_fprog)) {
436 struct sock_fprog fprog;
437
438 ret = -EFAULT;
439 if (copy_from_user(&fprog, optval, sizeof(fprog)))
440 break;
441
442 ret = sk_attach_filter(&fprog, sk);
443 }
444 break;
445
446 case SO_DETACH_FILTER:
447 spin_lock_bh(&sk->sk_lock.slock);
448 filter = sk->sk_filter;
449 if (filter) {
450 sk->sk_filter = NULL;
451 spin_unlock_bh(&sk->sk_lock.slock);
452 sk_filter_release(sk, filter);
453 break;
454 }
455 spin_unlock_bh(&sk->sk_lock.slock);
456 ret = -ENONET;
457 break;
458
459 /* We implement the SO_SNDLOWAT etc to
460 not be settable (1003.1g 5.3) */
461 default:
462 ret = -ENOPROTOOPT;
463 break;
464 }
465 release_sock(sk);
466 return ret;
467}
468
469
470int sock_getsockopt(struct socket *sock, int level, int optname,
471 char __user *optval, int __user *optlen)
472{
473 struct sock *sk = sock->sk;
474
475 union
476 {
477 int val;
478 struct linger ling;
479 struct timeval tm;
480 } v;
481
482 unsigned int lv = sizeof(int);
483 int len;
484
485 if(get_user(len,optlen))
486 return -EFAULT;
487 if(len < 0)
488 return -EINVAL;
489
490 switch(optname)
491 {
492 case SO_DEBUG:
493 v.val = sock_flag(sk, SOCK_DBG);
494 break;
495
496 case SO_DONTROUTE:
497 v.val = sock_flag(sk, SOCK_LOCALROUTE);
498 break;
499
500 case SO_BROADCAST:
501 v.val = !!sock_flag(sk, SOCK_BROADCAST);
502 break;
503
504 case SO_SNDBUF:
505 v.val = sk->sk_sndbuf;
506 break;
507
508 case SO_RCVBUF:
509 v.val = sk->sk_rcvbuf;
510 break;
511
512 case SO_REUSEADDR:
513 v.val = sk->sk_reuse;
514 break;
515
516 case SO_KEEPALIVE:
517 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
518 break;
519
520 case SO_TYPE:
521 v.val = sk->sk_type;
522 break;
523
524 case SO_ERROR:
525 v.val = -sock_error(sk);
526 if(v.val==0)
527 v.val = xchg(&sk->sk_err_soft, 0);
528 break;
529
530 case SO_OOBINLINE:
531 v.val = !!sock_flag(sk, SOCK_URGINLINE);
532 break;
533
534 case SO_NO_CHECK:
535 v.val = sk->sk_no_check;
536 break;
537
538 case SO_PRIORITY:
539 v.val = sk->sk_priority;
540 break;
541
542 case SO_LINGER:
543 lv = sizeof(v.ling);
544 v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER);
545 v.ling.l_linger = sk->sk_lingertime / HZ;
546 break;
547
548 case SO_BSDCOMPAT:
549 sock_warn_obsolete_bsdism("getsockopt");
550 break;
551
552 case SO_TIMESTAMP:
553 v.val = sock_flag(sk, SOCK_RCVTSTAMP);
554 break;
555
556 case SO_RCVTIMEO:
557 lv=sizeof(struct timeval);
558 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
559 v.tm.tv_sec = 0;
560 v.tm.tv_usec = 0;
561 } else {
562 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
563 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
564 }
565 break;
566
567 case SO_SNDTIMEO:
568 lv=sizeof(struct timeval);
569 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
570 v.tm.tv_sec = 0;
571 v.tm.tv_usec = 0;
572 } else {
573 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
574 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
575 }
576 break;
577
578 case SO_RCVLOWAT:
579 v.val = sk->sk_rcvlowat;
580 break;
581
582 case SO_SNDLOWAT:
583 v.val=1;
584 break;
585
586 case SO_PASSCRED:
587 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
588 break;
589
590 case SO_PEERCRED:
591 if (len > sizeof(sk->sk_peercred))
592 len = sizeof(sk->sk_peercred);
593 if (copy_to_user(optval, &sk->sk_peercred, len))
594 return -EFAULT;
595 goto lenout;
596
597 case SO_PEERNAME:
598 {
599 char address[128];
600
601 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
602 return -ENOTCONN;
603 if (lv < len)
604 return -EINVAL;
605 if (copy_to_user(optval, address, len))
606 return -EFAULT;
607 goto lenout;
608 }
609
610 /* Dubious BSD thing... Probably nobody even uses it, but
611 * the UNIX standard wants it for whatever reason... -DaveM
612 */
613 case SO_ACCEPTCONN:
614 v.val = sk->sk_state == TCP_LISTEN;
615 break;
616
617 case SO_PEERSEC:
618 return security_socket_getpeersec(sock, optval, optlen, len);
619
620 default:
621 return(-ENOPROTOOPT);
622 }
623 if (len > lv)
624 len = lv;
625 if (copy_to_user(optval, &v, len))
626 return -EFAULT;
627lenout:
628 if (put_user(len, optlen))
629 return -EFAULT;
630 return 0;
631}
632
633/**
634 * sk_alloc - All socket objects are allocated here
4dc3b16b
PP
635 * @family: protocol family
636 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
637 * @prot: struct proto associated with this new sock instance
638 * @zero_it: if we should zero the newly allocated sock
1da177e4 639 */
86a76caf
VF
640struct sock *sk_alloc(int family, unsigned int __nocast priority,
641 struct proto *prot, int zero_it)
1da177e4
LT
642{
643 struct sock *sk = NULL;
644 kmem_cache_t *slab = prot->slab;
645
646 if (slab != NULL)
647 sk = kmem_cache_alloc(slab, priority);
648 else
649 sk = kmalloc(prot->obj_size, priority);
650
651 if (sk) {
652 if (zero_it) {
653 memset(sk, 0, prot->obj_size);
654 sk->sk_family = family;
476e19cf
ACM
655 /*
656 * See comment in struct sock definition to understand
657 * why we need sk_prot_creator -acme
658 */
659 sk->sk_prot = sk->sk_prot_creator = prot;
1da177e4
LT
660 sock_lock_init(sk);
661 }
662
663 if (security_sk_alloc(sk, family, priority)) {
88a66858
ACM
664 if (slab != NULL)
665 kmem_cache_free(slab, sk);
666 else
667 kfree(sk);
1da177e4
LT
668 sk = NULL;
669 } else
670 __module_get(prot->owner);
671 }
672 return sk;
673}
674
675void sk_free(struct sock *sk)
676{
677 struct sk_filter *filter;
476e19cf 678 struct module *owner = sk->sk_prot_creator->owner;
1da177e4
LT
679
680 if (sk->sk_destruct)
681 sk->sk_destruct(sk);
682
683 filter = sk->sk_filter;
684 if (filter) {
685 sk_filter_release(sk, filter);
686 sk->sk_filter = NULL;
687 }
688
689 sock_disable_timestamp(sk);
690
691 if (atomic_read(&sk->sk_omem_alloc))
692 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
693 __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
694
695 security_sk_free(sk);
476e19cf
ACM
696 if (sk->sk_prot_creator->slab != NULL)
697 kmem_cache_free(sk->sk_prot_creator->slab, sk);
1da177e4
LT
698 else
699 kfree(sk);
700 module_put(owner);
701}
702
703void __init sk_init(void)
704{
705 if (num_physpages <= 4096) {
706 sysctl_wmem_max = 32767;
707 sysctl_rmem_max = 32767;
708 sysctl_wmem_default = 32767;
709 sysctl_rmem_default = 32767;
710 } else if (num_physpages >= 131072) {
711 sysctl_wmem_max = 131071;
712 sysctl_rmem_max = 131071;
713 }
714}
715
716/*
717 * Simple resource managers for sockets.
718 */
719
720
721/*
722 * Write buffer destructor automatically called from kfree_skb.
723 */
724void sock_wfree(struct sk_buff *skb)
725{
726 struct sock *sk = skb->sk;
727
728 /* In case it might be waiting for more memory. */
729 atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
730 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
731 sk->sk_write_space(sk);
732 sock_put(sk);
733}
734
735/*
736 * Read buffer destructor automatically called from kfree_skb.
737 */
738void sock_rfree(struct sk_buff *skb)
739{
740 struct sock *sk = skb->sk;
741
742 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
743}
744
745
746int sock_i_uid(struct sock *sk)
747{
748 int uid;
749
750 read_lock(&sk->sk_callback_lock);
751 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
752 read_unlock(&sk->sk_callback_lock);
753 return uid;
754}
755
756unsigned long sock_i_ino(struct sock *sk)
757{
758 unsigned long ino;
759
760 read_lock(&sk->sk_callback_lock);
761 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
762 read_unlock(&sk->sk_callback_lock);
763 return ino;
764}
765
766/*
767 * Allocate a skb from the socket's send buffer.
768 */
86a76caf
VF
769struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
770 unsigned int __nocast priority)
1da177e4
LT
771{
772 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
773 struct sk_buff * skb = alloc_skb(size, priority);
774 if (skb) {
775 skb_set_owner_w(skb, sk);
776 return skb;
777 }
778 }
779 return NULL;
780}
781
782/*
783 * Allocate a skb from the socket's receive buffer.
784 */
86a76caf
VF
785struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
786 unsigned int __nocast priority)
1da177e4
LT
787{
788 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
789 struct sk_buff *skb = alloc_skb(size, priority);
790 if (skb) {
791 skb_set_owner_r(skb, sk);
792 return skb;
793 }
794 }
795 return NULL;
796}
797
798/*
799 * Allocate a memory block from the socket's option memory buffer.
800 */
86a76caf 801void *sock_kmalloc(struct sock *sk, int size, unsigned int __nocast priority)
1da177e4
LT
802{
803 if ((unsigned)size <= sysctl_optmem_max &&
804 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
805 void *mem;
806 /* First do the add, to avoid the race if kmalloc
807 * might sleep.
808 */
809 atomic_add(size, &sk->sk_omem_alloc);
810 mem = kmalloc(size, priority);
811 if (mem)
812 return mem;
813 atomic_sub(size, &sk->sk_omem_alloc);
814 }
815 return NULL;
816}
817
818/*
819 * Free an option memory block.
820 */
821void sock_kfree_s(struct sock *sk, void *mem, int size)
822{
823 kfree(mem);
824 atomic_sub(size, &sk->sk_omem_alloc);
825}
826
827/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
828 I think, these locks should be removed for datagram sockets.
829 */
830static long sock_wait_for_wmem(struct sock * sk, long timeo)
831{
832 DEFINE_WAIT(wait);
833
834 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
835 for (;;) {
836 if (!timeo)
837 break;
838 if (signal_pending(current))
839 break;
840 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
841 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
842 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
843 break;
844 if (sk->sk_shutdown & SEND_SHUTDOWN)
845 break;
846 if (sk->sk_err)
847 break;
848 timeo = schedule_timeout(timeo);
849 }
850 finish_wait(sk->sk_sleep, &wait);
851 return timeo;
852}
853
854
855/*
856 * Generic send/receive buffer handlers
857 */
858
859static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
860 unsigned long header_len,
861 unsigned long data_len,
862 int noblock, int *errcode)
863{
864 struct sk_buff *skb;
865 unsigned int gfp_mask;
866 long timeo;
867 int err;
868
869 gfp_mask = sk->sk_allocation;
870 if (gfp_mask & __GFP_WAIT)
871 gfp_mask |= __GFP_REPEAT;
872
873 timeo = sock_sndtimeo(sk, noblock);
874 while (1) {
875 err = sock_error(sk);
876 if (err != 0)
877 goto failure;
878
879 err = -EPIPE;
880 if (sk->sk_shutdown & SEND_SHUTDOWN)
881 goto failure;
882
883 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
884 skb = alloc_skb(header_len, sk->sk_allocation);
885 if (skb) {
886 int npages;
887 int i;
888
889 /* No pages, we're done... */
890 if (!data_len)
891 break;
892
893 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
894 skb->truesize += data_len;
895 skb_shinfo(skb)->nr_frags = npages;
896 for (i = 0; i < npages; i++) {
897 struct page *page;
898 skb_frag_t *frag;
899
900 page = alloc_pages(sk->sk_allocation, 0);
901 if (!page) {
902 err = -ENOBUFS;
903 skb_shinfo(skb)->nr_frags = i;
904 kfree_skb(skb);
905 goto failure;
906 }
907
908 frag = &skb_shinfo(skb)->frags[i];
909 frag->page = page;
910 frag->page_offset = 0;
911 frag->size = (data_len >= PAGE_SIZE ?
912 PAGE_SIZE :
913 data_len);
914 data_len -= PAGE_SIZE;
915 }
916
917 /* Full success... */
918 break;
919 }
920 err = -ENOBUFS;
921 goto failure;
922 }
923 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
924 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
925 err = -EAGAIN;
926 if (!timeo)
927 goto failure;
928 if (signal_pending(current))
929 goto interrupted;
930 timeo = sock_wait_for_wmem(sk, timeo);
931 }
932
933 skb_set_owner_w(skb, sk);
934 return skb;
935
936interrupted:
937 err = sock_intr_errno(timeo);
938failure:
939 *errcode = err;
940 return NULL;
941}
942
943struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
944 int noblock, int *errcode)
945{
946 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
947}
948
949static void __lock_sock(struct sock *sk)
950{
951 DEFINE_WAIT(wait);
952
953 for(;;) {
954 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
955 TASK_UNINTERRUPTIBLE);
956 spin_unlock_bh(&sk->sk_lock.slock);
957 schedule();
958 spin_lock_bh(&sk->sk_lock.slock);
959 if(!sock_owned_by_user(sk))
960 break;
961 }
962 finish_wait(&sk->sk_lock.wq, &wait);
963}
964
965static void __release_sock(struct sock *sk)
966{
967 struct sk_buff *skb = sk->sk_backlog.head;
968
969 do {
970 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
971 bh_unlock_sock(sk);
972
973 do {
974 struct sk_buff *next = skb->next;
975
976 skb->next = NULL;
977 sk->sk_backlog_rcv(sk, skb);
978
979 /*
980 * We are in process context here with softirqs
981 * disabled, use cond_resched_softirq() to preempt.
982 * This is safe to do because we've taken the backlog
983 * queue private:
984 */
985 cond_resched_softirq();
986
987 skb = next;
988 } while (skb != NULL);
989
990 bh_lock_sock(sk);
991 } while((skb = sk->sk_backlog.head) != NULL);
992}
993
994/**
995 * sk_wait_data - wait for data to arrive at sk_receive_queue
4dc3b16b
PP
996 * @sk: sock to wait on
997 * @timeo: for how long
1da177e4
LT
998 *
999 * Now socket state including sk->sk_err is changed only under lock,
1000 * hence we may omit checks after joining wait queue.
1001 * We check receive queue before schedule() only as optimization;
1002 * it is very likely that release_sock() added new data.
1003 */
1004int sk_wait_data(struct sock *sk, long *timeo)
1005{
1006 int rc;
1007 DEFINE_WAIT(wait);
1008
1009 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1010 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1011 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1012 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1013 finish_wait(sk->sk_sleep, &wait);
1014 return rc;
1015}
1016
1017EXPORT_SYMBOL(sk_wait_data);
1018
1019/*
1020 * Set of default routines for initialising struct proto_ops when
1021 * the protocol does not support a particular function. In certain
1022 * cases where it makes no sense for a protocol to have a "do nothing"
1023 * function, some default processing is provided.
1024 */
1025
1026int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1027{
1028 return -EOPNOTSUPP;
1029}
1030
1031int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1032 int len, int flags)
1033{
1034 return -EOPNOTSUPP;
1035}
1036
1037int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1038{
1039 return -EOPNOTSUPP;
1040}
1041
1042int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1043{
1044 return -EOPNOTSUPP;
1045}
1046
1047int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1048 int *len, int peer)
1049{
1050 return -EOPNOTSUPP;
1051}
1052
1053unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1054{
1055 return 0;
1056}
1057
1058int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1059{
1060 return -EOPNOTSUPP;
1061}
1062
1063int sock_no_listen(struct socket *sock, int backlog)
1064{
1065 return -EOPNOTSUPP;
1066}
1067
1068int sock_no_shutdown(struct socket *sock, int how)
1069{
1070 return -EOPNOTSUPP;
1071}
1072
1073int sock_no_setsockopt(struct socket *sock, int level, int optname,
1074 char __user *optval, int optlen)
1075{
1076 return -EOPNOTSUPP;
1077}
1078
1079int sock_no_getsockopt(struct socket *sock, int level, int optname,
1080 char __user *optval, int __user *optlen)
1081{
1082 return -EOPNOTSUPP;
1083}
1084
1085int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1086 size_t len)
1087{
1088 return -EOPNOTSUPP;
1089}
1090
1091int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1092 size_t len, int flags)
1093{
1094 return -EOPNOTSUPP;
1095}
1096
1097int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1098{
1099 /* Mirror missing mmap method error code */
1100 return -ENODEV;
1101}
1102
1103ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1104{
1105 ssize_t res;
1106 struct msghdr msg = {.msg_flags = flags};
1107 struct kvec iov;
1108 char *kaddr = kmap(page);
1109 iov.iov_base = kaddr + offset;
1110 iov.iov_len = size;
1111 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1112 kunmap(page);
1113 return res;
1114}
1115
1116/*
1117 * Default Socket Callbacks
1118 */
1119
1120static void sock_def_wakeup(struct sock *sk)
1121{
1122 read_lock(&sk->sk_callback_lock);
1123 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1124 wake_up_interruptible_all(sk->sk_sleep);
1125 read_unlock(&sk->sk_callback_lock);
1126}
1127
1128static void sock_def_error_report(struct sock *sk)
1129{
1130 read_lock(&sk->sk_callback_lock);
1131 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1132 wake_up_interruptible(sk->sk_sleep);
1133 sk_wake_async(sk,0,POLL_ERR);
1134 read_unlock(&sk->sk_callback_lock);
1135}
1136
1137static void sock_def_readable(struct sock *sk, int len)
1138{
1139 read_lock(&sk->sk_callback_lock);
1140 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1141 wake_up_interruptible(sk->sk_sleep);
1142 sk_wake_async(sk,1,POLL_IN);
1143 read_unlock(&sk->sk_callback_lock);
1144}
1145
1146static void sock_def_write_space(struct sock *sk)
1147{
1148 read_lock(&sk->sk_callback_lock);
1149
1150 /* Do not wake up a writer until he can make "significant"
1151 * progress. --DaveM
1152 */
1153 if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1154 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1155 wake_up_interruptible(sk->sk_sleep);
1156
1157 /* Should agree with poll, otherwise some programs break */
1158 if (sock_writeable(sk))
1159 sk_wake_async(sk, 2, POLL_OUT);
1160 }
1161
1162 read_unlock(&sk->sk_callback_lock);
1163}
1164
1165static void sock_def_destruct(struct sock *sk)
1166{
1167 if (sk->sk_protinfo)
1168 kfree(sk->sk_protinfo);
1169}
1170
1171void sk_send_sigurg(struct sock *sk)
1172{
1173 if (sk->sk_socket && sk->sk_socket->file)
1174 if (send_sigurg(&sk->sk_socket->file->f_owner))
1175 sk_wake_async(sk, 3, POLL_PRI);
1176}
1177
1178void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1179 unsigned long expires)
1180{
1181 if (!mod_timer(timer, expires))
1182 sock_hold(sk);
1183}
1184
1185EXPORT_SYMBOL(sk_reset_timer);
1186
1187void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1188{
1189 if (timer_pending(timer) && del_timer(timer))
1190 __sock_put(sk);
1191}
1192
1193EXPORT_SYMBOL(sk_stop_timer);
1194
1195void sock_init_data(struct socket *sock, struct sock *sk)
1196{
1197 skb_queue_head_init(&sk->sk_receive_queue);
1198 skb_queue_head_init(&sk->sk_write_queue);
1199 skb_queue_head_init(&sk->sk_error_queue);
1200
1201 sk->sk_send_head = NULL;
1202
1203 init_timer(&sk->sk_timer);
1204
1205 sk->sk_allocation = GFP_KERNEL;
1206 sk->sk_rcvbuf = sysctl_rmem_default;
1207 sk->sk_sndbuf = sysctl_wmem_default;
1208 sk->sk_state = TCP_CLOSE;
1209 sk->sk_socket = sock;
1210
1211 sock_set_flag(sk, SOCK_ZAPPED);
1212
1213 if(sock)
1214 {
1215 sk->sk_type = sock->type;
1216 sk->sk_sleep = &sock->wait;
1217 sock->sk = sk;
1218 } else
1219 sk->sk_sleep = NULL;
1220
1221 rwlock_init(&sk->sk_dst_lock);
1222 rwlock_init(&sk->sk_callback_lock);
1223
1224 sk->sk_state_change = sock_def_wakeup;
1225 sk->sk_data_ready = sock_def_readable;
1226 sk->sk_write_space = sock_def_write_space;
1227 sk->sk_error_report = sock_def_error_report;
1228 sk->sk_destruct = sock_def_destruct;
1229
1230 sk->sk_sndmsg_page = NULL;
1231 sk->sk_sndmsg_off = 0;
1232
1233 sk->sk_peercred.pid = 0;
1234 sk->sk_peercred.uid = -1;
1235 sk->sk_peercred.gid = -1;
1236 sk->sk_write_pending = 0;
1237 sk->sk_rcvlowat = 1;
1238 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1239 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
1240
1241 sk->sk_stamp.tv_sec = -1L;
1242 sk->sk_stamp.tv_usec = -1L;
1243
1244 atomic_set(&sk->sk_refcnt, 1);
1245}
1246
1247void fastcall lock_sock(struct sock *sk)
1248{
1249 might_sleep();
1250 spin_lock_bh(&(sk->sk_lock.slock));
1251 if (sk->sk_lock.owner)
1252 __lock_sock(sk);
1253 sk->sk_lock.owner = (void *)1;
1254 spin_unlock_bh(&(sk->sk_lock.slock));
1255}
1256
1257EXPORT_SYMBOL(lock_sock);
1258
1259void fastcall release_sock(struct sock *sk)
1260{
1261 spin_lock_bh(&(sk->sk_lock.slock));
1262 if (sk->sk_backlog.tail)
1263 __release_sock(sk);
1264 sk->sk_lock.owner = NULL;
1265 if (waitqueue_active(&(sk->sk_lock.wq)))
1266 wake_up(&(sk->sk_lock.wq));
1267 spin_unlock_bh(&(sk->sk_lock.slock));
1268}
1269EXPORT_SYMBOL(release_sock);
1270
1271int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1272{
1273 if (!sock_flag(sk, SOCK_TIMESTAMP))
1274 sock_enable_timestamp(sk);
1275 if (sk->sk_stamp.tv_sec == -1)
1276 return -ENOENT;
1277 if (sk->sk_stamp.tv_sec == 0)
1278 do_gettimeofday(&sk->sk_stamp);
1279 return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
1280 -EFAULT : 0;
1281}
1282EXPORT_SYMBOL(sock_get_timestamp);
1283
1284void sock_enable_timestamp(struct sock *sk)
1285{
1286 if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1287 sock_set_flag(sk, SOCK_TIMESTAMP);
1288 net_enable_timestamp();
1289 }
1290}
1291EXPORT_SYMBOL(sock_enable_timestamp);
1292
1293/*
1294 * Get a socket option on an socket.
1295 *
1296 * FIX: POSIX 1003.1g is very ambiguous here. It states that
1297 * asynchronous errors should be reported by getsockopt. We assume
1298 * this means if you specify SO_ERROR (otherwise whats the point of it).
1299 */
1300int sock_common_getsockopt(struct socket *sock, int level, int optname,
1301 char __user *optval, int __user *optlen)
1302{
1303 struct sock *sk = sock->sk;
1304
1305 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1306}
1307
1308EXPORT_SYMBOL(sock_common_getsockopt);
1309
1310int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1311 struct msghdr *msg, size_t size, int flags)
1312{
1313 struct sock *sk = sock->sk;
1314 int addr_len = 0;
1315 int err;
1316
1317 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1318 flags & ~MSG_DONTWAIT, &addr_len);
1319 if (err >= 0)
1320 msg->msg_namelen = addr_len;
1321 return err;
1322}
1323
1324EXPORT_SYMBOL(sock_common_recvmsg);
1325
1326/*
1327 * Set socket options on an inet socket.
1328 */
1329int sock_common_setsockopt(struct socket *sock, int level, int optname,
1330 char __user *optval, int optlen)
1331{
1332 struct sock *sk = sock->sk;
1333
1334 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1335}
1336
1337EXPORT_SYMBOL(sock_common_setsockopt);
1338
1339void sk_common_release(struct sock *sk)
1340{
1341 if (sk->sk_prot->destroy)
1342 sk->sk_prot->destroy(sk);
1343
1344 /*
1345 * Observation: when sock_common_release is called, processes have
1346 * no access to socket. But net still has.
1347 * Step one, detach it from networking:
1348 *
1349 * A. Remove from hash tables.
1350 */
1351
1352 sk->sk_prot->unhash(sk);
1353
1354 /*
1355 * In this point socket cannot receive new packets, but it is possible
1356 * that some packets are in flight because some CPU runs receiver and
1357 * did hash table lookup before we unhashed socket. They will achieve
1358 * receive queue and will be purged by socket destructor.
1359 *
1360 * Also we still have packets pending on receive queue and probably,
1361 * our own packets waiting in device queues. sock_destroy will drain
1362 * receive queue, but transmitted packets will delay socket destruction
1363 * until the last reference will be released.
1364 */
1365
1366 sock_orphan(sk);
1367
1368 xfrm_sk_free_policy(sk);
1369
e6848976 1370 sk_refcnt_debug_release(sk);
1da177e4
LT
1371 sock_put(sk);
1372}
1373
1374EXPORT_SYMBOL(sk_common_release);
1375
1376static DEFINE_RWLOCK(proto_list_lock);
1377static LIST_HEAD(proto_list);
1378
1379int proto_register(struct proto *prot, int alloc_slab)
1380{
8feaf0c0
ACM
1381 char *request_sock_slab_name = NULL;
1382 char *timewait_sock_slab_name;
1da177e4
LT
1383 int rc = -ENOBUFS;
1384
1da177e4
LT
1385 if (alloc_slab) {
1386 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1387 SLAB_HWCACHE_ALIGN, NULL, NULL);
1388
1389 if (prot->slab == NULL) {
1390 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1391 prot->name);
2a278051 1392 goto out;
1da177e4 1393 }
2e6599cb
ACM
1394
1395 if (prot->rsk_prot != NULL) {
1396 static const char mask[] = "request_sock_%s";
1397
1398 request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1399 if (request_sock_slab_name == NULL)
1400 goto out_free_sock_slab;
1401
1402 sprintf(request_sock_slab_name, mask, prot->name);
1403 prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1404 prot->rsk_prot->obj_size, 0,
1405 SLAB_HWCACHE_ALIGN, NULL, NULL);
1406
1407 if (prot->rsk_prot->slab == NULL) {
1408 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1409 prot->name);
1410 goto out_free_request_sock_slab_name;
1411 }
1412 }
8feaf0c0
ACM
1413
1414 if (prot->twsk_obj_size) {
1415 static const char mask[] = "tw_sock_%s";
1416
1417 timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1418
1419 if (timewait_sock_slab_name == NULL)
1420 goto out_free_request_sock_slab;
1421
1422 sprintf(timewait_sock_slab_name, mask, prot->name);
1423 prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name,
1424 prot->twsk_obj_size,
1425 0, SLAB_HWCACHE_ALIGN,
1426 NULL, NULL);
1427 if (prot->twsk_slab == NULL)
1428 goto out_free_timewait_sock_slab_name;
1429 }
1da177e4
LT
1430 }
1431
2a278051 1432 write_lock(&proto_list_lock);
1da177e4 1433 list_add(&prot->node, &proto_list);
1da177e4 1434 write_unlock(&proto_list_lock);
2a278051
ACM
1435 rc = 0;
1436out:
1da177e4 1437 return rc;
8feaf0c0
ACM
1438out_free_timewait_sock_slab_name:
1439 kfree(timewait_sock_slab_name);
1440out_free_request_sock_slab:
1441 if (prot->rsk_prot && prot->rsk_prot->slab) {
1442 kmem_cache_destroy(prot->rsk_prot->slab);
1443 prot->rsk_prot->slab = NULL;
1444 }
2e6599cb
ACM
1445out_free_request_sock_slab_name:
1446 kfree(request_sock_slab_name);
1447out_free_sock_slab:
1448 kmem_cache_destroy(prot->slab);
1449 prot->slab = NULL;
1450 goto out;
1da177e4
LT
1451}
1452
1453EXPORT_SYMBOL(proto_register);
1454
1455void proto_unregister(struct proto *prot)
1456{
1457 write_lock(&proto_list_lock);
1458
1459 if (prot->slab != NULL) {
1460 kmem_cache_destroy(prot->slab);
1461 prot->slab = NULL;
1462 }
1463
2e6599cb
ACM
1464 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1465 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1466
1467 kmem_cache_destroy(prot->rsk_prot->slab);
1468 kfree(name);
1469 prot->rsk_prot->slab = NULL;
1470 }
1471
8feaf0c0
ACM
1472 if (prot->twsk_slab != NULL) {
1473 const char *name = kmem_cache_name(prot->twsk_slab);
1474
1475 kmem_cache_destroy(prot->twsk_slab);
1476 kfree(name);
1477 prot->twsk_slab = NULL;
1478 }
1479
1da177e4
LT
1480 list_del(&prot->node);
1481 write_unlock(&proto_list_lock);
1482}
1483
1484EXPORT_SYMBOL(proto_unregister);
1485
1486#ifdef CONFIG_PROC_FS
1487static inline struct proto *__proto_head(void)
1488{
1489 return list_entry(proto_list.next, struct proto, node);
1490}
1491
1492static inline struct proto *proto_head(void)
1493{
1494 return list_empty(&proto_list) ? NULL : __proto_head();
1495}
1496
1497static inline struct proto *proto_next(struct proto *proto)
1498{
1499 return proto->node.next == &proto_list ? NULL :
1500 list_entry(proto->node.next, struct proto, node);
1501}
1502
1503static inline struct proto *proto_get_idx(loff_t pos)
1504{
1505 struct proto *proto;
1506 loff_t i = 0;
1507
1508 list_for_each_entry(proto, &proto_list, node)
1509 if (i++ == pos)
1510 goto out;
1511
1512 proto = NULL;
1513out:
1514 return proto;
1515}
1516
1517static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1518{
1519 read_lock(&proto_list_lock);
1520 return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1521}
1522
1523static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1524{
1525 ++*pos;
1526 return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1527}
1528
1529static void proto_seq_stop(struct seq_file *seq, void *v)
1530{
1531 read_unlock(&proto_list_lock);
1532}
1533
1534static char proto_method_implemented(const void *method)
1535{
1536 return method == NULL ? 'n' : 'y';
1537}
1538
1539static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1540{
1541 seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s "
1542 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1543 proto->name,
1544 proto->obj_size,
1545 proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1546 proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1547 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1548 proto->max_header,
1549 proto->slab == NULL ? "no" : "yes",
1550 module_name(proto->owner),
1551 proto_method_implemented(proto->close),
1552 proto_method_implemented(proto->connect),
1553 proto_method_implemented(proto->disconnect),
1554 proto_method_implemented(proto->accept),
1555 proto_method_implemented(proto->ioctl),
1556 proto_method_implemented(proto->init),
1557 proto_method_implemented(proto->destroy),
1558 proto_method_implemented(proto->shutdown),
1559 proto_method_implemented(proto->setsockopt),
1560 proto_method_implemented(proto->getsockopt),
1561 proto_method_implemented(proto->sendmsg),
1562 proto_method_implemented(proto->recvmsg),
1563 proto_method_implemented(proto->sendpage),
1564 proto_method_implemented(proto->bind),
1565 proto_method_implemented(proto->backlog_rcv),
1566 proto_method_implemented(proto->hash),
1567 proto_method_implemented(proto->unhash),
1568 proto_method_implemented(proto->get_port),
1569 proto_method_implemented(proto->enter_memory_pressure));
1570}
1571
1572static int proto_seq_show(struct seq_file *seq, void *v)
1573{
1574 if (v == SEQ_START_TOKEN)
1575 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1576 "protocol",
1577 "size",
1578 "sockets",
1579 "memory",
1580 "press",
1581 "maxhdr",
1582 "slab",
1583 "module",
1584 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1585 else
1586 proto_seq_printf(seq, v);
1587 return 0;
1588}
1589
1590static struct seq_operations proto_seq_ops = {
1591 .start = proto_seq_start,
1592 .next = proto_seq_next,
1593 .stop = proto_seq_stop,
1594 .show = proto_seq_show,
1595};
1596
1597static int proto_seq_open(struct inode *inode, struct file *file)
1598{
1599 return seq_open(file, &proto_seq_ops);
1600}
1601
1602static struct file_operations proto_seq_fops = {
1603 .owner = THIS_MODULE,
1604 .open = proto_seq_open,
1605 .read = seq_read,
1606 .llseek = seq_lseek,
1607 .release = seq_release,
1608};
1609
1610static int __init proto_init(void)
1611{
1612 /* register /proc/net/protocols */
1613 return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1614}
1615
1616subsys_initcall(proto_init);
1617
1618#endif /* PROC_FS */
1619
1620EXPORT_SYMBOL(sk_alloc);
1621EXPORT_SYMBOL(sk_free);
1622EXPORT_SYMBOL(sk_send_sigurg);
1623EXPORT_SYMBOL(sock_alloc_send_skb);
1624EXPORT_SYMBOL(sock_init_data);
1625EXPORT_SYMBOL(sock_kfree_s);
1626EXPORT_SYMBOL(sock_kmalloc);
1627EXPORT_SYMBOL(sock_no_accept);
1628EXPORT_SYMBOL(sock_no_bind);
1629EXPORT_SYMBOL(sock_no_connect);
1630EXPORT_SYMBOL(sock_no_getname);
1631EXPORT_SYMBOL(sock_no_getsockopt);
1632EXPORT_SYMBOL(sock_no_ioctl);
1633EXPORT_SYMBOL(sock_no_listen);
1634EXPORT_SYMBOL(sock_no_mmap);
1635EXPORT_SYMBOL(sock_no_poll);
1636EXPORT_SYMBOL(sock_no_recvmsg);
1637EXPORT_SYMBOL(sock_no_sendmsg);
1638EXPORT_SYMBOL(sock_no_sendpage);
1639EXPORT_SYMBOL(sock_no_setsockopt);
1640EXPORT_SYMBOL(sock_no_shutdown);
1641EXPORT_SYMBOL(sock_no_socketpair);
1642EXPORT_SYMBOL(sock_rfree);
1643EXPORT_SYMBOL(sock_setsockopt);
1644EXPORT_SYMBOL(sock_wfree);
1645EXPORT_SYMBOL(sock_wmalloc);
1646EXPORT_SYMBOL(sock_i_uid);
1647EXPORT_SYMBOL(sock_i_ino);
1648#ifdef CONFIG_SYSCTL
1649EXPORT_SYMBOL(sysctl_optmem_max);
1650EXPORT_SYMBOL(sysctl_rmem_max);
1651EXPORT_SYMBOL(sysctl_wmem_max);
1652#endif