Merge tag 'v3.10.95' into update
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / core / sock.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
02c30a84 10 * Authors: Ross Biro
1da177e4
LT
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Alan Cox, <A.Cox@swansea.ac.uk>
14 *
15 * Fixes:
16 * Alan Cox : Numerous verify_area() problems
17 * Alan Cox : Connecting on a connecting socket
18 * now returns an error for tcp.
19 * Alan Cox : sock->protocol is set correctly.
20 * and is not sometimes left as 0.
21 * Alan Cox : connect handles icmp errors on a
22 * connect properly. Unfortunately there
23 * is a restart syscall nasty there. I
24 * can't match BSD without hacking the C
25 * library. Ideas urgently sought!
26 * Alan Cox : Disallow bind() to addresses that are
27 * not ours - especially broadcast ones!!
28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
30 * instead they leave that for the DESTROY timer.
31 * Alan Cox : Clean up error flag in accept
32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
33 * was buggy. Put a remove_sock() in the handler
34 * for memory when we hit 0. Also altered the timer
4ec93edb 35 * code. The ACK stuff can wait and needs major
1da177e4
LT
36 * TCP layer surgery.
37 * Alan Cox : Fixed TCP ack bug, removed remove sock
38 * and fixed timer/inet_bh race.
39 * Alan Cox : Added zapped flag for TCP
40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45 * Rick Sladkey : Relaxed UDP rules for matching packets.
46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
47 * Pauline Middelink : identd support
48 * Alan Cox : Fixed connect() taking signals I think.
49 * Alan Cox : SO_LINGER supported
50 * Alan Cox : Error reporting fixes
51 * Anonymous : inet_create tidied up (sk->reuse setting)
52 * Alan Cox : inet sockets don't set sk->type!
53 * Alan Cox : Split socket option code
54 * Alan Cox : Callbacks
55 * Alan Cox : Nagle flag for Charles & Johannes stuff
56 * Alex : Removed restriction on inet fioctl
57 * Alan Cox : Splitting INET from NET core
58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
60 * Alan Cox : Split IP from generic code
61 * Alan Cox : New kfree_skbmem()
62 * Alan Cox : Make SO_DEBUG superuser only.
63 * Alan Cox : Allow anyone to clear SO_DEBUG
64 * (compatibility fix)
65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
66 * Alan Cox : Allocator for a socket is settable.
67 * Alan Cox : SO_ERROR includes soft errors.
68 * Alan Cox : Allow NULL arguments on some SO_ opts
69 * Alan Cox : Generic socket allocation to make hooks
70 * easier (suggested by Craig Metz).
71 * Michael Pall : SO_ERROR returns positive errno again
72 * Steve Whitehouse: Added default destructor to free
73 * protocol private data.
74 * Steve Whitehouse: Added various other default routines
75 * common to several socket families.
76 * Chris Evans : Call suser() check last on F_SETOWN
77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
79 * Andi Kleen : Fix write_space callback
80 * Chris Evans : Security fixes - signedness again
81 * Arnaldo C. Melo : cleanups, use skb_queue_purge
82 *
83 * To Fix:
84 *
85 *
86 * This program is free software; you can redistribute it and/or
87 * modify it under the terms of the GNU General Public License
88 * as published by the Free Software Foundation; either version
89 * 2 of the License, or (at your option) any later version.
90 */
91
e005d193
JP
92#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
4fc268d2 94#include <linux/capability.h>
1da177e4
LT
95#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
1da177e4
LT
100#include <linux/module.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/sched.h>
104#include <linux/timer.h>
105#include <linux/string.h>
106#include <linux/sockios.h>
107#include <linux/net.h>
108#include <linux/mm.h>
109#include <linux/slab.h>
110#include <linux/interrupt.h>
111#include <linux/poll.h>
112#include <linux/tcp.h>
113#include <linux/init.h>
a1f8e7f7 114#include <linux/highmem.h>
3f551f94 115#include <linux/user_namespace.h>
c5905afb 116#include <linux/static_key.h>
3969eb38 117#include <linux/memcontrol.h>
8c1ae10d 118#include <linux/prefetch.h>
1da177e4
LT
119
120#include <asm/uaccess.h>
1da177e4
LT
121
122#include <linux/netdevice.h>
123#include <net/protocol.h>
124#include <linux/skbuff.h>
457c4cbc 125#include <net/net_namespace.h>
2e6599cb 126#include <net/request_sock.h>
1da177e4 127#include <net/sock.h>
20d49473 128#include <linux/net_tstamp.h>
1da177e4
LT
129#include <net/xfrm.h>
130#include <linux/ipsec.h>
f8451725 131#include <net/cls_cgroup.h>
5bc1421e 132#include <net/netprio_cgroup.h>
1da177e4
LT
133
134#include <linux/filter.h>
135
3847ce32
SM
136#include <trace/events/sock.h>
137
6fa3eb70
S
138#include <net/af_unix.h>
139
140
1da177e4
LT
141#ifdef CONFIG_INET
142#include <net/tcp.h>
143#endif
6fa3eb70 144#include <linux/xlog.h>
1da177e4 145
36b77a52 146static DEFINE_MUTEX(proto_list_mutex);
d1a4c0b3
GC
147static LIST_HEAD(proto_list);
148
c35b4e28
EB
149/**
150 * sk_ns_capable - General socket capability test
151 * @sk: Socket to use a capability on or through
152 * @user_ns: The user namespace of the capability to use
153 * @cap: The capability to use
154 *
155 * Test to see if the opener of the socket had when the socket was
156 * created and the current process has the capability @cap in the user
157 * namespace @user_ns.
158 */
159bool sk_ns_capable(const struct sock *sk,
160 struct user_namespace *user_ns, int cap)
161{
162 return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
163 ns_capable(user_ns, cap);
164}
165EXPORT_SYMBOL(sk_ns_capable);
166
167/**
168 * sk_capable - Socket global capability test
169 * @sk: Socket to use a capability on or through
170 * @cap: The global capbility to use
171 *
172 * Test to see if the opener of the socket had when the socket was
173 * created and the current process has the capability @cap in all user
174 * namespaces.
175 */
176bool sk_capable(const struct sock *sk, int cap)
177{
178 return sk_ns_capable(sk, &init_user_ns, cap);
179}
180EXPORT_SYMBOL(sk_capable);
181
182/**
183 * sk_net_capable - Network namespace socket capability test
184 * @sk: Socket to use a capability on or through
185 * @cap: The capability to use
186 *
187 * Test to see if the opener of the socket had when the socke was created
188 * and the current process has the capability @cap over the network namespace
189 * the socket is a member of.
190 */
191bool sk_net_capable(const struct sock *sk, int cap)
192{
193 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
194}
195EXPORT_SYMBOL(sk_net_capable);
196
197
c255a458 198#ifdef CONFIG_MEMCG_KMEM
1d62e436 199int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
d1a4c0b3
GC
200{
201 struct proto *proto;
202 int ret = 0;
203
36b77a52 204 mutex_lock(&proto_list_mutex);
d1a4c0b3
GC
205 list_for_each_entry(proto, &proto_list, node) {
206 if (proto->init_cgroup) {
1d62e436 207 ret = proto->init_cgroup(memcg, ss);
d1a4c0b3
GC
208 if (ret)
209 goto out;
210 }
211 }
212
36b77a52 213 mutex_unlock(&proto_list_mutex);
d1a4c0b3
GC
214 return ret;
215out:
216 list_for_each_entry_continue_reverse(proto, &proto_list, node)
217 if (proto->destroy_cgroup)
1d62e436 218 proto->destroy_cgroup(memcg);
36b77a52 219 mutex_unlock(&proto_list_mutex);
d1a4c0b3
GC
220 return ret;
221}
222
1d62e436 223void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
d1a4c0b3
GC
224{
225 struct proto *proto;
226
36b77a52 227 mutex_lock(&proto_list_mutex);
d1a4c0b3
GC
228 list_for_each_entry_reverse(proto, &proto_list, node)
229 if (proto->destroy_cgroup)
1d62e436 230 proto->destroy_cgroup(memcg);
36b77a52 231 mutex_unlock(&proto_list_mutex);
d1a4c0b3
GC
232}
233#endif
234
da21f24d
IM
235/*
236 * Each address family might have different locking rules, so we have
237 * one slock key per address family:
238 */
a5b5bb9a
IM
239static struct lock_class_key af_family_keys[AF_MAX];
240static struct lock_class_key af_family_slock_keys[AF_MAX];
241
cbda4eaf 242#if defined(CONFIG_MEMCG_KMEM)
c5905afb 243struct static_key memcg_socket_limit_enabled;
e1aab161 244EXPORT_SYMBOL(memcg_socket_limit_enabled);
cbda4eaf 245#endif
e1aab161 246
a5b5bb9a
IM
247/*
248 * Make lock validator output more readable. (we pre-construct these
249 * strings build-time, so that runtime initialization of socket
250 * locks is fast):
251 */
36cbd3dc 252static const char *const af_family_key_strings[AF_MAX+1] = {
a5b5bb9a
IM
253 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
254 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
255 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
256 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
257 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
258 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
259 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
cbd151bf 260 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
a5b5bb9a 261 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
cd05acfe 262 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
17926a79 263 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
bce7b154 264 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
6f107b58 265 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
456db6a4 266 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
a5b5bb9a 267};
36cbd3dc 268static const char *const af_family_slock_key_strings[AF_MAX+1] = {
a5b5bb9a
IM
269 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
270 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
271 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
272 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
273 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
274 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
275 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
cbd151bf 276 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
a5b5bb9a 277 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
cd05acfe 278 "slock-27" , "slock-28" , "slock-AF_CAN" ,
17926a79 279 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
bce7b154 280 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
6f107b58 281 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
456db6a4 282 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
a5b5bb9a 283};
36cbd3dc 284static const char *const af_family_clock_key_strings[AF_MAX+1] = {
443aef0e
PZ
285 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
286 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
287 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
288 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
289 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
290 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
291 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
cbd151bf 292 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
443aef0e 293 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
b4942af6 294 "clock-27" , "clock-28" , "clock-AF_CAN" ,
e51f802b 295 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
bce7b154 296 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
6f107b58 297 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
456db6a4 298 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
443aef0e 299};
da21f24d
IM
300
301/*
302 * sk_callback_lock locking rules are per-address-family,
303 * so split the lock classes by using a per-AF key:
304 */
305static struct lock_class_key af_callback_keys[AF_MAX];
306
1da177e4
LT
307/* Take into consideration the size of the struct sk_buff overhead in the
308 * determination of these values, since that is non-constant across
309 * platforms. This makes socket queueing behavior and performance
310 * not depend upon such differences.
311 */
312#define _SK_MEM_PACKETS 256
87fb4b7b 313#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
1da177e4
LT
314#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
315#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
316
317/* Run time adjustable parameters. */
ab32ea5d 318__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
6d8ebc8a 319EXPORT_SYMBOL(sysctl_wmem_max);
6fa3eb70 320__u32 sysctl_rmem_max __read_mostly = (SK_RMEM_MAX*8);
6d8ebc8a 321EXPORT_SYMBOL(sysctl_rmem_max);
ab32ea5d
BH
322__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
323__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
1da177e4 324
25985edc 325/* Maximal space eaten by iovec or ancillary data plus some space */
ab32ea5d 326int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
2a91525c 327EXPORT_SYMBOL(sysctl_optmem_max);
1da177e4 328
c93bdd0e
MG
329struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
330EXPORT_SYMBOL_GPL(memalloc_socks);
331
7cb02404
MG
332/**
333 * sk_set_memalloc - sets %SOCK_MEMALLOC
334 * @sk: socket to set it on
335 *
336 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
337 * It's the responsibility of the admin to adjust min_free_kbytes
338 * to meet the requirements
339 */
340void sk_set_memalloc(struct sock *sk)
341{
342 sock_set_flag(sk, SOCK_MEMALLOC);
343 sk->sk_allocation |= __GFP_MEMALLOC;
c93bdd0e 344 static_key_slow_inc(&memalloc_socks);
7cb02404
MG
345}
346EXPORT_SYMBOL_GPL(sk_set_memalloc);
347
348void sk_clear_memalloc(struct sock *sk)
349{
350 sock_reset_flag(sk, SOCK_MEMALLOC);
351 sk->sk_allocation &= ~__GFP_MEMALLOC;
c93bdd0e 352 static_key_slow_dec(&memalloc_socks);
c76562b6
MG
353
354 /*
355 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
356 * progress of swapping. However, if SOCK_MEMALLOC is cleared while
357 * it has rmem allocations there is a risk that the user of the
358 * socket cannot make forward progress due to exceeding the rmem
359 * limits. By rights, sk_clear_memalloc() should only be called
360 * on sockets being torn down but warn and reset the accounting if
361 * that assumption breaks.
362 */
363 if (WARN_ON(sk->sk_forward_alloc))
364 sk_mem_reclaim(sk);
7cb02404
MG
365}
366EXPORT_SYMBOL_GPL(sk_clear_memalloc);
367
b4b9e355
MG
368int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
369{
370 int ret;
371 unsigned long pflags = current->flags;
372
373 /* these should have been dropped before queueing */
374 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
375
376 current->flags |= PF_MEMALLOC;
377 ret = sk->sk_backlog_rcv(sk, skb);
378 tsk_restore_flags(current, pflags, PF_MEMALLOC);
379
380 return ret;
381}
382EXPORT_SYMBOL(__sk_backlog_rcv);
383
1da177e4
LT
384static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
385{
386 struct timeval tv;
387
388 if (optlen < sizeof(tv))
389 return -EINVAL;
390 if (copy_from_user(&tv, optval, sizeof(tv)))
391 return -EFAULT;
ba78073e
VA
392 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
393 return -EDOM;
1da177e4 394
ba78073e 395 if (tv.tv_sec < 0) {
6f11df83
AM
396 static int warned __read_mostly;
397
ba78073e 398 *timeo_p = 0;
50aab54f 399 if (warned < 10 && net_ratelimit()) {
ba78073e 400 warned++;
e005d193
JP
401 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
402 __func__, current->comm, task_pid_nr(current));
50aab54f 403 }
ba78073e
VA
404 return 0;
405 }
1da177e4
LT
406 *timeo_p = MAX_SCHEDULE_TIMEOUT;
407 if (tv.tv_sec == 0 && tv.tv_usec == 0)
408 return 0;
409 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
410 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
411 return 0;
412}
413
414static void sock_warn_obsolete_bsdism(const char *name)
415{
416 static int warned;
417 static char warncomm[TASK_COMM_LEN];
4ec93edb
YH
418 if (strcmp(warncomm, current->comm) && warned < 5) {
419 strcpy(warncomm, current->comm);
e005d193
JP
420 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
421 warncomm, name);
1da177e4
LT
422 warned++;
423 }
424}
425
08e29af3 426static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
4ec93edb 427{
08e29af3
ED
428 if (sk->sk_flags & flags) {
429 sk->sk_flags &= ~flags;
430 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
20d49473 431 net_disable_timestamp();
1da177e4
LT
432 }
433}
434
435
f0088a50
DV
436int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
437{
766e9037 438 int err;
f0088a50 439 int skb_len;
3b885787
NH
440 unsigned long flags;
441 struct sk_buff_head *list = &sk->sk_receive_queue;
f0088a50 442
0fd7bac6 443 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
766e9037 444 atomic_inc(&sk->sk_drops);
3847ce32 445 trace_sock_rcvqueue_full(sk, skb);
766e9037 446 return -ENOMEM;
f0088a50
DV
447 }
448
fda9ef5d 449 err = sk_filter(sk, skb);
f0088a50 450 if (err)
766e9037 451 return err;
f0088a50 452
c76562b6 453 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
766e9037
ED
454 atomic_inc(&sk->sk_drops);
455 return -ENOBUFS;
3ab224be
HA
456 }
457
f0088a50
DV
458 skb->dev = NULL;
459 skb_set_owner_r(skb, sk);
49ad9599 460
f0088a50
DV
461 /* Cache the SKB length before we tack it onto the receive
462 * queue. Once it is added it no longer belongs to us and
463 * may be freed by other threads of control pulling packets
464 * from the queue.
465 */
466 skb_len = skb->len;
467
7fee226a
ED
468 /* we escape from rcu protected region, make sure we dont leak
469 * a norefcounted dst
470 */
471 skb_dst_force(skb);
472
3b885787
NH
473 spin_lock_irqsave(&list->lock, flags);
474 skb->dropcount = atomic_read(&sk->sk_drops);
475 __skb_queue_tail(list, skb);
476 spin_unlock_irqrestore(&list->lock, flags);
f0088a50
DV
477
478 if (!sock_flag(sk, SOCK_DEAD))
479 sk->sk_data_ready(sk, skb_len);
766e9037 480 return 0;
f0088a50
DV
481}
482EXPORT_SYMBOL(sock_queue_rcv_skb);
483
58a5a7b9 484int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
f0088a50
DV
485{
486 int rc = NET_RX_SUCCESS;
487
fda9ef5d 488 if (sk_filter(sk, skb))
f0088a50
DV
489 goto discard_and_relse;
490
491 skb->dev = NULL;
492
f545a38f 493 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
c377411f
ED
494 atomic_inc(&sk->sk_drops);
495 goto discard_and_relse;
496 }
58a5a7b9
ACM
497 if (nested)
498 bh_lock_sock_nested(sk);
499 else
500 bh_lock_sock(sk);
a5b5bb9a
IM
501 if (!sock_owned_by_user(sk)) {
502 /*
503 * trylock + unlock semantics:
504 */
505 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
506
c57943a1 507 rc = sk_backlog_rcv(sk, skb);
a5b5bb9a
IM
508
509 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
f545a38f 510 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
8eae939f
ZY
511 bh_unlock_sock(sk);
512 atomic_inc(&sk->sk_drops);
513 goto discard_and_relse;
514 }
515
f0088a50
DV
516 bh_unlock_sock(sk);
517out:
518 sock_put(sk);
519 return rc;
520discard_and_relse:
521 kfree_skb(skb);
522 goto out;
523}
524EXPORT_SYMBOL(sk_receive_skb);
525
ea94ff3b
KK
526void sk_reset_txq(struct sock *sk)
527{
528 sk_tx_queue_clear(sk);
529}
530EXPORT_SYMBOL(sk_reset_txq);
531
f0088a50
DV
532struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
533{
b6c6712a 534 struct dst_entry *dst = __sk_dst_get(sk);
f0088a50
DV
535
536 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
e022f0b4 537 sk_tx_queue_clear(sk);
a9b3cd7f 538 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
f0088a50
DV
539 dst_release(dst);
540 return NULL;
541 }
542
543 return dst;
544}
545EXPORT_SYMBOL(__sk_dst_check);
546
547struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
548{
549 struct dst_entry *dst = sk_dst_get(sk);
550
551 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
552 sk_dst_reset(sk);
553 dst_release(dst);
554 return NULL;
555 }
556
557 return dst;
558}
559EXPORT_SYMBOL(sk_dst_check);
560
c91f6df2
BH
561static int sock_setbindtodevice(struct sock *sk, char __user *optval,
562 int optlen)
4878809f
DM
563{
564 int ret = -ENOPROTOOPT;
565#ifdef CONFIG_NETDEVICES
3b1e0a65 566 struct net *net = sock_net(sk);
4878809f
DM
567 char devname[IFNAMSIZ];
568 int index;
569
570 /* Sorry... */
571 ret = -EPERM;
5e1fccc0 572 if (!ns_capable(net->user_ns, CAP_NET_RAW))
4878809f
DM
573 goto out;
574
575 ret = -EINVAL;
576 if (optlen < 0)
577 goto out;
578
579 /* Bind this socket to a particular device like "eth0",
580 * as specified in the passed interface name. If the
581 * name is "" or the option length is zero the socket
582 * is not bound.
583 */
584 if (optlen > IFNAMSIZ - 1)
585 optlen = IFNAMSIZ - 1;
586 memset(devname, 0, sizeof(devname));
587
588 ret = -EFAULT;
589 if (copy_from_user(devname, optval, optlen))
590 goto out;
591
000ba2e4
DM
592 index = 0;
593 if (devname[0] != '\0') {
bf8e56bf 594 struct net_device *dev;
4878809f 595
bf8e56bf
ED
596 rcu_read_lock();
597 dev = dev_get_by_name_rcu(net, devname);
598 if (dev)
599 index = dev->ifindex;
600 rcu_read_unlock();
4878809f
DM
601 ret = -ENODEV;
602 if (!dev)
603 goto out;
4878809f
DM
604 }
605
606 lock_sock(sk);
607 sk->sk_bound_dev_if = index;
608 sk_dst_reset(sk);
609 release_sock(sk);
610
611 ret = 0;
612
613out:
614#endif
615
616 return ret;
617}
618
c91f6df2
BH
619static int sock_getbindtodevice(struct sock *sk, char __user *optval,
620 int __user *optlen, int len)
621{
622 int ret = -ENOPROTOOPT;
623#ifdef CONFIG_NETDEVICES
624 struct net *net = sock_net(sk);
c91f6df2 625 char devname[IFNAMSIZ];
c91f6df2
BH
626
627 if (sk->sk_bound_dev_if == 0) {
628 len = 0;
629 goto zero;
630 }
631
632 ret = -EINVAL;
633 if (len < IFNAMSIZ)
634 goto out;
635
5dbe7c17
NS
636 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
637 if (ret)
c91f6df2 638 goto out;
c91f6df2
BH
639
640 len = strlen(devname) + 1;
641
642 ret = -EFAULT;
643 if (copy_to_user(optval, devname, len))
644 goto out;
645
646zero:
647 ret = -EFAULT;
648 if (put_user(len, optlen))
649 goto out;
650
651 ret = 0;
652
653out:
654#endif
655
656 return ret;
657}
658
c0ef877b
PE
659static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
660{
661 if (valbool)
662 sock_set_flag(sk, bit);
663 else
664 sock_reset_flag(sk, bit);
665}
666
1da177e4
LT
667/*
668 * This is meant for all protocols to use and covers goings on
669 * at the socket level. Everything here is generic.
670 */
671
672int sock_setsockopt(struct socket *sock, int level, int optname,
b7058842 673 char __user *optval, unsigned int optlen)
1da177e4 674{
2a91525c 675 struct sock *sk = sock->sk;
1da177e4
LT
676 int val;
677 int valbool;
678 struct linger ling;
679 int ret = 0;
4ec93edb 680
1da177e4
LT
681 /*
682 * Options without arguments
683 */
684
4878809f 685 if (optname == SO_BINDTODEVICE)
c91f6df2 686 return sock_setbindtodevice(sk, optval, optlen);
4878809f 687
e71a4783
SH
688 if (optlen < sizeof(int))
689 return -EINVAL;
4ec93edb 690
1da177e4
LT
691 if (get_user(val, (int __user *)optval))
692 return -EFAULT;
4ec93edb 693
2a91525c 694 valbool = val ? 1 : 0;
1da177e4
LT
695
696 lock_sock(sk);
697
2a91525c 698 switch (optname) {
e71a4783 699 case SO_DEBUG:
2a91525c 700 if (val && !capable(CAP_NET_ADMIN))
e71a4783 701 ret = -EACCES;
2a91525c 702 else
c0ef877b 703 sock_valbool_flag(sk, SOCK_DBG, valbool);
e71a4783
SH
704 break;
705 case SO_REUSEADDR:
4a17fd52 706 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
e71a4783 707 break;
055dc21a
TH
708 case SO_REUSEPORT:
709 sk->sk_reuseport = valbool;
710 break;
e71a4783 711 case SO_TYPE:
49c794e9 712 case SO_PROTOCOL:
0d6038ee 713 case SO_DOMAIN:
e71a4783
SH
714 case SO_ERROR:
715 ret = -ENOPROTOOPT;
716 break;
717 case SO_DONTROUTE:
c0ef877b 718 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
e71a4783
SH
719 break;
720 case SO_BROADCAST:
721 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
722 break;
723 case SO_SNDBUF:
724 /* Don't error on this BSD doesn't and if you think
82981930
ED
725 * about it this is right. Otherwise apps have to
726 * play 'guess the biggest size' games. RCVBUF/SNDBUF
727 * are treated in BSD as hints
728 */
729 val = min_t(u32, val, sysctl_wmem_max);
b0573dea 730set_sndbuf:
e71a4783 731 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
4b9e9796 732 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
82981930 733 /* Wake up sending tasks if we upped the value. */
e71a4783
SH
734 sk->sk_write_space(sk);
735 break;
1da177e4 736
e71a4783
SH
737 case SO_SNDBUFFORCE:
738 if (!capable(CAP_NET_ADMIN)) {
739 ret = -EPERM;
740 break;
741 }
742 goto set_sndbuf;
b0573dea 743
e71a4783
SH
744 case SO_RCVBUF:
745 /* Don't error on this BSD doesn't and if you think
82981930
ED
746 * about it this is right. Otherwise apps have to
747 * play 'guess the biggest size' games. RCVBUF/SNDBUF
748 * are treated in BSD as hints
749 */
750 val = min_t(u32, val, sysctl_rmem_max);
b0573dea 751set_rcvbuf:
e71a4783
SH
752 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
753 /*
754 * We double it on the way in to account for
755 * "struct sk_buff" etc. overhead. Applications
756 * assume that the SO_RCVBUF setting they make will
757 * allow that much actual data to be received on that
758 * socket.
759 *
760 * Applications are unaware that "struct sk_buff" and
761 * other overheads allocate from the receive buffer
762 * during socket buffer allocation.
763 *
764 * And after considering the possible alternatives,
765 * returning the value we actually used in getsockopt
766 * is the most desirable behavior.
767 */
4b9e9796 768 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
e71a4783
SH
769 break;
770
771 case SO_RCVBUFFORCE:
772 if (!capable(CAP_NET_ADMIN)) {
773 ret = -EPERM;
1da177e4 774 break;
e71a4783
SH
775 }
776 goto set_rcvbuf;
1da177e4 777
e71a4783 778 case SO_KEEPALIVE:
1da177e4 779#ifdef CONFIG_INET
3e10986d
ED
780 if (sk->sk_protocol == IPPROTO_TCP &&
781 sk->sk_type == SOCK_STREAM)
e71a4783 782 tcp_set_keepalive(sk, valbool);
1da177e4 783#endif
e71a4783
SH
784 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
785 break;
786
787 case SO_OOBINLINE:
788 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
789 break;
790
791 case SO_NO_CHECK:
792 sk->sk_no_check = valbool;
793 break;
794
795 case SO_PRIORITY:
5e1fccc0
EB
796 if ((val >= 0 && val <= 6) ||
797 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
e71a4783
SH
798 sk->sk_priority = val;
799 else
800 ret = -EPERM;
801 break;
802
803 case SO_LINGER:
804 if (optlen < sizeof(ling)) {
805 ret = -EINVAL; /* 1003.1g */
1da177e4 806 break;
e71a4783 807 }
2a91525c 808 if (copy_from_user(&ling, optval, sizeof(ling))) {
e71a4783 809 ret = -EFAULT;
1da177e4 810 break;
e71a4783
SH
811 }
812 if (!ling.l_onoff)
813 sock_reset_flag(sk, SOCK_LINGER);
814 else {
1da177e4 815#if (BITS_PER_LONG == 32)
e71a4783
SH
816 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
817 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
1da177e4 818 else
e71a4783
SH
819#endif
820 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
821 sock_set_flag(sk, SOCK_LINGER);
822 }
823 break;
824
825 case SO_BSDCOMPAT:
826 sock_warn_obsolete_bsdism("setsockopt");
827 break;
828
829 case SO_PASSCRED:
830 if (valbool)
831 set_bit(SOCK_PASSCRED, &sock->flags);
832 else
833 clear_bit(SOCK_PASSCRED, &sock->flags);
834 break;
835
836 case SO_TIMESTAMP:
92f37fd2 837 case SO_TIMESTAMPNS:
e71a4783 838 if (valbool) {
92f37fd2
ED
839 if (optname == SO_TIMESTAMP)
840 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
841 else
842 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783 843 sock_set_flag(sk, SOCK_RCVTSTAMP);
20d49473 844 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
92f37fd2 845 } else {
e71a4783 846 sock_reset_flag(sk, SOCK_RCVTSTAMP);
92f37fd2
ED
847 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
848 }
e71a4783
SH
849 break;
850
20d49473
PO
851 case SO_TIMESTAMPING:
852 if (val & ~SOF_TIMESTAMPING_MASK) {
f249fb78 853 ret = -EINVAL;
20d49473
PO
854 break;
855 }
856 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
857 val & SOF_TIMESTAMPING_TX_HARDWARE);
858 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
859 val & SOF_TIMESTAMPING_TX_SOFTWARE);
860 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
861 val & SOF_TIMESTAMPING_RX_HARDWARE);
862 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
863 sock_enable_timestamp(sk,
864 SOCK_TIMESTAMPING_RX_SOFTWARE);
865 else
866 sock_disable_timestamp(sk,
08e29af3 867 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
20d49473
PO
868 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
869 val & SOF_TIMESTAMPING_SOFTWARE);
870 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
871 val & SOF_TIMESTAMPING_SYS_HARDWARE);
872 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
873 val & SOF_TIMESTAMPING_RAW_HARDWARE);
874 break;
875
e71a4783
SH
876 case SO_RCVLOWAT:
877 if (val < 0)
878 val = INT_MAX;
879 sk->sk_rcvlowat = val ? : 1;
880 break;
881
882 case SO_RCVTIMEO:
883 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
884 break;
885
886 case SO_SNDTIMEO:
887 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
888 break;
1da177e4 889
e71a4783
SH
890 case SO_ATTACH_FILTER:
891 ret = -EINVAL;
892 if (optlen == sizeof(struct sock_fprog)) {
893 struct sock_fprog fprog;
1da177e4 894
e71a4783
SH
895 ret = -EFAULT;
896 if (copy_from_user(&fprog, optval, sizeof(fprog)))
1da177e4 897 break;
e71a4783
SH
898
899 ret = sk_attach_filter(&fprog, sk);
900 }
901 break;
902
903 case SO_DETACH_FILTER:
55b33325 904 ret = sk_detach_filter(sk);
e71a4783 905 break;
1da177e4 906
d59577b6
VB
907 case SO_LOCK_FILTER:
908 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
909 ret = -EPERM;
910 else
911 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
912 break;
913
e71a4783
SH
914 case SO_PASSSEC:
915 if (valbool)
916 set_bit(SOCK_PASSSEC, &sock->flags);
917 else
918 clear_bit(SOCK_PASSSEC, &sock->flags);
919 break;
4a19ec58 920 case SO_MARK:
5e1fccc0 921 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
4a19ec58 922 ret = -EPERM;
2a91525c 923 else
4a19ec58 924 sk->sk_mark = val;
4a19ec58 925 break;
877ce7c1 926
1da177e4
LT
927 /* We implement the SO_SNDLOWAT etc to
928 not be settable (1003.1g 5.3) */
3b885787 929 case SO_RXQ_OVFL:
8083f0fc 930 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
3b885787 931 break;
6e3e939f
JB
932
933 case SO_WIFI_STATUS:
934 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
935 break;
936
ef64a54f
PE
937 case SO_PEEK_OFF:
938 if (sock->ops->set_peek_off)
d90d9ff6 939 ret = sock->ops->set_peek_off(sk, val);
ef64a54f
PE
940 else
941 ret = -EOPNOTSUPP;
942 break;
3bdc0eba
BG
943
944 case SO_NOFCS:
945 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
946 break;
947
7d4c04fc
KJ
948 case SO_SELECT_ERR_QUEUE:
949 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
950 break;
951
e71a4783
SH
952 default:
953 ret = -ENOPROTOOPT;
954 break;
4ec93edb 955 }
1da177e4
LT
956 release_sock(sk);
957 return ret;
958}
2a91525c 959EXPORT_SYMBOL(sock_setsockopt);
1da177e4
LT
960
961
3f551f94
EB
962void cred_to_ucred(struct pid *pid, const struct cred *cred,
963 struct ucred *ucred)
964{
965 ucred->pid = pid_vnr(pid);
966 ucred->uid = ucred->gid = -1;
967 if (cred) {
968 struct user_namespace *current_ns = current_user_ns();
969
b2e4f544
EB
970 ucred->uid = from_kuid_munged(current_ns, cred->euid);
971 ucred->gid = from_kgid_munged(current_ns, cred->egid);
3f551f94
EB
972 }
973}
3924773a 974EXPORT_SYMBOL_GPL(cred_to_ucred);
3f551f94 975
1da177e4
LT
976int sock_getsockopt(struct socket *sock, int level, int optname,
977 char __user *optval, int __user *optlen)
978{
979 struct sock *sk = sock->sk;
4ec93edb 980
e71a4783 981 union {
4ec93edb
YH
982 int val;
983 struct linger ling;
1da177e4
LT
984 struct timeval tm;
985 } v;
4ec93edb 986
4d0392be 987 int lv = sizeof(int);
1da177e4 988 int len;
4ec93edb 989
e71a4783 990 if (get_user(len, optlen))
4ec93edb 991 return -EFAULT;
e71a4783 992 if (len < 0)
1da177e4 993 return -EINVAL;
4ec93edb 994
50fee1de 995 memset(&v, 0, sizeof(v));
df0bca04 996
2a91525c 997 switch (optname) {
e71a4783
SH
998 case SO_DEBUG:
999 v.val = sock_flag(sk, SOCK_DBG);
1000 break;
1001
1002 case SO_DONTROUTE:
1003 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1004 break;
1005
1006 case SO_BROADCAST:
1b23a5df 1007 v.val = sock_flag(sk, SOCK_BROADCAST);
e71a4783
SH
1008 break;
1009
1010 case SO_SNDBUF:
1011 v.val = sk->sk_sndbuf;
1012 break;
1013
1014 case SO_RCVBUF:
1015 v.val = sk->sk_rcvbuf;
1016 break;
1017
1018 case SO_REUSEADDR:
1019 v.val = sk->sk_reuse;
1020 break;
1021
055dc21a
TH
1022 case SO_REUSEPORT:
1023 v.val = sk->sk_reuseport;
1024 break;
1025
e71a4783 1026 case SO_KEEPALIVE:
1b23a5df 1027 v.val = sock_flag(sk, SOCK_KEEPOPEN);
e71a4783
SH
1028 break;
1029
1030 case SO_TYPE:
1031 v.val = sk->sk_type;
1032 break;
1033
49c794e9
JE
1034 case SO_PROTOCOL:
1035 v.val = sk->sk_protocol;
1036 break;
1037
0d6038ee
JE
1038 case SO_DOMAIN:
1039 v.val = sk->sk_family;
1040 break;
1041
e71a4783
SH
1042 case SO_ERROR:
1043 v.val = -sock_error(sk);
2a91525c 1044 if (v.val == 0)
e71a4783
SH
1045 v.val = xchg(&sk->sk_err_soft, 0);
1046 break;
1047
1048 case SO_OOBINLINE:
1b23a5df 1049 v.val = sock_flag(sk, SOCK_URGINLINE);
e71a4783
SH
1050 break;
1051
1052 case SO_NO_CHECK:
1053 v.val = sk->sk_no_check;
1054 break;
1055
1056 case SO_PRIORITY:
1057 v.val = sk->sk_priority;
1058 break;
1059
1060 case SO_LINGER:
1061 lv = sizeof(v.ling);
1b23a5df 1062 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
e71a4783
SH
1063 v.ling.l_linger = sk->sk_lingertime / HZ;
1064 break;
1065
1066 case SO_BSDCOMPAT:
1067 sock_warn_obsolete_bsdism("getsockopt");
1068 break;
1069
1070 case SO_TIMESTAMP:
92f37fd2
ED
1071 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1072 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1073 break;
1074
1075 case SO_TIMESTAMPNS:
1076 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783
SH
1077 break;
1078
20d49473
PO
1079 case SO_TIMESTAMPING:
1080 v.val = 0;
1081 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
1082 v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
1083 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
1084 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
1085 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
1086 v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
1087 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
1088 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
1089 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
1090 v.val |= SOF_TIMESTAMPING_SOFTWARE;
1091 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
1092 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
1093 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
1094 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
1095 break;
1096
e71a4783 1097 case SO_RCVTIMEO:
2a91525c 1098 lv = sizeof(struct timeval);
e71a4783
SH
1099 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1100 v.tm.tv_sec = 0;
1101 v.tm.tv_usec = 0;
1102 } else {
1103 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1104 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
1105 }
1106 break;
1107
1108 case SO_SNDTIMEO:
2a91525c 1109 lv = sizeof(struct timeval);
e71a4783
SH
1110 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1111 v.tm.tv_sec = 0;
1112 v.tm.tv_usec = 0;
1113 } else {
1114 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1115 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
1116 }
1117 break;
1da177e4 1118
e71a4783
SH
1119 case SO_RCVLOWAT:
1120 v.val = sk->sk_rcvlowat;
1121 break;
1da177e4 1122
e71a4783 1123 case SO_SNDLOWAT:
2a91525c 1124 v.val = 1;
e71a4783 1125 break;
1da177e4 1126
e71a4783 1127 case SO_PASSCRED:
82981930 1128 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
e71a4783 1129 break;
1da177e4 1130
e71a4783 1131 case SO_PEERCRED:
109f6e39
EB
1132 {
1133 struct ucred peercred;
1134 if (len > sizeof(peercred))
1135 len = sizeof(peercred);
1136 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1137 if (copy_to_user(optval, &peercred, len))
e71a4783
SH
1138 return -EFAULT;
1139 goto lenout;
109f6e39 1140 }
1da177e4 1141
e71a4783
SH
1142 case SO_PEERNAME:
1143 {
1144 char address[128];
1145
1146 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1147 return -ENOTCONN;
1148 if (lv < len)
1149 return -EINVAL;
1150 if (copy_to_user(optval, address, len))
1151 return -EFAULT;
1152 goto lenout;
1153 }
1da177e4 1154
e71a4783
SH
1155 /* Dubious BSD thing... Probably nobody even uses it, but
1156 * the UNIX standard wants it for whatever reason... -DaveM
1157 */
1158 case SO_ACCEPTCONN:
1159 v.val = sk->sk_state == TCP_LISTEN;
1160 break;
1da177e4 1161
e71a4783 1162 case SO_PASSSEC:
82981930 1163 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
e71a4783 1164 break;
877ce7c1 1165
e71a4783
SH
1166 case SO_PEERSEC:
1167 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1da177e4 1168
4a19ec58
LAT
1169 case SO_MARK:
1170 v.val = sk->sk_mark;
1171 break;
1172
3b885787 1173 case SO_RXQ_OVFL:
1b23a5df 1174 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
3b885787
NH
1175 break;
1176
6e3e939f 1177 case SO_WIFI_STATUS:
1b23a5df 1178 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
6e3e939f
JB
1179 break;
1180
ef64a54f
PE
1181 case SO_PEEK_OFF:
1182 if (!sock->ops->set_peek_off)
1183 return -EOPNOTSUPP;
1184
1185 v.val = sk->sk_peek_off;
1186 break;
bc2f7996 1187 case SO_NOFCS:
1b23a5df 1188 v.val = sock_flag(sk, SOCK_NOFCS);
bc2f7996 1189 break;
c91f6df2 1190
f7b86bfe 1191 case SO_BINDTODEVICE:
c91f6df2
BH
1192 return sock_getbindtodevice(sk, optval, optlen, len);
1193
a8fc9277
PE
1194 case SO_GET_FILTER:
1195 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1196 if (len < 0)
1197 return len;
1198
1199 goto lenout;
c91f6df2 1200
d59577b6
VB
1201 case SO_LOCK_FILTER:
1202 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1203 break;
1204
7d4c04fc
KJ
1205 case SO_SELECT_ERR_QUEUE:
1206 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1207 break;
1208
e71a4783
SH
1209 default:
1210 return -ENOPROTOOPT;
1da177e4 1211 }
e71a4783 1212
1da177e4
LT
1213 if (len > lv)
1214 len = lv;
1215 if (copy_to_user(optval, &v, len))
1216 return -EFAULT;
1217lenout:
4ec93edb
YH
1218 if (put_user(len, optlen))
1219 return -EFAULT;
1220 return 0;
1da177e4
LT
1221}
1222
a5b5bb9a
IM
1223/*
1224 * Initialize an sk_lock.
1225 *
1226 * (We also register the sk_lock with the lock validator.)
1227 */
b6f99a21 1228static inline void sock_lock_init(struct sock *sk)
a5b5bb9a 1229{
ed07536e
PZ
1230 sock_lock_init_class_and_name(sk,
1231 af_family_slock_key_strings[sk->sk_family],
1232 af_family_slock_keys + sk->sk_family,
1233 af_family_key_strings[sk->sk_family],
1234 af_family_keys + sk->sk_family);
a5b5bb9a
IM
1235}
1236
4dc6dc71
ED
1237/*
1238 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1239 * even temporarly, because of RCU lookups. sk_node should also be left as is.
68835aba 1240 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
4dc6dc71 1241 */
f1a6c4da
PE
1242static void sock_copy(struct sock *nsk, const struct sock *osk)
1243{
1244#ifdef CONFIG_SECURITY_NETWORK
1245 void *sptr = nsk->sk_security;
1246#endif
68835aba
ED
1247 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1248
1249 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1250 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1251
f1a6c4da
PE
1252#ifdef CONFIG_SECURITY_NETWORK
1253 nsk->sk_security = sptr;
1254 security_sk_clone(osk, nsk);
1255#endif
1256}
1257
fcbdf09d
OP
1258void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1259{
1260 unsigned long nulls1, nulls2;
1261
1262 nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1263 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1264 if (nulls1 > nulls2)
1265 swap(nulls1, nulls2);
1266
1267 if (nulls1 != 0)
1268 memset((char *)sk, 0, nulls1);
1269 memset((char *)sk + nulls1 + sizeof(void *), 0,
1270 nulls2 - nulls1 - sizeof(void *));
1271 memset((char *)sk + nulls2 + sizeof(void *), 0,
1272 size - nulls2 - sizeof(void *));
1273}
1274EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1275
2e4afe7b
PE
1276static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1277 int family)
c308c1b2
PE
1278{
1279 struct sock *sk;
1280 struct kmem_cache *slab;
1281
1282 slab = prot->slab;
e912b114
ED
1283 if (slab != NULL) {
1284 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1285 if (!sk)
1286 return sk;
1287 if (priority & __GFP_ZERO) {
fcbdf09d
OP
1288 if (prot->clear_sk)
1289 prot->clear_sk(sk, prot->obj_size);
1290 else
1291 sk_prot_clear_nulls(sk, prot->obj_size);
e912b114 1292 }
fcbdf09d 1293 } else
c308c1b2
PE
1294 sk = kmalloc(prot->obj_size, priority);
1295
2e4afe7b 1296 if (sk != NULL) {
a98b65a3
VN
1297 kmemcheck_annotate_bitfield(sk, flags);
1298
2e4afe7b
PE
1299 if (security_sk_alloc(sk, family, priority))
1300 goto out_free;
1301
1302 if (!try_module_get(prot->owner))
1303 goto out_free_sec;
e022f0b4 1304 sk_tx_queue_clear(sk);
2e4afe7b
PE
1305 }
1306
c308c1b2 1307 return sk;
2e4afe7b
PE
1308
1309out_free_sec:
1310 security_sk_free(sk);
1311out_free:
1312 if (slab != NULL)
1313 kmem_cache_free(slab, sk);
1314 else
1315 kfree(sk);
1316 return NULL;
c308c1b2
PE
1317}
1318
1319static void sk_prot_free(struct proto *prot, struct sock *sk)
1320{
1321 struct kmem_cache *slab;
2e4afe7b 1322 struct module *owner;
c308c1b2 1323
2e4afe7b 1324 owner = prot->owner;
c308c1b2 1325 slab = prot->slab;
2e4afe7b
PE
1326
1327 security_sk_free(sk);
c308c1b2
PE
1328 if (slab != NULL)
1329 kmem_cache_free(slab, sk);
1330 else
1331 kfree(sk);
2e4afe7b 1332 module_put(owner);
c308c1b2
PE
1333}
1334
8fb974c9 1335#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
211d2f97 1336void sock_update_classid(struct sock *sk)
f8451725 1337{
1144182a 1338 u32 classid;
f8451725 1339
211d2f97 1340 classid = task_cls_classid(current);
3afa6d00 1341 if (classid != sk->sk_classid)
f8451725
HX
1342 sk->sk_classid = classid;
1343}
82862742 1344EXPORT_SYMBOL(sock_update_classid);
8fb974c9 1345#endif
5bc1421e 1346
51e4e7fa 1347#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
6ffd4641 1348void sock_update_netprioidx(struct sock *sk)
5bc1421e 1349{
5bc1421e
NH
1350 if (in_interrupt())
1351 return;
2b73bc65 1352
6ffd4641 1353 sk->sk_cgrp_prioidx = task_netprioidx(current);
5bc1421e
NH
1354}
1355EXPORT_SYMBOL_GPL(sock_update_netprioidx);
f8451725
HX
1356#endif
1357
1da177e4
LT
1358/**
1359 * sk_alloc - All socket objects are allocated here
c4ea43c5 1360 * @net: the applicable net namespace
4dc3b16b
PP
1361 * @family: protocol family
1362 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1363 * @prot: struct proto associated with this new sock instance
1da177e4 1364 */
1b8d7ae4 1365struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
6257ff21 1366 struct proto *prot)
1da177e4 1367{
c308c1b2 1368 struct sock *sk;
1da177e4 1369
154adbc8 1370 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1da177e4 1371 if (sk) {
154adbc8
PE
1372 sk->sk_family = family;
1373 /*
1374 * See comment in struct sock definition to understand
1375 * why we need sk_prot_creator -acme
1376 */
1377 sk->sk_prot = sk->sk_prot_creator = prot;
1378 sock_lock_init(sk);
3b1e0a65 1379 sock_net_set(sk, get_net(net));
d66ee058 1380 atomic_set(&sk->sk_wmem_alloc, 1);
f8451725 1381
211d2f97 1382 sock_update_classid(sk);
6ffd4641 1383 sock_update_netprioidx(sk);
1da177e4 1384 }
a79af59e 1385
2e4afe7b 1386 return sk;
1da177e4 1387}
2a91525c 1388EXPORT_SYMBOL(sk_alloc);
1da177e4 1389
2b85a34e 1390static void __sk_free(struct sock *sk)
1da177e4
LT
1391{
1392 struct sk_filter *filter;
1da177e4
LT
1393
1394 if (sk->sk_destruct)
1395 sk->sk_destruct(sk);
1396
a898def2
PM
1397 filter = rcu_dereference_check(sk->sk_filter,
1398 atomic_read(&sk->sk_wmem_alloc) == 0);
1da177e4 1399 if (filter) {
309dd5fc 1400 sk_filter_uncharge(sk, filter);
a9b3cd7f 1401 RCU_INIT_POINTER(sk->sk_filter, NULL);
1da177e4
LT
1402 }
1403
08e29af3 1404 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1da177e4
LT
1405
1406 if (atomic_read(&sk->sk_omem_alloc))
e005d193
JP
1407 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1408 __func__, atomic_read(&sk->sk_omem_alloc));
1da177e4 1409
109f6e39
EB
1410 if (sk->sk_peer_cred)
1411 put_cred(sk->sk_peer_cred);
1412 put_pid(sk->sk_peer_pid);
3b1e0a65 1413 put_net(sock_net(sk));
c308c1b2 1414 sk_prot_free(sk->sk_prot_creator, sk);
1da177e4 1415}
2b85a34e
ED
1416
1417void sk_free(struct sock *sk)
1418{
1419 /*
25985edc 1420 * We subtract one from sk_wmem_alloc and can know if
2b85a34e
ED
1421 * some packets are still in some tx queue.
1422 * If not null, sock_wfree() will call __sk_free(sk) later
1423 */
1424 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1425 __sk_free(sk);
1426}
2a91525c 1427EXPORT_SYMBOL(sk_free);
1da177e4 1428
edf02087 1429/*
25985edc
LDM
1430 * Last sock_put should drop reference to sk->sk_net. It has already
1431 * been dropped in sk_change_net. Taking reference to stopping namespace
edf02087 1432 * is not an option.
25985edc 1433 * Take reference to a socket to remove it from hash _alive_ and after that
edf02087
DL
1434 * destroy it in the context of init_net.
1435 */
1436void sk_release_kernel(struct sock *sk)
1437{
1438 if (sk == NULL || sk->sk_socket == NULL)
1439 return;
1440
1441 sock_hold(sk);
1442 sock_release(sk->sk_socket);
65a18ec5 1443 release_net(sock_net(sk));
3b1e0a65 1444 sock_net_set(sk, get_net(&init_net));
edf02087
DL
1445 sock_put(sk);
1446}
45af1754 1447EXPORT_SYMBOL(sk_release_kernel);
edf02087 1448
475f1b52
SR
1449static void sk_update_clone(const struct sock *sk, struct sock *newsk)
1450{
1451 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1452 sock_update_memcg(newsk);
1453}
1454
e56c57d0
ED
1455/**
1456 * sk_clone_lock - clone a socket, and lock its clone
1457 * @sk: the socket to clone
1458 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1459 *
1460 * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1461 */
1462struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
87d11ceb 1463{
8fd1d178 1464 struct sock *newsk;
87d11ceb 1465
8fd1d178 1466 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
87d11ceb
ACM
1467 if (newsk != NULL) {
1468 struct sk_filter *filter;
1469
892c141e 1470 sock_copy(newsk, sk);
87d11ceb
ACM
1471
1472 /* SANITY */
3b1e0a65 1473 get_net(sock_net(newsk));
87d11ceb
ACM
1474 sk_node_init(&newsk->sk_node);
1475 sock_lock_init(newsk);
1476 bh_lock_sock(newsk);
fa438ccf 1477 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
8eae939f 1478 newsk->sk_backlog.len = 0;
87d11ceb
ACM
1479
1480 atomic_set(&newsk->sk_rmem_alloc, 0);
2b85a34e
ED
1481 /*
1482 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1483 */
1484 atomic_set(&newsk->sk_wmem_alloc, 1);
87d11ceb
ACM
1485 atomic_set(&newsk->sk_omem_alloc, 0);
1486 skb_queue_head_init(&newsk->sk_receive_queue);
1487 skb_queue_head_init(&newsk->sk_write_queue);
97fc2f08
CL
1488#ifdef CONFIG_NET_DMA
1489 skb_queue_head_init(&newsk->sk_async_wait_queue);
1490#endif
87d11ceb 1491
b6c6712a 1492 spin_lock_init(&newsk->sk_dst_lock);
87d11ceb 1493 rwlock_init(&newsk->sk_callback_lock);
443aef0e
PZ
1494 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1495 af_callback_keys + newsk->sk_family,
1496 af_family_clock_key_strings[newsk->sk_family]);
87d11ceb
ACM
1497
1498 newsk->sk_dst_cache = NULL;
1499 newsk->sk_wmem_queued = 0;
1500 newsk->sk_forward_alloc = 0;
1501 newsk->sk_send_head = NULL;
87d11ceb
ACM
1502 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1503
1504 sock_reset_flag(newsk, SOCK_DONE);
1505 skb_queue_head_init(&newsk->sk_error_queue);
1506
0d7da9dd 1507 filter = rcu_dereference_protected(newsk->sk_filter, 1);
87d11ceb
ACM
1508 if (filter != NULL)
1509 sk_filter_charge(newsk, filter);
1510
1511 if (unlikely(xfrm_sk_clone_policy(newsk))) {
1512 /* It is still raw copy of parent, so invalidate
1513 * destructor and make plain sk_free() */
1514 newsk->sk_destruct = NULL;
b0691c8e 1515 bh_unlock_sock(newsk);
87d11ceb
ACM
1516 sk_free(newsk);
1517 newsk = NULL;
1518 goto out;
1519 }
1520
1521 newsk->sk_err = 0;
1522 newsk->sk_priority = 0;
4dc6dc71
ED
1523 /*
1524 * Before updating sk_refcnt, we must commit prior changes to memory
1525 * (Documentation/RCU/rculist_nulls.txt for details)
1526 */
1527 smp_wmb();
87d11ceb
ACM
1528 atomic_set(&newsk->sk_refcnt, 2);
1529
1530 /*
1531 * Increment the counter in the same struct proto as the master
1532 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1533 * is the same as sk->sk_prot->socks, as this field was copied
1534 * with memcpy).
1535 *
1536 * This _changes_ the previous behaviour, where
1537 * tcp_create_openreq_child always was incrementing the
1538 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1539 * to be taken into account in all callers. -acme
1540 */
1541 sk_refcnt_debug_inc(newsk);
972692e0 1542 sk_set_socket(newsk, NULL);
43815482 1543 newsk->sk_wq = NULL;
87d11ceb 1544
f3f511e1
GC
1545 sk_update_clone(sk, newsk);
1546
87d11ceb 1547 if (newsk->sk_prot->sockets_allocated)
180d8cd9 1548 sk_sockets_allocated_inc(newsk);
704da560 1549
08e29af3 1550 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
704da560 1551 net_enable_timestamp();
87d11ceb
ACM
1552 }
1553out:
1554 return newsk;
1555}
e56c57d0 1556EXPORT_SYMBOL_GPL(sk_clone_lock);
87d11ceb 1557
9958089a
AK
1558void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1559{
1560 __sk_dst_set(sk, dst);
1561 sk->sk_route_caps = dst->dev->features;
1562 if (sk->sk_route_caps & NETIF_F_GSO)
4fcd6b99 1563 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
a465419b 1564 sk->sk_route_caps &= ~sk->sk_route_nocaps;
9958089a 1565 if (sk_can_gso(sk)) {
82cc1a7a 1566 if (dst->header_len) {
9958089a 1567 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
82cc1a7a 1568 } else {
9958089a 1569 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
82cc1a7a 1570 sk->sk_gso_max_size = dst->dev->gso_max_size;
1485348d 1571 sk->sk_gso_max_segs = dst->dev->gso_max_segs;
82cc1a7a 1572 }
9958089a
AK
1573 }
1574}
1575EXPORT_SYMBOL_GPL(sk_setup_caps);
1576
1da177e4
LT
1577/*
1578 * Simple resource managers for sockets.
1579 */
1580
1581
4ec93edb
YH
1582/*
1583 * Write buffer destructor automatically called from kfree_skb.
1da177e4
LT
1584 */
1585void sock_wfree(struct sk_buff *skb)
1586{
1587 struct sock *sk = skb->sk;
d99927f4 1588 unsigned int len = skb->truesize;
1da177e4 1589
d99927f4
ED
1590 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1591 /*
1592 * Keep a reference on sk_wmem_alloc, this will be released
1593 * after sk_write_space() call
1594 */
1595 atomic_sub(len - 1, &sk->sk_wmem_alloc);
1da177e4 1596 sk->sk_write_space(sk);
d99927f4
ED
1597 len = 1;
1598 }
2b85a34e 1599 /*
d99927f4
ED
1600 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1601 * could not do because of in-flight packets
2b85a34e 1602 */
d99927f4 1603 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
2b85a34e 1604 __sk_free(sk);
1da177e4 1605}
2a91525c 1606EXPORT_SYMBOL(sock_wfree);
1da177e4 1607
4ec93edb
YH
1608/*
1609 * Read buffer destructor automatically called from kfree_skb.
1da177e4
LT
1610 */
1611void sock_rfree(struct sk_buff *skb)
1612{
1613 struct sock *sk = skb->sk;
d361fd59 1614 unsigned int len = skb->truesize;
1da177e4 1615
d361fd59
ED
1616 atomic_sub(len, &sk->sk_rmem_alloc);
1617 sk_mem_uncharge(sk, len);
1da177e4 1618}
2a91525c 1619EXPORT_SYMBOL(sock_rfree);
1da177e4 1620
41063e9d
DM
1621void sock_edemux(struct sk_buff *skb)
1622{
e812347c
ED
1623 struct sock *sk = skb->sk;
1624
1c463e57 1625#ifdef CONFIG_INET
e812347c
ED
1626 if (sk->sk_state == TCP_TIME_WAIT)
1627 inet_twsk_put(inet_twsk(sk));
1628 else
1c463e57 1629#endif
e812347c 1630 sock_put(sk);
41063e9d
DM
1631}
1632EXPORT_SYMBOL(sock_edemux);
1da177e4 1633
976d0201 1634kuid_t sock_i_uid(struct sock *sk)
1da177e4 1635{
976d0201 1636 kuid_t uid;
6fa3eb70
S
1637
1638 /*mtk_net: fix kernel bug*/
1639 if (!sk) {
1640 pr_info("sk == NULL for sock_i_uid\n");
1641 return GLOBAL_ROOT_UID;
1642 }
1643
f064af1e 1644 read_lock_bh(&sk->sk_callback_lock);
976d0201 1645 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
f064af1e 1646 read_unlock_bh(&sk->sk_callback_lock);
1da177e4
LT
1647 return uid;
1648}
2a91525c 1649EXPORT_SYMBOL(sock_i_uid);
1da177e4
LT
1650
1651unsigned long sock_i_ino(struct sock *sk)
1652{
1653 unsigned long ino;
1654
f064af1e 1655 read_lock_bh(&sk->sk_callback_lock);
1da177e4 1656 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
f064af1e 1657 read_unlock_bh(&sk->sk_callback_lock);
1da177e4
LT
1658 return ino;
1659}
2a91525c 1660EXPORT_SYMBOL(sock_i_ino);
1da177e4
LT
1661
1662/*
1663 * Allocate a skb from the socket's send buffer.
1664 */
86a76caf 1665struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
dd0fc66f 1666 gfp_t priority)
1da177e4
LT
1667{
1668 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
2a91525c 1669 struct sk_buff *skb = alloc_skb(size, priority);
1da177e4
LT
1670 if (skb) {
1671 skb_set_owner_w(skb, sk);
1672 return skb;
1673 }
1674 }
1675 return NULL;
1676}
2a91525c 1677EXPORT_SYMBOL(sock_wmalloc);
1da177e4
LT
1678
1679/*
1680 * Allocate a skb from the socket's receive buffer.
4ec93edb 1681 */
86a76caf 1682struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
dd0fc66f 1683 gfp_t priority)
1da177e4
LT
1684{
1685 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1686 struct sk_buff *skb = alloc_skb(size, priority);
1687 if (skb) {
1688 skb_set_owner_r(skb, sk);
1689 return skb;
1690 }
1691 }
1692 return NULL;
1693}
1694
4ec93edb 1695/*
1da177e4 1696 * Allocate a memory block from the socket's option memory buffer.
4ec93edb 1697 */
dd0fc66f 1698void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1da177e4 1699{
95c96174 1700 if ((unsigned int)size <= sysctl_optmem_max &&
1da177e4
LT
1701 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1702 void *mem;
1703 /* First do the add, to avoid the race if kmalloc
4ec93edb 1704 * might sleep.
1da177e4
LT
1705 */
1706 atomic_add(size, &sk->sk_omem_alloc);
1707 mem = kmalloc(size, priority);
1708 if (mem)
1709 return mem;
1710 atomic_sub(size, &sk->sk_omem_alloc);
1711 }
1712 return NULL;
1713}
2a91525c 1714EXPORT_SYMBOL(sock_kmalloc);
1da177e4
LT
1715
1716/*
1717 * Free an option memory block.
1718 */
1719void sock_kfree_s(struct sock *sk, void *mem, int size)
1720{
1721 kfree(mem);
1722 atomic_sub(size, &sk->sk_omem_alloc);
1723}
2a91525c 1724EXPORT_SYMBOL(sock_kfree_s);
1da177e4
LT
1725
1726/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1727 I think, these locks should be removed for datagram sockets.
1728 */
2a91525c 1729static long sock_wait_for_wmem(struct sock *sk, long timeo)
1da177e4
LT
1730{
1731 DEFINE_WAIT(wait);
1732
1733 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1734 for (;;) {
1735 if (!timeo)
1736 break;
1737 if (signal_pending(current))
1738 break;
1739 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
aa395145 1740 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4
LT
1741 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1742 break;
1743 if (sk->sk_shutdown & SEND_SHUTDOWN)
1744 break;
1745 if (sk->sk_err)
1746 break;
1747 timeo = schedule_timeout(timeo);
1748 }
aa395145 1749 finish_wait(sk_sleep(sk), &wait);
1da177e4
LT
1750 return timeo;
1751}
1752
1753
6fa3eb70
S
1754//debug funcion
1755
1756static int sock_dump_info(struct sock *sk)
1757{
1758 //dump receiver queue 128 bytes
1759 //struct sk_buff *skb;
1760 //char skbmsg[128];
1761 //dump receiver queue 128 bytes end
1762
1763 if(sk->sk_family == AF_UNIX)
1764 {
1765 struct unix_sock *u = unix_sk(sk);
1766 struct sock *other = NULL;
1767 if( (u->path.dentry !=NULL)&&(u->path.dentry->d_iname!=NULL))
1768 //if( (u->dentry !=NULL)&&(u->dentry->d_iname!=NULL))
1769 {
1770 #ifdef CONFIG_MTK_NET_LOGGING
1771 printk(KERN_INFO "[mtk_net][sock]sockdbg: socket-Name:%s \n",u->path.dentry->d_iname);
1772 #endif
1773 }
1774 else
1775 {
1776 #ifdef CONFIG_MTK_NET_LOGGING
1777 printk(KERN_INFO "[mtk_net][sock]sockdbg:socket Name (NULL)\n" );
1778 #endif
1779 }
1780
1781 if(sk->sk_socket && SOCK_INODE(sk->sk_socket))
1782 {
1783 #ifdef CONFIG_MTK_NET_LOGGING
1784 printk(KERN_INFO "[mtk_net][sock]sockdbg:socket Inode[%lu]\n" ,SOCK_INODE(sk->sk_socket)->i_ino);
1785 #endif
1786 }
1787
1788 other = unix_sk(sk)->peer ;
1789 if (!other)
1790 {
1791 #ifdef CONFIG_MTK_NET_LOGGING
1792 printk(KERN_INFO "[mtk_net][sock]sockdbg:peer is (NULL) \n");
1793 #endif
1794 } else{
1795
1796 if ((((struct unix_sock *)other)->path.dentry != NULL)&&(((struct unix_sock *)other)->path.dentry->d_iname != NULL))
1797 //if ((((struct unix_sock *)other)->dentry != NULL)&&(((struct unix_sock *)other)->dentry->d_iname != NULL))
1798 {
1799 #ifdef CONFIG_MTK_NET_LOGGING
1800 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Name:%s \n",((struct unix_sock *)other)->path.dentry->d_iname);
1801 #endif
1802 }
1803 else
1804 {
1805 #ifdef CONFIG_MTK_NET_LOGGING
1806 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Name (NULL) \n");
1807 #endif
1808 }
1809
1810 if(other->sk_socket && SOCK_INODE(other->sk_socket))
1811 {
1812 #ifdef CONFIG_MTK_NET_LOGGING
1813 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Inode [%lu] \n", SOCK_INODE(other->sk_socket)->i_ino);
1814 #endif
1815 }
1816 #ifdef CONFIG_MTK_NET_LOGGING
1817 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Recieve Queue len:%d \n",other->sk_receive_queue.qlen);
1818 #endif
1819 //dump receiver queue 128 bytes
1820 /* if ((skb = skb_peek_tail(&other->sk_receive_queue)) == NULL) {
1821
1822 printk(KERN_INFO "sockdbg: Peer Recieve Queue is null (warning) \n");
1823 }else{
1824 int i =0 ,len=0;
1825 if((skb->len !=0) && (skb->data != NULL)){
1826
1827 if(skb->len >= 127){
1828 len = 127 ;
1829 }else
1830 {
1831 len = skb->len ;
1832 }
1833 for (i=0;i<len;i++)
1834 sprintf(skbmsg+i, "%x", skb->data[i]);
1835
1836 skbmsg[len]= '\0' ;
1837
1838 printk(KERN_INFO "sockdbg: Peer Recieve Queue dump(%d bytes):%s\n", len, skbmsg);
1839
1840
1841 }else{
1842 printk(KERN_INFO "sockdbg: Peer Recieve skb error \n");
1843 }*/
1844 //dump receiver queue 128 bytes end
1845
1846 //}
1847 //dump receiver queue 128 bytes end
1848
1849 }
1850 }
1851
1852 return 0 ;
1853
1854
1855}
1856
1857
1858
1da177e4
LT
1859/*
1860 * Generic send/receive buffer handlers
1861 */
1862
4cc7f68d
HX
1863struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1864 unsigned long data_len, int noblock,
1865 int *errcode)
1da177e4
LT
1866{
1867 struct sk_buff *skb;
7d877f3b 1868 gfp_t gfp_mask;
1da177e4
LT
1869 long timeo;
1870 int err;
cc9b17ad
JW
1871 int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1872
1873 err = -EMSGSIZE;
1874 if (npages > MAX_SKB_FRAGS)
1875 goto failure;
1da177e4
LT
1876
1877 gfp_mask = sk->sk_allocation;
1878 if (gfp_mask & __GFP_WAIT)
1879 gfp_mask |= __GFP_REPEAT;
1880
1881 timeo = sock_sndtimeo(sk, noblock);
1882 while (1) {
1883 err = sock_error(sk);
1884 if (err != 0)
1885 goto failure;
1886
1887 err = -EPIPE;
1888 if (sk->sk_shutdown & SEND_SHUTDOWN)
1889 goto failure;
1890
1891 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
db38c179 1892 skb = alloc_skb(header_len, gfp_mask);
1da177e4 1893 if (skb) {
1da177e4
LT
1894 int i;
1895
1896 /* No pages, we're done... */
1897 if (!data_len)
1898 break;
1899
1da177e4
LT
1900 skb->truesize += data_len;
1901 skb_shinfo(skb)->nr_frags = npages;
1902 for (i = 0; i < npages; i++) {
1903 struct page *page;
1da177e4
LT
1904
1905 page = alloc_pages(sk->sk_allocation, 0);
1906 if (!page) {
1907 err = -ENOBUFS;
1908 skb_shinfo(skb)->nr_frags = i;
1909 kfree_skb(skb);
1910 goto failure;
1911 }
1912
ea2ab693
IC
1913 __skb_fill_page_desc(skb, i,
1914 page, 0,
1915 (data_len >= PAGE_SIZE ?
1916 PAGE_SIZE :
1917 data_len));
1da177e4
LT
1918 data_len -= PAGE_SIZE;
1919 }
1920
1921 /* Full success... */
1922 break;
1923 }
1924 err = -ENOBUFS;
1925 goto failure;
1926 }
1927 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1928 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1929 err = -EAGAIN;
1930 if (!timeo)
1931 goto failure;
1932 if (signal_pending(current))
1933 goto interrupted;
6fa3eb70
S
1934
1935 sock_dump_info(sk);
1936 #ifdef CONFIG_MTK_NET_LOGGING
1937 printk(KERN_INFO "[mtk_net][sock]sockdbg: wait_for_wmem, timeo =%ld, wmem =%d, snd buf =%d \n",
1938 timeo, atomic_read(&sk->sk_wmem_alloc), sk->sk_sndbuf);
1939 #endif
1da177e4 1940 timeo = sock_wait_for_wmem(sk, timeo);
6fa3eb70
S
1941 #ifdef CONFIG_MTK_NET_LOGGING
1942 printk(KERN_INFO "[mtk_net][sock]sockdbg: wait_for_wmem done, header_len=0x%lx, data_len=0x%lx,timeo =%ld \n",
1943 header_len, data_len ,timeo);
1944 #endif
1da177e4
LT
1945 }
1946
1947 skb_set_owner_w(skb, sk);
1948 return skb;
1949
1950interrupted:
1951 err = sock_intr_errno(timeo);
1952failure:
1953 *errcode = err;
1954 return NULL;
1955}
4cc7f68d 1956EXPORT_SYMBOL(sock_alloc_send_pskb);
1da177e4 1957
4ec93edb 1958struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1da177e4
LT
1959 int noblock, int *errcode)
1960{
1961 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1962}
2a91525c 1963EXPORT_SYMBOL(sock_alloc_send_skb);
1da177e4 1964
5640f768
ED
1965/* On 32bit arches, an skb frag is limited to 2^15 */
1966#define SKB_FRAG_PAGE_ORDER get_order(32768)
1967
1968bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1969{
1970 int order;
1971
1972 if (pfrag->page) {
1973 if (atomic_read(&pfrag->page->_count) == 1) {
1974 pfrag->offset = 0;
1975 return true;
1976 }
1977 if (pfrag->offset < pfrag->size)
1978 return true;
1979 put_page(pfrag->page);
1980 }
1981
1982 /* We restrict high order allocations to users that can afford to wait */
1983 order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
1984
1985 do {
1986 gfp_t gfp = sk->sk_allocation;
1987
1988 if (order)
a9e3d789 1989 gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
5640f768
ED
1990 pfrag->page = alloc_pages(gfp, order);
1991 if (likely(pfrag->page)) {
1992 pfrag->offset = 0;
1993 pfrag->size = PAGE_SIZE << order;
1994 return true;
1995 }
1996 } while (--order >= 0);
1997
1998 sk_enter_memory_pressure(sk);
1999 sk_stream_moderate_sndbuf(sk);
2000 return false;
2001}
2002EXPORT_SYMBOL(sk_page_frag_refill);
2003
1da177e4 2004static void __lock_sock(struct sock *sk)
f39234d6
NK
2005 __releases(&sk->sk_lock.slock)
2006 __acquires(&sk->sk_lock.slock)
1da177e4
LT
2007{
2008 DEFINE_WAIT(wait);
2009
e71a4783 2010 for (;;) {
1da177e4
LT
2011 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2012 TASK_UNINTERRUPTIBLE);
2013 spin_unlock_bh(&sk->sk_lock.slock);
2014 schedule();
2015 spin_lock_bh(&sk->sk_lock.slock);
e71a4783 2016 if (!sock_owned_by_user(sk))
1da177e4
LT
2017 break;
2018 }
2019 finish_wait(&sk->sk_lock.wq, &wait);
2020}
2021
2022static void __release_sock(struct sock *sk)
f39234d6
NK
2023 __releases(&sk->sk_lock.slock)
2024 __acquires(&sk->sk_lock.slock)
1da177e4
LT
2025{
2026 struct sk_buff *skb = sk->sk_backlog.head;
2027
2028 do {
2029 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2030 bh_unlock_sock(sk);
2031
2032 do {
2033 struct sk_buff *next = skb->next;
2034
e4cbb02a 2035 prefetch(next);
7fee226a 2036 WARN_ON_ONCE(skb_dst_is_noref(skb));
1da177e4 2037 skb->next = NULL;
c57943a1 2038 sk_backlog_rcv(sk, skb);
1da177e4
LT
2039
2040 /*
2041 * We are in process context here with softirqs
2042 * disabled, use cond_resched_softirq() to preempt.
2043 * This is safe to do because we've taken the backlog
2044 * queue private:
2045 */
2046 cond_resched_softirq();
2047
2048 skb = next;
2049 } while (skb != NULL);
2050
2051 bh_lock_sock(sk);
e71a4783 2052 } while ((skb = sk->sk_backlog.head) != NULL);
8eae939f
ZY
2053
2054 /*
2055 * Doing the zeroing here guarantee we can not loop forever
2056 * while a wild producer attempts to flood us.
2057 */
2058 sk->sk_backlog.len = 0;
1da177e4
LT
2059}
2060
2061/**
2062 * sk_wait_data - wait for data to arrive at sk_receive_queue
4dc3b16b
PP
2063 * @sk: sock to wait on
2064 * @timeo: for how long
1da177e4
LT
2065 *
2066 * Now socket state including sk->sk_err is changed only under lock,
2067 * hence we may omit checks after joining wait queue.
2068 * We check receive queue before schedule() only as optimization;
2069 * it is very likely that release_sock() added new data.
2070 */
2071int sk_wait_data(struct sock *sk, long *timeo)
2072{
2073 int rc;
2074 DEFINE_WAIT(wait);
2075
aa395145 2076 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4
LT
2077 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2078 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
2079 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
aa395145 2080 finish_wait(sk_sleep(sk), &wait);
1da177e4
LT
2081 return rc;
2082}
1da177e4
LT
2083EXPORT_SYMBOL(sk_wait_data);
2084
3ab224be
HA
2085/**
2086 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
2087 * @sk: socket
2088 * @size: memory size to allocate
2089 * @kind: allocation type
2090 *
2091 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
2092 * rmem allocation. This function assumes that protocols which have
2093 * memory_pressure use sk_wmem_queued as write buffer accounting.
2094 */
2095int __sk_mem_schedule(struct sock *sk, int size, int kind)
2096{
2097 struct proto *prot = sk->sk_prot;
2098 int amt = sk_mem_pages(size);
8d987e5c 2099 long allocated;
e1aab161 2100 int parent_status = UNDER_LIMIT;
3ab224be
HA
2101
2102 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
180d8cd9 2103
e1aab161 2104 allocated = sk_memory_allocated_add(sk, amt, &parent_status);
3ab224be
HA
2105
2106 /* Under limit. */
e1aab161
GC
2107 if (parent_status == UNDER_LIMIT &&
2108 allocated <= sk_prot_mem_limits(sk, 0)) {
180d8cd9 2109 sk_leave_memory_pressure(sk);
3ab224be
HA
2110 return 1;
2111 }
2112
e1aab161
GC
2113 /* Under pressure. (we or our parents) */
2114 if ((parent_status > SOFT_LIMIT) ||
2115 allocated > sk_prot_mem_limits(sk, 1))
180d8cd9 2116 sk_enter_memory_pressure(sk);
3ab224be 2117
e1aab161
GC
2118 /* Over hard limit (we or our parents) */
2119 if ((parent_status == OVER_LIMIT) ||
2120 (allocated > sk_prot_mem_limits(sk, 2)))
3ab224be
HA
2121 goto suppress_allocation;
2122
2123 /* guarantee minimum buffer size under pressure */
2124 if (kind == SK_MEM_RECV) {
2125 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2126 return 1;
180d8cd9 2127
3ab224be
HA
2128 } else { /* SK_MEM_SEND */
2129 if (sk->sk_type == SOCK_STREAM) {
2130 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2131 return 1;
2132 } else if (atomic_read(&sk->sk_wmem_alloc) <
2133 prot->sysctl_wmem[0])
2134 return 1;
2135 }
2136
180d8cd9 2137 if (sk_has_memory_pressure(sk)) {
1748376b
ED
2138 int alloc;
2139
180d8cd9 2140 if (!sk_under_memory_pressure(sk))
1748376b 2141 return 1;
180d8cd9
GC
2142 alloc = sk_sockets_allocated_read_positive(sk);
2143 if (sk_prot_mem_limits(sk, 2) > alloc *
3ab224be
HA
2144 sk_mem_pages(sk->sk_wmem_queued +
2145 atomic_read(&sk->sk_rmem_alloc) +
2146 sk->sk_forward_alloc))
2147 return 1;
2148 }
2149
2150suppress_allocation:
2151
2152 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2153 sk_stream_moderate_sndbuf(sk);
2154
2155 /* Fail only if socket is _under_ its sndbuf.
2156 * In this case we cannot block, so that we have to fail.
2157 */
2158 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2159 return 1;
2160 }
2161
3847ce32
SM
2162 trace_sock_exceed_buf_limit(sk, prot, allocated);
2163
3ab224be
HA
2164 /* Alas. Undo changes. */
2165 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
180d8cd9 2166
0e90b31f 2167 sk_memory_allocated_sub(sk, amt);
180d8cd9 2168
3ab224be
HA
2169 return 0;
2170}
3ab224be
HA
2171EXPORT_SYMBOL(__sk_mem_schedule);
2172
2173/**
2174 * __sk_reclaim - reclaim memory_allocated
2175 * @sk: socket
2176 */
2177void __sk_mem_reclaim(struct sock *sk)
2178{
180d8cd9 2179 sk_memory_allocated_sub(sk,
0e90b31f 2180 sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
3ab224be
HA
2181 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
2182
180d8cd9
GC
2183 if (sk_under_memory_pressure(sk) &&
2184 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2185 sk_leave_memory_pressure(sk);
3ab224be 2186}
3ab224be
HA
2187EXPORT_SYMBOL(__sk_mem_reclaim);
2188
2189
1da177e4
LT
2190/*
2191 * Set of default routines for initialising struct proto_ops when
2192 * the protocol does not support a particular function. In certain
2193 * cases where it makes no sense for a protocol to have a "do nothing"
2194 * function, some default processing is provided.
2195 */
2196
2197int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2198{
2199 return -EOPNOTSUPP;
2200}
2a91525c 2201EXPORT_SYMBOL(sock_no_bind);
1da177e4 2202
4ec93edb 2203int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
2204 int len, int flags)
2205{
2206 return -EOPNOTSUPP;
2207}
2a91525c 2208EXPORT_SYMBOL(sock_no_connect);
1da177e4
LT
2209
2210int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2211{
2212 return -EOPNOTSUPP;
2213}
2a91525c 2214EXPORT_SYMBOL(sock_no_socketpair);
1da177e4
LT
2215
2216int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
2217{
2218 return -EOPNOTSUPP;
2219}
2a91525c 2220EXPORT_SYMBOL(sock_no_accept);
1da177e4 2221
4ec93edb 2222int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
2223 int *len, int peer)
2224{
2225 return -EOPNOTSUPP;
2226}
2a91525c 2227EXPORT_SYMBOL(sock_no_getname);
1da177e4 2228
2a91525c 2229unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
1da177e4
LT
2230{
2231 return 0;
2232}
2a91525c 2233EXPORT_SYMBOL(sock_no_poll);
1da177e4
LT
2234
2235int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2236{
2237 return -EOPNOTSUPP;
2238}
2a91525c 2239EXPORT_SYMBOL(sock_no_ioctl);
1da177e4
LT
2240
2241int sock_no_listen(struct socket *sock, int backlog)
2242{
2243 return -EOPNOTSUPP;
2244}
2a91525c 2245EXPORT_SYMBOL(sock_no_listen);
1da177e4
LT
2246
2247int sock_no_shutdown(struct socket *sock, int how)
2248{
2249 return -EOPNOTSUPP;
2250}
2a91525c 2251EXPORT_SYMBOL(sock_no_shutdown);
1da177e4
LT
2252
2253int sock_no_setsockopt(struct socket *sock, int level, int optname,
b7058842 2254 char __user *optval, unsigned int optlen)
1da177e4
LT
2255{
2256 return -EOPNOTSUPP;
2257}
2a91525c 2258EXPORT_SYMBOL(sock_no_setsockopt);
1da177e4
LT
2259
2260int sock_no_getsockopt(struct socket *sock, int level, int optname,
2261 char __user *optval, int __user *optlen)
2262{
2263 return -EOPNOTSUPP;
2264}
2a91525c 2265EXPORT_SYMBOL(sock_no_getsockopt);
1da177e4
LT
2266
2267int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2268 size_t len)
2269{
2270 return -EOPNOTSUPP;
2271}
2a91525c 2272EXPORT_SYMBOL(sock_no_sendmsg);
1da177e4
LT
2273
2274int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2275 size_t len, int flags)
2276{
2277 return -EOPNOTSUPP;
2278}
2a91525c 2279EXPORT_SYMBOL(sock_no_recvmsg);
1da177e4
LT
2280
2281int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2282{
2283 /* Mirror missing mmap method error code */
2284 return -ENODEV;
2285}
2a91525c 2286EXPORT_SYMBOL(sock_no_mmap);
1da177e4
LT
2287
2288ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2289{
2290 ssize_t res;
2291 struct msghdr msg = {.msg_flags = flags};
2292 struct kvec iov;
2293 char *kaddr = kmap(page);
2294 iov.iov_base = kaddr + offset;
2295 iov.iov_len = size;
2296 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2297 kunmap(page);
2298 return res;
2299}
2a91525c 2300EXPORT_SYMBOL(sock_no_sendpage);
1da177e4
LT
2301
2302/*
2303 * Default Socket Callbacks
2304 */
2305
2306static void sock_def_wakeup(struct sock *sk)
2307{
43815482
ED
2308 struct socket_wq *wq;
2309
2310 rcu_read_lock();
2311 wq = rcu_dereference(sk->sk_wq);
2312 if (wq_has_sleeper(wq))
2313 wake_up_interruptible_all(&wq->wait);
2314 rcu_read_unlock();
1da177e4
LT
2315}
2316
2317static void sock_def_error_report(struct sock *sk)
2318{
43815482
ED
2319 struct socket_wq *wq;
2320
2321 rcu_read_lock();
2322 wq = rcu_dereference(sk->sk_wq);
2323 if (wq_has_sleeper(wq))
2324 wake_up_interruptible_poll(&wq->wait, POLLERR);
8d8ad9d7 2325 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
43815482 2326 rcu_read_unlock();
1da177e4
LT
2327}
2328
2329static void sock_def_readable(struct sock *sk, int len)
2330{
43815482
ED
2331 struct socket_wq *wq;
2332
2333 rcu_read_lock();
2334 wq = rcu_dereference(sk->sk_wq);
2335 if (wq_has_sleeper(wq))
2c6607c6 2336 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
37e5540b 2337 POLLRDNORM | POLLRDBAND);
8d8ad9d7 2338 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
43815482 2339 rcu_read_unlock();
1da177e4
LT
2340}
2341
2342static void sock_def_write_space(struct sock *sk)
2343{
43815482
ED
2344 struct socket_wq *wq;
2345
2346 rcu_read_lock();
1da177e4
LT
2347
2348 /* Do not wake up a writer until he can make "significant"
2349 * progress. --DaveM
2350 */
e71a4783 2351 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
43815482
ED
2352 wq = rcu_dereference(sk->sk_wq);
2353 if (wq_has_sleeper(wq))
2354 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
37e5540b 2355 POLLWRNORM | POLLWRBAND);
1da177e4
LT
2356
2357 /* Should agree with poll, otherwise some programs break */
2358 if (sock_writeable(sk))
8d8ad9d7 2359 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4
LT
2360 }
2361
43815482 2362 rcu_read_unlock();
1da177e4
LT
2363}
2364
2365static void sock_def_destruct(struct sock *sk)
2366{
a51482bd 2367 kfree(sk->sk_protinfo);
1da177e4
LT
2368}
2369
2370void sk_send_sigurg(struct sock *sk)
2371{
2372 if (sk->sk_socket && sk->sk_socket->file)
2373 if (send_sigurg(&sk->sk_socket->file->f_owner))
8d8ad9d7 2374 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
1da177e4 2375}
2a91525c 2376EXPORT_SYMBOL(sk_send_sigurg);
1da177e4
LT
2377
2378void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2379 unsigned long expires)
2380{
2381 if (!mod_timer(timer, expires))
2382 sock_hold(sk);
2383}
1da177e4
LT
2384EXPORT_SYMBOL(sk_reset_timer);
2385
2386void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2387{
25cc4ae9 2388 if (del_timer(timer))
1da177e4
LT
2389 __sock_put(sk);
2390}
1da177e4
LT
2391EXPORT_SYMBOL(sk_stop_timer);
2392
2393void sock_init_data(struct socket *sock, struct sock *sk)
2394{
2395 skb_queue_head_init(&sk->sk_receive_queue);
2396 skb_queue_head_init(&sk->sk_write_queue);
2397 skb_queue_head_init(&sk->sk_error_queue);
97fc2f08
CL
2398#ifdef CONFIG_NET_DMA
2399 skb_queue_head_init(&sk->sk_async_wait_queue);
2400#endif
1da177e4
LT
2401
2402 sk->sk_send_head = NULL;
2403
2404 init_timer(&sk->sk_timer);
4ec93edb 2405
1da177e4
LT
2406 sk->sk_allocation = GFP_KERNEL;
2407 sk->sk_rcvbuf = sysctl_rmem_default;
2408 sk->sk_sndbuf = sysctl_wmem_default;
2409 sk->sk_state = TCP_CLOSE;
972692e0 2410 sk_set_socket(sk, sock);
1da177e4
LT
2411
2412 sock_set_flag(sk, SOCK_ZAPPED);
2413
e71a4783 2414 if (sock) {
1da177e4 2415 sk->sk_type = sock->type;
43815482 2416 sk->sk_wq = sock->wq;
1da177e4
LT
2417 sock->sk = sk;
2418 } else
43815482 2419 sk->sk_wq = NULL;
1da177e4 2420
b6c6712a 2421 spin_lock_init(&sk->sk_dst_lock);
1da177e4 2422 rwlock_init(&sk->sk_callback_lock);
443aef0e
PZ
2423 lockdep_set_class_and_name(&sk->sk_callback_lock,
2424 af_callback_keys + sk->sk_family,
2425 af_family_clock_key_strings[sk->sk_family]);
1da177e4
LT
2426
2427 sk->sk_state_change = sock_def_wakeup;
2428 sk->sk_data_ready = sock_def_readable;
2429 sk->sk_write_space = sock_def_write_space;
2430 sk->sk_error_report = sock_def_error_report;
2431 sk->sk_destruct = sock_def_destruct;
2432
5640f768
ED
2433 sk->sk_frag.page = NULL;
2434 sk->sk_frag.offset = 0;
ef64a54f 2435 sk->sk_peek_off = -1;
1da177e4 2436
109f6e39
EB
2437 sk->sk_peer_pid = NULL;
2438 sk->sk_peer_cred = NULL;
1da177e4
LT
2439 sk->sk_write_pending = 0;
2440 sk->sk_rcvlowat = 1;
2441 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2442 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2443
f37f0afb 2444 sk->sk_stamp = ktime_set(-1L, 0);
1da177e4 2445
5e25ba50 2446 sk->sk_pacing_rate = ~0U;
4dc6dc71
ED
2447 /*
2448 * Before updating sk_refcnt, we must commit prior changes to memory
2449 * (Documentation/RCU/rculist_nulls.txt for details)
2450 */
2451 smp_wmb();
1da177e4 2452 atomic_set(&sk->sk_refcnt, 1);
33c732c3 2453 atomic_set(&sk->sk_drops, 0);
1da177e4 2454}
2a91525c 2455EXPORT_SYMBOL(sock_init_data);
1da177e4 2456
b5606c2d 2457void lock_sock_nested(struct sock *sk, int subclass)
1da177e4
LT
2458{
2459 might_sleep();
a5b5bb9a 2460 spin_lock_bh(&sk->sk_lock.slock);
d2e9117c 2461 if (sk->sk_lock.owned)
1da177e4 2462 __lock_sock(sk);
d2e9117c 2463 sk->sk_lock.owned = 1;
a5b5bb9a
IM
2464 spin_unlock(&sk->sk_lock.slock);
2465 /*
2466 * The sk_lock has mutex_lock() semantics here:
2467 */
fcc70d5f 2468 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
a5b5bb9a 2469 local_bh_enable();
1da177e4 2470}
fcc70d5f 2471EXPORT_SYMBOL(lock_sock_nested);
1da177e4 2472
b5606c2d 2473void release_sock(struct sock *sk)
1da177e4 2474{
a5b5bb9a
IM
2475 /*
2476 * The sk_lock has mutex_unlock() semantics:
2477 */
2478 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
2479
2480 spin_lock_bh(&sk->sk_lock.slock);
1da177e4
LT
2481 if (sk->sk_backlog.tail)
2482 __release_sock(sk);
46d3ceab 2483
cbbb5a25
ED
2484 /* Warning : release_cb() might need to release sk ownership,
2485 * ie call sock_release_ownership(sk) before us.
2486 */
46d3ceab
ED
2487 if (sk->sk_prot->release_cb)
2488 sk->sk_prot->release_cb(sk);
2489
cbbb5a25 2490 sock_release_ownership(sk);
a5b5bb9a
IM
2491 if (waitqueue_active(&sk->sk_lock.wq))
2492 wake_up(&sk->sk_lock.wq);
2493 spin_unlock_bh(&sk->sk_lock.slock);
1da177e4
LT
2494}
2495EXPORT_SYMBOL(release_sock);
2496
8a74ad60
ED
2497/**
2498 * lock_sock_fast - fast version of lock_sock
2499 * @sk: socket
2500 *
2501 * This version should be used for very small section, where process wont block
2502 * return false if fast path is taken
2503 * sk_lock.slock locked, owned = 0, BH disabled
2504 * return true if slow path is taken
2505 * sk_lock.slock unlocked, owned = 1, BH enabled
2506 */
2507bool lock_sock_fast(struct sock *sk)
2508{
2509 might_sleep();
2510 spin_lock_bh(&sk->sk_lock.slock);
2511
2512 if (!sk->sk_lock.owned)
2513 /*
2514 * Note : We must disable BH
2515 */
2516 return false;
2517
2518 __lock_sock(sk);
2519 sk->sk_lock.owned = 1;
2520 spin_unlock(&sk->sk_lock.slock);
2521 /*
2522 * The sk_lock has mutex_lock() semantics here:
2523 */
2524 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2525 local_bh_enable();
2526 return true;
2527}
2528EXPORT_SYMBOL(lock_sock_fast);
2529
1da177e4 2530int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
4ec93edb 2531{
b7aa0bf7 2532 struct timeval tv;
1da177e4 2533 if (!sock_flag(sk, SOCK_TIMESTAMP))
20d49473 2534 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
b7aa0bf7
ED
2535 tv = ktime_to_timeval(sk->sk_stamp);
2536 if (tv.tv_sec == -1)
1da177e4 2537 return -ENOENT;
b7aa0bf7
ED
2538 if (tv.tv_sec == 0) {
2539 sk->sk_stamp = ktime_get_real();
2540 tv = ktime_to_timeval(sk->sk_stamp);
2541 }
2542 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
4ec93edb 2543}
1da177e4
LT
2544EXPORT_SYMBOL(sock_get_timestamp);
2545
ae40eb1e
ED
2546int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2547{
2548 struct timespec ts;
2549 if (!sock_flag(sk, SOCK_TIMESTAMP))
20d49473 2550 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
ae40eb1e
ED
2551 ts = ktime_to_timespec(sk->sk_stamp);
2552 if (ts.tv_sec == -1)
2553 return -ENOENT;
2554 if (ts.tv_sec == 0) {
2555 sk->sk_stamp = ktime_get_real();
2556 ts = ktime_to_timespec(sk->sk_stamp);
2557 }
2558 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2559}
2560EXPORT_SYMBOL(sock_get_timestampns);
2561
20d49473 2562void sock_enable_timestamp(struct sock *sk, int flag)
4ec93edb 2563{
20d49473 2564 if (!sock_flag(sk, flag)) {
08e29af3
ED
2565 unsigned long previous_flags = sk->sk_flags;
2566
20d49473
PO
2567 sock_set_flag(sk, flag);
2568 /*
2569 * we just set one of the two flags which require net
2570 * time stamping, but time stamping might have been on
2571 * already because of the other one
2572 */
08e29af3 2573 if (!(previous_flags & SK_FLAGS_TIMESTAMP))
20d49473 2574 net_enable_timestamp();
1da177e4
LT
2575 }
2576}
1da177e4
LT
2577
2578/*
2579 * Get a socket option on an socket.
2580 *
2581 * FIX: POSIX 1003.1g is very ambiguous here. It states that
2582 * asynchronous errors should be reported by getsockopt. We assume
2583 * this means if you specify SO_ERROR (otherwise whats the point of it).
2584 */
2585int sock_common_getsockopt(struct socket *sock, int level, int optname,
2586 char __user *optval, int __user *optlen)
2587{
2588 struct sock *sk = sock->sk;
2589
2590 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2591}
1da177e4
LT
2592EXPORT_SYMBOL(sock_common_getsockopt);
2593
3fdadf7d 2594#ifdef CONFIG_COMPAT
543d9cfe
ACM
2595int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2596 char __user *optval, int __user *optlen)
3fdadf7d
DM
2597{
2598 struct sock *sk = sock->sk;
2599
1e51f951 2600 if (sk->sk_prot->compat_getsockopt != NULL)
543d9cfe
ACM
2601 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2602 optval, optlen);
3fdadf7d
DM
2603 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2604}
2605EXPORT_SYMBOL(compat_sock_common_getsockopt);
2606#endif
2607
1da177e4
LT
2608int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
2609 struct msghdr *msg, size_t size, int flags)
2610{
2611 struct sock *sk = sock->sk;
2612 int addr_len = 0;
2613 int err;
2614
2615 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
2616 flags & ~MSG_DONTWAIT, &addr_len);
2617 if (err >= 0)
2618 msg->msg_namelen = addr_len;
2619 return err;
2620}
1da177e4
LT
2621EXPORT_SYMBOL(sock_common_recvmsg);
2622
2623/*
2624 * Set socket options on an inet socket.
2625 */
2626int sock_common_setsockopt(struct socket *sock, int level, int optname,
b7058842 2627 char __user *optval, unsigned int optlen)
1da177e4
LT
2628{
2629 struct sock *sk = sock->sk;
2630
2631 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2632}
1da177e4
LT
2633EXPORT_SYMBOL(sock_common_setsockopt);
2634
3fdadf7d 2635#ifdef CONFIG_COMPAT
543d9cfe 2636int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
b7058842 2637 char __user *optval, unsigned int optlen)
3fdadf7d
DM
2638{
2639 struct sock *sk = sock->sk;
2640
543d9cfe
ACM
2641 if (sk->sk_prot->compat_setsockopt != NULL)
2642 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2643 optval, optlen);
3fdadf7d
DM
2644 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2645}
2646EXPORT_SYMBOL(compat_sock_common_setsockopt);
2647#endif
2648
1da177e4
LT
2649void sk_common_release(struct sock *sk)
2650{
2651 if (sk->sk_prot->destroy)
2652 sk->sk_prot->destroy(sk);
2653
2654 /*
2655 * Observation: when sock_common_release is called, processes have
2656 * no access to socket. But net still has.
2657 * Step one, detach it from networking:
2658 *
2659 * A. Remove from hash tables.
2660 */
2661
2662 sk->sk_prot->unhash(sk);
2663
2664 /*
2665 * In this point socket cannot receive new packets, but it is possible
2666 * that some packets are in flight because some CPU runs receiver and
2667 * did hash table lookup before we unhashed socket. They will achieve
2668 * receive queue and will be purged by socket destructor.
2669 *
2670 * Also we still have packets pending on receive queue and probably,
2671 * our own packets waiting in device queues. sock_destroy will drain
2672 * receive queue, but transmitted packets will delay socket destruction
2673 * until the last reference will be released.
2674 */
2675
2676 sock_orphan(sk);
2677
2678 xfrm_sk_free_policy(sk);
2679
e6848976 2680 sk_refcnt_debug_release(sk);
5640f768
ED
2681
2682 if (sk->sk_frag.page) {
2683 put_page(sk->sk_frag.page);
2684 sk->sk_frag.page = NULL;
2685 }
2686
1da177e4
LT
2687 sock_put(sk);
2688}
1da177e4
LT
2689EXPORT_SYMBOL(sk_common_release);
2690
13ff3d6f
PE
2691#ifdef CONFIG_PROC_FS
2692#define PROTO_INUSE_NR 64 /* should be enough for the first time */
1338d466
PE
2693struct prot_inuse {
2694 int val[PROTO_INUSE_NR];
2695};
13ff3d6f
PE
2696
2697static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
70ee1159
PE
2698
2699#ifdef CONFIG_NET_NS
2700void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2701{
d6d9ca0f 2702 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
70ee1159
PE
2703}
2704EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2705
2706int sock_prot_inuse_get(struct net *net, struct proto *prot)
2707{
2708 int cpu, idx = prot->inuse_idx;
2709 int res = 0;
2710
2711 for_each_possible_cpu(cpu)
2712 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2713
2714 return res >= 0 ? res : 0;
2715}
2716EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2717
2c8c1e72 2718static int __net_init sock_inuse_init_net(struct net *net)
70ee1159
PE
2719{
2720 net->core.inuse = alloc_percpu(struct prot_inuse);
2721 return net->core.inuse ? 0 : -ENOMEM;
2722}
2723
2c8c1e72 2724static void __net_exit sock_inuse_exit_net(struct net *net)
70ee1159
PE
2725{
2726 free_percpu(net->core.inuse);
2727}
2728
2729static struct pernet_operations net_inuse_ops = {
2730 .init = sock_inuse_init_net,
2731 .exit = sock_inuse_exit_net,
2732};
2733
2734static __init int net_inuse_init(void)
2735{
2736 if (register_pernet_subsys(&net_inuse_ops))
2737 panic("Cannot initialize net inuse counters");
2738
2739 return 0;
2740}
2741
2742core_initcall(net_inuse_init);
2743#else
1338d466
PE
2744static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2745
c29a0bc4 2746void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
1338d466 2747{
d6d9ca0f 2748 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
1338d466
PE
2749}
2750EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2751
c29a0bc4 2752int sock_prot_inuse_get(struct net *net, struct proto *prot)
1338d466
PE
2753{
2754 int cpu, idx = prot->inuse_idx;
2755 int res = 0;
2756
2757 for_each_possible_cpu(cpu)
2758 res += per_cpu(prot_inuse, cpu).val[idx];
2759
2760 return res >= 0 ? res : 0;
2761}
2762EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
70ee1159 2763#endif
13ff3d6f
PE
2764
2765static void assign_proto_idx(struct proto *prot)
2766{
2767 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2768
2769 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
e005d193 2770 pr_err("PROTO_INUSE_NR exhausted\n");
13ff3d6f
PE
2771 return;
2772 }
2773
2774 set_bit(prot->inuse_idx, proto_inuse_idx);
2775}
2776
2777static void release_proto_idx(struct proto *prot)
2778{
2779 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2780 clear_bit(prot->inuse_idx, proto_inuse_idx);
2781}
2782#else
2783static inline void assign_proto_idx(struct proto *prot)
2784{
2785}
2786
2787static inline void release_proto_idx(struct proto *prot)
2788{
2789}
2790#endif
2791
b733c007
PE
2792int proto_register(struct proto *prot, int alloc_slab)
2793{
1da177e4
LT
2794 if (alloc_slab) {
2795 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
271b72c7
ED
2796 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2797 NULL);
1da177e4
LT
2798
2799 if (prot->slab == NULL) {
e005d193
JP
2800 pr_crit("%s: Can't create sock SLAB cache!\n",
2801 prot->name);
60e7663d 2802 goto out;
1da177e4 2803 }
2e6599cb
ACM
2804
2805 if (prot->rsk_prot != NULL) {
faf23422 2806 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
7e56b5d6 2807 if (prot->rsk_prot->slab_name == NULL)
2e6599cb
ACM
2808 goto out_free_sock_slab;
2809
7e56b5d6 2810 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2e6599cb 2811 prot->rsk_prot->obj_size, 0,
20c2df83 2812 SLAB_HWCACHE_ALIGN, NULL);
2e6599cb
ACM
2813
2814 if (prot->rsk_prot->slab == NULL) {
e005d193
JP
2815 pr_crit("%s: Can't create request sock SLAB cache!\n",
2816 prot->name);
2e6599cb
ACM
2817 goto out_free_request_sock_slab_name;
2818 }
2819 }
8feaf0c0 2820
6d6ee43e 2821 if (prot->twsk_prot != NULL) {
faf23422 2822 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
8feaf0c0 2823
7e56b5d6 2824 if (prot->twsk_prot->twsk_slab_name == NULL)
8feaf0c0
ACM
2825 goto out_free_request_sock_slab;
2826
6d6ee43e 2827 prot->twsk_prot->twsk_slab =
7e56b5d6 2828 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
6d6ee43e 2829 prot->twsk_prot->twsk_obj_size,
3ab5aee7
ED
2830 0,
2831 SLAB_HWCACHE_ALIGN |
2832 prot->slab_flags,
20c2df83 2833 NULL);
6d6ee43e 2834 if (prot->twsk_prot->twsk_slab == NULL)
8feaf0c0
ACM
2835 goto out_free_timewait_sock_slab_name;
2836 }
1da177e4
LT
2837 }
2838
36b77a52 2839 mutex_lock(&proto_list_mutex);
1da177e4 2840 list_add(&prot->node, &proto_list);
13ff3d6f 2841 assign_proto_idx(prot);
36b77a52 2842 mutex_unlock(&proto_list_mutex);
b733c007
PE
2843 return 0;
2844
8feaf0c0 2845out_free_timewait_sock_slab_name:
7e56b5d6 2846 kfree(prot->twsk_prot->twsk_slab_name);
8feaf0c0
ACM
2847out_free_request_sock_slab:
2848 if (prot->rsk_prot && prot->rsk_prot->slab) {
2849 kmem_cache_destroy(prot->rsk_prot->slab);
2850 prot->rsk_prot->slab = NULL;
2851 }
2e6599cb 2852out_free_request_sock_slab_name:
72150e9b
DC
2853 if (prot->rsk_prot)
2854 kfree(prot->rsk_prot->slab_name);
2e6599cb
ACM
2855out_free_sock_slab:
2856 kmem_cache_destroy(prot->slab);
2857 prot->slab = NULL;
b733c007
PE
2858out:
2859 return -ENOBUFS;
1da177e4 2860}
1da177e4
LT
2861EXPORT_SYMBOL(proto_register);
2862
2863void proto_unregister(struct proto *prot)
2864{
36b77a52 2865 mutex_lock(&proto_list_mutex);
13ff3d6f 2866 release_proto_idx(prot);
0a3f4358 2867 list_del(&prot->node);
36b77a52 2868 mutex_unlock(&proto_list_mutex);
1da177e4
LT
2869
2870 if (prot->slab != NULL) {
2871 kmem_cache_destroy(prot->slab);
2872 prot->slab = NULL;
2873 }
2874
2e6599cb 2875 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
2e6599cb 2876 kmem_cache_destroy(prot->rsk_prot->slab);
7e56b5d6 2877 kfree(prot->rsk_prot->slab_name);
2e6599cb
ACM
2878 prot->rsk_prot->slab = NULL;
2879 }
2880
6d6ee43e 2881 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
6d6ee43e 2882 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
7e56b5d6 2883 kfree(prot->twsk_prot->twsk_slab_name);
6d6ee43e 2884 prot->twsk_prot->twsk_slab = NULL;
8feaf0c0 2885 }
1da177e4 2886}
1da177e4
LT
2887EXPORT_SYMBOL(proto_unregister);
2888
2889#ifdef CONFIG_PROC_FS
1da177e4 2890static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
36b77a52 2891 __acquires(proto_list_mutex)
1da177e4 2892{
36b77a52 2893 mutex_lock(&proto_list_mutex);
60f0438a 2894 return seq_list_start_head(&proto_list, *pos);
1da177e4
LT
2895}
2896
2897static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2898{
60f0438a 2899 return seq_list_next(v, &proto_list, pos);
1da177e4
LT
2900}
2901
2902static void proto_seq_stop(struct seq_file *seq, void *v)
36b77a52 2903 __releases(proto_list_mutex)
1da177e4 2904{
36b77a52 2905 mutex_unlock(&proto_list_mutex);
1da177e4
LT
2906}
2907
2908static char proto_method_implemented(const void *method)
2909{
2910 return method == NULL ? 'n' : 'y';
2911}
180d8cd9
GC
2912static long sock_prot_memory_allocated(struct proto *proto)
2913{
cb75a36c 2914 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
180d8cd9
GC
2915}
2916
2917static char *sock_prot_memory_pressure(struct proto *proto)
2918{
2919 return proto->memory_pressure != NULL ?
2920 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
2921}
1da177e4
LT
2922
2923static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2924{
180d8cd9 2925
8d987e5c 2926 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
1da177e4
LT
2927 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2928 proto->name,
2929 proto->obj_size,
14e943db 2930 sock_prot_inuse_get(seq_file_net(seq), proto),
180d8cd9
GC
2931 sock_prot_memory_allocated(proto),
2932 sock_prot_memory_pressure(proto),
1da177e4
LT
2933 proto->max_header,
2934 proto->slab == NULL ? "no" : "yes",
2935 module_name(proto->owner),
2936 proto_method_implemented(proto->close),
2937 proto_method_implemented(proto->connect),
2938 proto_method_implemented(proto->disconnect),
2939 proto_method_implemented(proto->accept),
2940 proto_method_implemented(proto->ioctl),
2941 proto_method_implemented(proto->init),
2942 proto_method_implemented(proto->destroy),
2943 proto_method_implemented(proto->shutdown),
2944 proto_method_implemented(proto->setsockopt),
2945 proto_method_implemented(proto->getsockopt),
2946 proto_method_implemented(proto->sendmsg),
2947 proto_method_implemented(proto->recvmsg),
2948 proto_method_implemented(proto->sendpage),
2949 proto_method_implemented(proto->bind),
2950 proto_method_implemented(proto->backlog_rcv),
2951 proto_method_implemented(proto->hash),
2952 proto_method_implemented(proto->unhash),
2953 proto_method_implemented(proto->get_port),
2954 proto_method_implemented(proto->enter_memory_pressure));
2955}
2956
2957static int proto_seq_show(struct seq_file *seq, void *v)
2958{
60f0438a 2959 if (v == &proto_list)
1da177e4
LT
2960 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2961 "protocol",
2962 "size",
2963 "sockets",
2964 "memory",
2965 "press",
2966 "maxhdr",
2967 "slab",
2968 "module",
2969 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2970 else
60f0438a 2971 proto_seq_printf(seq, list_entry(v, struct proto, node));
1da177e4
LT
2972 return 0;
2973}
2974
f690808e 2975static const struct seq_operations proto_seq_ops = {
1da177e4
LT
2976 .start = proto_seq_start,
2977 .next = proto_seq_next,
2978 .stop = proto_seq_stop,
2979 .show = proto_seq_show,
2980};
2981
2982static int proto_seq_open(struct inode *inode, struct file *file)
2983{
14e943db
ED
2984 return seq_open_net(inode, file, &proto_seq_ops,
2985 sizeof(struct seq_net_private));
1da177e4
LT
2986}
2987
9a32144e 2988static const struct file_operations proto_seq_fops = {
1da177e4
LT
2989 .owner = THIS_MODULE,
2990 .open = proto_seq_open,
2991 .read = seq_read,
2992 .llseek = seq_lseek,
14e943db
ED
2993 .release = seq_release_net,
2994};
2995
2996static __net_init int proto_init_net(struct net *net)
2997{
d4beaa66 2998 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
14e943db
ED
2999 return -ENOMEM;
3000
3001 return 0;
3002}
3003
3004static __net_exit void proto_exit_net(struct net *net)
3005{
ece31ffd 3006 remove_proc_entry("protocols", net->proc_net);
14e943db
ED
3007}
3008
3009
3010static __net_initdata struct pernet_operations proto_net_ops = {
3011 .init = proto_init_net,
3012 .exit = proto_exit_net,
1da177e4
LT
3013};
3014
3015static int __init proto_init(void)
3016{
14e943db 3017 return register_pernet_subsys(&proto_net_ops);
1da177e4
LT
3018}
3019
3020subsys_initcall(proto_init);
3021
3022#endif /* PROC_FS */