Merge tag 'v3.10.108' into update
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / core / sock.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
02c30a84 10 * Authors: Ross Biro
1da177e4
LT
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Alan Cox, <A.Cox@swansea.ac.uk>
14 *
15 * Fixes:
16 * Alan Cox : Numerous verify_area() problems
17 * Alan Cox : Connecting on a connecting socket
18 * now returns an error for tcp.
19 * Alan Cox : sock->protocol is set correctly.
20 * and is not sometimes left as 0.
21 * Alan Cox : connect handles icmp errors on a
22 * connect properly. Unfortunately there
23 * is a restart syscall nasty there. I
24 * can't match BSD without hacking the C
25 * library. Ideas urgently sought!
26 * Alan Cox : Disallow bind() to addresses that are
27 * not ours - especially broadcast ones!!
28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
30 * instead they leave that for the DESTROY timer.
31 * Alan Cox : Clean up error flag in accept
32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
33 * was buggy. Put a remove_sock() in the handler
34 * for memory when we hit 0. Also altered the timer
4ec93edb 35 * code. The ACK stuff can wait and needs major
1da177e4
LT
36 * TCP layer surgery.
37 * Alan Cox : Fixed TCP ack bug, removed remove sock
38 * and fixed timer/inet_bh race.
39 * Alan Cox : Added zapped flag for TCP
40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45 * Rick Sladkey : Relaxed UDP rules for matching packets.
46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
47 * Pauline Middelink : identd support
48 * Alan Cox : Fixed connect() taking signals I think.
49 * Alan Cox : SO_LINGER supported
50 * Alan Cox : Error reporting fixes
51 * Anonymous : inet_create tidied up (sk->reuse setting)
52 * Alan Cox : inet sockets don't set sk->type!
53 * Alan Cox : Split socket option code
54 * Alan Cox : Callbacks
55 * Alan Cox : Nagle flag for Charles & Johannes stuff
56 * Alex : Removed restriction on inet fioctl
57 * Alan Cox : Splitting INET from NET core
58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
60 * Alan Cox : Split IP from generic code
61 * Alan Cox : New kfree_skbmem()
62 * Alan Cox : Make SO_DEBUG superuser only.
63 * Alan Cox : Allow anyone to clear SO_DEBUG
64 * (compatibility fix)
65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
66 * Alan Cox : Allocator for a socket is settable.
67 * Alan Cox : SO_ERROR includes soft errors.
68 * Alan Cox : Allow NULL arguments on some SO_ opts
69 * Alan Cox : Generic socket allocation to make hooks
70 * easier (suggested by Craig Metz).
71 * Michael Pall : SO_ERROR returns positive errno again
72 * Steve Whitehouse: Added default destructor to free
73 * protocol private data.
74 * Steve Whitehouse: Added various other default routines
75 * common to several socket families.
76 * Chris Evans : Call suser() check last on F_SETOWN
77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
79 * Andi Kleen : Fix write_space callback
80 * Chris Evans : Security fixes - signedness again
81 * Arnaldo C. Melo : cleanups, use skb_queue_purge
82 *
83 * To Fix:
84 *
85 *
86 * This program is free software; you can redistribute it and/or
87 * modify it under the terms of the GNU General Public License
88 * as published by the Free Software Foundation; either version
89 * 2 of the License, or (at your option) any later version.
90 */
91
e005d193
JP
92#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
4fc268d2 94#include <linux/capability.h>
1da177e4
LT
95#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
1da177e4
LT
100#include <linux/module.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/sched.h>
104#include <linux/timer.h>
105#include <linux/string.h>
106#include <linux/sockios.h>
107#include <linux/net.h>
108#include <linux/mm.h>
109#include <linux/slab.h>
110#include <linux/interrupt.h>
111#include <linux/poll.h>
112#include <linux/tcp.h>
113#include <linux/init.h>
a1f8e7f7 114#include <linux/highmem.h>
3f551f94 115#include <linux/user_namespace.h>
c5905afb 116#include <linux/static_key.h>
3969eb38 117#include <linux/memcontrol.h>
8c1ae10d 118#include <linux/prefetch.h>
1da177e4
LT
119
120#include <asm/uaccess.h>
1da177e4
LT
121
122#include <linux/netdevice.h>
123#include <net/protocol.h>
124#include <linux/skbuff.h>
457c4cbc 125#include <net/net_namespace.h>
2e6599cb 126#include <net/request_sock.h>
1da177e4 127#include <net/sock.h>
20d49473 128#include <linux/net_tstamp.h>
1da177e4
LT
129#include <net/xfrm.h>
130#include <linux/ipsec.h>
f8451725 131#include <net/cls_cgroup.h>
5bc1421e 132#include <net/netprio_cgroup.h>
1da177e4
LT
133
134#include <linux/filter.h>
135
3847ce32
SM
136#include <trace/events/sock.h>
137
6fa3eb70
S
138#include <net/af_unix.h>
139
140
1da177e4
LT
141#ifdef CONFIG_INET
142#include <net/tcp.h>
143#endif
6fa3eb70 144#include <linux/xlog.h>
1da177e4 145
36b77a52 146static DEFINE_MUTEX(proto_list_mutex);
d1a4c0b3
GC
147static LIST_HEAD(proto_list);
148
c35b4e28
EB
149/**
150 * sk_ns_capable - General socket capability test
151 * @sk: Socket to use a capability on or through
152 * @user_ns: The user namespace of the capability to use
153 * @cap: The capability to use
154 *
155 * Test to see if the opener of the socket had when the socket was
156 * created and the current process has the capability @cap in the user
157 * namespace @user_ns.
158 */
159bool sk_ns_capable(const struct sock *sk,
160 struct user_namespace *user_ns, int cap)
161{
162 return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
163 ns_capable(user_ns, cap);
164}
165EXPORT_SYMBOL(sk_ns_capable);
166
167/**
168 * sk_capable - Socket global capability test
169 * @sk: Socket to use a capability on or through
170 * @cap: The global capbility to use
171 *
172 * Test to see if the opener of the socket had when the socket was
173 * created and the current process has the capability @cap in all user
174 * namespaces.
175 */
176bool sk_capable(const struct sock *sk, int cap)
177{
178 return sk_ns_capable(sk, &init_user_ns, cap);
179}
180EXPORT_SYMBOL(sk_capable);
181
182/**
183 * sk_net_capable - Network namespace socket capability test
184 * @sk: Socket to use a capability on or through
185 * @cap: The capability to use
186 *
187 * Test to see if the opener of the socket had when the socke was created
188 * and the current process has the capability @cap over the network namespace
189 * the socket is a member of.
190 */
191bool sk_net_capable(const struct sock *sk, int cap)
192{
193 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
194}
195EXPORT_SYMBOL(sk_net_capable);
196
197
c255a458 198#ifdef CONFIG_MEMCG_KMEM
1d62e436 199int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
d1a4c0b3
GC
200{
201 struct proto *proto;
202 int ret = 0;
203
36b77a52 204 mutex_lock(&proto_list_mutex);
d1a4c0b3
GC
205 list_for_each_entry(proto, &proto_list, node) {
206 if (proto->init_cgroup) {
1d62e436 207 ret = proto->init_cgroup(memcg, ss);
d1a4c0b3
GC
208 if (ret)
209 goto out;
210 }
211 }
212
36b77a52 213 mutex_unlock(&proto_list_mutex);
d1a4c0b3
GC
214 return ret;
215out:
216 list_for_each_entry_continue_reverse(proto, &proto_list, node)
217 if (proto->destroy_cgroup)
1d62e436 218 proto->destroy_cgroup(memcg);
36b77a52 219 mutex_unlock(&proto_list_mutex);
d1a4c0b3
GC
220 return ret;
221}
222
1d62e436 223void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
d1a4c0b3
GC
224{
225 struct proto *proto;
226
36b77a52 227 mutex_lock(&proto_list_mutex);
d1a4c0b3
GC
228 list_for_each_entry_reverse(proto, &proto_list, node)
229 if (proto->destroy_cgroup)
1d62e436 230 proto->destroy_cgroup(memcg);
36b77a52 231 mutex_unlock(&proto_list_mutex);
d1a4c0b3
GC
232}
233#endif
234
da21f24d
IM
235/*
236 * Each address family might have different locking rules, so we have
237 * one slock key per address family:
238 */
a5b5bb9a
IM
239static struct lock_class_key af_family_keys[AF_MAX];
240static struct lock_class_key af_family_slock_keys[AF_MAX];
241
cbda4eaf 242#if defined(CONFIG_MEMCG_KMEM)
c5905afb 243struct static_key memcg_socket_limit_enabled;
e1aab161 244EXPORT_SYMBOL(memcg_socket_limit_enabled);
cbda4eaf 245#endif
e1aab161 246
a5b5bb9a
IM
247/*
248 * Make lock validator output more readable. (we pre-construct these
249 * strings build-time, so that runtime initialization of socket
250 * locks is fast):
251 */
36cbd3dc 252static const char *const af_family_key_strings[AF_MAX+1] = {
a5b5bb9a
IM
253 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
254 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
255 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
256 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
257 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
258 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
259 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
cbd151bf 260 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
a5b5bb9a 261 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
cd05acfe 262 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
17926a79 263 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
bce7b154 264 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
6f107b58 265 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
456db6a4 266 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
a5b5bb9a 267};
36cbd3dc 268static const char *const af_family_slock_key_strings[AF_MAX+1] = {
a5b5bb9a
IM
269 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
270 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
271 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
272 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
273 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
274 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
275 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
cbd151bf 276 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
a5b5bb9a 277 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
cd05acfe 278 "slock-27" , "slock-28" , "slock-AF_CAN" ,
17926a79 279 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
bce7b154 280 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
6f107b58 281 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
456db6a4 282 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
a5b5bb9a 283};
36cbd3dc 284static const char *const af_family_clock_key_strings[AF_MAX+1] = {
443aef0e
PZ
285 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
286 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
287 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
288 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
289 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
290 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
291 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
cbd151bf 292 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
443aef0e 293 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
b4942af6 294 "clock-27" , "clock-28" , "clock-AF_CAN" ,
e51f802b 295 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
bce7b154 296 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
6f107b58 297 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
456db6a4 298 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
443aef0e 299};
da21f24d
IM
300
301/*
302 * sk_callback_lock locking rules are per-address-family,
303 * so split the lock classes by using a per-AF key:
304 */
305static struct lock_class_key af_callback_keys[AF_MAX];
306
1da177e4
LT
307/* Take into consideration the size of the struct sk_buff overhead in the
308 * determination of these values, since that is non-constant across
309 * platforms. This makes socket queueing behavior and performance
310 * not depend upon such differences.
311 */
312#define _SK_MEM_PACKETS 256
87fb4b7b 313#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
1da177e4
LT
314#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
315#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
316
317/* Run time adjustable parameters. */
ab32ea5d 318__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
6d8ebc8a 319EXPORT_SYMBOL(sysctl_wmem_max);
6fa3eb70 320__u32 sysctl_rmem_max __read_mostly = (SK_RMEM_MAX*8);
6d8ebc8a 321EXPORT_SYMBOL(sysctl_rmem_max);
ab32ea5d
BH
322__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
323__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
1da177e4 324
25985edc 325/* Maximal space eaten by iovec or ancillary data plus some space */
ab32ea5d 326int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
2a91525c 327EXPORT_SYMBOL(sysctl_optmem_max);
1da177e4 328
c93bdd0e
MG
329struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
330EXPORT_SYMBOL_GPL(memalloc_socks);
331
7cb02404
MG
332/**
333 * sk_set_memalloc - sets %SOCK_MEMALLOC
334 * @sk: socket to set it on
335 *
336 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
337 * It's the responsibility of the admin to adjust min_free_kbytes
338 * to meet the requirements
339 */
340void sk_set_memalloc(struct sock *sk)
341{
342 sock_set_flag(sk, SOCK_MEMALLOC);
343 sk->sk_allocation |= __GFP_MEMALLOC;
c93bdd0e 344 static_key_slow_inc(&memalloc_socks);
7cb02404
MG
345}
346EXPORT_SYMBOL_GPL(sk_set_memalloc);
347
348void sk_clear_memalloc(struct sock *sk)
349{
350 sock_reset_flag(sk, SOCK_MEMALLOC);
351 sk->sk_allocation &= ~__GFP_MEMALLOC;
c93bdd0e 352 static_key_slow_dec(&memalloc_socks);
c76562b6
MG
353
354 /*
355 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
356 * progress of swapping. However, if SOCK_MEMALLOC is cleared while
357 * it has rmem allocations there is a risk that the user of the
358 * socket cannot make forward progress due to exceeding the rmem
359 * limits. By rights, sk_clear_memalloc() should only be called
360 * on sockets being torn down but warn and reset the accounting if
361 * that assumption breaks.
362 */
363 if (WARN_ON(sk->sk_forward_alloc))
364 sk_mem_reclaim(sk);
7cb02404
MG
365}
366EXPORT_SYMBOL_GPL(sk_clear_memalloc);
367
b4b9e355
MG
368int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
369{
370 int ret;
371 unsigned long pflags = current->flags;
372
373 /* these should have been dropped before queueing */
374 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
375
376 current->flags |= PF_MEMALLOC;
377 ret = sk->sk_backlog_rcv(sk, skb);
378 tsk_restore_flags(current, pflags, PF_MEMALLOC);
379
380 return ret;
381}
382EXPORT_SYMBOL(__sk_backlog_rcv);
383
1da177e4
LT
384static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
385{
386 struct timeval tv;
387
388 if (optlen < sizeof(tv))
389 return -EINVAL;
390 if (copy_from_user(&tv, optval, sizeof(tv)))
391 return -EFAULT;
ba78073e
VA
392 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
393 return -EDOM;
1da177e4 394
ba78073e 395 if (tv.tv_sec < 0) {
6f11df83
AM
396 static int warned __read_mostly;
397
ba78073e 398 *timeo_p = 0;
50aab54f 399 if (warned < 10 && net_ratelimit()) {
ba78073e 400 warned++;
e005d193
JP
401 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
402 __func__, current->comm, task_pid_nr(current));
50aab54f 403 }
ba78073e
VA
404 return 0;
405 }
1da177e4
LT
406 *timeo_p = MAX_SCHEDULE_TIMEOUT;
407 if (tv.tv_sec == 0 && tv.tv_usec == 0)
408 return 0;
409 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
410 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
411 return 0;
412}
413
414static void sock_warn_obsolete_bsdism(const char *name)
415{
416 static int warned;
417 static char warncomm[TASK_COMM_LEN];
4ec93edb
YH
418 if (strcmp(warncomm, current->comm) && warned < 5) {
419 strcpy(warncomm, current->comm);
e005d193
JP
420 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
421 warncomm, name);
1da177e4
LT
422 warned++;
423 }
424}
425
08e29af3 426static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
4ec93edb 427{
08e29af3
ED
428 if (sk->sk_flags & flags) {
429 sk->sk_flags &= ~flags;
430 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
20d49473 431 net_disable_timestamp();
1da177e4
LT
432 }
433}
434
435
f0088a50
DV
436int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
437{
766e9037 438 int err;
f0088a50 439 int skb_len;
3b885787
NH
440 unsigned long flags;
441 struct sk_buff_head *list = &sk->sk_receive_queue;
f0088a50 442
0fd7bac6 443 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
766e9037 444 atomic_inc(&sk->sk_drops);
3847ce32 445 trace_sock_rcvqueue_full(sk, skb);
766e9037 446 return -ENOMEM;
f0088a50
DV
447 }
448
fda9ef5d 449 err = sk_filter(sk, skb);
f0088a50 450 if (err)
766e9037 451 return err;
f0088a50 452
c76562b6 453 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
766e9037
ED
454 atomic_inc(&sk->sk_drops);
455 return -ENOBUFS;
3ab224be
HA
456 }
457
f0088a50
DV
458 skb->dev = NULL;
459 skb_set_owner_r(skb, sk);
49ad9599 460
f0088a50
DV
461 /* Cache the SKB length before we tack it onto the receive
462 * queue. Once it is added it no longer belongs to us and
463 * may be freed by other threads of control pulling packets
464 * from the queue.
465 */
466 skb_len = skb->len;
467
7fee226a
ED
468 /* we escape from rcu protected region, make sure we dont leak
469 * a norefcounted dst
470 */
471 skb_dst_force(skb);
472
3b885787
NH
473 spin_lock_irqsave(&list->lock, flags);
474 skb->dropcount = atomic_read(&sk->sk_drops);
475 __skb_queue_tail(list, skb);
476 spin_unlock_irqrestore(&list->lock, flags);
f0088a50
DV
477
478 if (!sock_flag(sk, SOCK_DEAD))
479 sk->sk_data_ready(sk, skb_len);
766e9037 480 return 0;
f0088a50
DV
481}
482EXPORT_SYMBOL(sock_queue_rcv_skb);
483
58a5a7b9 484int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
f0088a50
DV
485{
486 int rc = NET_RX_SUCCESS;
487
fda9ef5d 488 if (sk_filter(sk, skb))
f0088a50
DV
489 goto discard_and_relse;
490
491 skb->dev = NULL;
492
f545a38f 493 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
c377411f
ED
494 atomic_inc(&sk->sk_drops);
495 goto discard_and_relse;
496 }
58a5a7b9
ACM
497 if (nested)
498 bh_lock_sock_nested(sk);
499 else
500 bh_lock_sock(sk);
a5b5bb9a
IM
501 if (!sock_owned_by_user(sk)) {
502 /*
503 * trylock + unlock semantics:
504 */
505 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
506
c57943a1 507 rc = sk_backlog_rcv(sk, skb);
a5b5bb9a
IM
508
509 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
f545a38f 510 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
8eae939f
ZY
511 bh_unlock_sock(sk);
512 atomic_inc(&sk->sk_drops);
513 goto discard_and_relse;
514 }
515
f0088a50
DV
516 bh_unlock_sock(sk);
517out:
518 sock_put(sk);
519 return rc;
520discard_and_relse:
521 kfree_skb(skb);
522 goto out;
523}
524EXPORT_SYMBOL(sk_receive_skb);
525
ea94ff3b
KK
526void sk_reset_txq(struct sock *sk)
527{
528 sk_tx_queue_clear(sk);
529}
530EXPORT_SYMBOL(sk_reset_txq);
531
f0088a50
DV
532struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
533{
b6c6712a 534 struct dst_entry *dst = __sk_dst_get(sk);
f0088a50
DV
535
536 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
e022f0b4 537 sk_tx_queue_clear(sk);
a9b3cd7f 538 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
f0088a50
DV
539 dst_release(dst);
540 return NULL;
541 }
542
543 return dst;
544}
545EXPORT_SYMBOL(__sk_dst_check);
546
547struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
548{
549 struct dst_entry *dst = sk_dst_get(sk);
550
551 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
552 sk_dst_reset(sk);
553 dst_release(dst);
554 return NULL;
555 }
556
557 return dst;
558}
559EXPORT_SYMBOL(sk_dst_check);
560
c91f6df2
BH
561static int sock_setbindtodevice(struct sock *sk, char __user *optval,
562 int optlen)
4878809f
DM
563{
564 int ret = -ENOPROTOOPT;
565#ifdef CONFIG_NETDEVICES
3b1e0a65 566 struct net *net = sock_net(sk);
4878809f
DM
567 char devname[IFNAMSIZ];
568 int index;
569
570 /* Sorry... */
571 ret = -EPERM;
5e1fccc0 572 if (!ns_capable(net->user_ns, CAP_NET_RAW))
4878809f
DM
573 goto out;
574
575 ret = -EINVAL;
576 if (optlen < 0)
577 goto out;
578
579 /* Bind this socket to a particular device like "eth0",
580 * as specified in the passed interface name. If the
581 * name is "" or the option length is zero the socket
582 * is not bound.
583 */
584 if (optlen > IFNAMSIZ - 1)
585 optlen = IFNAMSIZ - 1;
586 memset(devname, 0, sizeof(devname));
587
588 ret = -EFAULT;
589 if (copy_from_user(devname, optval, optlen))
590 goto out;
591
000ba2e4
DM
592 index = 0;
593 if (devname[0] != '\0') {
bf8e56bf 594 struct net_device *dev;
4878809f 595
bf8e56bf
ED
596 rcu_read_lock();
597 dev = dev_get_by_name_rcu(net, devname);
598 if (dev)
599 index = dev->ifindex;
600 rcu_read_unlock();
4878809f
DM
601 ret = -ENODEV;
602 if (!dev)
603 goto out;
4878809f
DM
604 }
605
606 lock_sock(sk);
607 sk->sk_bound_dev_if = index;
608 sk_dst_reset(sk);
609 release_sock(sk);
610
611 ret = 0;
612
613out:
614#endif
615
616 return ret;
617}
618
c91f6df2
BH
619static int sock_getbindtodevice(struct sock *sk, char __user *optval,
620 int __user *optlen, int len)
621{
622 int ret = -ENOPROTOOPT;
623#ifdef CONFIG_NETDEVICES
624 struct net *net = sock_net(sk);
c91f6df2 625 char devname[IFNAMSIZ];
c91f6df2
BH
626
627 if (sk->sk_bound_dev_if == 0) {
628 len = 0;
629 goto zero;
630 }
631
632 ret = -EINVAL;
633 if (len < IFNAMSIZ)
634 goto out;
635
5dbe7c17
NS
636 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
637 if (ret)
c91f6df2 638 goto out;
c91f6df2
BH
639
640 len = strlen(devname) + 1;
641
642 ret = -EFAULT;
643 if (copy_to_user(optval, devname, len))
644 goto out;
645
646zero:
647 ret = -EFAULT;
648 if (put_user(len, optlen))
649 goto out;
650
651 ret = 0;
652
653out:
654#endif
655
656 return ret;
657}
658
c0ef877b
PE
659static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
660{
661 if (valbool)
662 sock_set_flag(sk, bit);
663 else
664 sock_reset_flag(sk, bit);
665}
666
1da177e4
LT
667/*
668 * This is meant for all protocols to use and covers goings on
669 * at the socket level. Everything here is generic.
670 */
671
672int sock_setsockopt(struct socket *sock, int level, int optname,
b7058842 673 char __user *optval, unsigned int optlen)
1da177e4 674{
2a91525c 675 struct sock *sk = sock->sk;
1da177e4
LT
676 int val;
677 int valbool;
678 struct linger ling;
679 int ret = 0;
4ec93edb 680
1da177e4
LT
681 /*
682 * Options without arguments
683 */
684
4878809f 685 if (optname == SO_BINDTODEVICE)
c91f6df2 686 return sock_setbindtodevice(sk, optval, optlen);
4878809f 687
e71a4783
SH
688 if (optlen < sizeof(int))
689 return -EINVAL;
4ec93edb 690
1da177e4
LT
691 if (get_user(val, (int __user *)optval))
692 return -EFAULT;
4ec93edb 693
2a91525c 694 valbool = val ? 1 : 0;
1da177e4
LT
695
696 lock_sock(sk);
697
2a91525c 698 switch (optname) {
e71a4783 699 case SO_DEBUG:
2a91525c 700 if (val && !capable(CAP_NET_ADMIN))
e71a4783 701 ret = -EACCES;
2a91525c 702 else
c0ef877b 703 sock_valbool_flag(sk, SOCK_DBG, valbool);
e71a4783
SH
704 break;
705 case SO_REUSEADDR:
4a17fd52 706 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
e71a4783 707 break;
055dc21a
TH
708 case SO_REUSEPORT:
709 sk->sk_reuseport = valbool;
710 break;
e71a4783 711 case SO_TYPE:
49c794e9 712 case SO_PROTOCOL:
0d6038ee 713 case SO_DOMAIN:
e71a4783
SH
714 case SO_ERROR:
715 ret = -ENOPROTOOPT;
716 break;
717 case SO_DONTROUTE:
c0ef877b 718 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
e71a4783
SH
719 break;
720 case SO_BROADCAST:
721 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
722 break;
723 case SO_SNDBUF:
724 /* Don't error on this BSD doesn't and if you think
82981930
ED
725 * about it this is right. Otherwise apps have to
726 * play 'guess the biggest size' games. RCVBUF/SNDBUF
727 * are treated in BSD as hints
728 */
729 val = min_t(u32, val, sysctl_wmem_max);
b0573dea 730set_sndbuf:
e71a4783 731 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
4b9e9796 732 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
82981930 733 /* Wake up sending tasks if we upped the value. */
e71a4783
SH
734 sk->sk_write_space(sk);
735 break;
1da177e4 736
e71a4783
SH
737 case SO_SNDBUFFORCE:
738 if (!capable(CAP_NET_ADMIN)) {
739 ret = -EPERM;
740 break;
741 }
742 goto set_sndbuf;
b0573dea 743
e71a4783
SH
744 case SO_RCVBUF:
745 /* Don't error on this BSD doesn't and if you think
82981930
ED
746 * about it this is right. Otherwise apps have to
747 * play 'guess the biggest size' games. RCVBUF/SNDBUF
748 * are treated in BSD as hints
749 */
750 val = min_t(u32, val, sysctl_rmem_max);
b0573dea 751set_rcvbuf:
e71a4783
SH
752 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
753 /*
754 * We double it on the way in to account for
755 * "struct sk_buff" etc. overhead. Applications
756 * assume that the SO_RCVBUF setting they make will
757 * allow that much actual data to be received on that
758 * socket.
759 *
760 * Applications are unaware that "struct sk_buff" and
761 * other overheads allocate from the receive buffer
762 * during socket buffer allocation.
763 *
764 * And after considering the possible alternatives,
765 * returning the value we actually used in getsockopt
766 * is the most desirable behavior.
767 */
4b9e9796 768 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
e71a4783
SH
769 break;
770
771 case SO_RCVBUFFORCE:
772 if (!capable(CAP_NET_ADMIN)) {
773 ret = -EPERM;
1da177e4 774 break;
e71a4783
SH
775 }
776 goto set_rcvbuf;
1da177e4 777
e71a4783 778 case SO_KEEPALIVE:
1da177e4 779#ifdef CONFIG_INET
3e10986d
ED
780 if (sk->sk_protocol == IPPROTO_TCP &&
781 sk->sk_type == SOCK_STREAM)
e71a4783 782 tcp_set_keepalive(sk, valbool);
1da177e4 783#endif
e71a4783
SH
784 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
785 break;
786
787 case SO_OOBINLINE:
788 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
789 break;
790
791 case SO_NO_CHECK:
792 sk->sk_no_check = valbool;
793 break;
794
795 case SO_PRIORITY:
5e1fccc0
EB
796 if ((val >= 0 && val <= 6) ||
797 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
e71a4783
SH
798 sk->sk_priority = val;
799 else
800 ret = -EPERM;
801 break;
802
803 case SO_LINGER:
804 if (optlen < sizeof(ling)) {
805 ret = -EINVAL; /* 1003.1g */
1da177e4 806 break;
e71a4783 807 }
2a91525c 808 if (copy_from_user(&ling, optval, sizeof(ling))) {
e71a4783 809 ret = -EFAULT;
1da177e4 810 break;
e71a4783
SH
811 }
812 if (!ling.l_onoff)
813 sock_reset_flag(sk, SOCK_LINGER);
814 else {
1da177e4 815#if (BITS_PER_LONG == 32)
e71a4783
SH
816 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
817 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
1da177e4 818 else
e71a4783
SH
819#endif
820 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
821 sock_set_flag(sk, SOCK_LINGER);
822 }
823 break;
824
825 case SO_BSDCOMPAT:
826 sock_warn_obsolete_bsdism("setsockopt");
827 break;
828
829 case SO_PASSCRED:
830 if (valbool)
831 set_bit(SOCK_PASSCRED, &sock->flags);
832 else
833 clear_bit(SOCK_PASSCRED, &sock->flags);
834 break;
835
836 case SO_TIMESTAMP:
92f37fd2 837 case SO_TIMESTAMPNS:
e71a4783 838 if (valbool) {
92f37fd2
ED
839 if (optname == SO_TIMESTAMP)
840 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
841 else
842 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783 843 sock_set_flag(sk, SOCK_RCVTSTAMP);
20d49473 844 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
92f37fd2 845 } else {
e71a4783 846 sock_reset_flag(sk, SOCK_RCVTSTAMP);
92f37fd2
ED
847 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
848 }
e71a4783
SH
849 break;
850
20d49473
PO
851 case SO_TIMESTAMPING:
852 if (val & ~SOF_TIMESTAMPING_MASK) {
f249fb78 853 ret = -EINVAL;
20d49473
PO
854 break;
855 }
856 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
857 val & SOF_TIMESTAMPING_TX_HARDWARE);
858 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
859 val & SOF_TIMESTAMPING_TX_SOFTWARE);
860 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
861 val & SOF_TIMESTAMPING_RX_HARDWARE);
862 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
863 sock_enable_timestamp(sk,
864 SOCK_TIMESTAMPING_RX_SOFTWARE);
865 else
866 sock_disable_timestamp(sk,
08e29af3 867 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
20d49473
PO
868 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
869 val & SOF_TIMESTAMPING_SOFTWARE);
870 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
871 val & SOF_TIMESTAMPING_SYS_HARDWARE);
872 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
873 val & SOF_TIMESTAMPING_RAW_HARDWARE);
874 break;
875
e71a4783
SH
876 case SO_RCVLOWAT:
877 if (val < 0)
878 val = INT_MAX;
879 sk->sk_rcvlowat = val ? : 1;
880 break;
881
882 case SO_RCVTIMEO:
883 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
884 break;
885
886 case SO_SNDTIMEO:
887 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
888 break;
1da177e4 889
e71a4783
SH
890 case SO_ATTACH_FILTER:
891 ret = -EINVAL;
892 if (optlen == sizeof(struct sock_fprog)) {
893 struct sock_fprog fprog;
1da177e4 894
e71a4783
SH
895 ret = -EFAULT;
896 if (copy_from_user(&fprog, optval, sizeof(fprog)))
1da177e4 897 break;
e71a4783
SH
898
899 ret = sk_attach_filter(&fprog, sk);
900 }
901 break;
902
903 case SO_DETACH_FILTER:
55b33325 904 ret = sk_detach_filter(sk);
e71a4783 905 break;
1da177e4 906
d59577b6
VB
907 case SO_LOCK_FILTER:
908 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
909 ret = -EPERM;
910 else
911 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
912 break;
913
e71a4783
SH
914 case SO_PASSSEC:
915 if (valbool)
916 set_bit(SOCK_PASSSEC, &sock->flags);
917 else
918 clear_bit(SOCK_PASSSEC, &sock->flags);
919 break;
4a19ec58 920 case SO_MARK:
5e1fccc0 921 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
4a19ec58 922 ret = -EPERM;
2a91525c 923 else
4a19ec58 924 sk->sk_mark = val;
4a19ec58 925 break;
877ce7c1 926
1da177e4
LT
927 /* We implement the SO_SNDLOWAT etc to
928 not be settable (1003.1g 5.3) */
3b885787 929 case SO_RXQ_OVFL:
8083f0fc 930 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
3b885787 931 break;
6e3e939f
JB
932
933 case SO_WIFI_STATUS:
934 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
935 break;
936
ef64a54f
PE
937 case SO_PEEK_OFF:
938 if (sock->ops->set_peek_off)
d90d9ff6 939 ret = sock->ops->set_peek_off(sk, val);
ef64a54f
PE
940 else
941 ret = -EOPNOTSUPP;
942 break;
3bdc0eba
BG
943
944 case SO_NOFCS:
945 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
946 break;
947
7d4c04fc
KJ
948 case SO_SELECT_ERR_QUEUE:
949 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
950 break;
951
e71a4783
SH
952 default:
953 ret = -ENOPROTOOPT;
954 break;
4ec93edb 955 }
1da177e4
LT
956 release_sock(sk);
957 return ret;
958}
2a91525c 959EXPORT_SYMBOL(sock_setsockopt);
1da177e4
LT
960
961
3f551f94
EB
962void cred_to_ucred(struct pid *pid, const struct cred *cred,
963 struct ucred *ucred)
964{
965 ucred->pid = pid_vnr(pid);
966 ucred->uid = ucred->gid = -1;
967 if (cred) {
968 struct user_namespace *current_ns = current_user_ns();
969
b2e4f544
EB
970 ucred->uid = from_kuid_munged(current_ns, cred->euid);
971 ucred->gid = from_kgid_munged(current_ns, cred->egid);
3f551f94
EB
972 }
973}
3924773a 974EXPORT_SYMBOL_GPL(cred_to_ucred);
3f551f94 975
1da177e4
LT
976int sock_getsockopt(struct socket *sock, int level, int optname,
977 char __user *optval, int __user *optlen)
978{
979 struct sock *sk = sock->sk;
4ec93edb 980
e71a4783 981 union {
4ec93edb
YH
982 int val;
983 struct linger ling;
1da177e4
LT
984 struct timeval tm;
985 } v;
4ec93edb 986
4d0392be 987 int lv = sizeof(int);
1da177e4 988 int len;
4ec93edb 989
e71a4783 990 if (get_user(len, optlen))
4ec93edb 991 return -EFAULT;
e71a4783 992 if (len < 0)
1da177e4 993 return -EINVAL;
4ec93edb 994
50fee1de 995 memset(&v, 0, sizeof(v));
df0bca04 996
2a91525c 997 switch (optname) {
e71a4783
SH
998 case SO_DEBUG:
999 v.val = sock_flag(sk, SOCK_DBG);
1000 break;
1001
1002 case SO_DONTROUTE:
1003 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1004 break;
1005
1006 case SO_BROADCAST:
1b23a5df 1007 v.val = sock_flag(sk, SOCK_BROADCAST);
e71a4783
SH
1008 break;
1009
1010 case SO_SNDBUF:
1011 v.val = sk->sk_sndbuf;
1012 break;
1013
1014 case SO_RCVBUF:
1015 v.val = sk->sk_rcvbuf;
1016 break;
1017
1018 case SO_REUSEADDR:
1019 v.val = sk->sk_reuse;
1020 break;
1021
055dc21a
TH
1022 case SO_REUSEPORT:
1023 v.val = sk->sk_reuseport;
1024 break;
1025
e71a4783 1026 case SO_KEEPALIVE:
1b23a5df 1027 v.val = sock_flag(sk, SOCK_KEEPOPEN);
e71a4783
SH
1028 break;
1029
1030 case SO_TYPE:
1031 v.val = sk->sk_type;
1032 break;
1033
49c794e9
JE
1034 case SO_PROTOCOL:
1035 v.val = sk->sk_protocol;
1036 break;
1037
0d6038ee
JE
1038 case SO_DOMAIN:
1039 v.val = sk->sk_family;
1040 break;
1041
e71a4783
SH
1042 case SO_ERROR:
1043 v.val = -sock_error(sk);
2a91525c 1044 if (v.val == 0)
e71a4783
SH
1045 v.val = xchg(&sk->sk_err_soft, 0);
1046 break;
1047
1048 case SO_OOBINLINE:
1b23a5df 1049 v.val = sock_flag(sk, SOCK_URGINLINE);
e71a4783
SH
1050 break;
1051
1052 case SO_NO_CHECK:
1053 v.val = sk->sk_no_check;
1054 break;
1055
1056 case SO_PRIORITY:
1057 v.val = sk->sk_priority;
1058 break;
1059
1060 case SO_LINGER:
1061 lv = sizeof(v.ling);
1b23a5df 1062 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
e71a4783
SH
1063 v.ling.l_linger = sk->sk_lingertime / HZ;
1064 break;
1065
1066 case SO_BSDCOMPAT:
1067 sock_warn_obsolete_bsdism("getsockopt");
1068 break;
1069
1070 case SO_TIMESTAMP:
92f37fd2
ED
1071 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1072 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1073 break;
1074
1075 case SO_TIMESTAMPNS:
1076 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783
SH
1077 break;
1078
20d49473
PO
1079 case SO_TIMESTAMPING:
1080 v.val = 0;
1081 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
1082 v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
1083 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
1084 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
1085 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
1086 v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
1087 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
1088 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
1089 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
1090 v.val |= SOF_TIMESTAMPING_SOFTWARE;
1091 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
1092 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
1093 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
1094 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
1095 break;
1096
e71a4783 1097 case SO_RCVTIMEO:
2a91525c 1098 lv = sizeof(struct timeval);
e71a4783
SH
1099 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1100 v.tm.tv_sec = 0;
1101 v.tm.tv_usec = 0;
1102 } else {
1103 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1104 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
1105 }
1106 break;
1107
1108 case SO_SNDTIMEO:
2a91525c 1109 lv = sizeof(struct timeval);
e71a4783
SH
1110 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1111 v.tm.tv_sec = 0;
1112 v.tm.tv_usec = 0;
1113 } else {
1114 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1115 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
1116 }
1117 break;
1da177e4 1118
e71a4783
SH
1119 case SO_RCVLOWAT:
1120 v.val = sk->sk_rcvlowat;
1121 break;
1da177e4 1122
e71a4783 1123 case SO_SNDLOWAT:
2a91525c 1124 v.val = 1;
e71a4783 1125 break;
1da177e4 1126
e71a4783 1127 case SO_PASSCRED:
82981930 1128 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
e71a4783 1129 break;
1da177e4 1130
e71a4783 1131 case SO_PEERCRED:
109f6e39
EB
1132 {
1133 struct ucred peercred;
1134 if (len > sizeof(peercred))
1135 len = sizeof(peercred);
1136 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1137 if (copy_to_user(optval, &peercred, len))
e71a4783
SH
1138 return -EFAULT;
1139 goto lenout;
109f6e39 1140 }
1da177e4 1141
e71a4783
SH
1142 case SO_PEERNAME:
1143 {
1144 char address[128];
1145
1146 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1147 return -ENOTCONN;
1148 if (lv < len)
1149 return -EINVAL;
1150 if (copy_to_user(optval, address, len))
1151 return -EFAULT;
1152 goto lenout;
1153 }
1da177e4 1154
e71a4783
SH
1155 /* Dubious BSD thing... Probably nobody even uses it, but
1156 * the UNIX standard wants it for whatever reason... -DaveM
1157 */
1158 case SO_ACCEPTCONN:
1159 v.val = sk->sk_state == TCP_LISTEN;
1160 break;
1da177e4 1161
e71a4783 1162 case SO_PASSSEC:
82981930 1163 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
e71a4783 1164 break;
877ce7c1 1165
e71a4783
SH
1166 case SO_PEERSEC:
1167 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1da177e4 1168
4a19ec58
LAT
1169 case SO_MARK:
1170 v.val = sk->sk_mark;
1171 break;
1172
3b885787 1173 case SO_RXQ_OVFL:
1b23a5df 1174 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
3b885787
NH
1175 break;
1176
6e3e939f 1177 case SO_WIFI_STATUS:
1b23a5df 1178 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
6e3e939f
JB
1179 break;
1180
ef64a54f
PE
1181 case SO_PEEK_OFF:
1182 if (!sock->ops->set_peek_off)
1183 return -EOPNOTSUPP;
1184
1185 v.val = sk->sk_peek_off;
1186 break;
bc2f7996 1187 case SO_NOFCS:
1b23a5df 1188 v.val = sock_flag(sk, SOCK_NOFCS);
bc2f7996 1189 break;
c91f6df2 1190
f7b86bfe 1191 case SO_BINDTODEVICE:
c91f6df2
BH
1192 return sock_getbindtodevice(sk, optval, optlen, len);
1193
a8fc9277
PE
1194 case SO_GET_FILTER:
1195 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1196 if (len < 0)
1197 return len;
1198
1199 goto lenout;
c91f6df2 1200
d59577b6
VB
1201 case SO_LOCK_FILTER:
1202 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1203 break;
1204
7d4c04fc
KJ
1205 case SO_SELECT_ERR_QUEUE:
1206 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1207 break;
1208
e71a4783
SH
1209 default:
1210 return -ENOPROTOOPT;
1da177e4 1211 }
e71a4783 1212
1da177e4
LT
1213 if (len > lv)
1214 len = lv;
1215 if (copy_to_user(optval, &v, len))
1216 return -EFAULT;
1217lenout:
4ec93edb
YH
1218 if (put_user(len, optlen))
1219 return -EFAULT;
1220 return 0;
1da177e4
LT
1221}
1222
a5b5bb9a
IM
1223/*
1224 * Initialize an sk_lock.
1225 *
1226 * (We also register the sk_lock with the lock validator.)
1227 */
b6f99a21 1228static inline void sock_lock_init(struct sock *sk)
a5b5bb9a 1229{
ed07536e
PZ
1230 sock_lock_init_class_and_name(sk,
1231 af_family_slock_key_strings[sk->sk_family],
1232 af_family_slock_keys + sk->sk_family,
1233 af_family_key_strings[sk->sk_family],
1234 af_family_keys + sk->sk_family);
a5b5bb9a
IM
1235}
1236
4dc6dc71
ED
1237/*
1238 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1239 * even temporarly, because of RCU lookups. sk_node should also be left as is.
68835aba 1240 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
4dc6dc71 1241 */
f1a6c4da
PE
1242static void sock_copy(struct sock *nsk, const struct sock *osk)
1243{
1244#ifdef CONFIG_SECURITY_NETWORK
1245 void *sptr = nsk->sk_security;
1246#endif
68835aba
ED
1247 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1248
1249 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1250 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1251
f1a6c4da
PE
1252#ifdef CONFIG_SECURITY_NETWORK
1253 nsk->sk_security = sptr;
1254 security_sk_clone(osk, nsk);
1255#endif
1256}
1257
fcbdf09d
OP
1258void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1259{
1260 unsigned long nulls1, nulls2;
1261
1262 nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1263 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1264 if (nulls1 > nulls2)
1265 swap(nulls1, nulls2);
1266
1267 if (nulls1 != 0)
1268 memset((char *)sk, 0, nulls1);
1269 memset((char *)sk + nulls1 + sizeof(void *), 0,
1270 nulls2 - nulls1 - sizeof(void *));
1271 memset((char *)sk + nulls2 + sizeof(void *), 0,
1272 size - nulls2 - sizeof(void *));
1273}
1274EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1275
2e4afe7b
PE
1276static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1277 int family)
c308c1b2
PE
1278{
1279 struct sock *sk;
1280 struct kmem_cache *slab;
1281
1282 slab = prot->slab;
e912b114
ED
1283 if (slab != NULL) {
1284 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1285 if (!sk)
1286 return sk;
1287 if (priority & __GFP_ZERO) {
fcbdf09d
OP
1288 if (prot->clear_sk)
1289 prot->clear_sk(sk, prot->obj_size);
1290 else
1291 sk_prot_clear_nulls(sk, prot->obj_size);
e912b114 1292 }
fcbdf09d 1293 } else
c308c1b2
PE
1294 sk = kmalloc(prot->obj_size, priority);
1295
2e4afe7b 1296 if (sk != NULL) {
a98b65a3
VN
1297 kmemcheck_annotate_bitfield(sk, flags);
1298
2e4afe7b
PE
1299 if (security_sk_alloc(sk, family, priority))
1300 goto out_free;
1301
1302 if (!try_module_get(prot->owner))
1303 goto out_free_sec;
e022f0b4 1304 sk_tx_queue_clear(sk);
2e4afe7b
PE
1305 }
1306
c308c1b2 1307 return sk;
2e4afe7b
PE
1308
1309out_free_sec:
1310 security_sk_free(sk);
1311out_free:
1312 if (slab != NULL)
1313 kmem_cache_free(slab, sk);
1314 else
1315 kfree(sk);
1316 return NULL;
c308c1b2
PE
1317}
1318
1319static void sk_prot_free(struct proto *prot, struct sock *sk)
1320{
1321 struct kmem_cache *slab;
2e4afe7b 1322 struct module *owner;
c308c1b2 1323
2e4afe7b 1324 owner = prot->owner;
c308c1b2 1325 slab = prot->slab;
2e4afe7b
PE
1326
1327 security_sk_free(sk);
c308c1b2
PE
1328 if (slab != NULL)
1329 kmem_cache_free(slab, sk);
1330 else
1331 kfree(sk);
2e4afe7b 1332 module_put(owner);
c308c1b2
PE
1333}
1334
8fb974c9 1335#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
211d2f97 1336void sock_update_classid(struct sock *sk)
f8451725 1337{
1144182a 1338 u32 classid;
f8451725 1339
211d2f97 1340 classid = task_cls_classid(current);
3afa6d00 1341 if (classid != sk->sk_classid)
f8451725
HX
1342 sk->sk_classid = classid;
1343}
82862742 1344EXPORT_SYMBOL(sock_update_classid);
8fb974c9 1345#endif
5bc1421e 1346
51e4e7fa 1347#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
6ffd4641 1348void sock_update_netprioidx(struct sock *sk)
5bc1421e 1349{
5bc1421e
NH
1350 if (in_interrupt())
1351 return;
2b73bc65 1352
6ffd4641 1353 sk->sk_cgrp_prioidx = task_netprioidx(current);
5bc1421e
NH
1354}
1355EXPORT_SYMBOL_GPL(sock_update_netprioidx);
f8451725
HX
1356#endif
1357
1da177e4
LT
1358/**
1359 * sk_alloc - All socket objects are allocated here
c4ea43c5 1360 * @net: the applicable net namespace
4dc3b16b
PP
1361 * @family: protocol family
1362 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1363 * @prot: struct proto associated with this new sock instance
1da177e4 1364 */
1b8d7ae4 1365struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
6257ff21 1366 struct proto *prot)
1da177e4 1367{
c308c1b2 1368 struct sock *sk;
1da177e4 1369
154adbc8 1370 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1da177e4 1371 if (sk) {
154adbc8
PE
1372 sk->sk_family = family;
1373 /*
1374 * See comment in struct sock definition to understand
1375 * why we need sk_prot_creator -acme
1376 */
1377 sk->sk_prot = sk->sk_prot_creator = prot;
1378 sock_lock_init(sk);
3b1e0a65 1379 sock_net_set(sk, get_net(net));
d66ee058 1380 atomic_set(&sk->sk_wmem_alloc, 1);
f8451725 1381
211d2f97 1382 sock_update_classid(sk);
6ffd4641 1383 sock_update_netprioidx(sk);
1da177e4 1384 }
a79af59e 1385
2e4afe7b 1386 return sk;
1da177e4 1387}
2a91525c 1388EXPORT_SYMBOL(sk_alloc);
1da177e4 1389
2b85a34e 1390static void __sk_free(struct sock *sk)
1da177e4
LT
1391{
1392 struct sk_filter *filter;
1da177e4
LT
1393
1394 if (sk->sk_destruct)
1395 sk->sk_destruct(sk);
1396
a898def2
PM
1397 filter = rcu_dereference_check(sk->sk_filter,
1398 atomic_read(&sk->sk_wmem_alloc) == 0);
1da177e4 1399 if (filter) {
309dd5fc 1400 sk_filter_uncharge(sk, filter);
a9b3cd7f 1401 RCU_INIT_POINTER(sk->sk_filter, NULL);
1da177e4
LT
1402 }
1403
08e29af3 1404 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1da177e4
LT
1405
1406 if (atomic_read(&sk->sk_omem_alloc))
e005d193
JP
1407 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1408 __func__, atomic_read(&sk->sk_omem_alloc));
1da177e4 1409
de9d09c3
ED
1410 if (sk->sk_frag.page) {
1411 put_page(sk->sk_frag.page);
1412 sk->sk_frag.page = NULL;
1413 }
1414
109f6e39
EB
1415 if (sk->sk_peer_cred)
1416 put_cred(sk->sk_peer_cred);
1417 put_pid(sk->sk_peer_pid);
3b1e0a65 1418 put_net(sock_net(sk));
c308c1b2 1419 sk_prot_free(sk->sk_prot_creator, sk);
1da177e4 1420}
2b85a34e
ED
1421
1422void sk_free(struct sock *sk)
1423{
1424 /*
25985edc 1425 * We subtract one from sk_wmem_alloc and can know if
2b85a34e
ED
1426 * some packets are still in some tx queue.
1427 * If not null, sock_wfree() will call __sk_free(sk) later
1428 */
1429 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1430 __sk_free(sk);
1431}
2a91525c 1432EXPORT_SYMBOL(sk_free);
1da177e4 1433
edf02087 1434/*
25985edc
LDM
1435 * Last sock_put should drop reference to sk->sk_net. It has already
1436 * been dropped in sk_change_net. Taking reference to stopping namespace
edf02087 1437 * is not an option.
25985edc 1438 * Take reference to a socket to remove it from hash _alive_ and after that
edf02087
DL
1439 * destroy it in the context of init_net.
1440 */
1441void sk_release_kernel(struct sock *sk)
1442{
1443 if (sk == NULL || sk->sk_socket == NULL)
1444 return;
1445
1446 sock_hold(sk);
1447 sock_release(sk->sk_socket);
65a18ec5 1448 release_net(sock_net(sk));
3b1e0a65 1449 sock_net_set(sk, get_net(&init_net));
edf02087
DL
1450 sock_put(sk);
1451}
45af1754 1452EXPORT_SYMBOL(sk_release_kernel);
edf02087 1453
475f1b52
SR
1454static void sk_update_clone(const struct sock *sk, struct sock *newsk)
1455{
1456 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1457 sock_update_memcg(newsk);
1458}
1459
e56c57d0
ED
1460/**
1461 * sk_clone_lock - clone a socket, and lock its clone
1462 * @sk: the socket to clone
1463 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1464 *
1465 * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1466 */
1467struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
87d11ceb 1468{
8fd1d178 1469 struct sock *newsk;
87d11ceb 1470
8fd1d178 1471 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
87d11ceb
ACM
1472 if (newsk != NULL) {
1473 struct sk_filter *filter;
1474
892c141e 1475 sock_copy(newsk, sk);
87d11ceb 1476
78da094d
CP
1477 newsk->sk_prot_creator = sk->sk_prot;
1478
87d11ceb 1479 /* SANITY */
3b1e0a65 1480 get_net(sock_net(newsk));
87d11ceb
ACM
1481 sk_node_init(&newsk->sk_node);
1482 sock_lock_init(newsk);
1483 bh_lock_sock(newsk);
fa438ccf 1484 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
8eae939f 1485 newsk->sk_backlog.len = 0;
87d11ceb
ACM
1486
1487 atomic_set(&newsk->sk_rmem_alloc, 0);
2b85a34e
ED
1488 /*
1489 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1490 */
1491 atomic_set(&newsk->sk_wmem_alloc, 1);
87d11ceb
ACM
1492 atomic_set(&newsk->sk_omem_alloc, 0);
1493 skb_queue_head_init(&newsk->sk_receive_queue);
1494 skb_queue_head_init(&newsk->sk_write_queue);
97fc2f08
CL
1495#ifdef CONFIG_NET_DMA
1496 skb_queue_head_init(&newsk->sk_async_wait_queue);
1497#endif
87d11ceb 1498
b6c6712a 1499 spin_lock_init(&newsk->sk_dst_lock);
87d11ceb 1500 rwlock_init(&newsk->sk_callback_lock);
443aef0e
PZ
1501 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1502 af_callback_keys + newsk->sk_family,
1503 af_family_clock_key_strings[newsk->sk_family]);
87d11ceb
ACM
1504
1505 newsk->sk_dst_cache = NULL;
1506 newsk->sk_wmem_queued = 0;
1507 newsk->sk_forward_alloc = 0;
1508 newsk->sk_send_head = NULL;
87d11ceb
ACM
1509 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1510
1511 sock_reset_flag(newsk, SOCK_DONE);
1512 skb_queue_head_init(&newsk->sk_error_queue);
1513
0d7da9dd 1514 filter = rcu_dereference_protected(newsk->sk_filter, 1);
87d11ceb
ACM
1515 if (filter != NULL)
1516 sk_filter_charge(newsk, filter);
1517
1518 if (unlikely(xfrm_sk_clone_policy(newsk))) {
1519 /* It is still raw copy of parent, so invalidate
1520 * destructor and make plain sk_free() */
1521 newsk->sk_destruct = NULL;
b0691c8e 1522 bh_unlock_sock(newsk);
87d11ceb
ACM
1523 sk_free(newsk);
1524 newsk = NULL;
1525 goto out;
1526 }
1527
1528 newsk->sk_err = 0;
9385be2e 1529 newsk->sk_err_soft = 0;
87d11ceb 1530 newsk->sk_priority = 0;
4dc6dc71
ED
1531 /*
1532 * Before updating sk_refcnt, we must commit prior changes to memory
1533 * (Documentation/RCU/rculist_nulls.txt for details)
1534 */
1535 smp_wmb();
87d11ceb
ACM
1536 atomic_set(&newsk->sk_refcnt, 2);
1537
1538 /*
1539 * Increment the counter in the same struct proto as the master
1540 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1541 * is the same as sk->sk_prot->socks, as this field was copied
1542 * with memcpy).
1543 *
1544 * This _changes_ the previous behaviour, where
1545 * tcp_create_openreq_child always was incrementing the
1546 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1547 * to be taken into account in all callers. -acme
1548 */
1549 sk_refcnt_debug_inc(newsk);
972692e0 1550 sk_set_socket(newsk, NULL);
43815482 1551 newsk->sk_wq = NULL;
87d11ceb 1552
f3f511e1
GC
1553 sk_update_clone(sk, newsk);
1554
87d11ceb 1555 if (newsk->sk_prot->sockets_allocated)
180d8cd9 1556 sk_sockets_allocated_inc(newsk);
704da560 1557
08e29af3 1558 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
704da560 1559 net_enable_timestamp();
87d11ceb
ACM
1560 }
1561out:
1562 return newsk;
1563}
e56c57d0 1564EXPORT_SYMBOL_GPL(sk_clone_lock);
87d11ceb 1565
9958089a
AK
1566void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1567{
1568 __sk_dst_set(sk, dst);
1569 sk->sk_route_caps = dst->dev->features;
1570 if (sk->sk_route_caps & NETIF_F_GSO)
4fcd6b99 1571 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
a465419b 1572 sk->sk_route_caps &= ~sk->sk_route_nocaps;
9958089a 1573 if (sk_can_gso(sk)) {
82cc1a7a 1574 if (dst->header_len) {
9958089a 1575 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
82cc1a7a 1576 } else {
9958089a 1577 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
82cc1a7a 1578 sk->sk_gso_max_size = dst->dev->gso_max_size;
1485348d 1579 sk->sk_gso_max_segs = dst->dev->gso_max_segs;
82cc1a7a 1580 }
9958089a
AK
1581 }
1582}
1583EXPORT_SYMBOL_GPL(sk_setup_caps);
1584
1da177e4
LT
1585/*
1586 * Simple resource managers for sockets.
1587 */
1588
1589
4ec93edb
YH
1590/*
1591 * Write buffer destructor automatically called from kfree_skb.
1da177e4
LT
1592 */
1593void sock_wfree(struct sk_buff *skb)
1594{
1595 struct sock *sk = skb->sk;
d99927f4 1596 unsigned int len = skb->truesize;
1da177e4 1597
d99927f4
ED
1598 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1599 /*
1600 * Keep a reference on sk_wmem_alloc, this will be released
1601 * after sk_write_space() call
1602 */
1603 atomic_sub(len - 1, &sk->sk_wmem_alloc);
1da177e4 1604 sk->sk_write_space(sk);
d99927f4
ED
1605 len = 1;
1606 }
2b85a34e 1607 /*
d99927f4
ED
1608 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1609 * could not do because of in-flight packets
2b85a34e 1610 */
d99927f4 1611 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
2b85a34e 1612 __sk_free(sk);
1da177e4 1613}
2a91525c 1614EXPORT_SYMBOL(sock_wfree);
1da177e4 1615
4ec93edb
YH
1616/*
1617 * Read buffer destructor automatically called from kfree_skb.
1da177e4
LT
1618 */
1619void sock_rfree(struct sk_buff *skb)
1620{
1621 struct sock *sk = skb->sk;
d361fd59 1622 unsigned int len = skb->truesize;
1da177e4 1623
d361fd59
ED
1624 atomic_sub(len, &sk->sk_rmem_alloc);
1625 sk_mem_uncharge(sk, len);
1da177e4 1626}
2a91525c 1627EXPORT_SYMBOL(sock_rfree);
1da177e4 1628
41063e9d
DM
1629void sock_edemux(struct sk_buff *skb)
1630{
e812347c
ED
1631 struct sock *sk = skb->sk;
1632
1c463e57 1633#ifdef CONFIG_INET
e812347c
ED
1634 if (sk->sk_state == TCP_TIME_WAIT)
1635 inet_twsk_put(inet_twsk(sk));
1636 else
1c463e57 1637#endif
e812347c 1638 sock_put(sk);
41063e9d
DM
1639}
1640EXPORT_SYMBOL(sock_edemux);
1da177e4 1641
976d0201 1642kuid_t sock_i_uid(struct sock *sk)
1da177e4 1643{
976d0201 1644 kuid_t uid;
6fa3eb70
S
1645
1646 /*mtk_net: fix kernel bug*/
1647 if (!sk) {
1648 pr_info("sk == NULL for sock_i_uid\n");
1649 return GLOBAL_ROOT_UID;
1650 }
1651
f064af1e 1652 read_lock_bh(&sk->sk_callback_lock);
976d0201 1653 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
f064af1e 1654 read_unlock_bh(&sk->sk_callback_lock);
1da177e4
LT
1655 return uid;
1656}
2a91525c 1657EXPORT_SYMBOL(sock_i_uid);
1da177e4
LT
1658
1659unsigned long sock_i_ino(struct sock *sk)
1660{
1661 unsigned long ino;
1662
f064af1e 1663 read_lock_bh(&sk->sk_callback_lock);
1da177e4 1664 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
f064af1e 1665 read_unlock_bh(&sk->sk_callback_lock);
1da177e4
LT
1666 return ino;
1667}
2a91525c 1668EXPORT_SYMBOL(sock_i_ino);
1da177e4
LT
1669
1670/*
1671 * Allocate a skb from the socket's send buffer.
1672 */
86a76caf 1673struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
dd0fc66f 1674 gfp_t priority)
1da177e4
LT
1675{
1676 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
2a91525c 1677 struct sk_buff *skb = alloc_skb(size, priority);
1da177e4
LT
1678 if (skb) {
1679 skb_set_owner_w(skb, sk);
1680 return skb;
1681 }
1682 }
1683 return NULL;
1684}
2a91525c 1685EXPORT_SYMBOL(sock_wmalloc);
1da177e4
LT
1686
1687/*
1688 * Allocate a skb from the socket's receive buffer.
4ec93edb 1689 */
86a76caf 1690struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
dd0fc66f 1691 gfp_t priority)
1da177e4
LT
1692{
1693 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1694 struct sk_buff *skb = alloc_skb(size, priority);
1695 if (skb) {
1696 skb_set_owner_r(skb, sk);
1697 return skb;
1698 }
1699 }
1700 return NULL;
1701}
1702
4ec93edb 1703/*
1da177e4 1704 * Allocate a memory block from the socket's option memory buffer.
4ec93edb 1705 */
dd0fc66f 1706void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1da177e4 1707{
95c96174 1708 if ((unsigned int)size <= sysctl_optmem_max &&
1da177e4
LT
1709 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1710 void *mem;
1711 /* First do the add, to avoid the race if kmalloc
4ec93edb 1712 * might sleep.
1da177e4
LT
1713 */
1714 atomic_add(size, &sk->sk_omem_alloc);
1715 mem = kmalloc(size, priority);
1716 if (mem)
1717 return mem;
1718 atomic_sub(size, &sk->sk_omem_alloc);
1719 }
1720 return NULL;
1721}
2a91525c 1722EXPORT_SYMBOL(sock_kmalloc);
1da177e4
LT
1723
1724/*
1725 * Free an option memory block.
1726 */
1727void sock_kfree_s(struct sock *sk, void *mem, int size)
1728{
1729 kfree(mem);
1730 atomic_sub(size, &sk->sk_omem_alloc);
1731}
2a91525c 1732EXPORT_SYMBOL(sock_kfree_s);
1da177e4
LT
1733
1734/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1735 I think, these locks should be removed for datagram sockets.
1736 */
2a91525c 1737static long sock_wait_for_wmem(struct sock *sk, long timeo)
1da177e4
LT
1738{
1739 DEFINE_WAIT(wait);
1740
1741 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1742 for (;;) {
1743 if (!timeo)
1744 break;
1745 if (signal_pending(current))
1746 break;
1747 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
aa395145 1748 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4
LT
1749 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1750 break;
1751 if (sk->sk_shutdown & SEND_SHUTDOWN)
1752 break;
1753 if (sk->sk_err)
1754 break;
1755 timeo = schedule_timeout(timeo);
1756 }
aa395145 1757 finish_wait(sk_sleep(sk), &wait);
1da177e4
LT
1758 return timeo;
1759}
1760
1761
6fa3eb70
S
1762//debug funcion
1763
1764static int sock_dump_info(struct sock *sk)
1765{
1766 //dump receiver queue 128 bytes
1767 //struct sk_buff *skb;
1768 //char skbmsg[128];
1769 //dump receiver queue 128 bytes end
1770
1771 if(sk->sk_family == AF_UNIX)
1772 {
1773 struct unix_sock *u = unix_sk(sk);
1774 struct sock *other = NULL;
1775 if( (u->path.dentry !=NULL)&&(u->path.dentry->d_iname!=NULL))
1776 //if( (u->dentry !=NULL)&&(u->dentry->d_iname!=NULL))
1777 {
1778 #ifdef CONFIG_MTK_NET_LOGGING
1779 printk(KERN_INFO "[mtk_net][sock]sockdbg: socket-Name:%s \n",u->path.dentry->d_iname);
1780 #endif
1781 }
1782 else
1783 {
1784 #ifdef CONFIG_MTK_NET_LOGGING
1785 printk(KERN_INFO "[mtk_net][sock]sockdbg:socket Name (NULL)\n" );
1786 #endif
1787 }
1788
1789 if(sk->sk_socket && SOCK_INODE(sk->sk_socket))
1790 {
1791 #ifdef CONFIG_MTK_NET_LOGGING
1792 printk(KERN_INFO "[mtk_net][sock]sockdbg:socket Inode[%lu]\n" ,SOCK_INODE(sk->sk_socket)->i_ino);
1793 #endif
1794 }
1795
1796 other = unix_sk(sk)->peer ;
1797 if (!other)
1798 {
1799 #ifdef CONFIG_MTK_NET_LOGGING
1800 printk(KERN_INFO "[mtk_net][sock]sockdbg:peer is (NULL) \n");
1801 #endif
1802 } else{
1803
1804 if ((((struct unix_sock *)other)->path.dentry != NULL)&&(((struct unix_sock *)other)->path.dentry->d_iname != NULL))
1805 //if ((((struct unix_sock *)other)->dentry != NULL)&&(((struct unix_sock *)other)->dentry->d_iname != NULL))
1806 {
1807 #ifdef CONFIG_MTK_NET_LOGGING
1808 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Name:%s \n",((struct unix_sock *)other)->path.dentry->d_iname);
1809 #endif
1810 }
1811 else
1812 {
1813 #ifdef CONFIG_MTK_NET_LOGGING
1814 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Name (NULL) \n");
1815 #endif
1816 }
1817
1818 if(other->sk_socket && SOCK_INODE(other->sk_socket))
1819 {
1820 #ifdef CONFIG_MTK_NET_LOGGING
1821 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Inode [%lu] \n", SOCK_INODE(other->sk_socket)->i_ino);
1822 #endif
1823 }
1824 #ifdef CONFIG_MTK_NET_LOGGING
1825 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Recieve Queue len:%d \n",other->sk_receive_queue.qlen);
1826 #endif
1827 //dump receiver queue 128 bytes
1828 /* if ((skb = skb_peek_tail(&other->sk_receive_queue)) == NULL) {
1829
1830 printk(KERN_INFO "sockdbg: Peer Recieve Queue is null (warning) \n");
1831 }else{
1832 int i =0 ,len=0;
1833 if((skb->len !=0) && (skb->data != NULL)){
1834
1835 if(skb->len >= 127){
1836 len = 127 ;
1837 }else
1838 {
1839 len = skb->len ;
1840 }
1841 for (i=0;i<len;i++)
1842 sprintf(skbmsg+i, "%x", skb->data[i]);
1843
1844 skbmsg[len]= '\0' ;
1845
1846 printk(KERN_INFO "sockdbg: Peer Recieve Queue dump(%d bytes):%s\n", len, skbmsg);
1847
1848
1849 }else{
1850 printk(KERN_INFO "sockdbg: Peer Recieve skb error \n");
1851 }*/
1852 //dump receiver queue 128 bytes end
1853
1854 //}
1855 //dump receiver queue 128 bytes end
1856
1857 }
1858 }
1859
1860 return 0 ;
1861
1862
1863}
1864
1865
1866
1da177e4
LT
1867/*
1868 * Generic send/receive buffer handlers
1869 */
1870
4cc7f68d
HX
1871struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1872 unsigned long data_len, int noblock,
1873 int *errcode)
1da177e4
LT
1874{
1875 struct sk_buff *skb;
7d877f3b 1876 gfp_t gfp_mask;
1da177e4
LT
1877 long timeo;
1878 int err;
cc9b17ad
JW
1879 int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1880
1881 err = -EMSGSIZE;
1882 if (npages > MAX_SKB_FRAGS)
1883 goto failure;
1da177e4
LT
1884
1885 gfp_mask = sk->sk_allocation;
1886 if (gfp_mask & __GFP_WAIT)
1887 gfp_mask |= __GFP_REPEAT;
1888
1889 timeo = sock_sndtimeo(sk, noblock);
1890 while (1) {
1891 err = sock_error(sk);
1892 if (err != 0)
1893 goto failure;
1894
1895 err = -EPIPE;
1896 if (sk->sk_shutdown & SEND_SHUTDOWN)
1897 goto failure;
1898
1899 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
db38c179 1900 skb = alloc_skb(header_len, gfp_mask);
1da177e4 1901 if (skb) {
1da177e4
LT
1902 int i;
1903
1904 /* No pages, we're done... */
1905 if (!data_len)
1906 break;
1907
1da177e4
LT
1908 skb->truesize += data_len;
1909 skb_shinfo(skb)->nr_frags = npages;
1910 for (i = 0; i < npages; i++) {
1911 struct page *page;
1da177e4
LT
1912
1913 page = alloc_pages(sk->sk_allocation, 0);
1914 if (!page) {
1915 err = -ENOBUFS;
1916 skb_shinfo(skb)->nr_frags = i;
1917 kfree_skb(skb);
1918 goto failure;
1919 }
1920
ea2ab693
IC
1921 __skb_fill_page_desc(skb, i,
1922 page, 0,
1923 (data_len >= PAGE_SIZE ?
1924 PAGE_SIZE :
1925 data_len));
1da177e4
LT
1926 data_len -= PAGE_SIZE;
1927 }
1928
1929 /* Full success... */
1930 break;
1931 }
1932 err = -ENOBUFS;
1933 goto failure;
1934 }
1935 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1936 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1937 err = -EAGAIN;
1938 if (!timeo)
1939 goto failure;
1940 if (signal_pending(current))
1941 goto interrupted;
6fa3eb70
S
1942
1943 sock_dump_info(sk);
1944 #ifdef CONFIG_MTK_NET_LOGGING
1945 printk(KERN_INFO "[mtk_net][sock]sockdbg: wait_for_wmem, timeo =%ld, wmem =%d, snd buf =%d \n",
1946 timeo, atomic_read(&sk->sk_wmem_alloc), sk->sk_sndbuf);
1947 #endif
1da177e4 1948 timeo = sock_wait_for_wmem(sk, timeo);
6fa3eb70
S
1949 #ifdef CONFIG_MTK_NET_LOGGING
1950 printk(KERN_INFO "[mtk_net][sock]sockdbg: wait_for_wmem done, header_len=0x%lx, data_len=0x%lx,timeo =%ld \n",
1951 header_len, data_len ,timeo);
1952 #endif
1da177e4
LT
1953 }
1954
1955 skb_set_owner_w(skb, sk);
1956 return skb;
1957
1958interrupted:
1959 err = sock_intr_errno(timeo);
1960failure:
1961 *errcode = err;
1962 return NULL;
1963}
4cc7f68d 1964EXPORT_SYMBOL(sock_alloc_send_pskb);
1da177e4 1965
4ec93edb 1966struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1da177e4
LT
1967 int noblock, int *errcode)
1968{
1969 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1970}
2a91525c 1971EXPORT_SYMBOL(sock_alloc_send_skb);
1da177e4 1972
5640f768
ED
1973/* On 32bit arches, an skb frag is limited to 2^15 */
1974#define SKB_FRAG_PAGE_ORDER get_order(32768)
1975
1976bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1977{
1978 int order;
1979
1980 if (pfrag->page) {
1981 if (atomic_read(&pfrag->page->_count) == 1) {
1982 pfrag->offset = 0;
1983 return true;
1984 }
1985 if (pfrag->offset < pfrag->size)
1986 return true;
1987 put_page(pfrag->page);
1988 }
1989
1990 /* We restrict high order allocations to users that can afford to wait */
1991 order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
1992
1993 do {
1994 gfp_t gfp = sk->sk_allocation;
1995
1996 if (order)
a9e3d789 1997 gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
5640f768
ED
1998 pfrag->page = alloc_pages(gfp, order);
1999 if (likely(pfrag->page)) {
2000 pfrag->offset = 0;
2001 pfrag->size = PAGE_SIZE << order;
2002 return true;
2003 }
2004 } while (--order >= 0);
2005
2006 sk_enter_memory_pressure(sk);
2007 sk_stream_moderate_sndbuf(sk);
2008 return false;
2009}
2010EXPORT_SYMBOL(sk_page_frag_refill);
2011
1da177e4 2012static void __lock_sock(struct sock *sk)
f39234d6
NK
2013 __releases(&sk->sk_lock.slock)
2014 __acquires(&sk->sk_lock.slock)
1da177e4
LT
2015{
2016 DEFINE_WAIT(wait);
2017
e71a4783 2018 for (;;) {
1da177e4
LT
2019 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2020 TASK_UNINTERRUPTIBLE);
2021 spin_unlock_bh(&sk->sk_lock.slock);
2022 schedule();
2023 spin_lock_bh(&sk->sk_lock.slock);
e71a4783 2024 if (!sock_owned_by_user(sk))
1da177e4
LT
2025 break;
2026 }
2027 finish_wait(&sk->sk_lock.wq, &wait);
2028}
2029
2030static void __release_sock(struct sock *sk)
f39234d6
NK
2031 __releases(&sk->sk_lock.slock)
2032 __acquires(&sk->sk_lock.slock)
1da177e4
LT
2033{
2034 struct sk_buff *skb = sk->sk_backlog.head;
2035
2036 do {
2037 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2038 bh_unlock_sock(sk);
2039
2040 do {
2041 struct sk_buff *next = skb->next;
2042
e4cbb02a 2043 prefetch(next);
7fee226a 2044 WARN_ON_ONCE(skb_dst_is_noref(skb));
1da177e4 2045 skb->next = NULL;
c57943a1 2046 sk_backlog_rcv(sk, skb);
1da177e4
LT
2047
2048 /*
2049 * We are in process context here with softirqs
2050 * disabled, use cond_resched_softirq() to preempt.
2051 * This is safe to do because we've taken the backlog
2052 * queue private:
2053 */
2054 cond_resched_softirq();
2055
2056 skb = next;
2057 } while (skb != NULL);
2058
2059 bh_lock_sock(sk);
e71a4783 2060 } while ((skb = sk->sk_backlog.head) != NULL);
8eae939f
ZY
2061
2062 /*
2063 * Doing the zeroing here guarantee we can not loop forever
2064 * while a wild producer attempts to flood us.
2065 */
2066 sk->sk_backlog.len = 0;
1da177e4
LT
2067}
2068
2069/**
2070 * sk_wait_data - wait for data to arrive at sk_receive_queue
4dc3b16b
PP
2071 * @sk: sock to wait on
2072 * @timeo: for how long
1da177e4
LT
2073 *
2074 * Now socket state including sk->sk_err is changed only under lock,
2075 * hence we may omit checks after joining wait queue.
2076 * We check receive queue before schedule() only as optimization;
2077 * it is very likely that release_sock() added new data.
2078 */
2079int sk_wait_data(struct sock *sk, long *timeo)
2080{
2081 int rc;
2082 DEFINE_WAIT(wait);
2083
aa395145 2084 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4
LT
2085 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2086 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
2087 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
aa395145 2088 finish_wait(sk_sleep(sk), &wait);
1da177e4
LT
2089 return rc;
2090}
1da177e4
LT
2091EXPORT_SYMBOL(sk_wait_data);
2092
3ab224be
HA
2093/**
2094 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
2095 * @sk: socket
2096 * @size: memory size to allocate
2097 * @kind: allocation type
2098 *
2099 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
2100 * rmem allocation. This function assumes that protocols which have
2101 * memory_pressure use sk_wmem_queued as write buffer accounting.
2102 */
2103int __sk_mem_schedule(struct sock *sk, int size, int kind)
2104{
2105 struct proto *prot = sk->sk_prot;
2106 int amt = sk_mem_pages(size);
8d987e5c 2107 long allocated;
e1aab161 2108 int parent_status = UNDER_LIMIT;
3ab224be
HA
2109
2110 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
180d8cd9 2111
e1aab161 2112 allocated = sk_memory_allocated_add(sk, amt, &parent_status);
3ab224be
HA
2113
2114 /* Under limit. */
e1aab161
GC
2115 if (parent_status == UNDER_LIMIT &&
2116 allocated <= sk_prot_mem_limits(sk, 0)) {
180d8cd9 2117 sk_leave_memory_pressure(sk);
3ab224be
HA
2118 return 1;
2119 }
2120
e1aab161
GC
2121 /* Under pressure. (we or our parents) */
2122 if ((parent_status > SOFT_LIMIT) ||
2123 allocated > sk_prot_mem_limits(sk, 1))
180d8cd9 2124 sk_enter_memory_pressure(sk);
3ab224be 2125
e1aab161
GC
2126 /* Over hard limit (we or our parents) */
2127 if ((parent_status == OVER_LIMIT) ||
2128 (allocated > sk_prot_mem_limits(sk, 2)))
3ab224be
HA
2129 goto suppress_allocation;
2130
2131 /* guarantee minimum buffer size under pressure */
2132 if (kind == SK_MEM_RECV) {
2133 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2134 return 1;
180d8cd9 2135
3ab224be
HA
2136 } else { /* SK_MEM_SEND */
2137 if (sk->sk_type == SOCK_STREAM) {
2138 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2139 return 1;
2140 } else if (atomic_read(&sk->sk_wmem_alloc) <
2141 prot->sysctl_wmem[0])
2142 return 1;
2143 }
2144
180d8cd9 2145 if (sk_has_memory_pressure(sk)) {
1748376b
ED
2146 int alloc;
2147
180d8cd9 2148 if (!sk_under_memory_pressure(sk))
1748376b 2149 return 1;
180d8cd9
GC
2150 alloc = sk_sockets_allocated_read_positive(sk);
2151 if (sk_prot_mem_limits(sk, 2) > alloc *
3ab224be
HA
2152 sk_mem_pages(sk->sk_wmem_queued +
2153 atomic_read(&sk->sk_rmem_alloc) +
2154 sk->sk_forward_alloc))
2155 return 1;
2156 }
2157
2158suppress_allocation:
2159
2160 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2161 sk_stream_moderate_sndbuf(sk);
2162
2163 /* Fail only if socket is _under_ its sndbuf.
2164 * In this case we cannot block, so that we have to fail.
2165 */
2166 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2167 return 1;
2168 }
2169
3847ce32
SM
2170 trace_sock_exceed_buf_limit(sk, prot, allocated);
2171
3ab224be
HA
2172 /* Alas. Undo changes. */
2173 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
180d8cd9 2174
0e90b31f 2175 sk_memory_allocated_sub(sk, amt);
180d8cd9 2176
3ab224be
HA
2177 return 0;
2178}
3ab224be
HA
2179EXPORT_SYMBOL(__sk_mem_schedule);
2180
2181/**
2182 * __sk_reclaim - reclaim memory_allocated
2183 * @sk: socket
a5b79829 2184 * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
3ab224be 2185 */
a5b79829 2186void __sk_mem_reclaim(struct sock *sk, int amount)
3ab224be 2187{
a5b79829
ED
2188 amount >>= SK_MEM_QUANTUM_SHIFT;
2189 sk_memory_allocated_sub(sk, amount);
2190 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
3ab224be 2191
180d8cd9
GC
2192 if (sk_under_memory_pressure(sk) &&
2193 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2194 sk_leave_memory_pressure(sk);
3ab224be 2195}
3ab224be
HA
2196EXPORT_SYMBOL(__sk_mem_reclaim);
2197
2198
1da177e4
LT
2199/*
2200 * Set of default routines for initialising struct proto_ops when
2201 * the protocol does not support a particular function. In certain
2202 * cases where it makes no sense for a protocol to have a "do nothing"
2203 * function, some default processing is provided.
2204 */
2205
2206int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2207{
2208 return -EOPNOTSUPP;
2209}
2a91525c 2210EXPORT_SYMBOL(sock_no_bind);
1da177e4 2211
4ec93edb 2212int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
2213 int len, int flags)
2214{
2215 return -EOPNOTSUPP;
2216}
2a91525c 2217EXPORT_SYMBOL(sock_no_connect);
1da177e4
LT
2218
2219int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2220{
2221 return -EOPNOTSUPP;
2222}
2a91525c 2223EXPORT_SYMBOL(sock_no_socketpair);
1da177e4
LT
2224
2225int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
2226{
2227 return -EOPNOTSUPP;
2228}
2a91525c 2229EXPORT_SYMBOL(sock_no_accept);
1da177e4 2230
4ec93edb 2231int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
2232 int *len, int peer)
2233{
2234 return -EOPNOTSUPP;
2235}
2a91525c 2236EXPORT_SYMBOL(sock_no_getname);
1da177e4 2237
2a91525c 2238unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
1da177e4
LT
2239{
2240 return 0;
2241}
2a91525c 2242EXPORT_SYMBOL(sock_no_poll);
1da177e4
LT
2243
2244int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2245{
2246 return -EOPNOTSUPP;
2247}
2a91525c 2248EXPORT_SYMBOL(sock_no_ioctl);
1da177e4
LT
2249
2250int sock_no_listen(struct socket *sock, int backlog)
2251{
2252 return -EOPNOTSUPP;
2253}
2a91525c 2254EXPORT_SYMBOL(sock_no_listen);
1da177e4
LT
2255
2256int sock_no_shutdown(struct socket *sock, int how)
2257{
2258 return -EOPNOTSUPP;
2259}
2a91525c 2260EXPORT_SYMBOL(sock_no_shutdown);
1da177e4
LT
2261
2262int sock_no_setsockopt(struct socket *sock, int level, int optname,
b7058842 2263 char __user *optval, unsigned int optlen)
1da177e4
LT
2264{
2265 return -EOPNOTSUPP;
2266}
2a91525c 2267EXPORT_SYMBOL(sock_no_setsockopt);
1da177e4
LT
2268
2269int sock_no_getsockopt(struct socket *sock, int level, int optname,
2270 char __user *optval, int __user *optlen)
2271{
2272 return -EOPNOTSUPP;
2273}
2a91525c 2274EXPORT_SYMBOL(sock_no_getsockopt);
1da177e4
LT
2275
2276int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2277 size_t len)
2278{
2279 return -EOPNOTSUPP;
2280}
2a91525c 2281EXPORT_SYMBOL(sock_no_sendmsg);
1da177e4
LT
2282
2283int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2284 size_t len, int flags)
2285{
2286 return -EOPNOTSUPP;
2287}
2a91525c 2288EXPORT_SYMBOL(sock_no_recvmsg);
1da177e4
LT
2289
2290int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2291{
2292 /* Mirror missing mmap method error code */
2293 return -ENODEV;
2294}
2a91525c 2295EXPORT_SYMBOL(sock_no_mmap);
1da177e4
LT
2296
2297ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2298{
2299 ssize_t res;
2300 struct msghdr msg = {.msg_flags = flags};
2301 struct kvec iov;
2302 char *kaddr = kmap(page);
2303 iov.iov_base = kaddr + offset;
2304 iov.iov_len = size;
2305 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2306 kunmap(page);
2307 return res;
2308}
2a91525c 2309EXPORT_SYMBOL(sock_no_sendpage);
1da177e4
LT
2310
2311/*
2312 * Default Socket Callbacks
2313 */
2314
2315static void sock_def_wakeup(struct sock *sk)
2316{
43815482
ED
2317 struct socket_wq *wq;
2318
2319 rcu_read_lock();
2320 wq = rcu_dereference(sk->sk_wq);
2321 if (wq_has_sleeper(wq))
2322 wake_up_interruptible_all(&wq->wait);
2323 rcu_read_unlock();
1da177e4
LT
2324}
2325
2326static void sock_def_error_report(struct sock *sk)
2327{
43815482
ED
2328 struct socket_wq *wq;
2329
2330 rcu_read_lock();
2331 wq = rcu_dereference(sk->sk_wq);
2332 if (wq_has_sleeper(wq))
2333 wake_up_interruptible_poll(&wq->wait, POLLERR);
8d8ad9d7 2334 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
43815482 2335 rcu_read_unlock();
1da177e4
LT
2336}
2337
2338static void sock_def_readable(struct sock *sk, int len)
2339{
43815482
ED
2340 struct socket_wq *wq;
2341
2342 rcu_read_lock();
2343 wq = rcu_dereference(sk->sk_wq);
2344 if (wq_has_sleeper(wq))
2c6607c6 2345 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
37e5540b 2346 POLLRDNORM | POLLRDBAND);
8d8ad9d7 2347 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
43815482 2348 rcu_read_unlock();
1da177e4
LT
2349}
2350
2351static void sock_def_write_space(struct sock *sk)
2352{
43815482
ED
2353 struct socket_wq *wq;
2354
2355 rcu_read_lock();
1da177e4
LT
2356
2357 /* Do not wake up a writer until he can make "significant"
2358 * progress. --DaveM
2359 */
e71a4783 2360 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
43815482
ED
2361 wq = rcu_dereference(sk->sk_wq);
2362 if (wq_has_sleeper(wq))
2363 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
37e5540b 2364 POLLWRNORM | POLLWRBAND);
1da177e4
LT
2365
2366 /* Should agree with poll, otherwise some programs break */
2367 if (sock_writeable(sk))
8d8ad9d7 2368 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4
LT
2369 }
2370
43815482 2371 rcu_read_unlock();
1da177e4
LT
2372}
2373
2374static void sock_def_destruct(struct sock *sk)
2375{
a51482bd 2376 kfree(sk->sk_protinfo);
1da177e4
LT
2377}
2378
2379void sk_send_sigurg(struct sock *sk)
2380{
2381 if (sk->sk_socket && sk->sk_socket->file)
2382 if (send_sigurg(&sk->sk_socket->file->f_owner))
8d8ad9d7 2383 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
1da177e4 2384}
2a91525c 2385EXPORT_SYMBOL(sk_send_sigurg);
1da177e4
LT
2386
2387void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2388 unsigned long expires)
2389{
2390 if (!mod_timer(timer, expires))
2391 sock_hold(sk);
2392}
1da177e4
LT
2393EXPORT_SYMBOL(sk_reset_timer);
2394
2395void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2396{
25cc4ae9 2397 if (del_timer(timer))
1da177e4
LT
2398 __sock_put(sk);
2399}
1da177e4
LT
2400EXPORT_SYMBOL(sk_stop_timer);
2401
2402void sock_init_data(struct socket *sock, struct sock *sk)
2403{
2404 skb_queue_head_init(&sk->sk_receive_queue);
2405 skb_queue_head_init(&sk->sk_write_queue);
2406 skb_queue_head_init(&sk->sk_error_queue);
97fc2f08
CL
2407#ifdef CONFIG_NET_DMA
2408 skb_queue_head_init(&sk->sk_async_wait_queue);
2409#endif
1da177e4
LT
2410
2411 sk->sk_send_head = NULL;
2412
2413 init_timer(&sk->sk_timer);
4ec93edb 2414
1da177e4
LT
2415 sk->sk_allocation = GFP_KERNEL;
2416 sk->sk_rcvbuf = sysctl_rmem_default;
2417 sk->sk_sndbuf = sysctl_wmem_default;
2418 sk->sk_state = TCP_CLOSE;
972692e0 2419 sk_set_socket(sk, sock);
1da177e4
LT
2420
2421 sock_set_flag(sk, SOCK_ZAPPED);
2422
e71a4783 2423 if (sock) {
1da177e4 2424 sk->sk_type = sock->type;
43815482 2425 sk->sk_wq = sock->wq;
1da177e4
LT
2426 sock->sk = sk;
2427 } else
43815482 2428 sk->sk_wq = NULL;
1da177e4 2429
b6c6712a 2430 spin_lock_init(&sk->sk_dst_lock);
1da177e4 2431 rwlock_init(&sk->sk_callback_lock);
443aef0e
PZ
2432 lockdep_set_class_and_name(&sk->sk_callback_lock,
2433 af_callback_keys + sk->sk_family,
2434 af_family_clock_key_strings[sk->sk_family]);
1da177e4
LT
2435
2436 sk->sk_state_change = sock_def_wakeup;
2437 sk->sk_data_ready = sock_def_readable;
2438 sk->sk_write_space = sock_def_write_space;
2439 sk->sk_error_report = sock_def_error_report;
2440 sk->sk_destruct = sock_def_destruct;
2441
5640f768
ED
2442 sk->sk_frag.page = NULL;
2443 sk->sk_frag.offset = 0;
ef64a54f 2444 sk->sk_peek_off = -1;
1da177e4 2445
109f6e39
EB
2446 sk->sk_peer_pid = NULL;
2447 sk->sk_peer_cred = NULL;
1da177e4
LT
2448 sk->sk_write_pending = 0;
2449 sk->sk_rcvlowat = 1;
2450 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2451 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2452
f37f0afb 2453 sk->sk_stamp = ktime_set(-1L, 0);
1da177e4 2454
5e25ba50 2455 sk->sk_pacing_rate = ~0U;
4dc6dc71
ED
2456 /*
2457 * Before updating sk_refcnt, we must commit prior changes to memory
2458 * (Documentation/RCU/rculist_nulls.txt for details)
2459 */
2460 smp_wmb();
1da177e4 2461 atomic_set(&sk->sk_refcnt, 1);
33c732c3 2462 atomic_set(&sk->sk_drops, 0);
1da177e4 2463}
2a91525c 2464EXPORT_SYMBOL(sock_init_data);
1da177e4 2465
b5606c2d 2466void lock_sock_nested(struct sock *sk, int subclass)
1da177e4
LT
2467{
2468 might_sleep();
a5b5bb9a 2469 spin_lock_bh(&sk->sk_lock.slock);
d2e9117c 2470 if (sk->sk_lock.owned)
1da177e4 2471 __lock_sock(sk);
d2e9117c 2472 sk->sk_lock.owned = 1;
a5b5bb9a
IM
2473 spin_unlock(&sk->sk_lock.slock);
2474 /*
2475 * The sk_lock has mutex_lock() semantics here:
2476 */
fcc70d5f 2477 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
a5b5bb9a 2478 local_bh_enable();
1da177e4 2479}
fcc70d5f 2480EXPORT_SYMBOL(lock_sock_nested);
1da177e4 2481
b5606c2d 2482void release_sock(struct sock *sk)
1da177e4 2483{
a5b5bb9a
IM
2484 /*
2485 * The sk_lock has mutex_unlock() semantics:
2486 */
2487 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
2488
2489 spin_lock_bh(&sk->sk_lock.slock);
1da177e4
LT
2490 if (sk->sk_backlog.tail)
2491 __release_sock(sk);
46d3ceab 2492
cbbb5a25
ED
2493 /* Warning : release_cb() might need to release sk ownership,
2494 * ie call sock_release_ownership(sk) before us.
2495 */
46d3ceab
ED
2496 if (sk->sk_prot->release_cb)
2497 sk->sk_prot->release_cb(sk);
2498
cbbb5a25 2499 sock_release_ownership(sk);
a5b5bb9a
IM
2500 if (waitqueue_active(&sk->sk_lock.wq))
2501 wake_up(&sk->sk_lock.wq);
2502 spin_unlock_bh(&sk->sk_lock.slock);
1da177e4
LT
2503}
2504EXPORT_SYMBOL(release_sock);
2505
8a74ad60
ED
2506/**
2507 * lock_sock_fast - fast version of lock_sock
2508 * @sk: socket
2509 *
2510 * This version should be used for very small section, where process wont block
2511 * return false if fast path is taken
2512 * sk_lock.slock locked, owned = 0, BH disabled
2513 * return true if slow path is taken
2514 * sk_lock.slock unlocked, owned = 1, BH enabled
2515 */
2516bool lock_sock_fast(struct sock *sk)
2517{
2518 might_sleep();
2519 spin_lock_bh(&sk->sk_lock.slock);
2520
2521 if (!sk->sk_lock.owned)
2522 /*
2523 * Note : We must disable BH
2524 */
2525 return false;
2526
2527 __lock_sock(sk);
2528 sk->sk_lock.owned = 1;
2529 spin_unlock(&sk->sk_lock.slock);
2530 /*
2531 * The sk_lock has mutex_lock() semantics here:
2532 */
2533 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2534 local_bh_enable();
2535 return true;
2536}
2537EXPORT_SYMBOL(lock_sock_fast);
2538
1da177e4 2539int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
4ec93edb 2540{
b7aa0bf7 2541 struct timeval tv;
1da177e4 2542 if (!sock_flag(sk, SOCK_TIMESTAMP))
20d49473 2543 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
b7aa0bf7
ED
2544 tv = ktime_to_timeval(sk->sk_stamp);
2545 if (tv.tv_sec == -1)
1da177e4 2546 return -ENOENT;
b7aa0bf7
ED
2547 if (tv.tv_sec == 0) {
2548 sk->sk_stamp = ktime_get_real();
2549 tv = ktime_to_timeval(sk->sk_stamp);
2550 }
2551 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
4ec93edb 2552}
1da177e4
LT
2553EXPORT_SYMBOL(sock_get_timestamp);
2554
ae40eb1e
ED
2555int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2556{
2557 struct timespec ts;
2558 if (!sock_flag(sk, SOCK_TIMESTAMP))
20d49473 2559 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
ae40eb1e
ED
2560 ts = ktime_to_timespec(sk->sk_stamp);
2561 if (ts.tv_sec == -1)
2562 return -ENOENT;
2563 if (ts.tv_sec == 0) {
2564 sk->sk_stamp = ktime_get_real();
2565 ts = ktime_to_timespec(sk->sk_stamp);
2566 }
2567 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2568}
2569EXPORT_SYMBOL(sock_get_timestampns);
2570
20d49473 2571void sock_enable_timestamp(struct sock *sk, int flag)
4ec93edb 2572{
20d49473 2573 if (!sock_flag(sk, flag)) {
08e29af3
ED
2574 unsigned long previous_flags = sk->sk_flags;
2575
20d49473
PO
2576 sock_set_flag(sk, flag);
2577 /*
2578 * we just set one of the two flags which require net
2579 * time stamping, but time stamping might have been on
2580 * already because of the other one
2581 */
08e29af3 2582 if (!(previous_flags & SK_FLAGS_TIMESTAMP))
20d49473 2583 net_enable_timestamp();
1da177e4
LT
2584 }
2585}
1da177e4
LT
2586
2587/*
2588 * Get a socket option on an socket.
2589 *
2590 * FIX: POSIX 1003.1g is very ambiguous here. It states that
2591 * asynchronous errors should be reported by getsockopt. We assume
2592 * this means if you specify SO_ERROR (otherwise whats the point of it).
2593 */
2594int sock_common_getsockopt(struct socket *sock, int level, int optname,
2595 char __user *optval, int __user *optlen)
2596{
2597 struct sock *sk = sock->sk;
2598
2599 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2600}
1da177e4
LT
2601EXPORT_SYMBOL(sock_common_getsockopt);
2602
3fdadf7d 2603#ifdef CONFIG_COMPAT
543d9cfe
ACM
2604int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2605 char __user *optval, int __user *optlen)
3fdadf7d
DM
2606{
2607 struct sock *sk = sock->sk;
2608
1e51f951 2609 if (sk->sk_prot->compat_getsockopt != NULL)
543d9cfe
ACM
2610 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2611 optval, optlen);
3fdadf7d
DM
2612 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2613}
2614EXPORT_SYMBOL(compat_sock_common_getsockopt);
2615#endif
2616
1da177e4
LT
2617int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
2618 struct msghdr *msg, size_t size, int flags)
2619{
2620 struct sock *sk = sock->sk;
2621 int addr_len = 0;
2622 int err;
2623
2624 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
2625 flags & ~MSG_DONTWAIT, &addr_len);
2626 if (err >= 0)
2627 msg->msg_namelen = addr_len;
2628 return err;
2629}
1da177e4
LT
2630EXPORT_SYMBOL(sock_common_recvmsg);
2631
2632/*
2633 * Set socket options on an inet socket.
2634 */
2635int sock_common_setsockopt(struct socket *sock, int level, int optname,
b7058842 2636 char __user *optval, unsigned int optlen)
1da177e4
LT
2637{
2638 struct sock *sk = sock->sk;
2639
2640 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2641}
1da177e4
LT
2642EXPORT_SYMBOL(sock_common_setsockopt);
2643
3fdadf7d 2644#ifdef CONFIG_COMPAT
543d9cfe 2645int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
b7058842 2646 char __user *optval, unsigned int optlen)
3fdadf7d
DM
2647{
2648 struct sock *sk = sock->sk;
2649
543d9cfe
ACM
2650 if (sk->sk_prot->compat_setsockopt != NULL)
2651 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2652 optval, optlen);
3fdadf7d
DM
2653 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2654}
2655EXPORT_SYMBOL(compat_sock_common_setsockopt);
2656#endif
2657
1da177e4
LT
2658void sk_common_release(struct sock *sk)
2659{
2660 if (sk->sk_prot->destroy)
2661 sk->sk_prot->destroy(sk);
2662
2663 /*
2664 * Observation: when sock_common_release is called, processes have
2665 * no access to socket. But net still has.
2666 * Step one, detach it from networking:
2667 *
2668 * A. Remove from hash tables.
2669 */
2670
2671 sk->sk_prot->unhash(sk);
2672
2673 /*
2674 * In this point socket cannot receive new packets, but it is possible
2675 * that some packets are in flight because some CPU runs receiver and
2676 * did hash table lookup before we unhashed socket. They will achieve
2677 * receive queue and will be purged by socket destructor.
2678 *
2679 * Also we still have packets pending on receive queue and probably,
2680 * our own packets waiting in device queues. sock_destroy will drain
2681 * receive queue, but transmitted packets will delay socket destruction
2682 * until the last reference will be released.
2683 */
2684
2685 sock_orphan(sk);
2686
2687 xfrm_sk_free_policy(sk);
2688
e6848976 2689 sk_refcnt_debug_release(sk);
5640f768 2690
1da177e4
LT
2691 sock_put(sk);
2692}
1da177e4
LT
2693EXPORT_SYMBOL(sk_common_release);
2694
13ff3d6f
PE
2695#ifdef CONFIG_PROC_FS
2696#define PROTO_INUSE_NR 64 /* should be enough for the first time */
1338d466
PE
2697struct prot_inuse {
2698 int val[PROTO_INUSE_NR];
2699};
13ff3d6f
PE
2700
2701static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
70ee1159
PE
2702
2703#ifdef CONFIG_NET_NS
2704void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2705{
d6d9ca0f 2706 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
70ee1159
PE
2707}
2708EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2709
2710int sock_prot_inuse_get(struct net *net, struct proto *prot)
2711{
2712 int cpu, idx = prot->inuse_idx;
2713 int res = 0;
2714
2715 for_each_possible_cpu(cpu)
2716 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2717
2718 return res >= 0 ? res : 0;
2719}
2720EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2721
2c8c1e72 2722static int __net_init sock_inuse_init_net(struct net *net)
70ee1159
PE
2723{
2724 net->core.inuse = alloc_percpu(struct prot_inuse);
2725 return net->core.inuse ? 0 : -ENOMEM;
2726}
2727
2c8c1e72 2728static void __net_exit sock_inuse_exit_net(struct net *net)
70ee1159
PE
2729{
2730 free_percpu(net->core.inuse);
2731}
2732
2733static struct pernet_operations net_inuse_ops = {
2734 .init = sock_inuse_init_net,
2735 .exit = sock_inuse_exit_net,
2736};
2737
2738static __init int net_inuse_init(void)
2739{
2740 if (register_pernet_subsys(&net_inuse_ops))
2741 panic("Cannot initialize net inuse counters");
2742
2743 return 0;
2744}
2745
2746core_initcall(net_inuse_init);
2747#else
1338d466
PE
2748static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2749
c29a0bc4 2750void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
1338d466 2751{
d6d9ca0f 2752 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
1338d466
PE
2753}
2754EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2755
c29a0bc4 2756int sock_prot_inuse_get(struct net *net, struct proto *prot)
1338d466
PE
2757{
2758 int cpu, idx = prot->inuse_idx;
2759 int res = 0;
2760
2761 for_each_possible_cpu(cpu)
2762 res += per_cpu(prot_inuse, cpu).val[idx];
2763
2764 return res >= 0 ? res : 0;
2765}
2766EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
70ee1159 2767#endif
13ff3d6f
PE
2768
2769static void assign_proto_idx(struct proto *prot)
2770{
2771 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2772
2773 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
e005d193 2774 pr_err("PROTO_INUSE_NR exhausted\n");
13ff3d6f
PE
2775 return;
2776 }
2777
2778 set_bit(prot->inuse_idx, proto_inuse_idx);
2779}
2780
2781static void release_proto_idx(struct proto *prot)
2782{
2783 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2784 clear_bit(prot->inuse_idx, proto_inuse_idx);
2785}
2786#else
2787static inline void assign_proto_idx(struct proto *prot)
2788{
2789}
2790
2791static inline void release_proto_idx(struct proto *prot)
2792{
2793}
2794#endif
2795
b733c007
PE
2796int proto_register(struct proto *prot, int alloc_slab)
2797{
1da177e4
LT
2798 if (alloc_slab) {
2799 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
271b72c7
ED
2800 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2801 NULL);
1da177e4
LT
2802
2803 if (prot->slab == NULL) {
e005d193
JP
2804 pr_crit("%s: Can't create sock SLAB cache!\n",
2805 prot->name);
60e7663d 2806 goto out;
1da177e4 2807 }
2e6599cb
ACM
2808
2809 if (prot->rsk_prot != NULL) {
faf23422 2810 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
7e56b5d6 2811 if (prot->rsk_prot->slab_name == NULL)
2e6599cb
ACM
2812 goto out_free_sock_slab;
2813
7e56b5d6 2814 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2e6599cb 2815 prot->rsk_prot->obj_size, 0,
20c2df83 2816 SLAB_HWCACHE_ALIGN, NULL);
2e6599cb
ACM
2817
2818 if (prot->rsk_prot->slab == NULL) {
e005d193
JP
2819 pr_crit("%s: Can't create request sock SLAB cache!\n",
2820 prot->name);
2e6599cb
ACM
2821 goto out_free_request_sock_slab_name;
2822 }
2823 }
8feaf0c0 2824
6d6ee43e 2825 if (prot->twsk_prot != NULL) {
faf23422 2826 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
8feaf0c0 2827
7e56b5d6 2828 if (prot->twsk_prot->twsk_slab_name == NULL)
8feaf0c0
ACM
2829 goto out_free_request_sock_slab;
2830
6d6ee43e 2831 prot->twsk_prot->twsk_slab =
7e56b5d6 2832 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
6d6ee43e 2833 prot->twsk_prot->twsk_obj_size,
3ab5aee7
ED
2834 0,
2835 SLAB_HWCACHE_ALIGN |
2836 prot->slab_flags,
20c2df83 2837 NULL);
6d6ee43e 2838 if (prot->twsk_prot->twsk_slab == NULL)
8feaf0c0
ACM
2839 goto out_free_timewait_sock_slab_name;
2840 }
1da177e4
LT
2841 }
2842
36b77a52 2843 mutex_lock(&proto_list_mutex);
1da177e4 2844 list_add(&prot->node, &proto_list);
13ff3d6f 2845 assign_proto_idx(prot);
36b77a52 2846 mutex_unlock(&proto_list_mutex);
b733c007
PE
2847 return 0;
2848
8feaf0c0 2849out_free_timewait_sock_slab_name:
7e56b5d6 2850 kfree(prot->twsk_prot->twsk_slab_name);
8feaf0c0
ACM
2851out_free_request_sock_slab:
2852 if (prot->rsk_prot && prot->rsk_prot->slab) {
2853 kmem_cache_destroy(prot->rsk_prot->slab);
2854 prot->rsk_prot->slab = NULL;
2855 }
2e6599cb 2856out_free_request_sock_slab_name:
72150e9b
DC
2857 if (prot->rsk_prot)
2858 kfree(prot->rsk_prot->slab_name);
2e6599cb
ACM
2859out_free_sock_slab:
2860 kmem_cache_destroy(prot->slab);
2861 prot->slab = NULL;
b733c007
PE
2862out:
2863 return -ENOBUFS;
1da177e4 2864}
1da177e4
LT
2865EXPORT_SYMBOL(proto_register);
2866
2867void proto_unregister(struct proto *prot)
2868{
36b77a52 2869 mutex_lock(&proto_list_mutex);
13ff3d6f 2870 release_proto_idx(prot);
0a3f4358 2871 list_del(&prot->node);
36b77a52 2872 mutex_unlock(&proto_list_mutex);
1da177e4
LT
2873
2874 if (prot->slab != NULL) {
2875 kmem_cache_destroy(prot->slab);
2876 prot->slab = NULL;
2877 }
2878
2e6599cb 2879 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
2e6599cb 2880 kmem_cache_destroy(prot->rsk_prot->slab);
7e56b5d6 2881 kfree(prot->rsk_prot->slab_name);
2e6599cb
ACM
2882 prot->rsk_prot->slab = NULL;
2883 }
2884
6d6ee43e 2885 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
6d6ee43e 2886 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
7e56b5d6 2887 kfree(prot->twsk_prot->twsk_slab_name);
6d6ee43e 2888 prot->twsk_prot->twsk_slab = NULL;
8feaf0c0 2889 }
1da177e4 2890}
1da177e4
LT
2891EXPORT_SYMBOL(proto_unregister);
2892
2893#ifdef CONFIG_PROC_FS
1da177e4 2894static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
36b77a52 2895 __acquires(proto_list_mutex)
1da177e4 2896{
36b77a52 2897 mutex_lock(&proto_list_mutex);
60f0438a 2898 return seq_list_start_head(&proto_list, *pos);
1da177e4
LT
2899}
2900
2901static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2902{
60f0438a 2903 return seq_list_next(v, &proto_list, pos);
1da177e4
LT
2904}
2905
2906static void proto_seq_stop(struct seq_file *seq, void *v)
36b77a52 2907 __releases(proto_list_mutex)
1da177e4 2908{
36b77a52 2909 mutex_unlock(&proto_list_mutex);
1da177e4
LT
2910}
2911
2912static char proto_method_implemented(const void *method)
2913{
2914 return method == NULL ? 'n' : 'y';
2915}
180d8cd9
GC
2916static long sock_prot_memory_allocated(struct proto *proto)
2917{
cb75a36c 2918 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
180d8cd9
GC
2919}
2920
2921static char *sock_prot_memory_pressure(struct proto *proto)
2922{
2923 return proto->memory_pressure != NULL ?
2924 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
2925}
1da177e4
LT
2926
2927static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2928{
180d8cd9 2929
8d987e5c 2930 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
1da177e4
LT
2931 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2932 proto->name,
2933 proto->obj_size,
14e943db 2934 sock_prot_inuse_get(seq_file_net(seq), proto),
180d8cd9
GC
2935 sock_prot_memory_allocated(proto),
2936 sock_prot_memory_pressure(proto),
1da177e4
LT
2937 proto->max_header,
2938 proto->slab == NULL ? "no" : "yes",
2939 module_name(proto->owner),
2940 proto_method_implemented(proto->close),
2941 proto_method_implemented(proto->connect),
2942 proto_method_implemented(proto->disconnect),
2943 proto_method_implemented(proto->accept),
2944 proto_method_implemented(proto->ioctl),
2945 proto_method_implemented(proto->init),
2946 proto_method_implemented(proto->destroy),
2947 proto_method_implemented(proto->shutdown),
2948 proto_method_implemented(proto->setsockopt),
2949 proto_method_implemented(proto->getsockopt),
2950 proto_method_implemented(proto->sendmsg),
2951 proto_method_implemented(proto->recvmsg),
2952 proto_method_implemented(proto->sendpage),
2953 proto_method_implemented(proto->bind),
2954 proto_method_implemented(proto->backlog_rcv),
2955 proto_method_implemented(proto->hash),
2956 proto_method_implemented(proto->unhash),
2957 proto_method_implemented(proto->get_port),
2958 proto_method_implemented(proto->enter_memory_pressure));
2959}
2960
2961static int proto_seq_show(struct seq_file *seq, void *v)
2962{
60f0438a 2963 if (v == &proto_list)
1da177e4
LT
2964 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2965 "protocol",
2966 "size",
2967 "sockets",
2968 "memory",
2969 "press",
2970 "maxhdr",
2971 "slab",
2972 "module",
2973 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2974 else
60f0438a 2975 proto_seq_printf(seq, list_entry(v, struct proto, node));
1da177e4
LT
2976 return 0;
2977}
2978
f690808e 2979static const struct seq_operations proto_seq_ops = {
1da177e4
LT
2980 .start = proto_seq_start,
2981 .next = proto_seq_next,
2982 .stop = proto_seq_stop,
2983 .show = proto_seq_show,
2984};
2985
2986static int proto_seq_open(struct inode *inode, struct file *file)
2987{
14e943db
ED
2988 return seq_open_net(inode, file, &proto_seq_ops,
2989 sizeof(struct seq_net_private));
1da177e4
LT
2990}
2991
9a32144e 2992static const struct file_operations proto_seq_fops = {
1da177e4
LT
2993 .owner = THIS_MODULE,
2994 .open = proto_seq_open,
2995 .read = seq_read,
2996 .llseek = seq_lseek,
14e943db
ED
2997 .release = seq_release_net,
2998};
2999
3000static __net_init int proto_init_net(struct net *net)
3001{
d4beaa66 3002 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
14e943db
ED
3003 return -ENOMEM;
3004
3005 return 0;
3006}
3007
3008static __net_exit void proto_exit_net(struct net *net)
3009{
ece31ffd 3010 remove_proc_entry("protocols", net->proc_net);
14e943db
ED
3011}
3012
3013
3014static __net_initdata struct pernet_operations proto_net_ops = {
3015 .init = proto_init_net,
3016 .exit = proto_exit_net,
1da177e4
LT
3017};
3018
3019static int __init proto_init(void)
3020{
14e943db 3021 return register_pernet_subsys(&proto_net_ops);
1da177e4
LT
3022}
3023
3024subsys_initcall(proto_init);
3025
3026#endif /* PROC_FS */