Merge tag 'v3.10.95' into update
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / core / sock.c
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Alan Cox, <A.Cox@swansea.ac.uk>
14 *
15 * Fixes:
16 * Alan Cox : Numerous verify_area() problems
17 * Alan Cox : Connecting on a connecting socket
18 * now returns an error for tcp.
19 * Alan Cox : sock->protocol is set correctly.
20 * and is not sometimes left as 0.
21 * Alan Cox : connect handles icmp errors on a
22 * connect properly. Unfortunately there
23 * is a restart syscall nasty there. I
24 * can't match BSD without hacking the C
25 * library. Ideas urgently sought!
26 * Alan Cox : Disallow bind() to addresses that are
27 * not ours - especially broadcast ones!!
28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
30 * instead they leave that for the DESTROY timer.
31 * Alan Cox : Clean up error flag in accept
32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
33 * was buggy. Put a remove_sock() in the handler
34 * for memory when we hit 0. Also altered the timer
35 * code. The ACK stuff can wait and needs major
36 * TCP layer surgery.
37 * Alan Cox : Fixed TCP ack bug, removed remove sock
38 * and fixed timer/inet_bh race.
39 * Alan Cox : Added zapped flag for TCP
40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45 * Rick Sladkey : Relaxed UDP rules for matching packets.
46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
47 * Pauline Middelink : identd support
48 * Alan Cox : Fixed connect() taking signals I think.
49 * Alan Cox : SO_LINGER supported
50 * Alan Cox : Error reporting fixes
51 * Anonymous : inet_create tidied up (sk->reuse setting)
52 * Alan Cox : inet sockets don't set sk->type!
53 * Alan Cox : Split socket option code
54 * Alan Cox : Callbacks
55 * Alan Cox : Nagle flag for Charles & Johannes stuff
56 * Alex : Removed restriction on inet fioctl
57 * Alan Cox : Splitting INET from NET core
58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
60 * Alan Cox : Split IP from generic code
61 * Alan Cox : New kfree_skbmem()
62 * Alan Cox : Make SO_DEBUG superuser only.
63 * Alan Cox : Allow anyone to clear SO_DEBUG
64 * (compatibility fix)
65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
66 * Alan Cox : Allocator for a socket is settable.
67 * Alan Cox : SO_ERROR includes soft errors.
68 * Alan Cox : Allow NULL arguments on some SO_ opts
69 * Alan Cox : Generic socket allocation to make hooks
70 * easier (suggested by Craig Metz).
71 * Michael Pall : SO_ERROR returns positive errno again
72 * Steve Whitehouse: Added default destructor to free
73 * protocol private data.
74 * Steve Whitehouse: Added various other default routines
75 * common to several socket families.
76 * Chris Evans : Call suser() check last on F_SETOWN
77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
79 * Andi Kleen : Fix write_space callback
80 * Chris Evans : Security fixes - signedness again
81 * Arnaldo C. Melo : cleanups, use skb_queue_purge
82 *
83 * To Fix:
84 *
85 *
86 * This program is free software; you can redistribute it and/or
87 * modify it under the terms of the GNU General Public License
88 * as published by the Free Software Foundation; either version
89 * 2 of the License, or (at your option) any later version.
90 */
91
92 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 #include <linux/highmem.h>
115 #include <linux/user_namespace.h>
116 #include <linux/static_key.h>
117 #include <linux/memcontrol.h>
118 #include <linux/prefetch.h>
119
120 #include <asm/uaccess.h>
121
122 #include <linux/netdevice.h>
123 #include <net/protocol.h>
124 #include <linux/skbuff.h>
125 #include <net/net_namespace.h>
126 #include <net/request_sock.h>
127 #include <net/sock.h>
128 #include <linux/net_tstamp.h>
129 #include <net/xfrm.h>
130 #include <linux/ipsec.h>
131 #include <net/cls_cgroup.h>
132 #include <net/netprio_cgroup.h>
133
134 #include <linux/filter.h>
135
136 #include <trace/events/sock.h>
137
138 #include <net/af_unix.h>
139
140
141 #ifdef CONFIG_INET
142 #include <net/tcp.h>
143 #endif
144 #include <linux/xlog.h>
145
146 static DEFINE_MUTEX(proto_list_mutex);
147 static LIST_HEAD(proto_list);
148
149 /**
150 * sk_ns_capable - General socket capability test
151 * @sk: Socket to use a capability on or through
152 * @user_ns: The user namespace of the capability to use
153 * @cap: The capability to use
154 *
155 * Test to see if the opener of the socket had when the socket was
156 * created and the current process has the capability @cap in the user
157 * namespace @user_ns.
158 */
159 bool sk_ns_capable(const struct sock *sk,
160 struct user_namespace *user_ns, int cap)
161 {
162 return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
163 ns_capable(user_ns, cap);
164 }
165 EXPORT_SYMBOL(sk_ns_capable);
166
167 /**
168 * sk_capable - Socket global capability test
169 * @sk: Socket to use a capability on or through
170 * @cap: The global capbility to use
171 *
172 * Test to see if the opener of the socket had when the socket was
173 * created and the current process has the capability @cap in all user
174 * namespaces.
175 */
176 bool sk_capable(const struct sock *sk, int cap)
177 {
178 return sk_ns_capable(sk, &init_user_ns, cap);
179 }
180 EXPORT_SYMBOL(sk_capable);
181
182 /**
183 * sk_net_capable - Network namespace socket capability test
184 * @sk: Socket to use a capability on or through
185 * @cap: The capability to use
186 *
187 * Test to see if the opener of the socket had when the socke was created
188 * and the current process has the capability @cap over the network namespace
189 * the socket is a member of.
190 */
191 bool sk_net_capable(const struct sock *sk, int cap)
192 {
193 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
194 }
195 EXPORT_SYMBOL(sk_net_capable);
196
197
198 #ifdef CONFIG_MEMCG_KMEM
199 int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
200 {
201 struct proto *proto;
202 int ret = 0;
203
204 mutex_lock(&proto_list_mutex);
205 list_for_each_entry(proto, &proto_list, node) {
206 if (proto->init_cgroup) {
207 ret = proto->init_cgroup(memcg, ss);
208 if (ret)
209 goto out;
210 }
211 }
212
213 mutex_unlock(&proto_list_mutex);
214 return ret;
215 out:
216 list_for_each_entry_continue_reverse(proto, &proto_list, node)
217 if (proto->destroy_cgroup)
218 proto->destroy_cgroup(memcg);
219 mutex_unlock(&proto_list_mutex);
220 return ret;
221 }
222
223 void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
224 {
225 struct proto *proto;
226
227 mutex_lock(&proto_list_mutex);
228 list_for_each_entry_reverse(proto, &proto_list, node)
229 if (proto->destroy_cgroup)
230 proto->destroy_cgroup(memcg);
231 mutex_unlock(&proto_list_mutex);
232 }
233 #endif
234
235 /*
236 * Each address family might have different locking rules, so we have
237 * one slock key per address family:
238 */
239 static struct lock_class_key af_family_keys[AF_MAX];
240 static struct lock_class_key af_family_slock_keys[AF_MAX];
241
242 #if defined(CONFIG_MEMCG_KMEM)
243 struct static_key memcg_socket_limit_enabled;
244 EXPORT_SYMBOL(memcg_socket_limit_enabled);
245 #endif
246
247 /*
248 * Make lock validator output more readable. (we pre-construct these
249 * strings build-time, so that runtime initialization of socket
250 * locks is fast):
251 */
252 static const char *const af_family_key_strings[AF_MAX+1] = {
253 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
254 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
255 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
256 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
257 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
258 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
259 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
260 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
261 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
262 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
263 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
264 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
265 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
266 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
267 };
268 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
269 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
270 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
271 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
272 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
273 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
274 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
275 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
276 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
277 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
278 "slock-27" , "slock-28" , "slock-AF_CAN" ,
279 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
280 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
281 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
282 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
283 };
284 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
285 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
286 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
287 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
288 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
289 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
290 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
291 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
292 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
293 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
294 "clock-27" , "clock-28" , "clock-AF_CAN" ,
295 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
296 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
297 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
298 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
299 };
300
301 /*
302 * sk_callback_lock locking rules are per-address-family,
303 * so split the lock classes by using a per-AF key:
304 */
305 static struct lock_class_key af_callback_keys[AF_MAX];
306
307 /* Take into consideration the size of the struct sk_buff overhead in the
308 * determination of these values, since that is non-constant across
309 * platforms. This makes socket queueing behavior and performance
310 * not depend upon such differences.
311 */
312 #define _SK_MEM_PACKETS 256
313 #define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
314 #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
315 #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
316
317 /* Run time adjustable parameters. */
318 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
319 EXPORT_SYMBOL(sysctl_wmem_max);
320 __u32 sysctl_rmem_max __read_mostly = (SK_RMEM_MAX*8);
321 EXPORT_SYMBOL(sysctl_rmem_max);
322 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
323 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
324
325 /* Maximal space eaten by iovec or ancillary data plus some space */
326 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
327 EXPORT_SYMBOL(sysctl_optmem_max);
328
329 struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
330 EXPORT_SYMBOL_GPL(memalloc_socks);
331
332 /**
333 * sk_set_memalloc - sets %SOCK_MEMALLOC
334 * @sk: socket to set it on
335 *
336 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
337 * It's the responsibility of the admin to adjust min_free_kbytes
338 * to meet the requirements
339 */
340 void sk_set_memalloc(struct sock *sk)
341 {
342 sock_set_flag(sk, SOCK_MEMALLOC);
343 sk->sk_allocation |= __GFP_MEMALLOC;
344 static_key_slow_inc(&memalloc_socks);
345 }
346 EXPORT_SYMBOL_GPL(sk_set_memalloc);
347
348 void sk_clear_memalloc(struct sock *sk)
349 {
350 sock_reset_flag(sk, SOCK_MEMALLOC);
351 sk->sk_allocation &= ~__GFP_MEMALLOC;
352 static_key_slow_dec(&memalloc_socks);
353
354 /*
355 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
356 * progress of swapping. However, if SOCK_MEMALLOC is cleared while
357 * it has rmem allocations there is a risk that the user of the
358 * socket cannot make forward progress due to exceeding the rmem
359 * limits. By rights, sk_clear_memalloc() should only be called
360 * on sockets being torn down but warn and reset the accounting if
361 * that assumption breaks.
362 */
363 if (WARN_ON(sk->sk_forward_alloc))
364 sk_mem_reclaim(sk);
365 }
366 EXPORT_SYMBOL_GPL(sk_clear_memalloc);
367
368 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
369 {
370 int ret;
371 unsigned long pflags = current->flags;
372
373 /* these should have been dropped before queueing */
374 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
375
376 current->flags |= PF_MEMALLOC;
377 ret = sk->sk_backlog_rcv(sk, skb);
378 tsk_restore_flags(current, pflags, PF_MEMALLOC);
379
380 return ret;
381 }
382 EXPORT_SYMBOL(__sk_backlog_rcv);
383
384 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
385 {
386 struct timeval tv;
387
388 if (optlen < sizeof(tv))
389 return -EINVAL;
390 if (copy_from_user(&tv, optval, sizeof(tv)))
391 return -EFAULT;
392 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
393 return -EDOM;
394
395 if (tv.tv_sec < 0) {
396 static int warned __read_mostly;
397
398 *timeo_p = 0;
399 if (warned < 10 && net_ratelimit()) {
400 warned++;
401 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
402 __func__, current->comm, task_pid_nr(current));
403 }
404 return 0;
405 }
406 *timeo_p = MAX_SCHEDULE_TIMEOUT;
407 if (tv.tv_sec == 0 && tv.tv_usec == 0)
408 return 0;
409 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
410 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
411 return 0;
412 }
413
414 static void sock_warn_obsolete_bsdism(const char *name)
415 {
416 static int warned;
417 static char warncomm[TASK_COMM_LEN];
418 if (strcmp(warncomm, current->comm) && warned < 5) {
419 strcpy(warncomm, current->comm);
420 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
421 warncomm, name);
422 warned++;
423 }
424 }
425
426 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
427 {
428 if (sk->sk_flags & flags) {
429 sk->sk_flags &= ~flags;
430 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
431 net_disable_timestamp();
432 }
433 }
434
435
436 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
437 {
438 int err;
439 int skb_len;
440 unsigned long flags;
441 struct sk_buff_head *list = &sk->sk_receive_queue;
442
443 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
444 atomic_inc(&sk->sk_drops);
445 trace_sock_rcvqueue_full(sk, skb);
446 return -ENOMEM;
447 }
448
449 err = sk_filter(sk, skb);
450 if (err)
451 return err;
452
453 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
454 atomic_inc(&sk->sk_drops);
455 return -ENOBUFS;
456 }
457
458 skb->dev = NULL;
459 skb_set_owner_r(skb, sk);
460
461 /* Cache the SKB length before we tack it onto the receive
462 * queue. Once it is added it no longer belongs to us and
463 * may be freed by other threads of control pulling packets
464 * from the queue.
465 */
466 skb_len = skb->len;
467
468 /* we escape from rcu protected region, make sure we dont leak
469 * a norefcounted dst
470 */
471 skb_dst_force(skb);
472
473 spin_lock_irqsave(&list->lock, flags);
474 skb->dropcount = atomic_read(&sk->sk_drops);
475 __skb_queue_tail(list, skb);
476 spin_unlock_irqrestore(&list->lock, flags);
477
478 if (!sock_flag(sk, SOCK_DEAD))
479 sk->sk_data_ready(sk, skb_len);
480 return 0;
481 }
482 EXPORT_SYMBOL(sock_queue_rcv_skb);
483
484 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
485 {
486 int rc = NET_RX_SUCCESS;
487
488 if (sk_filter(sk, skb))
489 goto discard_and_relse;
490
491 skb->dev = NULL;
492
493 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
494 atomic_inc(&sk->sk_drops);
495 goto discard_and_relse;
496 }
497 if (nested)
498 bh_lock_sock_nested(sk);
499 else
500 bh_lock_sock(sk);
501 if (!sock_owned_by_user(sk)) {
502 /*
503 * trylock + unlock semantics:
504 */
505 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
506
507 rc = sk_backlog_rcv(sk, skb);
508
509 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
510 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
511 bh_unlock_sock(sk);
512 atomic_inc(&sk->sk_drops);
513 goto discard_and_relse;
514 }
515
516 bh_unlock_sock(sk);
517 out:
518 sock_put(sk);
519 return rc;
520 discard_and_relse:
521 kfree_skb(skb);
522 goto out;
523 }
524 EXPORT_SYMBOL(sk_receive_skb);
525
526 void sk_reset_txq(struct sock *sk)
527 {
528 sk_tx_queue_clear(sk);
529 }
530 EXPORT_SYMBOL(sk_reset_txq);
531
532 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
533 {
534 struct dst_entry *dst = __sk_dst_get(sk);
535
536 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
537 sk_tx_queue_clear(sk);
538 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
539 dst_release(dst);
540 return NULL;
541 }
542
543 return dst;
544 }
545 EXPORT_SYMBOL(__sk_dst_check);
546
547 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
548 {
549 struct dst_entry *dst = sk_dst_get(sk);
550
551 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
552 sk_dst_reset(sk);
553 dst_release(dst);
554 return NULL;
555 }
556
557 return dst;
558 }
559 EXPORT_SYMBOL(sk_dst_check);
560
561 static int sock_setbindtodevice(struct sock *sk, char __user *optval,
562 int optlen)
563 {
564 int ret = -ENOPROTOOPT;
565 #ifdef CONFIG_NETDEVICES
566 struct net *net = sock_net(sk);
567 char devname[IFNAMSIZ];
568 int index;
569
570 /* Sorry... */
571 ret = -EPERM;
572 if (!ns_capable(net->user_ns, CAP_NET_RAW))
573 goto out;
574
575 ret = -EINVAL;
576 if (optlen < 0)
577 goto out;
578
579 /* Bind this socket to a particular device like "eth0",
580 * as specified in the passed interface name. If the
581 * name is "" or the option length is zero the socket
582 * is not bound.
583 */
584 if (optlen > IFNAMSIZ - 1)
585 optlen = IFNAMSIZ - 1;
586 memset(devname, 0, sizeof(devname));
587
588 ret = -EFAULT;
589 if (copy_from_user(devname, optval, optlen))
590 goto out;
591
592 index = 0;
593 if (devname[0] != '\0') {
594 struct net_device *dev;
595
596 rcu_read_lock();
597 dev = dev_get_by_name_rcu(net, devname);
598 if (dev)
599 index = dev->ifindex;
600 rcu_read_unlock();
601 ret = -ENODEV;
602 if (!dev)
603 goto out;
604 }
605
606 lock_sock(sk);
607 sk->sk_bound_dev_if = index;
608 sk_dst_reset(sk);
609 release_sock(sk);
610
611 ret = 0;
612
613 out:
614 #endif
615
616 return ret;
617 }
618
619 static int sock_getbindtodevice(struct sock *sk, char __user *optval,
620 int __user *optlen, int len)
621 {
622 int ret = -ENOPROTOOPT;
623 #ifdef CONFIG_NETDEVICES
624 struct net *net = sock_net(sk);
625 char devname[IFNAMSIZ];
626
627 if (sk->sk_bound_dev_if == 0) {
628 len = 0;
629 goto zero;
630 }
631
632 ret = -EINVAL;
633 if (len < IFNAMSIZ)
634 goto out;
635
636 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
637 if (ret)
638 goto out;
639
640 len = strlen(devname) + 1;
641
642 ret = -EFAULT;
643 if (copy_to_user(optval, devname, len))
644 goto out;
645
646 zero:
647 ret = -EFAULT;
648 if (put_user(len, optlen))
649 goto out;
650
651 ret = 0;
652
653 out:
654 #endif
655
656 return ret;
657 }
658
659 static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
660 {
661 if (valbool)
662 sock_set_flag(sk, bit);
663 else
664 sock_reset_flag(sk, bit);
665 }
666
667 /*
668 * This is meant for all protocols to use and covers goings on
669 * at the socket level. Everything here is generic.
670 */
671
672 int sock_setsockopt(struct socket *sock, int level, int optname,
673 char __user *optval, unsigned int optlen)
674 {
675 struct sock *sk = sock->sk;
676 int val;
677 int valbool;
678 struct linger ling;
679 int ret = 0;
680
681 /*
682 * Options without arguments
683 */
684
685 if (optname == SO_BINDTODEVICE)
686 return sock_setbindtodevice(sk, optval, optlen);
687
688 if (optlen < sizeof(int))
689 return -EINVAL;
690
691 if (get_user(val, (int __user *)optval))
692 return -EFAULT;
693
694 valbool = val ? 1 : 0;
695
696 lock_sock(sk);
697
698 switch (optname) {
699 case SO_DEBUG:
700 if (val && !capable(CAP_NET_ADMIN))
701 ret = -EACCES;
702 else
703 sock_valbool_flag(sk, SOCK_DBG, valbool);
704 break;
705 case SO_REUSEADDR:
706 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
707 break;
708 case SO_REUSEPORT:
709 sk->sk_reuseport = valbool;
710 break;
711 case SO_TYPE:
712 case SO_PROTOCOL:
713 case SO_DOMAIN:
714 case SO_ERROR:
715 ret = -ENOPROTOOPT;
716 break;
717 case SO_DONTROUTE:
718 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
719 break;
720 case SO_BROADCAST:
721 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
722 break;
723 case SO_SNDBUF:
724 /* Don't error on this BSD doesn't and if you think
725 * about it this is right. Otherwise apps have to
726 * play 'guess the biggest size' games. RCVBUF/SNDBUF
727 * are treated in BSD as hints
728 */
729 val = min_t(u32, val, sysctl_wmem_max);
730 set_sndbuf:
731 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
732 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
733 /* Wake up sending tasks if we upped the value. */
734 sk->sk_write_space(sk);
735 break;
736
737 case SO_SNDBUFFORCE:
738 if (!capable(CAP_NET_ADMIN)) {
739 ret = -EPERM;
740 break;
741 }
742 goto set_sndbuf;
743
744 case SO_RCVBUF:
745 /* Don't error on this BSD doesn't and if you think
746 * about it this is right. Otherwise apps have to
747 * play 'guess the biggest size' games. RCVBUF/SNDBUF
748 * are treated in BSD as hints
749 */
750 val = min_t(u32, val, sysctl_rmem_max);
751 set_rcvbuf:
752 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
753 /*
754 * We double it on the way in to account for
755 * "struct sk_buff" etc. overhead. Applications
756 * assume that the SO_RCVBUF setting they make will
757 * allow that much actual data to be received on that
758 * socket.
759 *
760 * Applications are unaware that "struct sk_buff" and
761 * other overheads allocate from the receive buffer
762 * during socket buffer allocation.
763 *
764 * And after considering the possible alternatives,
765 * returning the value we actually used in getsockopt
766 * is the most desirable behavior.
767 */
768 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
769 break;
770
771 case SO_RCVBUFFORCE:
772 if (!capable(CAP_NET_ADMIN)) {
773 ret = -EPERM;
774 break;
775 }
776 goto set_rcvbuf;
777
778 case SO_KEEPALIVE:
779 #ifdef CONFIG_INET
780 if (sk->sk_protocol == IPPROTO_TCP &&
781 sk->sk_type == SOCK_STREAM)
782 tcp_set_keepalive(sk, valbool);
783 #endif
784 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
785 break;
786
787 case SO_OOBINLINE:
788 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
789 break;
790
791 case SO_NO_CHECK:
792 sk->sk_no_check = valbool;
793 break;
794
795 case SO_PRIORITY:
796 if ((val >= 0 && val <= 6) ||
797 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
798 sk->sk_priority = val;
799 else
800 ret = -EPERM;
801 break;
802
803 case SO_LINGER:
804 if (optlen < sizeof(ling)) {
805 ret = -EINVAL; /* 1003.1g */
806 break;
807 }
808 if (copy_from_user(&ling, optval, sizeof(ling))) {
809 ret = -EFAULT;
810 break;
811 }
812 if (!ling.l_onoff)
813 sock_reset_flag(sk, SOCK_LINGER);
814 else {
815 #if (BITS_PER_LONG == 32)
816 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
817 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
818 else
819 #endif
820 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
821 sock_set_flag(sk, SOCK_LINGER);
822 }
823 break;
824
825 case SO_BSDCOMPAT:
826 sock_warn_obsolete_bsdism("setsockopt");
827 break;
828
829 case SO_PASSCRED:
830 if (valbool)
831 set_bit(SOCK_PASSCRED, &sock->flags);
832 else
833 clear_bit(SOCK_PASSCRED, &sock->flags);
834 break;
835
836 case SO_TIMESTAMP:
837 case SO_TIMESTAMPNS:
838 if (valbool) {
839 if (optname == SO_TIMESTAMP)
840 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
841 else
842 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
843 sock_set_flag(sk, SOCK_RCVTSTAMP);
844 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
845 } else {
846 sock_reset_flag(sk, SOCK_RCVTSTAMP);
847 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
848 }
849 break;
850
851 case SO_TIMESTAMPING:
852 if (val & ~SOF_TIMESTAMPING_MASK) {
853 ret = -EINVAL;
854 break;
855 }
856 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
857 val & SOF_TIMESTAMPING_TX_HARDWARE);
858 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
859 val & SOF_TIMESTAMPING_TX_SOFTWARE);
860 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
861 val & SOF_TIMESTAMPING_RX_HARDWARE);
862 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
863 sock_enable_timestamp(sk,
864 SOCK_TIMESTAMPING_RX_SOFTWARE);
865 else
866 sock_disable_timestamp(sk,
867 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
868 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
869 val & SOF_TIMESTAMPING_SOFTWARE);
870 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
871 val & SOF_TIMESTAMPING_SYS_HARDWARE);
872 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
873 val & SOF_TIMESTAMPING_RAW_HARDWARE);
874 break;
875
876 case SO_RCVLOWAT:
877 if (val < 0)
878 val = INT_MAX;
879 sk->sk_rcvlowat = val ? : 1;
880 break;
881
882 case SO_RCVTIMEO:
883 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
884 break;
885
886 case SO_SNDTIMEO:
887 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
888 break;
889
890 case SO_ATTACH_FILTER:
891 ret = -EINVAL;
892 if (optlen == sizeof(struct sock_fprog)) {
893 struct sock_fprog fprog;
894
895 ret = -EFAULT;
896 if (copy_from_user(&fprog, optval, sizeof(fprog)))
897 break;
898
899 ret = sk_attach_filter(&fprog, sk);
900 }
901 break;
902
903 case SO_DETACH_FILTER:
904 ret = sk_detach_filter(sk);
905 break;
906
907 case SO_LOCK_FILTER:
908 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
909 ret = -EPERM;
910 else
911 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
912 break;
913
914 case SO_PASSSEC:
915 if (valbool)
916 set_bit(SOCK_PASSSEC, &sock->flags);
917 else
918 clear_bit(SOCK_PASSSEC, &sock->flags);
919 break;
920 case SO_MARK:
921 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
922 ret = -EPERM;
923 else
924 sk->sk_mark = val;
925 break;
926
927 /* We implement the SO_SNDLOWAT etc to
928 not be settable (1003.1g 5.3) */
929 case SO_RXQ_OVFL:
930 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
931 break;
932
933 case SO_WIFI_STATUS:
934 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
935 break;
936
937 case SO_PEEK_OFF:
938 if (sock->ops->set_peek_off)
939 ret = sock->ops->set_peek_off(sk, val);
940 else
941 ret = -EOPNOTSUPP;
942 break;
943
944 case SO_NOFCS:
945 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
946 break;
947
948 case SO_SELECT_ERR_QUEUE:
949 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
950 break;
951
952 default:
953 ret = -ENOPROTOOPT;
954 break;
955 }
956 release_sock(sk);
957 return ret;
958 }
959 EXPORT_SYMBOL(sock_setsockopt);
960
961
962 void cred_to_ucred(struct pid *pid, const struct cred *cred,
963 struct ucred *ucred)
964 {
965 ucred->pid = pid_vnr(pid);
966 ucred->uid = ucred->gid = -1;
967 if (cred) {
968 struct user_namespace *current_ns = current_user_ns();
969
970 ucred->uid = from_kuid_munged(current_ns, cred->euid);
971 ucred->gid = from_kgid_munged(current_ns, cred->egid);
972 }
973 }
974 EXPORT_SYMBOL_GPL(cred_to_ucred);
975
976 int sock_getsockopt(struct socket *sock, int level, int optname,
977 char __user *optval, int __user *optlen)
978 {
979 struct sock *sk = sock->sk;
980
981 union {
982 int val;
983 struct linger ling;
984 struct timeval tm;
985 } v;
986
987 int lv = sizeof(int);
988 int len;
989
990 if (get_user(len, optlen))
991 return -EFAULT;
992 if (len < 0)
993 return -EINVAL;
994
995 memset(&v, 0, sizeof(v));
996
997 switch (optname) {
998 case SO_DEBUG:
999 v.val = sock_flag(sk, SOCK_DBG);
1000 break;
1001
1002 case SO_DONTROUTE:
1003 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1004 break;
1005
1006 case SO_BROADCAST:
1007 v.val = sock_flag(sk, SOCK_BROADCAST);
1008 break;
1009
1010 case SO_SNDBUF:
1011 v.val = sk->sk_sndbuf;
1012 break;
1013
1014 case SO_RCVBUF:
1015 v.val = sk->sk_rcvbuf;
1016 break;
1017
1018 case SO_REUSEADDR:
1019 v.val = sk->sk_reuse;
1020 break;
1021
1022 case SO_REUSEPORT:
1023 v.val = sk->sk_reuseport;
1024 break;
1025
1026 case SO_KEEPALIVE:
1027 v.val = sock_flag(sk, SOCK_KEEPOPEN);
1028 break;
1029
1030 case SO_TYPE:
1031 v.val = sk->sk_type;
1032 break;
1033
1034 case SO_PROTOCOL:
1035 v.val = sk->sk_protocol;
1036 break;
1037
1038 case SO_DOMAIN:
1039 v.val = sk->sk_family;
1040 break;
1041
1042 case SO_ERROR:
1043 v.val = -sock_error(sk);
1044 if (v.val == 0)
1045 v.val = xchg(&sk->sk_err_soft, 0);
1046 break;
1047
1048 case SO_OOBINLINE:
1049 v.val = sock_flag(sk, SOCK_URGINLINE);
1050 break;
1051
1052 case SO_NO_CHECK:
1053 v.val = sk->sk_no_check;
1054 break;
1055
1056 case SO_PRIORITY:
1057 v.val = sk->sk_priority;
1058 break;
1059
1060 case SO_LINGER:
1061 lv = sizeof(v.ling);
1062 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
1063 v.ling.l_linger = sk->sk_lingertime / HZ;
1064 break;
1065
1066 case SO_BSDCOMPAT:
1067 sock_warn_obsolete_bsdism("getsockopt");
1068 break;
1069
1070 case SO_TIMESTAMP:
1071 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1072 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1073 break;
1074
1075 case SO_TIMESTAMPNS:
1076 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1077 break;
1078
1079 case SO_TIMESTAMPING:
1080 v.val = 0;
1081 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
1082 v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
1083 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
1084 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
1085 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
1086 v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
1087 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
1088 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
1089 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
1090 v.val |= SOF_TIMESTAMPING_SOFTWARE;
1091 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
1092 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
1093 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
1094 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
1095 break;
1096
1097 case SO_RCVTIMEO:
1098 lv = sizeof(struct timeval);
1099 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1100 v.tm.tv_sec = 0;
1101 v.tm.tv_usec = 0;
1102 } else {
1103 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1104 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
1105 }
1106 break;
1107
1108 case SO_SNDTIMEO:
1109 lv = sizeof(struct timeval);
1110 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1111 v.tm.tv_sec = 0;
1112 v.tm.tv_usec = 0;
1113 } else {
1114 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1115 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
1116 }
1117 break;
1118
1119 case SO_RCVLOWAT:
1120 v.val = sk->sk_rcvlowat;
1121 break;
1122
1123 case SO_SNDLOWAT:
1124 v.val = 1;
1125 break;
1126
1127 case SO_PASSCRED:
1128 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1129 break;
1130
1131 case SO_PEERCRED:
1132 {
1133 struct ucred peercred;
1134 if (len > sizeof(peercred))
1135 len = sizeof(peercred);
1136 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1137 if (copy_to_user(optval, &peercred, len))
1138 return -EFAULT;
1139 goto lenout;
1140 }
1141
1142 case SO_PEERNAME:
1143 {
1144 char address[128];
1145
1146 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1147 return -ENOTCONN;
1148 if (lv < len)
1149 return -EINVAL;
1150 if (copy_to_user(optval, address, len))
1151 return -EFAULT;
1152 goto lenout;
1153 }
1154
1155 /* Dubious BSD thing... Probably nobody even uses it, but
1156 * the UNIX standard wants it for whatever reason... -DaveM
1157 */
1158 case SO_ACCEPTCONN:
1159 v.val = sk->sk_state == TCP_LISTEN;
1160 break;
1161
1162 case SO_PASSSEC:
1163 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1164 break;
1165
1166 case SO_PEERSEC:
1167 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1168
1169 case SO_MARK:
1170 v.val = sk->sk_mark;
1171 break;
1172
1173 case SO_RXQ_OVFL:
1174 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1175 break;
1176
1177 case SO_WIFI_STATUS:
1178 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1179 break;
1180
1181 case SO_PEEK_OFF:
1182 if (!sock->ops->set_peek_off)
1183 return -EOPNOTSUPP;
1184
1185 v.val = sk->sk_peek_off;
1186 break;
1187 case SO_NOFCS:
1188 v.val = sock_flag(sk, SOCK_NOFCS);
1189 break;
1190
1191 case SO_BINDTODEVICE:
1192 return sock_getbindtodevice(sk, optval, optlen, len);
1193
1194 case SO_GET_FILTER:
1195 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1196 if (len < 0)
1197 return len;
1198
1199 goto lenout;
1200
1201 case SO_LOCK_FILTER:
1202 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1203 break;
1204
1205 case SO_SELECT_ERR_QUEUE:
1206 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1207 break;
1208
1209 default:
1210 return -ENOPROTOOPT;
1211 }
1212
1213 if (len > lv)
1214 len = lv;
1215 if (copy_to_user(optval, &v, len))
1216 return -EFAULT;
1217 lenout:
1218 if (put_user(len, optlen))
1219 return -EFAULT;
1220 return 0;
1221 }
1222
1223 /*
1224 * Initialize an sk_lock.
1225 *
1226 * (We also register the sk_lock with the lock validator.)
1227 */
1228 static inline void sock_lock_init(struct sock *sk)
1229 {
1230 sock_lock_init_class_and_name(sk,
1231 af_family_slock_key_strings[sk->sk_family],
1232 af_family_slock_keys + sk->sk_family,
1233 af_family_key_strings[sk->sk_family],
1234 af_family_keys + sk->sk_family);
1235 }
1236
1237 /*
1238 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1239 * even temporarly, because of RCU lookups. sk_node should also be left as is.
1240 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
1241 */
1242 static void sock_copy(struct sock *nsk, const struct sock *osk)
1243 {
1244 #ifdef CONFIG_SECURITY_NETWORK
1245 void *sptr = nsk->sk_security;
1246 #endif
1247 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1248
1249 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1250 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1251
1252 #ifdef CONFIG_SECURITY_NETWORK
1253 nsk->sk_security = sptr;
1254 security_sk_clone(osk, nsk);
1255 #endif
1256 }
1257
1258 void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1259 {
1260 unsigned long nulls1, nulls2;
1261
1262 nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1263 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1264 if (nulls1 > nulls2)
1265 swap(nulls1, nulls2);
1266
1267 if (nulls1 != 0)
1268 memset((char *)sk, 0, nulls1);
1269 memset((char *)sk + nulls1 + sizeof(void *), 0,
1270 nulls2 - nulls1 - sizeof(void *));
1271 memset((char *)sk + nulls2 + sizeof(void *), 0,
1272 size - nulls2 - sizeof(void *));
1273 }
1274 EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1275
1276 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1277 int family)
1278 {
1279 struct sock *sk;
1280 struct kmem_cache *slab;
1281
1282 slab = prot->slab;
1283 if (slab != NULL) {
1284 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1285 if (!sk)
1286 return sk;
1287 if (priority & __GFP_ZERO) {
1288 if (prot->clear_sk)
1289 prot->clear_sk(sk, prot->obj_size);
1290 else
1291 sk_prot_clear_nulls(sk, prot->obj_size);
1292 }
1293 } else
1294 sk = kmalloc(prot->obj_size, priority);
1295
1296 if (sk != NULL) {
1297 kmemcheck_annotate_bitfield(sk, flags);
1298
1299 if (security_sk_alloc(sk, family, priority))
1300 goto out_free;
1301
1302 if (!try_module_get(prot->owner))
1303 goto out_free_sec;
1304 sk_tx_queue_clear(sk);
1305 }
1306
1307 return sk;
1308
1309 out_free_sec:
1310 security_sk_free(sk);
1311 out_free:
1312 if (slab != NULL)
1313 kmem_cache_free(slab, sk);
1314 else
1315 kfree(sk);
1316 return NULL;
1317 }
1318
1319 static void sk_prot_free(struct proto *prot, struct sock *sk)
1320 {
1321 struct kmem_cache *slab;
1322 struct module *owner;
1323
1324 owner = prot->owner;
1325 slab = prot->slab;
1326
1327 security_sk_free(sk);
1328 if (slab != NULL)
1329 kmem_cache_free(slab, sk);
1330 else
1331 kfree(sk);
1332 module_put(owner);
1333 }
1334
1335 #if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
1336 void sock_update_classid(struct sock *sk)
1337 {
1338 u32 classid;
1339
1340 classid = task_cls_classid(current);
1341 if (classid != sk->sk_classid)
1342 sk->sk_classid = classid;
1343 }
1344 EXPORT_SYMBOL(sock_update_classid);
1345 #endif
1346
1347 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
1348 void sock_update_netprioidx(struct sock *sk)
1349 {
1350 if (in_interrupt())
1351 return;
1352
1353 sk->sk_cgrp_prioidx = task_netprioidx(current);
1354 }
1355 EXPORT_SYMBOL_GPL(sock_update_netprioidx);
1356 #endif
1357
1358 /**
1359 * sk_alloc - All socket objects are allocated here
1360 * @net: the applicable net namespace
1361 * @family: protocol family
1362 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1363 * @prot: struct proto associated with this new sock instance
1364 */
1365 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1366 struct proto *prot)
1367 {
1368 struct sock *sk;
1369
1370 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1371 if (sk) {
1372 sk->sk_family = family;
1373 /*
1374 * See comment in struct sock definition to understand
1375 * why we need sk_prot_creator -acme
1376 */
1377 sk->sk_prot = sk->sk_prot_creator = prot;
1378 sock_lock_init(sk);
1379 sock_net_set(sk, get_net(net));
1380 atomic_set(&sk->sk_wmem_alloc, 1);
1381
1382 sock_update_classid(sk);
1383 sock_update_netprioidx(sk);
1384 }
1385
1386 return sk;
1387 }
1388 EXPORT_SYMBOL(sk_alloc);
1389
1390 static void __sk_free(struct sock *sk)
1391 {
1392 struct sk_filter *filter;
1393
1394 if (sk->sk_destruct)
1395 sk->sk_destruct(sk);
1396
1397 filter = rcu_dereference_check(sk->sk_filter,
1398 atomic_read(&sk->sk_wmem_alloc) == 0);
1399 if (filter) {
1400 sk_filter_uncharge(sk, filter);
1401 RCU_INIT_POINTER(sk->sk_filter, NULL);
1402 }
1403
1404 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1405
1406 if (atomic_read(&sk->sk_omem_alloc))
1407 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1408 __func__, atomic_read(&sk->sk_omem_alloc));
1409
1410 if (sk->sk_peer_cred)
1411 put_cred(sk->sk_peer_cred);
1412 put_pid(sk->sk_peer_pid);
1413 put_net(sock_net(sk));
1414 sk_prot_free(sk->sk_prot_creator, sk);
1415 }
1416
1417 void sk_free(struct sock *sk)
1418 {
1419 /*
1420 * We subtract one from sk_wmem_alloc and can know if
1421 * some packets are still in some tx queue.
1422 * If not null, sock_wfree() will call __sk_free(sk) later
1423 */
1424 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1425 __sk_free(sk);
1426 }
1427 EXPORT_SYMBOL(sk_free);
1428
1429 /*
1430 * Last sock_put should drop reference to sk->sk_net. It has already
1431 * been dropped in sk_change_net. Taking reference to stopping namespace
1432 * is not an option.
1433 * Take reference to a socket to remove it from hash _alive_ and after that
1434 * destroy it in the context of init_net.
1435 */
1436 void sk_release_kernel(struct sock *sk)
1437 {
1438 if (sk == NULL || sk->sk_socket == NULL)
1439 return;
1440
1441 sock_hold(sk);
1442 sock_release(sk->sk_socket);
1443 release_net(sock_net(sk));
1444 sock_net_set(sk, get_net(&init_net));
1445 sock_put(sk);
1446 }
1447 EXPORT_SYMBOL(sk_release_kernel);
1448
1449 static void sk_update_clone(const struct sock *sk, struct sock *newsk)
1450 {
1451 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1452 sock_update_memcg(newsk);
1453 }
1454
1455 /**
1456 * sk_clone_lock - clone a socket, and lock its clone
1457 * @sk: the socket to clone
1458 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1459 *
1460 * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1461 */
1462 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1463 {
1464 struct sock *newsk;
1465
1466 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1467 if (newsk != NULL) {
1468 struct sk_filter *filter;
1469
1470 sock_copy(newsk, sk);
1471
1472 /* SANITY */
1473 get_net(sock_net(newsk));
1474 sk_node_init(&newsk->sk_node);
1475 sock_lock_init(newsk);
1476 bh_lock_sock(newsk);
1477 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1478 newsk->sk_backlog.len = 0;
1479
1480 atomic_set(&newsk->sk_rmem_alloc, 0);
1481 /*
1482 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1483 */
1484 atomic_set(&newsk->sk_wmem_alloc, 1);
1485 atomic_set(&newsk->sk_omem_alloc, 0);
1486 skb_queue_head_init(&newsk->sk_receive_queue);
1487 skb_queue_head_init(&newsk->sk_write_queue);
1488 #ifdef CONFIG_NET_DMA
1489 skb_queue_head_init(&newsk->sk_async_wait_queue);
1490 #endif
1491
1492 spin_lock_init(&newsk->sk_dst_lock);
1493 rwlock_init(&newsk->sk_callback_lock);
1494 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1495 af_callback_keys + newsk->sk_family,
1496 af_family_clock_key_strings[newsk->sk_family]);
1497
1498 newsk->sk_dst_cache = NULL;
1499 newsk->sk_wmem_queued = 0;
1500 newsk->sk_forward_alloc = 0;
1501 newsk->sk_send_head = NULL;
1502 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1503
1504 sock_reset_flag(newsk, SOCK_DONE);
1505 skb_queue_head_init(&newsk->sk_error_queue);
1506
1507 filter = rcu_dereference_protected(newsk->sk_filter, 1);
1508 if (filter != NULL)
1509 sk_filter_charge(newsk, filter);
1510
1511 if (unlikely(xfrm_sk_clone_policy(newsk))) {
1512 /* It is still raw copy of parent, so invalidate
1513 * destructor and make plain sk_free() */
1514 newsk->sk_destruct = NULL;
1515 bh_unlock_sock(newsk);
1516 sk_free(newsk);
1517 newsk = NULL;
1518 goto out;
1519 }
1520
1521 newsk->sk_err = 0;
1522 newsk->sk_priority = 0;
1523 /*
1524 * Before updating sk_refcnt, we must commit prior changes to memory
1525 * (Documentation/RCU/rculist_nulls.txt for details)
1526 */
1527 smp_wmb();
1528 atomic_set(&newsk->sk_refcnt, 2);
1529
1530 /*
1531 * Increment the counter in the same struct proto as the master
1532 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1533 * is the same as sk->sk_prot->socks, as this field was copied
1534 * with memcpy).
1535 *
1536 * This _changes_ the previous behaviour, where
1537 * tcp_create_openreq_child always was incrementing the
1538 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1539 * to be taken into account in all callers. -acme
1540 */
1541 sk_refcnt_debug_inc(newsk);
1542 sk_set_socket(newsk, NULL);
1543 newsk->sk_wq = NULL;
1544
1545 sk_update_clone(sk, newsk);
1546
1547 if (newsk->sk_prot->sockets_allocated)
1548 sk_sockets_allocated_inc(newsk);
1549
1550 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1551 net_enable_timestamp();
1552 }
1553 out:
1554 return newsk;
1555 }
1556 EXPORT_SYMBOL_GPL(sk_clone_lock);
1557
1558 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1559 {
1560 __sk_dst_set(sk, dst);
1561 sk->sk_route_caps = dst->dev->features;
1562 if (sk->sk_route_caps & NETIF_F_GSO)
1563 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1564 sk->sk_route_caps &= ~sk->sk_route_nocaps;
1565 if (sk_can_gso(sk)) {
1566 if (dst->header_len) {
1567 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1568 } else {
1569 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1570 sk->sk_gso_max_size = dst->dev->gso_max_size;
1571 sk->sk_gso_max_segs = dst->dev->gso_max_segs;
1572 }
1573 }
1574 }
1575 EXPORT_SYMBOL_GPL(sk_setup_caps);
1576
1577 /*
1578 * Simple resource managers for sockets.
1579 */
1580
1581
1582 /*
1583 * Write buffer destructor automatically called from kfree_skb.
1584 */
1585 void sock_wfree(struct sk_buff *skb)
1586 {
1587 struct sock *sk = skb->sk;
1588 unsigned int len = skb->truesize;
1589
1590 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1591 /*
1592 * Keep a reference on sk_wmem_alloc, this will be released
1593 * after sk_write_space() call
1594 */
1595 atomic_sub(len - 1, &sk->sk_wmem_alloc);
1596 sk->sk_write_space(sk);
1597 len = 1;
1598 }
1599 /*
1600 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1601 * could not do because of in-flight packets
1602 */
1603 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
1604 __sk_free(sk);
1605 }
1606 EXPORT_SYMBOL(sock_wfree);
1607
1608 /*
1609 * Read buffer destructor automatically called from kfree_skb.
1610 */
1611 void sock_rfree(struct sk_buff *skb)
1612 {
1613 struct sock *sk = skb->sk;
1614 unsigned int len = skb->truesize;
1615
1616 atomic_sub(len, &sk->sk_rmem_alloc);
1617 sk_mem_uncharge(sk, len);
1618 }
1619 EXPORT_SYMBOL(sock_rfree);
1620
1621 void sock_edemux(struct sk_buff *skb)
1622 {
1623 struct sock *sk = skb->sk;
1624
1625 #ifdef CONFIG_INET
1626 if (sk->sk_state == TCP_TIME_WAIT)
1627 inet_twsk_put(inet_twsk(sk));
1628 else
1629 #endif
1630 sock_put(sk);
1631 }
1632 EXPORT_SYMBOL(sock_edemux);
1633
1634 kuid_t sock_i_uid(struct sock *sk)
1635 {
1636 kuid_t uid;
1637
1638 /*mtk_net: fix kernel bug*/
1639 if (!sk) {
1640 pr_info("sk == NULL for sock_i_uid\n");
1641 return GLOBAL_ROOT_UID;
1642 }
1643
1644 read_lock_bh(&sk->sk_callback_lock);
1645 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
1646 read_unlock_bh(&sk->sk_callback_lock);
1647 return uid;
1648 }
1649 EXPORT_SYMBOL(sock_i_uid);
1650
1651 unsigned long sock_i_ino(struct sock *sk)
1652 {
1653 unsigned long ino;
1654
1655 read_lock_bh(&sk->sk_callback_lock);
1656 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1657 read_unlock_bh(&sk->sk_callback_lock);
1658 return ino;
1659 }
1660 EXPORT_SYMBOL(sock_i_ino);
1661
1662 /*
1663 * Allocate a skb from the socket's send buffer.
1664 */
1665 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1666 gfp_t priority)
1667 {
1668 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1669 struct sk_buff *skb = alloc_skb(size, priority);
1670 if (skb) {
1671 skb_set_owner_w(skb, sk);
1672 return skb;
1673 }
1674 }
1675 return NULL;
1676 }
1677 EXPORT_SYMBOL(sock_wmalloc);
1678
1679 /*
1680 * Allocate a skb from the socket's receive buffer.
1681 */
1682 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1683 gfp_t priority)
1684 {
1685 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1686 struct sk_buff *skb = alloc_skb(size, priority);
1687 if (skb) {
1688 skb_set_owner_r(skb, sk);
1689 return skb;
1690 }
1691 }
1692 return NULL;
1693 }
1694
1695 /*
1696 * Allocate a memory block from the socket's option memory buffer.
1697 */
1698 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1699 {
1700 if ((unsigned int)size <= sysctl_optmem_max &&
1701 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1702 void *mem;
1703 /* First do the add, to avoid the race if kmalloc
1704 * might sleep.
1705 */
1706 atomic_add(size, &sk->sk_omem_alloc);
1707 mem = kmalloc(size, priority);
1708 if (mem)
1709 return mem;
1710 atomic_sub(size, &sk->sk_omem_alloc);
1711 }
1712 return NULL;
1713 }
1714 EXPORT_SYMBOL(sock_kmalloc);
1715
1716 /*
1717 * Free an option memory block.
1718 */
1719 void sock_kfree_s(struct sock *sk, void *mem, int size)
1720 {
1721 kfree(mem);
1722 atomic_sub(size, &sk->sk_omem_alloc);
1723 }
1724 EXPORT_SYMBOL(sock_kfree_s);
1725
1726 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1727 I think, these locks should be removed for datagram sockets.
1728 */
1729 static long sock_wait_for_wmem(struct sock *sk, long timeo)
1730 {
1731 DEFINE_WAIT(wait);
1732
1733 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1734 for (;;) {
1735 if (!timeo)
1736 break;
1737 if (signal_pending(current))
1738 break;
1739 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1740 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1741 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1742 break;
1743 if (sk->sk_shutdown & SEND_SHUTDOWN)
1744 break;
1745 if (sk->sk_err)
1746 break;
1747 timeo = schedule_timeout(timeo);
1748 }
1749 finish_wait(sk_sleep(sk), &wait);
1750 return timeo;
1751 }
1752
1753
1754 //debug funcion
1755
1756 static int sock_dump_info(struct sock *sk)
1757 {
1758 //dump receiver queue 128 bytes
1759 //struct sk_buff *skb;
1760 //char skbmsg[128];
1761 //dump receiver queue 128 bytes end
1762
1763 if(sk->sk_family == AF_UNIX)
1764 {
1765 struct unix_sock *u = unix_sk(sk);
1766 struct sock *other = NULL;
1767 if( (u->path.dentry !=NULL)&&(u->path.dentry->d_iname!=NULL))
1768 //if( (u->dentry !=NULL)&&(u->dentry->d_iname!=NULL))
1769 {
1770 #ifdef CONFIG_MTK_NET_LOGGING
1771 printk(KERN_INFO "[mtk_net][sock]sockdbg: socket-Name:%s \n",u->path.dentry->d_iname);
1772 #endif
1773 }
1774 else
1775 {
1776 #ifdef CONFIG_MTK_NET_LOGGING
1777 printk(KERN_INFO "[mtk_net][sock]sockdbg:socket Name (NULL)\n" );
1778 #endif
1779 }
1780
1781 if(sk->sk_socket && SOCK_INODE(sk->sk_socket))
1782 {
1783 #ifdef CONFIG_MTK_NET_LOGGING
1784 printk(KERN_INFO "[mtk_net][sock]sockdbg:socket Inode[%lu]\n" ,SOCK_INODE(sk->sk_socket)->i_ino);
1785 #endif
1786 }
1787
1788 other = unix_sk(sk)->peer ;
1789 if (!other)
1790 {
1791 #ifdef CONFIG_MTK_NET_LOGGING
1792 printk(KERN_INFO "[mtk_net][sock]sockdbg:peer is (NULL) \n");
1793 #endif
1794 } else{
1795
1796 if ((((struct unix_sock *)other)->path.dentry != NULL)&&(((struct unix_sock *)other)->path.dentry->d_iname != NULL))
1797 //if ((((struct unix_sock *)other)->dentry != NULL)&&(((struct unix_sock *)other)->dentry->d_iname != NULL))
1798 {
1799 #ifdef CONFIG_MTK_NET_LOGGING
1800 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Name:%s \n",((struct unix_sock *)other)->path.dentry->d_iname);
1801 #endif
1802 }
1803 else
1804 {
1805 #ifdef CONFIG_MTK_NET_LOGGING
1806 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Name (NULL) \n");
1807 #endif
1808 }
1809
1810 if(other->sk_socket && SOCK_INODE(other->sk_socket))
1811 {
1812 #ifdef CONFIG_MTK_NET_LOGGING
1813 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Inode [%lu] \n", SOCK_INODE(other->sk_socket)->i_ino);
1814 #endif
1815 }
1816 #ifdef CONFIG_MTK_NET_LOGGING
1817 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Recieve Queue len:%d \n",other->sk_receive_queue.qlen);
1818 #endif
1819 //dump receiver queue 128 bytes
1820 /* if ((skb = skb_peek_tail(&other->sk_receive_queue)) == NULL) {
1821
1822 printk(KERN_INFO "sockdbg: Peer Recieve Queue is null (warning) \n");
1823 }else{
1824 int i =0 ,len=0;
1825 if((skb->len !=0) && (skb->data != NULL)){
1826
1827 if(skb->len >= 127){
1828 len = 127 ;
1829 }else
1830 {
1831 len = skb->len ;
1832 }
1833 for (i=0;i<len;i++)
1834 sprintf(skbmsg+i, "%x", skb->data[i]);
1835
1836 skbmsg[len]= '\0' ;
1837
1838 printk(KERN_INFO "sockdbg: Peer Recieve Queue dump(%d bytes):%s\n", len, skbmsg);
1839
1840
1841 }else{
1842 printk(KERN_INFO "sockdbg: Peer Recieve skb error \n");
1843 }*/
1844 //dump receiver queue 128 bytes end
1845
1846 //}
1847 //dump receiver queue 128 bytes end
1848
1849 }
1850 }
1851
1852 return 0 ;
1853
1854
1855 }
1856
1857
1858
1859 /*
1860 * Generic send/receive buffer handlers
1861 */
1862
1863 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1864 unsigned long data_len, int noblock,
1865 int *errcode)
1866 {
1867 struct sk_buff *skb;
1868 gfp_t gfp_mask;
1869 long timeo;
1870 int err;
1871 int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1872
1873 err = -EMSGSIZE;
1874 if (npages > MAX_SKB_FRAGS)
1875 goto failure;
1876
1877 gfp_mask = sk->sk_allocation;
1878 if (gfp_mask & __GFP_WAIT)
1879 gfp_mask |= __GFP_REPEAT;
1880
1881 timeo = sock_sndtimeo(sk, noblock);
1882 while (1) {
1883 err = sock_error(sk);
1884 if (err != 0)
1885 goto failure;
1886
1887 err = -EPIPE;
1888 if (sk->sk_shutdown & SEND_SHUTDOWN)
1889 goto failure;
1890
1891 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1892 skb = alloc_skb(header_len, gfp_mask);
1893 if (skb) {
1894 int i;
1895
1896 /* No pages, we're done... */
1897 if (!data_len)
1898 break;
1899
1900 skb->truesize += data_len;
1901 skb_shinfo(skb)->nr_frags = npages;
1902 for (i = 0; i < npages; i++) {
1903 struct page *page;
1904
1905 page = alloc_pages(sk->sk_allocation, 0);
1906 if (!page) {
1907 err = -ENOBUFS;
1908 skb_shinfo(skb)->nr_frags = i;
1909 kfree_skb(skb);
1910 goto failure;
1911 }
1912
1913 __skb_fill_page_desc(skb, i,
1914 page, 0,
1915 (data_len >= PAGE_SIZE ?
1916 PAGE_SIZE :
1917 data_len));
1918 data_len -= PAGE_SIZE;
1919 }
1920
1921 /* Full success... */
1922 break;
1923 }
1924 err = -ENOBUFS;
1925 goto failure;
1926 }
1927 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1928 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1929 err = -EAGAIN;
1930 if (!timeo)
1931 goto failure;
1932 if (signal_pending(current))
1933 goto interrupted;
1934
1935 sock_dump_info(sk);
1936 #ifdef CONFIG_MTK_NET_LOGGING
1937 printk(KERN_INFO "[mtk_net][sock]sockdbg: wait_for_wmem, timeo =%ld, wmem =%d, snd buf =%d \n",
1938 timeo, atomic_read(&sk->sk_wmem_alloc), sk->sk_sndbuf);
1939 #endif
1940 timeo = sock_wait_for_wmem(sk, timeo);
1941 #ifdef CONFIG_MTK_NET_LOGGING
1942 printk(KERN_INFO "[mtk_net][sock]sockdbg: wait_for_wmem done, header_len=0x%lx, data_len=0x%lx,timeo =%ld \n",
1943 header_len, data_len ,timeo);
1944 #endif
1945 }
1946
1947 skb_set_owner_w(skb, sk);
1948 return skb;
1949
1950 interrupted:
1951 err = sock_intr_errno(timeo);
1952 failure:
1953 *errcode = err;
1954 return NULL;
1955 }
1956 EXPORT_SYMBOL(sock_alloc_send_pskb);
1957
1958 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1959 int noblock, int *errcode)
1960 {
1961 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1962 }
1963 EXPORT_SYMBOL(sock_alloc_send_skb);
1964
1965 /* On 32bit arches, an skb frag is limited to 2^15 */
1966 #define SKB_FRAG_PAGE_ORDER get_order(32768)
1967
1968 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1969 {
1970 int order;
1971
1972 if (pfrag->page) {
1973 if (atomic_read(&pfrag->page->_count) == 1) {
1974 pfrag->offset = 0;
1975 return true;
1976 }
1977 if (pfrag->offset < pfrag->size)
1978 return true;
1979 put_page(pfrag->page);
1980 }
1981
1982 /* We restrict high order allocations to users that can afford to wait */
1983 order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
1984
1985 do {
1986 gfp_t gfp = sk->sk_allocation;
1987
1988 if (order)
1989 gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
1990 pfrag->page = alloc_pages(gfp, order);
1991 if (likely(pfrag->page)) {
1992 pfrag->offset = 0;
1993 pfrag->size = PAGE_SIZE << order;
1994 return true;
1995 }
1996 } while (--order >= 0);
1997
1998 sk_enter_memory_pressure(sk);
1999 sk_stream_moderate_sndbuf(sk);
2000 return false;
2001 }
2002 EXPORT_SYMBOL(sk_page_frag_refill);
2003
2004 static void __lock_sock(struct sock *sk)
2005 __releases(&sk->sk_lock.slock)
2006 __acquires(&sk->sk_lock.slock)
2007 {
2008 DEFINE_WAIT(wait);
2009
2010 for (;;) {
2011 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2012 TASK_UNINTERRUPTIBLE);
2013 spin_unlock_bh(&sk->sk_lock.slock);
2014 schedule();
2015 spin_lock_bh(&sk->sk_lock.slock);
2016 if (!sock_owned_by_user(sk))
2017 break;
2018 }
2019 finish_wait(&sk->sk_lock.wq, &wait);
2020 }
2021
2022 static void __release_sock(struct sock *sk)
2023 __releases(&sk->sk_lock.slock)
2024 __acquires(&sk->sk_lock.slock)
2025 {
2026 struct sk_buff *skb = sk->sk_backlog.head;
2027
2028 do {
2029 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2030 bh_unlock_sock(sk);
2031
2032 do {
2033 struct sk_buff *next = skb->next;
2034
2035 prefetch(next);
2036 WARN_ON_ONCE(skb_dst_is_noref(skb));
2037 skb->next = NULL;
2038 sk_backlog_rcv(sk, skb);
2039
2040 /*
2041 * We are in process context here with softirqs
2042 * disabled, use cond_resched_softirq() to preempt.
2043 * This is safe to do because we've taken the backlog
2044 * queue private:
2045 */
2046 cond_resched_softirq();
2047
2048 skb = next;
2049 } while (skb != NULL);
2050
2051 bh_lock_sock(sk);
2052 } while ((skb = sk->sk_backlog.head) != NULL);
2053
2054 /*
2055 * Doing the zeroing here guarantee we can not loop forever
2056 * while a wild producer attempts to flood us.
2057 */
2058 sk->sk_backlog.len = 0;
2059 }
2060
2061 /**
2062 * sk_wait_data - wait for data to arrive at sk_receive_queue
2063 * @sk: sock to wait on
2064 * @timeo: for how long
2065 *
2066 * Now socket state including sk->sk_err is changed only under lock,
2067 * hence we may omit checks after joining wait queue.
2068 * We check receive queue before schedule() only as optimization;
2069 * it is very likely that release_sock() added new data.
2070 */
2071 int sk_wait_data(struct sock *sk, long *timeo)
2072 {
2073 int rc;
2074 DEFINE_WAIT(wait);
2075
2076 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2077 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2078 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
2079 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2080 finish_wait(sk_sleep(sk), &wait);
2081 return rc;
2082 }
2083 EXPORT_SYMBOL(sk_wait_data);
2084
2085 /**
2086 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
2087 * @sk: socket
2088 * @size: memory size to allocate
2089 * @kind: allocation type
2090 *
2091 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
2092 * rmem allocation. This function assumes that protocols which have
2093 * memory_pressure use sk_wmem_queued as write buffer accounting.
2094 */
2095 int __sk_mem_schedule(struct sock *sk, int size, int kind)
2096 {
2097 struct proto *prot = sk->sk_prot;
2098 int amt = sk_mem_pages(size);
2099 long allocated;
2100 int parent_status = UNDER_LIMIT;
2101
2102 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
2103
2104 allocated = sk_memory_allocated_add(sk, amt, &parent_status);
2105
2106 /* Under limit. */
2107 if (parent_status == UNDER_LIMIT &&
2108 allocated <= sk_prot_mem_limits(sk, 0)) {
2109 sk_leave_memory_pressure(sk);
2110 return 1;
2111 }
2112
2113 /* Under pressure. (we or our parents) */
2114 if ((parent_status > SOFT_LIMIT) ||
2115 allocated > sk_prot_mem_limits(sk, 1))
2116 sk_enter_memory_pressure(sk);
2117
2118 /* Over hard limit (we or our parents) */
2119 if ((parent_status == OVER_LIMIT) ||
2120 (allocated > sk_prot_mem_limits(sk, 2)))
2121 goto suppress_allocation;
2122
2123 /* guarantee minimum buffer size under pressure */
2124 if (kind == SK_MEM_RECV) {
2125 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2126 return 1;
2127
2128 } else { /* SK_MEM_SEND */
2129 if (sk->sk_type == SOCK_STREAM) {
2130 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2131 return 1;
2132 } else if (atomic_read(&sk->sk_wmem_alloc) <
2133 prot->sysctl_wmem[0])
2134 return 1;
2135 }
2136
2137 if (sk_has_memory_pressure(sk)) {
2138 int alloc;
2139
2140 if (!sk_under_memory_pressure(sk))
2141 return 1;
2142 alloc = sk_sockets_allocated_read_positive(sk);
2143 if (sk_prot_mem_limits(sk, 2) > alloc *
2144 sk_mem_pages(sk->sk_wmem_queued +
2145 atomic_read(&sk->sk_rmem_alloc) +
2146 sk->sk_forward_alloc))
2147 return 1;
2148 }
2149
2150 suppress_allocation:
2151
2152 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2153 sk_stream_moderate_sndbuf(sk);
2154
2155 /* Fail only if socket is _under_ its sndbuf.
2156 * In this case we cannot block, so that we have to fail.
2157 */
2158 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2159 return 1;
2160 }
2161
2162 trace_sock_exceed_buf_limit(sk, prot, allocated);
2163
2164 /* Alas. Undo changes. */
2165 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
2166
2167 sk_memory_allocated_sub(sk, amt);
2168
2169 return 0;
2170 }
2171 EXPORT_SYMBOL(__sk_mem_schedule);
2172
2173 /**
2174 * __sk_reclaim - reclaim memory_allocated
2175 * @sk: socket
2176 */
2177 void __sk_mem_reclaim(struct sock *sk)
2178 {
2179 sk_memory_allocated_sub(sk,
2180 sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
2181 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
2182
2183 if (sk_under_memory_pressure(sk) &&
2184 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2185 sk_leave_memory_pressure(sk);
2186 }
2187 EXPORT_SYMBOL(__sk_mem_reclaim);
2188
2189
2190 /*
2191 * Set of default routines for initialising struct proto_ops when
2192 * the protocol does not support a particular function. In certain
2193 * cases where it makes no sense for a protocol to have a "do nothing"
2194 * function, some default processing is provided.
2195 */
2196
2197 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2198 {
2199 return -EOPNOTSUPP;
2200 }
2201 EXPORT_SYMBOL(sock_no_bind);
2202
2203 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2204 int len, int flags)
2205 {
2206 return -EOPNOTSUPP;
2207 }
2208 EXPORT_SYMBOL(sock_no_connect);
2209
2210 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2211 {
2212 return -EOPNOTSUPP;
2213 }
2214 EXPORT_SYMBOL(sock_no_socketpair);
2215
2216 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
2217 {
2218 return -EOPNOTSUPP;
2219 }
2220 EXPORT_SYMBOL(sock_no_accept);
2221
2222 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2223 int *len, int peer)
2224 {
2225 return -EOPNOTSUPP;
2226 }
2227 EXPORT_SYMBOL(sock_no_getname);
2228
2229 unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
2230 {
2231 return 0;
2232 }
2233 EXPORT_SYMBOL(sock_no_poll);
2234
2235 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2236 {
2237 return -EOPNOTSUPP;
2238 }
2239 EXPORT_SYMBOL(sock_no_ioctl);
2240
2241 int sock_no_listen(struct socket *sock, int backlog)
2242 {
2243 return -EOPNOTSUPP;
2244 }
2245 EXPORT_SYMBOL(sock_no_listen);
2246
2247 int sock_no_shutdown(struct socket *sock, int how)
2248 {
2249 return -EOPNOTSUPP;
2250 }
2251 EXPORT_SYMBOL(sock_no_shutdown);
2252
2253 int sock_no_setsockopt(struct socket *sock, int level, int optname,
2254 char __user *optval, unsigned int optlen)
2255 {
2256 return -EOPNOTSUPP;
2257 }
2258 EXPORT_SYMBOL(sock_no_setsockopt);
2259
2260 int sock_no_getsockopt(struct socket *sock, int level, int optname,
2261 char __user *optval, int __user *optlen)
2262 {
2263 return -EOPNOTSUPP;
2264 }
2265 EXPORT_SYMBOL(sock_no_getsockopt);
2266
2267 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2268 size_t len)
2269 {
2270 return -EOPNOTSUPP;
2271 }
2272 EXPORT_SYMBOL(sock_no_sendmsg);
2273
2274 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2275 size_t len, int flags)
2276 {
2277 return -EOPNOTSUPP;
2278 }
2279 EXPORT_SYMBOL(sock_no_recvmsg);
2280
2281 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2282 {
2283 /* Mirror missing mmap method error code */
2284 return -ENODEV;
2285 }
2286 EXPORT_SYMBOL(sock_no_mmap);
2287
2288 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2289 {
2290 ssize_t res;
2291 struct msghdr msg = {.msg_flags = flags};
2292 struct kvec iov;
2293 char *kaddr = kmap(page);
2294 iov.iov_base = kaddr + offset;
2295 iov.iov_len = size;
2296 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2297 kunmap(page);
2298 return res;
2299 }
2300 EXPORT_SYMBOL(sock_no_sendpage);
2301
2302 /*
2303 * Default Socket Callbacks
2304 */
2305
2306 static void sock_def_wakeup(struct sock *sk)
2307 {
2308 struct socket_wq *wq;
2309
2310 rcu_read_lock();
2311 wq = rcu_dereference(sk->sk_wq);
2312 if (wq_has_sleeper(wq))
2313 wake_up_interruptible_all(&wq->wait);
2314 rcu_read_unlock();
2315 }
2316
2317 static void sock_def_error_report(struct sock *sk)
2318 {
2319 struct socket_wq *wq;
2320
2321 rcu_read_lock();
2322 wq = rcu_dereference(sk->sk_wq);
2323 if (wq_has_sleeper(wq))
2324 wake_up_interruptible_poll(&wq->wait, POLLERR);
2325 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2326 rcu_read_unlock();
2327 }
2328
2329 static void sock_def_readable(struct sock *sk, int len)
2330 {
2331 struct socket_wq *wq;
2332
2333 rcu_read_lock();
2334 wq = rcu_dereference(sk->sk_wq);
2335 if (wq_has_sleeper(wq))
2336 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
2337 POLLRDNORM | POLLRDBAND);
2338 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2339 rcu_read_unlock();
2340 }
2341
2342 static void sock_def_write_space(struct sock *sk)
2343 {
2344 struct socket_wq *wq;
2345
2346 rcu_read_lock();
2347
2348 /* Do not wake up a writer until he can make "significant"
2349 * progress. --DaveM
2350 */
2351 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2352 wq = rcu_dereference(sk->sk_wq);
2353 if (wq_has_sleeper(wq))
2354 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
2355 POLLWRNORM | POLLWRBAND);
2356
2357 /* Should agree with poll, otherwise some programs break */
2358 if (sock_writeable(sk))
2359 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2360 }
2361
2362 rcu_read_unlock();
2363 }
2364
2365 static void sock_def_destruct(struct sock *sk)
2366 {
2367 kfree(sk->sk_protinfo);
2368 }
2369
2370 void sk_send_sigurg(struct sock *sk)
2371 {
2372 if (sk->sk_socket && sk->sk_socket->file)
2373 if (send_sigurg(&sk->sk_socket->file->f_owner))
2374 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2375 }
2376 EXPORT_SYMBOL(sk_send_sigurg);
2377
2378 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2379 unsigned long expires)
2380 {
2381 if (!mod_timer(timer, expires))
2382 sock_hold(sk);
2383 }
2384 EXPORT_SYMBOL(sk_reset_timer);
2385
2386 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2387 {
2388 if (del_timer(timer))
2389 __sock_put(sk);
2390 }
2391 EXPORT_SYMBOL(sk_stop_timer);
2392
2393 void sock_init_data(struct socket *sock, struct sock *sk)
2394 {
2395 skb_queue_head_init(&sk->sk_receive_queue);
2396 skb_queue_head_init(&sk->sk_write_queue);
2397 skb_queue_head_init(&sk->sk_error_queue);
2398 #ifdef CONFIG_NET_DMA
2399 skb_queue_head_init(&sk->sk_async_wait_queue);
2400 #endif
2401
2402 sk->sk_send_head = NULL;
2403
2404 init_timer(&sk->sk_timer);
2405
2406 sk->sk_allocation = GFP_KERNEL;
2407 sk->sk_rcvbuf = sysctl_rmem_default;
2408 sk->sk_sndbuf = sysctl_wmem_default;
2409 sk->sk_state = TCP_CLOSE;
2410 sk_set_socket(sk, sock);
2411
2412 sock_set_flag(sk, SOCK_ZAPPED);
2413
2414 if (sock) {
2415 sk->sk_type = sock->type;
2416 sk->sk_wq = sock->wq;
2417 sock->sk = sk;
2418 } else
2419 sk->sk_wq = NULL;
2420
2421 spin_lock_init(&sk->sk_dst_lock);
2422 rwlock_init(&sk->sk_callback_lock);
2423 lockdep_set_class_and_name(&sk->sk_callback_lock,
2424 af_callback_keys + sk->sk_family,
2425 af_family_clock_key_strings[sk->sk_family]);
2426
2427 sk->sk_state_change = sock_def_wakeup;
2428 sk->sk_data_ready = sock_def_readable;
2429 sk->sk_write_space = sock_def_write_space;
2430 sk->sk_error_report = sock_def_error_report;
2431 sk->sk_destruct = sock_def_destruct;
2432
2433 sk->sk_frag.page = NULL;
2434 sk->sk_frag.offset = 0;
2435 sk->sk_peek_off = -1;
2436
2437 sk->sk_peer_pid = NULL;
2438 sk->sk_peer_cred = NULL;
2439 sk->sk_write_pending = 0;
2440 sk->sk_rcvlowat = 1;
2441 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2442 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2443
2444 sk->sk_stamp = ktime_set(-1L, 0);
2445
2446 sk->sk_pacing_rate = ~0U;
2447 /*
2448 * Before updating sk_refcnt, we must commit prior changes to memory
2449 * (Documentation/RCU/rculist_nulls.txt for details)
2450 */
2451 smp_wmb();
2452 atomic_set(&sk->sk_refcnt, 1);
2453 atomic_set(&sk->sk_drops, 0);
2454 }
2455 EXPORT_SYMBOL(sock_init_data);
2456
2457 void lock_sock_nested(struct sock *sk, int subclass)
2458 {
2459 might_sleep();
2460 spin_lock_bh(&sk->sk_lock.slock);
2461 if (sk->sk_lock.owned)
2462 __lock_sock(sk);
2463 sk->sk_lock.owned = 1;
2464 spin_unlock(&sk->sk_lock.slock);
2465 /*
2466 * The sk_lock has mutex_lock() semantics here:
2467 */
2468 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2469 local_bh_enable();
2470 }
2471 EXPORT_SYMBOL(lock_sock_nested);
2472
2473 void release_sock(struct sock *sk)
2474 {
2475 /*
2476 * The sk_lock has mutex_unlock() semantics:
2477 */
2478 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
2479
2480 spin_lock_bh(&sk->sk_lock.slock);
2481 if (sk->sk_backlog.tail)
2482 __release_sock(sk);
2483
2484 /* Warning : release_cb() might need to release sk ownership,
2485 * ie call sock_release_ownership(sk) before us.
2486 */
2487 if (sk->sk_prot->release_cb)
2488 sk->sk_prot->release_cb(sk);
2489
2490 sock_release_ownership(sk);
2491 if (waitqueue_active(&sk->sk_lock.wq))
2492 wake_up(&sk->sk_lock.wq);
2493 spin_unlock_bh(&sk->sk_lock.slock);
2494 }
2495 EXPORT_SYMBOL(release_sock);
2496
2497 /**
2498 * lock_sock_fast - fast version of lock_sock
2499 * @sk: socket
2500 *
2501 * This version should be used for very small section, where process wont block
2502 * return false if fast path is taken
2503 * sk_lock.slock locked, owned = 0, BH disabled
2504 * return true if slow path is taken
2505 * sk_lock.slock unlocked, owned = 1, BH enabled
2506 */
2507 bool lock_sock_fast(struct sock *sk)
2508 {
2509 might_sleep();
2510 spin_lock_bh(&sk->sk_lock.slock);
2511
2512 if (!sk->sk_lock.owned)
2513 /*
2514 * Note : We must disable BH
2515 */
2516 return false;
2517
2518 __lock_sock(sk);
2519 sk->sk_lock.owned = 1;
2520 spin_unlock(&sk->sk_lock.slock);
2521 /*
2522 * The sk_lock has mutex_lock() semantics here:
2523 */
2524 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2525 local_bh_enable();
2526 return true;
2527 }
2528 EXPORT_SYMBOL(lock_sock_fast);
2529
2530 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2531 {
2532 struct timeval tv;
2533 if (!sock_flag(sk, SOCK_TIMESTAMP))
2534 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2535 tv = ktime_to_timeval(sk->sk_stamp);
2536 if (tv.tv_sec == -1)
2537 return -ENOENT;
2538 if (tv.tv_sec == 0) {
2539 sk->sk_stamp = ktime_get_real();
2540 tv = ktime_to_timeval(sk->sk_stamp);
2541 }
2542 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2543 }
2544 EXPORT_SYMBOL(sock_get_timestamp);
2545
2546 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2547 {
2548 struct timespec ts;
2549 if (!sock_flag(sk, SOCK_TIMESTAMP))
2550 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2551 ts = ktime_to_timespec(sk->sk_stamp);
2552 if (ts.tv_sec == -1)
2553 return -ENOENT;
2554 if (ts.tv_sec == 0) {
2555 sk->sk_stamp = ktime_get_real();
2556 ts = ktime_to_timespec(sk->sk_stamp);
2557 }
2558 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2559 }
2560 EXPORT_SYMBOL(sock_get_timestampns);
2561
2562 void sock_enable_timestamp(struct sock *sk, int flag)
2563 {
2564 if (!sock_flag(sk, flag)) {
2565 unsigned long previous_flags = sk->sk_flags;
2566
2567 sock_set_flag(sk, flag);
2568 /*
2569 * we just set one of the two flags which require net
2570 * time stamping, but time stamping might have been on
2571 * already because of the other one
2572 */
2573 if (!(previous_flags & SK_FLAGS_TIMESTAMP))
2574 net_enable_timestamp();
2575 }
2576 }
2577
2578 /*
2579 * Get a socket option on an socket.
2580 *
2581 * FIX: POSIX 1003.1g is very ambiguous here. It states that
2582 * asynchronous errors should be reported by getsockopt. We assume
2583 * this means if you specify SO_ERROR (otherwise whats the point of it).
2584 */
2585 int sock_common_getsockopt(struct socket *sock, int level, int optname,
2586 char __user *optval, int __user *optlen)
2587 {
2588 struct sock *sk = sock->sk;
2589
2590 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2591 }
2592 EXPORT_SYMBOL(sock_common_getsockopt);
2593
2594 #ifdef CONFIG_COMPAT
2595 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2596 char __user *optval, int __user *optlen)
2597 {
2598 struct sock *sk = sock->sk;
2599
2600 if (sk->sk_prot->compat_getsockopt != NULL)
2601 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2602 optval, optlen);
2603 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2604 }
2605 EXPORT_SYMBOL(compat_sock_common_getsockopt);
2606 #endif
2607
2608 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
2609 struct msghdr *msg, size_t size, int flags)
2610 {
2611 struct sock *sk = sock->sk;
2612 int addr_len = 0;
2613 int err;
2614
2615 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
2616 flags & ~MSG_DONTWAIT, &addr_len);
2617 if (err >= 0)
2618 msg->msg_namelen = addr_len;
2619 return err;
2620 }
2621 EXPORT_SYMBOL(sock_common_recvmsg);
2622
2623 /*
2624 * Set socket options on an inet socket.
2625 */
2626 int sock_common_setsockopt(struct socket *sock, int level, int optname,
2627 char __user *optval, unsigned int optlen)
2628 {
2629 struct sock *sk = sock->sk;
2630
2631 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2632 }
2633 EXPORT_SYMBOL(sock_common_setsockopt);
2634
2635 #ifdef CONFIG_COMPAT
2636 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
2637 char __user *optval, unsigned int optlen)
2638 {
2639 struct sock *sk = sock->sk;
2640
2641 if (sk->sk_prot->compat_setsockopt != NULL)
2642 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2643 optval, optlen);
2644 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2645 }
2646 EXPORT_SYMBOL(compat_sock_common_setsockopt);
2647 #endif
2648
2649 void sk_common_release(struct sock *sk)
2650 {
2651 if (sk->sk_prot->destroy)
2652 sk->sk_prot->destroy(sk);
2653
2654 /*
2655 * Observation: when sock_common_release is called, processes have
2656 * no access to socket. But net still has.
2657 * Step one, detach it from networking:
2658 *
2659 * A. Remove from hash tables.
2660 */
2661
2662 sk->sk_prot->unhash(sk);
2663
2664 /*
2665 * In this point socket cannot receive new packets, but it is possible
2666 * that some packets are in flight because some CPU runs receiver and
2667 * did hash table lookup before we unhashed socket. They will achieve
2668 * receive queue and will be purged by socket destructor.
2669 *
2670 * Also we still have packets pending on receive queue and probably,
2671 * our own packets waiting in device queues. sock_destroy will drain
2672 * receive queue, but transmitted packets will delay socket destruction
2673 * until the last reference will be released.
2674 */
2675
2676 sock_orphan(sk);
2677
2678 xfrm_sk_free_policy(sk);
2679
2680 sk_refcnt_debug_release(sk);
2681
2682 if (sk->sk_frag.page) {
2683 put_page(sk->sk_frag.page);
2684 sk->sk_frag.page = NULL;
2685 }
2686
2687 sock_put(sk);
2688 }
2689 EXPORT_SYMBOL(sk_common_release);
2690
2691 #ifdef CONFIG_PROC_FS
2692 #define PROTO_INUSE_NR 64 /* should be enough for the first time */
2693 struct prot_inuse {
2694 int val[PROTO_INUSE_NR];
2695 };
2696
2697 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2698
2699 #ifdef CONFIG_NET_NS
2700 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2701 {
2702 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2703 }
2704 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2705
2706 int sock_prot_inuse_get(struct net *net, struct proto *prot)
2707 {
2708 int cpu, idx = prot->inuse_idx;
2709 int res = 0;
2710
2711 for_each_possible_cpu(cpu)
2712 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2713
2714 return res >= 0 ? res : 0;
2715 }
2716 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2717
2718 static int __net_init sock_inuse_init_net(struct net *net)
2719 {
2720 net->core.inuse = alloc_percpu(struct prot_inuse);
2721 return net->core.inuse ? 0 : -ENOMEM;
2722 }
2723
2724 static void __net_exit sock_inuse_exit_net(struct net *net)
2725 {
2726 free_percpu(net->core.inuse);
2727 }
2728
2729 static struct pernet_operations net_inuse_ops = {
2730 .init = sock_inuse_init_net,
2731 .exit = sock_inuse_exit_net,
2732 };
2733
2734 static __init int net_inuse_init(void)
2735 {
2736 if (register_pernet_subsys(&net_inuse_ops))
2737 panic("Cannot initialize net inuse counters");
2738
2739 return 0;
2740 }
2741
2742 core_initcall(net_inuse_init);
2743 #else
2744 static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2745
2746 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2747 {
2748 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2749 }
2750 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2751
2752 int sock_prot_inuse_get(struct net *net, struct proto *prot)
2753 {
2754 int cpu, idx = prot->inuse_idx;
2755 int res = 0;
2756
2757 for_each_possible_cpu(cpu)
2758 res += per_cpu(prot_inuse, cpu).val[idx];
2759
2760 return res >= 0 ? res : 0;
2761 }
2762 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2763 #endif
2764
2765 static void assign_proto_idx(struct proto *prot)
2766 {
2767 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2768
2769 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
2770 pr_err("PROTO_INUSE_NR exhausted\n");
2771 return;
2772 }
2773
2774 set_bit(prot->inuse_idx, proto_inuse_idx);
2775 }
2776
2777 static void release_proto_idx(struct proto *prot)
2778 {
2779 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2780 clear_bit(prot->inuse_idx, proto_inuse_idx);
2781 }
2782 #else
2783 static inline void assign_proto_idx(struct proto *prot)
2784 {
2785 }
2786
2787 static inline void release_proto_idx(struct proto *prot)
2788 {
2789 }
2790 #endif
2791
2792 int proto_register(struct proto *prot, int alloc_slab)
2793 {
2794 if (alloc_slab) {
2795 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
2796 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2797 NULL);
2798
2799 if (prot->slab == NULL) {
2800 pr_crit("%s: Can't create sock SLAB cache!\n",
2801 prot->name);
2802 goto out;
2803 }
2804
2805 if (prot->rsk_prot != NULL) {
2806 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
2807 if (prot->rsk_prot->slab_name == NULL)
2808 goto out_free_sock_slab;
2809
2810 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2811 prot->rsk_prot->obj_size, 0,
2812 SLAB_HWCACHE_ALIGN, NULL);
2813
2814 if (prot->rsk_prot->slab == NULL) {
2815 pr_crit("%s: Can't create request sock SLAB cache!\n",
2816 prot->name);
2817 goto out_free_request_sock_slab_name;
2818 }
2819 }
2820
2821 if (prot->twsk_prot != NULL) {
2822 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
2823
2824 if (prot->twsk_prot->twsk_slab_name == NULL)
2825 goto out_free_request_sock_slab;
2826
2827 prot->twsk_prot->twsk_slab =
2828 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
2829 prot->twsk_prot->twsk_obj_size,
2830 0,
2831 SLAB_HWCACHE_ALIGN |
2832 prot->slab_flags,
2833 NULL);
2834 if (prot->twsk_prot->twsk_slab == NULL)
2835 goto out_free_timewait_sock_slab_name;
2836 }
2837 }
2838
2839 mutex_lock(&proto_list_mutex);
2840 list_add(&prot->node, &proto_list);
2841 assign_proto_idx(prot);
2842 mutex_unlock(&proto_list_mutex);
2843 return 0;
2844
2845 out_free_timewait_sock_slab_name:
2846 kfree(prot->twsk_prot->twsk_slab_name);
2847 out_free_request_sock_slab:
2848 if (prot->rsk_prot && prot->rsk_prot->slab) {
2849 kmem_cache_destroy(prot->rsk_prot->slab);
2850 prot->rsk_prot->slab = NULL;
2851 }
2852 out_free_request_sock_slab_name:
2853 if (prot->rsk_prot)
2854 kfree(prot->rsk_prot->slab_name);
2855 out_free_sock_slab:
2856 kmem_cache_destroy(prot->slab);
2857 prot->slab = NULL;
2858 out:
2859 return -ENOBUFS;
2860 }
2861 EXPORT_SYMBOL(proto_register);
2862
2863 void proto_unregister(struct proto *prot)
2864 {
2865 mutex_lock(&proto_list_mutex);
2866 release_proto_idx(prot);
2867 list_del(&prot->node);
2868 mutex_unlock(&proto_list_mutex);
2869
2870 if (prot->slab != NULL) {
2871 kmem_cache_destroy(prot->slab);
2872 prot->slab = NULL;
2873 }
2874
2875 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
2876 kmem_cache_destroy(prot->rsk_prot->slab);
2877 kfree(prot->rsk_prot->slab_name);
2878 prot->rsk_prot->slab = NULL;
2879 }
2880
2881 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
2882 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
2883 kfree(prot->twsk_prot->twsk_slab_name);
2884 prot->twsk_prot->twsk_slab = NULL;
2885 }
2886 }
2887 EXPORT_SYMBOL(proto_unregister);
2888
2889 #ifdef CONFIG_PROC_FS
2890 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
2891 __acquires(proto_list_mutex)
2892 {
2893 mutex_lock(&proto_list_mutex);
2894 return seq_list_start_head(&proto_list, *pos);
2895 }
2896
2897 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2898 {
2899 return seq_list_next(v, &proto_list, pos);
2900 }
2901
2902 static void proto_seq_stop(struct seq_file *seq, void *v)
2903 __releases(proto_list_mutex)
2904 {
2905 mutex_unlock(&proto_list_mutex);
2906 }
2907
2908 static char proto_method_implemented(const void *method)
2909 {
2910 return method == NULL ? 'n' : 'y';
2911 }
2912 static long sock_prot_memory_allocated(struct proto *proto)
2913 {
2914 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
2915 }
2916
2917 static char *sock_prot_memory_pressure(struct proto *proto)
2918 {
2919 return proto->memory_pressure != NULL ?
2920 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
2921 }
2922
2923 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2924 {
2925
2926 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
2927 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2928 proto->name,
2929 proto->obj_size,
2930 sock_prot_inuse_get(seq_file_net(seq), proto),
2931 sock_prot_memory_allocated(proto),
2932 sock_prot_memory_pressure(proto),
2933 proto->max_header,
2934 proto->slab == NULL ? "no" : "yes",
2935 module_name(proto->owner),
2936 proto_method_implemented(proto->close),
2937 proto_method_implemented(proto->connect),
2938 proto_method_implemented(proto->disconnect),
2939 proto_method_implemented(proto->accept),
2940 proto_method_implemented(proto->ioctl),
2941 proto_method_implemented(proto->init),
2942 proto_method_implemented(proto->destroy),
2943 proto_method_implemented(proto->shutdown),
2944 proto_method_implemented(proto->setsockopt),
2945 proto_method_implemented(proto->getsockopt),
2946 proto_method_implemented(proto->sendmsg),
2947 proto_method_implemented(proto->recvmsg),
2948 proto_method_implemented(proto->sendpage),
2949 proto_method_implemented(proto->bind),
2950 proto_method_implemented(proto->backlog_rcv),
2951 proto_method_implemented(proto->hash),
2952 proto_method_implemented(proto->unhash),
2953 proto_method_implemented(proto->get_port),
2954 proto_method_implemented(proto->enter_memory_pressure));
2955 }
2956
2957 static int proto_seq_show(struct seq_file *seq, void *v)
2958 {
2959 if (v == &proto_list)
2960 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2961 "protocol",
2962 "size",
2963 "sockets",
2964 "memory",
2965 "press",
2966 "maxhdr",
2967 "slab",
2968 "module",
2969 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2970 else
2971 proto_seq_printf(seq, list_entry(v, struct proto, node));
2972 return 0;
2973 }
2974
2975 static const struct seq_operations proto_seq_ops = {
2976 .start = proto_seq_start,
2977 .next = proto_seq_next,
2978 .stop = proto_seq_stop,
2979 .show = proto_seq_show,
2980 };
2981
2982 static int proto_seq_open(struct inode *inode, struct file *file)
2983 {
2984 return seq_open_net(inode, file, &proto_seq_ops,
2985 sizeof(struct seq_net_private));
2986 }
2987
2988 static const struct file_operations proto_seq_fops = {
2989 .owner = THIS_MODULE,
2990 .open = proto_seq_open,
2991 .read = seq_read,
2992 .llseek = seq_lseek,
2993 .release = seq_release_net,
2994 };
2995
2996 static __net_init int proto_init_net(struct net *net)
2997 {
2998 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
2999 return -ENOMEM;
3000
3001 return 0;
3002 }
3003
3004 static __net_exit void proto_exit_net(struct net *net)
3005 {
3006 remove_proc_entry("protocols", net->proc_net);
3007 }
3008
3009
3010 static __net_initdata struct pernet_operations proto_net_ops = {
3011 .init = proto_init_net,
3012 .exit = proto_exit_net,
3013 };
3014
3015 static int __init proto_init(void)
3016 {
3017 return register_pernet_subsys(&proto_net_ops);
3018 }
3019
3020 subsys_initcall(proto_init);
3021
3022 #endif /* PROC_FS */