Merge tag 'v3.10.106' into update
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / core / sock.c
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Alan Cox, <A.Cox@swansea.ac.uk>
14 *
15 * Fixes:
16 * Alan Cox : Numerous verify_area() problems
17 * Alan Cox : Connecting on a connecting socket
18 * now returns an error for tcp.
19 * Alan Cox : sock->protocol is set correctly.
20 * and is not sometimes left as 0.
21 * Alan Cox : connect handles icmp errors on a
22 * connect properly. Unfortunately there
23 * is a restart syscall nasty there. I
24 * can't match BSD without hacking the C
25 * library. Ideas urgently sought!
26 * Alan Cox : Disallow bind() to addresses that are
27 * not ours - especially broadcast ones!!
28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
30 * instead they leave that for the DESTROY timer.
31 * Alan Cox : Clean up error flag in accept
32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
33 * was buggy. Put a remove_sock() in the handler
34 * for memory when we hit 0. Also altered the timer
35 * code. The ACK stuff can wait and needs major
36 * TCP layer surgery.
37 * Alan Cox : Fixed TCP ack bug, removed remove sock
38 * and fixed timer/inet_bh race.
39 * Alan Cox : Added zapped flag for TCP
40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45 * Rick Sladkey : Relaxed UDP rules for matching packets.
46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
47 * Pauline Middelink : identd support
48 * Alan Cox : Fixed connect() taking signals I think.
49 * Alan Cox : SO_LINGER supported
50 * Alan Cox : Error reporting fixes
51 * Anonymous : inet_create tidied up (sk->reuse setting)
52 * Alan Cox : inet sockets don't set sk->type!
53 * Alan Cox : Split socket option code
54 * Alan Cox : Callbacks
55 * Alan Cox : Nagle flag for Charles & Johannes stuff
56 * Alex : Removed restriction on inet fioctl
57 * Alan Cox : Splitting INET from NET core
58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
60 * Alan Cox : Split IP from generic code
61 * Alan Cox : New kfree_skbmem()
62 * Alan Cox : Make SO_DEBUG superuser only.
63 * Alan Cox : Allow anyone to clear SO_DEBUG
64 * (compatibility fix)
65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
66 * Alan Cox : Allocator for a socket is settable.
67 * Alan Cox : SO_ERROR includes soft errors.
68 * Alan Cox : Allow NULL arguments on some SO_ opts
69 * Alan Cox : Generic socket allocation to make hooks
70 * easier (suggested by Craig Metz).
71 * Michael Pall : SO_ERROR returns positive errno again
72 * Steve Whitehouse: Added default destructor to free
73 * protocol private data.
74 * Steve Whitehouse: Added various other default routines
75 * common to several socket families.
76 * Chris Evans : Call suser() check last on F_SETOWN
77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
79 * Andi Kleen : Fix write_space callback
80 * Chris Evans : Security fixes - signedness again
81 * Arnaldo C. Melo : cleanups, use skb_queue_purge
82 *
83 * To Fix:
84 *
85 *
86 * This program is free software; you can redistribute it and/or
87 * modify it under the terms of the GNU General Public License
88 * as published by the Free Software Foundation; either version
89 * 2 of the License, or (at your option) any later version.
90 */
91
92 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 #include <linux/highmem.h>
115 #include <linux/user_namespace.h>
116 #include <linux/static_key.h>
117 #include <linux/memcontrol.h>
118 #include <linux/prefetch.h>
119
120 #include <asm/uaccess.h>
121
122 #include <linux/netdevice.h>
123 #include <net/protocol.h>
124 #include <linux/skbuff.h>
125 #include <net/net_namespace.h>
126 #include <net/request_sock.h>
127 #include <net/sock.h>
128 #include <linux/net_tstamp.h>
129 #include <net/xfrm.h>
130 #include <linux/ipsec.h>
131 #include <net/cls_cgroup.h>
132 #include <net/netprio_cgroup.h>
133
134 #include <linux/filter.h>
135
136 #include <trace/events/sock.h>
137
138 #include <net/af_unix.h>
139
140
141 #ifdef CONFIG_INET
142 #include <net/tcp.h>
143 #endif
144 #include <linux/xlog.h>
145
146 static DEFINE_MUTEX(proto_list_mutex);
147 static LIST_HEAD(proto_list);
148
149 /**
150 * sk_ns_capable - General socket capability test
151 * @sk: Socket to use a capability on or through
152 * @user_ns: The user namespace of the capability to use
153 * @cap: The capability to use
154 *
155 * Test to see if the opener of the socket had when the socket was
156 * created and the current process has the capability @cap in the user
157 * namespace @user_ns.
158 */
159 bool sk_ns_capable(const struct sock *sk,
160 struct user_namespace *user_ns, int cap)
161 {
162 return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
163 ns_capable(user_ns, cap);
164 }
165 EXPORT_SYMBOL(sk_ns_capable);
166
167 /**
168 * sk_capable - Socket global capability test
169 * @sk: Socket to use a capability on or through
170 * @cap: The global capbility to use
171 *
172 * Test to see if the opener of the socket had when the socket was
173 * created and the current process has the capability @cap in all user
174 * namespaces.
175 */
176 bool sk_capable(const struct sock *sk, int cap)
177 {
178 return sk_ns_capable(sk, &init_user_ns, cap);
179 }
180 EXPORT_SYMBOL(sk_capable);
181
182 /**
183 * sk_net_capable - Network namespace socket capability test
184 * @sk: Socket to use a capability on or through
185 * @cap: The capability to use
186 *
187 * Test to see if the opener of the socket had when the socke was created
188 * and the current process has the capability @cap over the network namespace
189 * the socket is a member of.
190 */
191 bool sk_net_capable(const struct sock *sk, int cap)
192 {
193 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
194 }
195 EXPORT_SYMBOL(sk_net_capable);
196
197
198 #ifdef CONFIG_MEMCG_KMEM
199 int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
200 {
201 struct proto *proto;
202 int ret = 0;
203
204 mutex_lock(&proto_list_mutex);
205 list_for_each_entry(proto, &proto_list, node) {
206 if (proto->init_cgroup) {
207 ret = proto->init_cgroup(memcg, ss);
208 if (ret)
209 goto out;
210 }
211 }
212
213 mutex_unlock(&proto_list_mutex);
214 return ret;
215 out:
216 list_for_each_entry_continue_reverse(proto, &proto_list, node)
217 if (proto->destroy_cgroup)
218 proto->destroy_cgroup(memcg);
219 mutex_unlock(&proto_list_mutex);
220 return ret;
221 }
222
223 void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
224 {
225 struct proto *proto;
226
227 mutex_lock(&proto_list_mutex);
228 list_for_each_entry_reverse(proto, &proto_list, node)
229 if (proto->destroy_cgroup)
230 proto->destroy_cgroup(memcg);
231 mutex_unlock(&proto_list_mutex);
232 }
233 #endif
234
235 /*
236 * Each address family might have different locking rules, so we have
237 * one slock key per address family:
238 */
239 static struct lock_class_key af_family_keys[AF_MAX];
240 static struct lock_class_key af_family_slock_keys[AF_MAX];
241
242 #if defined(CONFIG_MEMCG_KMEM)
243 struct static_key memcg_socket_limit_enabled;
244 EXPORT_SYMBOL(memcg_socket_limit_enabled);
245 #endif
246
247 /*
248 * Make lock validator output more readable. (we pre-construct these
249 * strings build-time, so that runtime initialization of socket
250 * locks is fast):
251 */
252 static const char *const af_family_key_strings[AF_MAX+1] = {
253 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
254 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
255 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
256 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
257 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
258 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
259 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
260 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
261 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
262 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
263 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
264 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
265 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
266 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
267 };
268 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
269 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
270 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
271 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
272 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
273 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
274 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
275 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
276 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
277 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
278 "slock-27" , "slock-28" , "slock-AF_CAN" ,
279 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
280 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
281 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
282 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
283 };
284 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
285 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
286 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
287 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
288 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
289 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
290 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
291 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
292 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
293 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
294 "clock-27" , "clock-28" , "clock-AF_CAN" ,
295 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
296 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
297 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
298 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
299 };
300
301 /*
302 * sk_callback_lock locking rules are per-address-family,
303 * so split the lock classes by using a per-AF key:
304 */
305 static struct lock_class_key af_callback_keys[AF_MAX];
306
307 /* Take into consideration the size of the struct sk_buff overhead in the
308 * determination of these values, since that is non-constant across
309 * platforms. This makes socket queueing behavior and performance
310 * not depend upon such differences.
311 */
312 #define _SK_MEM_PACKETS 256
313 #define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
314 #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
315 #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
316
317 /* Run time adjustable parameters. */
318 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
319 EXPORT_SYMBOL(sysctl_wmem_max);
320 __u32 sysctl_rmem_max __read_mostly = (SK_RMEM_MAX*8);
321 EXPORT_SYMBOL(sysctl_rmem_max);
322 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
323 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
324
325 /* Maximal space eaten by iovec or ancillary data plus some space */
326 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
327 EXPORT_SYMBOL(sysctl_optmem_max);
328
329 struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
330 EXPORT_SYMBOL_GPL(memalloc_socks);
331
332 /**
333 * sk_set_memalloc - sets %SOCK_MEMALLOC
334 * @sk: socket to set it on
335 *
336 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
337 * It's the responsibility of the admin to adjust min_free_kbytes
338 * to meet the requirements
339 */
340 void sk_set_memalloc(struct sock *sk)
341 {
342 sock_set_flag(sk, SOCK_MEMALLOC);
343 sk->sk_allocation |= __GFP_MEMALLOC;
344 static_key_slow_inc(&memalloc_socks);
345 }
346 EXPORT_SYMBOL_GPL(sk_set_memalloc);
347
348 void sk_clear_memalloc(struct sock *sk)
349 {
350 sock_reset_flag(sk, SOCK_MEMALLOC);
351 sk->sk_allocation &= ~__GFP_MEMALLOC;
352 static_key_slow_dec(&memalloc_socks);
353
354 /*
355 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
356 * progress of swapping. However, if SOCK_MEMALLOC is cleared while
357 * it has rmem allocations there is a risk that the user of the
358 * socket cannot make forward progress due to exceeding the rmem
359 * limits. By rights, sk_clear_memalloc() should only be called
360 * on sockets being torn down but warn and reset the accounting if
361 * that assumption breaks.
362 */
363 if (WARN_ON(sk->sk_forward_alloc))
364 sk_mem_reclaim(sk);
365 }
366 EXPORT_SYMBOL_GPL(sk_clear_memalloc);
367
368 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
369 {
370 int ret;
371 unsigned long pflags = current->flags;
372
373 /* these should have been dropped before queueing */
374 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
375
376 current->flags |= PF_MEMALLOC;
377 ret = sk->sk_backlog_rcv(sk, skb);
378 tsk_restore_flags(current, pflags, PF_MEMALLOC);
379
380 return ret;
381 }
382 EXPORT_SYMBOL(__sk_backlog_rcv);
383
384 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
385 {
386 struct timeval tv;
387
388 if (optlen < sizeof(tv))
389 return -EINVAL;
390 if (copy_from_user(&tv, optval, sizeof(tv)))
391 return -EFAULT;
392 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
393 return -EDOM;
394
395 if (tv.tv_sec < 0) {
396 static int warned __read_mostly;
397
398 *timeo_p = 0;
399 if (warned < 10 && net_ratelimit()) {
400 warned++;
401 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
402 __func__, current->comm, task_pid_nr(current));
403 }
404 return 0;
405 }
406 *timeo_p = MAX_SCHEDULE_TIMEOUT;
407 if (tv.tv_sec == 0 && tv.tv_usec == 0)
408 return 0;
409 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
410 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
411 return 0;
412 }
413
414 static void sock_warn_obsolete_bsdism(const char *name)
415 {
416 static int warned;
417 static char warncomm[TASK_COMM_LEN];
418 if (strcmp(warncomm, current->comm) && warned < 5) {
419 strcpy(warncomm, current->comm);
420 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
421 warncomm, name);
422 warned++;
423 }
424 }
425
426 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
427 {
428 if (sk->sk_flags & flags) {
429 sk->sk_flags &= ~flags;
430 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
431 net_disable_timestamp();
432 }
433 }
434
435
436 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
437 {
438 int err;
439 int skb_len;
440 unsigned long flags;
441 struct sk_buff_head *list = &sk->sk_receive_queue;
442
443 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
444 atomic_inc(&sk->sk_drops);
445 trace_sock_rcvqueue_full(sk, skb);
446 return -ENOMEM;
447 }
448
449 err = sk_filter(sk, skb);
450 if (err)
451 return err;
452
453 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
454 atomic_inc(&sk->sk_drops);
455 return -ENOBUFS;
456 }
457
458 skb->dev = NULL;
459 skb_set_owner_r(skb, sk);
460
461 /* Cache the SKB length before we tack it onto the receive
462 * queue. Once it is added it no longer belongs to us and
463 * may be freed by other threads of control pulling packets
464 * from the queue.
465 */
466 skb_len = skb->len;
467
468 /* we escape from rcu protected region, make sure we dont leak
469 * a norefcounted dst
470 */
471 skb_dst_force(skb);
472
473 spin_lock_irqsave(&list->lock, flags);
474 skb->dropcount = atomic_read(&sk->sk_drops);
475 __skb_queue_tail(list, skb);
476 spin_unlock_irqrestore(&list->lock, flags);
477
478 if (!sock_flag(sk, SOCK_DEAD))
479 sk->sk_data_ready(sk, skb_len);
480 return 0;
481 }
482 EXPORT_SYMBOL(sock_queue_rcv_skb);
483
484 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
485 {
486 int rc = NET_RX_SUCCESS;
487
488 if (sk_filter(sk, skb))
489 goto discard_and_relse;
490
491 skb->dev = NULL;
492
493 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
494 atomic_inc(&sk->sk_drops);
495 goto discard_and_relse;
496 }
497 if (nested)
498 bh_lock_sock_nested(sk);
499 else
500 bh_lock_sock(sk);
501 if (!sock_owned_by_user(sk)) {
502 /*
503 * trylock + unlock semantics:
504 */
505 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
506
507 rc = sk_backlog_rcv(sk, skb);
508
509 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
510 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
511 bh_unlock_sock(sk);
512 atomic_inc(&sk->sk_drops);
513 goto discard_and_relse;
514 }
515
516 bh_unlock_sock(sk);
517 out:
518 sock_put(sk);
519 return rc;
520 discard_and_relse:
521 kfree_skb(skb);
522 goto out;
523 }
524 EXPORT_SYMBOL(sk_receive_skb);
525
526 void sk_reset_txq(struct sock *sk)
527 {
528 sk_tx_queue_clear(sk);
529 }
530 EXPORT_SYMBOL(sk_reset_txq);
531
532 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
533 {
534 struct dst_entry *dst = __sk_dst_get(sk);
535
536 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
537 sk_tx_queue_clear(sk);
538 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
539 dst_release(dst);
540 return NULL;
541 }
542
543 return dst;
544 }
545 EXPORT_SYMBOL(__sk_dst_check);
546
547 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
548 {
549 struct dst_entry *dst = sk_dst_get(sk);
550
551 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
552 sk_dst_reset(sk);
553 dst_release(dst);
554 return NULL;
555 }
556
557 return dst;
558 }
559 EXPORT_SYMBOL(sk_dst_check);
560
561 static int sock_setbindtodevice(struct sock *sk, char __user *optval,
562 int optlen)
563 {
564 int ret = -ENOPROTOOPT;
565 #ifdef CONFIG_NETDEVICES
566 struct net *net = sock_net(sk);
567 char devname[IFNAMSIZ];
568 int index;
569
570 /* Sorry... */
571 ret = -EPERM;
572 if (!ns_capable(net->user_ns, CAP_NET_RAW))
573 goto out;
574
575 ret = -EINVAL;
576 if (optlen < 0)
577 goto out;
578
579 /* Bind this socket to a particular device like "eth0",
580 * as specified in the passed interface name. If the
581 * name is "" or the option length is zero the socket
582 * is not bound.
583 */
584 if (optlen > IFNAMSIZ - 1)
585 optlen = IFNAMSIZ - 1;
586 memset(devname, 0, sizeof(devname));
587
588 ret = -EFAULT;
589 if (copy_from_user(devname, optval, optlen))
590 goto out;
591
592 index = 0;
593 if (devname[0] != '\0') {
594 struct net_device *dev;
595
596 rcu_read_lock();
597 dev = dev_get_by_name_rcu(net, devname);
598 if (dev)
599 index = dev->ifindex;
600 rcu_read_unlock();
601 ret = -ENODEV;
602 if (!dev)
603 goto out;
604 }
605
606 lock_sock(sk);
607 sk->sk_bound_dev_if = index;
608 sk_dst_reset(sk);
609 release_sock(sk);
610
611 ret = 0;
612
613 out:
614 #endif
615
616 return ret;
617 }
618
619 static int sock_getbindtodevice(struct sock *sk, char __user *optval,
620 int __user *optlen, int len)
621 {
622 int ret = -ENOPROTOOPT;
623 #ifdef CONFIG_NETDEVICES
624 struct net *net = sock_net(sk);
625 char devname[IFNAMSIZ];
626
627 if (sk->sk_bound_dev_if == 0) {
628 len = 0;
629 goto zero;
630 }
631
632 ret = -EINVAL;
633 if (len < IFNAMSIZ)
634 goto out;
635
636 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
637 if (ret)
638 goto out;
639
640 len = strlen(devname) + 1;
641
642 ret = -EFAULT;
643 if (copy_to_user(optval, devname, len))
644 goto out;
645
646 zero:
647 ret = -EFAULT;
648 if (put_user(len, optlen))
649 goto out;
650
651 ret = 0;
652
653 out:
654 #endif
655
656 return ret;
657 }
658
659 static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
660 {
661 if (valbool)
662 sock_set_flag(sk, bit);
663 else
664 sock_reset_flag(sk, bit);
665 }
666
667 /*
668 * This is meant for all protocols to use and covers goings on
669 * at the socket level. Everything here is generic.
670 */
671
672 int sock_setsockopt(struct socket *sock, int level, int optname,
673 char __user *optval, unsigned int optlen)
674 {
675 struct sock *sk = sock->sk;
676 int val;
677 int valbool;
678 struct linger ling;
679 int ret = 0;
680
681 /*
682 * Options without arguments
683 */
684
685 if (optname == SO_BINDTODEVICE)
686 return sock_setbindtodevice(sk, optval, optlen);
687
688 if (optlen < sizeof(int))
689 return -EINVAL;
690
691 if (get_user(val, (int __user *)optval))
692 return -EFAULT;
693
694 valbool = val ? 1 : 0;
695
696 lock_sock(sk);
697
698 switch (optname) {
699 case SO_DEBUG:
700 if (val && !capable(CAP_NET_ADMIN))
701 ret = -EACCES;
702 else
703 sock_valbool_flag(sk, SOCK_DBG, valbool);
704 break;
705 case SO_REUSEADDR:
706 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
707 break;
708 case SO_REUSEPORT:
709 sk->sk_reuseport = valbool;
710 break;
711 case SO_TYPE:
712 case SO_PROTOCOL:
713 case SO_DOMAIN:
714 case SO_ERROR:
715 ret = -ENOPROTOOPT;
716 break;
717 case SO_DONTROUTE:
718 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
719 break;
720 case SO_BROADCAST:
721 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
722 break;
723 case SO_SNDBUF:
724 /* Don't error on this BSD doesn't and if you think
725 * about it this is right. Otherwise apps have to
726 * play 'guess the biggest size' games. RCVBUF/SNDBUF
727 * are treated in BSD as hints
728 */
729 val = min_t(u32, val, sysctl_wmem_max);
730 set_sndbuf:
731 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
732 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
733 /* Wake up sending tasks if we upped the value. */
734 sk->sk_write_space(sk);
735 break;
736
737 case SO_SNDBUFFORCE:
738 if (!capable(CAP_NET_ADMIN)) {
739 ret = -EPERM;
740 break;
741 }
742 goto set_sndbuf;
743
744 case SO_RCVBUF:
745 /* Don't error on this BSD doesn't and if you think
746 * about it this is right. Otherwise apps have to
747 * play 'guess the biggest size' games. RCVBUF/SNDBUF
748 * are treated in BSD as hints
749 */
750 val = min_t(u32, val, sysctl_rmem_max);
751 set_rcvbuf:
752 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
753 /*
754 * We double it on the way in to account for
755 * "struct sk_buff" etc. overhead. Applications
756 * assume that the SO_RCVBUF setting they make will
757 * allow that much actual data to be received on that
758 * socket.
759 *
760 * Applications are unaware that "struct sk_buff" and
761 * other overheads allocate from the receive buffer
762 * during socket buffer allocation.
763 *
764 * And after considering the possible alternatives,
765 * returning the value we actually used in getsockopt
766 * is the most desirable behavior.
767 */
768 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
769 break;
770
771 case SO_RCVBUFFORCE:
772 if (!capable(CAP_NET_ADMIN)) {
773 ret = -EPERM;
774 break;
775 }
776 goto set_rcvbuf;
777
778 case SO_KEEPALIVE:
779 #ifdef CONFIG_INET
780 if (sk->sk_protocol == IPPROTO_TCP &&
781 sk->sk_type == SOCK_STREAM)
782 tcp_set_keepalive(sk, valbool);
783 #endif
784 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
785 break;
786
787 case SO_OOBINLINE:
788 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
789 break;
790
791 case SO_NO_CHECK:
792 sk->sk_no_check = valbool;
793 break;
794
795 case SO_PRIORITY:
796 if ((val >= 0 && val <= 6) ||
797 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
798 sk->sk_priority = val;
799 else
800 ret = -EPERM;
801 break;
802
803 case SO_LINGER:
804 if (optlen < sizeof(ling)) {
805 ret = -EINVAL; /* 1003.1g */
806 break;
807 }
808 if (copy_from_user(&ling, optval, sizeof(ling))) {
809 ret = -EFAULT;
810 break;
811 }
812 if (!ling.l_onoff)
813 sock_reset_flag(sk, SOCK_LINGER);
814 else {
815 #if (BITS_PER_LONG == 32)
816 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
817 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
818 else
819 #endif
820 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
821 sock_set_flag(sk, SOCK_LINGER);
822 }
823 break;
824
825 case SO_BSDCOMPAT:
826 sock_warn_obsolete_bsdism("setsockopt");
827 break;
828
829 case SO_PASSCRED:
830 if (valbool)
831 set_bit(SOCK_PASSCRED, &sock->flags);
832 else
833 clear_bit(SOCK_PASSCRED, &sock->flags);
834 break;
835
836 case SO_TIMESTAMP:
837 case SO_TIMESTAMPNS:
838 if (valbool) {
839 if (optname == SO_TIMESTAMP)
840 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
841 else
842 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
843 sock_set_flag(sk, SOCK_RCVTSTAMP);
844 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
845 } else {
846 sock_reset_flag(sk, SOCK_RCVTSTAMP);
847 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
848 }
849 break;
850
851 case SO_TIMESTAMPING:
852 if (val & ~SOF_TIMESTAMPING_MASK) {
853 ret = -EINVAL;
854 break;
855 }
856 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
857 val & SOF_TIMESTAMPING_TX_HARDWARE);
858 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
859 val & SOF_TIMESTAMPING_TX_SOFTWARE);
860 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
861 val & SOF_TIMESTAMPING_RX_HARDWARE);
862 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
863 sock_enable_timestamp(sk,
864 SOCK_TIMESTAMPING_RX_SOFTWARE);
865 else
866 sock_disable_timestamp(sk,
867 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
868 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
869 val & SOF_TIMESTAMPING_SOFTWARE);
870 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
871 val & SOF_TIMESTAMPING_SYS_HARDWARE);
872 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
873 val & SOF_TIMESTAMPING_RAW_HARDWARE);
874 break;
875
876 case SO_RCVLOWAT:
877 if (val < 0)
878 val = INT_MAX;
879 sk->sk_rcvlowat = val ? : 1;
880 break;
881
882 case SO_RCVTIMEO:
883 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
884 break;
885
886 case SO_SNDTIMEO:
887 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
888 break;
889
890 case SO_ATTACH_FILTER:
891 ret = -EINVAL;
892 if (optlen == sizeof(struct sock_fprog)) {
893 struct sock_fprog fprog;
894
895 ret = -EFAULT;
896 if (copy_from_user(&fprog, optval, sizeof(fprog)))
897 break;
898
899 ret = sk_attach_filter(&fprog, sk);
900 }
901 break;
902
903 case SO_DETACH_FILTER:
904 ret = sk_detach_filter(sk);
905 break;
906
907 case SO_LOCK_FILTER:
908 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
909 ret = -EPERM;
910 else
911 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
912 break;
913
914 case SO_PASSSEC:
915 if (valbool)
916 set_bit(SOCK_PASSSEC, &sock->flags);
917 else
918 clear_bit(SOCK_PASSSEC, &sock->flags);
919 break;
920 case SO_MARK:
921 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
922 ret = -EPERM;
923 else
924 sk->sk_mark = val;
925 break;
926
927 /* We implement the SO_SNDLOWAT etc to
928 not be settable (1003.1g 5.3) */
929 case SO_RXQ_OVFL:
930 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
931 break;
932
933 case SO_WIFI_STATUS:
934 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
935 break;
936
937 case SO_PEEK_OFF:
938 if (sock->ops->set_peek_off)
939 ret = sock->ops->set_peek_off(sk, val);
940 else
941 ret = -EOPNOTSUPP;
942 break;
943
944 case SO_NOFCS:
945 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
946 break;
947
948 case SO_SELECT_ERR_QUEUE:
949 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
950 break;
951
952 default:
953 ret = -ENOPROTOOPT;
954 break;
955 }
956 release_sock(sk);
957 return ret;
958 }
959 EXPORT_SYMBOL(sock_setsockopt);
960
961
962 void cred_to_ucred(struct pid *pid, const struct cred *cred,
963 struct ucred *ucred)
964 {
965 ucred->pid = pid_vnr(pid);
966 ucred->uid = ucred->gid = -1;
967 if (cred) {
968 struct user_namespace *current_ns = current_user_ns();
969
970 ucred->uid = from_kuid_munged(current_ns, cred->euid);
971 ucred->gid = from_kgid_munged(current_ns, cred->egid);
972 }
973 }
974 EXPORT_SYMBOL_GPL(cred_to_ucred);
975
976 int sock_getsockopt(struct socket *sock, int level, int optname,
977 char __user *optval, int __user *optlen)
978 {
979 struct sock *sk = sock->sk;
980
981 union {
982 int val;
983 struct linger ling;
984 struct timeval tm;
985 } v;
986
987 int lv = sizeof(int);
988 int len;
989
990 if (get_user(len, optlen))
991 return -EFAULT;
992 if (len < 0)
993 return -EINVAL;
994
995 memset(&v, 0, sizeof(v));
996
997 switch (optname) {
998 case SO_DEBUG:
999 v.val = sock_flag(sk, SOCK_DBG);
1000 break;
1001
1002 case SO_DONTROUTE:
1003 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1004 break;
1005
1006 case SO_BROADCAST:
1007 v.val = sock_flag(sk, SOCK_BROADCAST);
1008 break;
1009
1010 case SO_SNDBUF:
1011 v.val = sk->sk_sndbuf;
1012 break;
1013
1014 case SO_RCVBUF:
1015 v.val = sk->sk_rcvbuf;
1016 break;
1017
1018 case SO_REUSEADDR:
1019 v.val = sk->sk_reuse;
1020 break;
1021
1022 case SO_REUSEPORT:
1023 v.val = sk->sk_reuseport;
1024 break;
1025
1026 case SO_KEEPALIVE:
1027 v.val = sock_flag(sk, SOCK_KEEPOPEN);
1028 break;
1029
1030 case SO_TYPE:
1031 v.val = sk->sk_type;
1032 break;
1033
1034 case SO_PROTOCOL:
1035 v.val = sk->sk_protocol;
1036 break;
1037
1038 case SO_DOMAIN:
1039 v.val = sk->sk_family;
1040 break;
1041
1042 case SO_ERROR:
1043 v.val = -sock_error(sk);
1044 if (v.val == 0)
1045 v.val = xchg(&sk->sk_err_soft, 0);
1046 break;
1047
1048 case SO_OOBINLINE:
1049 v.val = sock_flag(sk, SOCK_URGINLINE);
1050 break;
1051
1052 case SO_NO_CHECK:
1053 v.val = sk->sk_no_check;
1054 break;
1055
1056 case SO_PRIORITY:
1057 v.val = sk->sk_priority;
1058 break;
1059
1060 case SO_LINGER:
1061 lv = sizeof(v.ling);
1062 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
1063 v.ling.l_linger = sk->sk_lingertime / HZ;
1064 break;
1065
1066 case SO_BSDCOMPAT:
1067 sock_warn_obsolete_bsdism("getsockopt");
1068 break;
1069
1070 case SO_TIMESTAMP:
1071 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1072 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1073 break;
1074
1075 case SO_TIMESTAMPNS:
1076 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1077 break;
1078
1079 case SO_TIMESTAMPING:
1080 v.val = 0;
1081 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
1082 v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
1083 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
1084 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
1085 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
1086 v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
1087 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
1088 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
1089 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
1090 v.val |= SOF_TIMESTAMPING_SOFTWARE;
1091 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
1092 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
1093 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
1094 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
1095 break;
1096
1097 case SO_RCVTIMEO:
1098 lv = sizeof(struct timeval);
1099 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1100 v.tm.tv_sec = 0;
1101 v.tm.tv_usec = 0;
1102 } else {
1103 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1104 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
1105 }
1106 break;
1107
1108 case SO_SNDTIMEO:
1109 lv = sizeof(struct timeval);
1110 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1111 v.tm.tv_sec = 0;
1112 v.tm.tv_usec = 0;
1113 } else {
1114 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1115 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
1116 }
1117 break;
1118
1119 case SO_RCVLOWAT:
1120 v.val = sk->sk_rcvlowat;
1121 break;
1122
1123 case SO_SNDLOWAT:
1124 v.val = 1;
1125 break;
1126
1127 case SO_PASSCRED:
1128 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1129 break;
1130
1131 case SO_PEERCRED:
1132 {
1133 struct ucred peercred;
1134 if (len > sizeof(peercred))
1135 len = sizeof(peercred);
1136 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1137 if (copy_to_user(optval, &peercred, len))
1138 return -EFAULT;
1139 goto lenout;
1140 }
1141
1142 case SO_PEERNAME:
1143 {
1144 char address[128];
1145
1146 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1147 return -ENOTCONN;
1148 if (lv < len)
1149 return -EINVAL;
1150 if (copy_to_user(optval, address, len))
1151 return -EFAULT;
1152 goto lenout;
1153 }
1154
1155 /* Dubious BSD thing... Probably nobody even uses it, but
1156 * the UNIX standard wants it for whatever reason... -DaveM
1157 */
1158 case SO_ACCEPTCONN:
1159 v.val = sk->sk_state == TCP_LISTEN;
1160 break;
1161
1162 case SO_PASSSEC:
1163 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1164 break;
1165
1166 case SO_PEERSEC:
1167 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1168
1169 case SO_MARK:
1170 v.val = sk->sk_mark;
1171 break;
1172
1173 case SO_RXQ_OVFL:
1174 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1175 break;
1176
1177 case SO_WIFI_STATUS:
1178 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1179 break;
1180
1181 case SO_PEEK_OFF:
1182 if (!sock->ops->set_peek_off)
1183 return -EOPNOTSUPP;
1184
1185 v.val = sk->sk_peek_off;
1186 break;
1187 case SO_NOFCS:
1188 v.val = sock_flag(sk, SOCK_NOFCS);
1189 break;
1190
1191 case SO_BINDTODEVICE:
1192 return sock_getbindtodevice(sk, optval, optlen, len);
1193
1194 case SO_GET_FILTER:
1195 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1196 if (len < 0)
1197 return len;
1198
1199 goto lenout;
1200
1201 case SO_LOCK_FILTER:
1202 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1203 break;
1204
1205 case SO_SELECT_ERR_QUEUE:
1206 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1207 break;
1208
1209 default:
1210 return -ENOPROTOOPT;
1211 }
1212
1213 if (len > lv)
1214 len = lv;
1215 if (copy_to_user(optval, &v, len))
1216 return -EFAULT;
1217 lenout:
1218 if (put_user(len, optlen))
1219 return -EFAULT;
1220 return 0;
1221 }
1222
1223 /*
1224 * Initialize an sk_lock.
1225 *
1226 * (We also register the sk_lock with the lock validator.)
1227 */
1228 static inline void sock_lock_init(struct sock *sk)
1229 {
1230 sock_lock_init_class_and_name(sk,
1231 af_family_slock_key_strings[sk->sk_family],
1232 af_family_slock_keys + sk->sk_family,
1233 af_family_key_strings[sk->sk_family],
1234 af_family_keys + sk->sk_family);
1235 }
1236
1237 /*
1238 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1239 * even temporarly, because of RCU lookups. sk_node should also be left as is.
1240 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
1241 */
1242 static void sock_copy(struct sock *nsk, const struct sock *osk)
1243 {
1244 #ifdef CONFIG_SECURITY_NETWORK
1245 void *sptr = nsk->sk_security;
1246 #endif
1247 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1248
1249 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1250 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1251
1252 #ifdef CONFIG_SECURITY_NETWORK
1253 nsk->sk_security = sptr;
1254 security_sk_clone(osk, nsk);
1255 #endif
1256 }
1257
1258 void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1259 {
1260 unsigned long nulls1, nulls2;
1261
1262 nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1263 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1264 if (nulls1 > nulls2)
1265 swap(nulls1, nulls2);
1266
1267 if (nulls1 != 0)
1268 memset((char *)sk, 0, nulls1);
1269 memset((char *)sk + nulls1 + sizeof(void *), 0,
1270 nulls2 - nulls1 - sizeof(void *));
1271 memset((char *)sk + nulls2 + sizeof(void *), 0,
1272 size - nulls2 - sizeof(void *));
1273 }
1274 EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1275
1276 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1277 int family)
1278 {
1279 struct sock *sk;
1280 struct kmem_cache *slab;
1281
1282 slab = prot->slab;
1283 if (slab != NULL) {
1284 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1285 if (!sk)
1286 return sk;
1287 if (priority & __GFP_ZERO) {
1288 if (prot->clear_sk)
1289 prot->clear_sk(sk, prot->obj_size);
1290 else
1291 sk_prot_clear_nulls(sk, prot->obj_size);
1292 }
1293 } else
1294 sk = kmalloc(prot->obj_size, priority);
1295
1296 if (sk != NULL) {
1297 kmemcheck_annotate_bitfield(sk, flags);
1298
1299 if (security_sk_alloc(sk, family, priority))
1300 goto out_free;
1301
1302 if (!try_module_get(prot->owner))
1303 goto out_free_sec;
1304 sk_tx_queue_clear(sk);
1305 }
1306
1307 return sk;
1308
1309 out_free_sec:
1310 security_sk_free(sk);
1311 out_free:
1312 if (slab != NULL)
1313 kmem_cache_free(slab, sk);
1314 else
1315 kfree(sk);
1316 return NULL;
1317 }
1318
1319 static void sk_prot_free(struct proto *prot, struct sock *sk)
1320 {
1321 struct kmem_cache *slab;
1322 struct module *owner;
1323
1324 owner = prot->owner;
1325 slab = prot->slab;
1326
1327 security_sk_free(sk);
1328 if (slab != NULL)
1329 kmem_cache_free(slab, sk);
1330 else
1331 kfree(sk);
1332 module_put(owner);
1333 }
1334
1335 #if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
1336 void sock_update_classid(struct sock *sk)
1337 {
1338 u32 classid;
1339
1340 classid = task_cls_classid(current);
1341 if (classid != sk->sk_classid)
1342 sk->sk_classid = classid;
1343 }
1344 EXPORT_SYMBOL(sock_update_classid);
1345 #endif
1346
1347 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
1348 void sock_update_netprioidx(struct sock *sk)
1349 {
1350 if (in_interrupt())
1351 return;
1352
1353 sk->sk_cgrp_prioidx = task_netprioidx(current);
1354 }
1355 EXPORT_SYMBOL_GPL(sock_update_netprioidx);
1356 #endif
1357
1358 /**
1359 * sk_alloc - All socket objects are allocated here
1360 * @net: the applicable net namespace
1361 * @family: protocol family
1362 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1363 * @prot: struct proto associated with this new sock instance
1364 */
1365 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1366 struct proto *prot)
1367 {
1368 struct sock *sk;
1369
1370 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1371 if (sk) {
1372 sk->sk_family = family;
1373 /*
1374 * See comment in struct sock definition to understand
1375 * why we need sk_prot_creator -acme
1376 */
1377 sk->sk_prot = sk->sk_prot_creator = prot;
1378 sock_lock_init(sk);
1379 sock_net_set(sk, get_net(net));
1380 atomic_set(&sk->sk_wmem_alloc, 1);
1381
1382 sock_update_classid(sk);
1383 sock_update_netprioidx(sk);
1384 }
1385
1386 return sk;
1387 }
1388 EXPORT_SYMBOL(sk_alloc);
1389
1390 static void __sk_free(struct sock *sk)
1391 {
1392 struct sk_filter *filter;
1393
1394 if (sk->sk_destruct)
1395 sk->sk_destruct(sk);
1396
1397 filter = rcu_dereference_check(sk->sk_filter,
1398 atomic_read(&sk->sk_wmem_alloc) == 0);
1399 if (filter) {
1400 sk_filter_uncharge(sk, filter);
1401 RCU_INIT_POINTER(sk->sk_filter, NULL);
1402 }
1403
1404 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1405
1406 if (atomic_read(&sk->sk_omem_alloc))
1407 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1408 __func__, atomic_read(&sk->sk_omem_alloc));
1409
1410 if (sk->sk_frag.page) {
1411 put_page(sk->sk_frag.page);
1412 sk->sk_frag.page = NULL;
1413 }
1414
1415 if (sk->sk_peer_cred)
1416 put_cred(sk->sk_peer_cred);
1417 put_pid(sk->sk_peer_pid);
1418 put_net(sock_net(sk));
1419 sk_prot_free(sk->sk_prot_creator, sk);
1420 }
1421
1422 void sk_free(struct sock *sk)
1423 {
1424 /*
1425 * We subtract one from sk_wmem_alloc and can know if
1426 * some packets are still in some tx queue.
1427 * If not null, sock_wfree() will call __sk_free(sk) later
1428 */
1429 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1430 __sk_free(sk);
1431 }
1432 EXPORT_SYMBOL(sk_free);
1433
1434 /*
1435 * Last sock_put should drop reference to sk->sk_net. It has already
1436 * been dropped in sk_change_net. Taking reference to stopping namespace
1437 * is not an option.
1438 * Take reference to a socket to remove it from hash _alive_ and after that
1439 * destroy it in the context of init_net.
1440 */
1441 void sk_release_kernel(struct sock *sk)
1442 {
1443 if (sk == NULL || sk->sk_socket == NULL)
1444 return;
1445
1446 sock_hold(sk);
1447 sock_release(sk->sk_socket);
1448 release_net(sock_net(sk));
1449 sock_net_set(sk, get_net(&init_net));
1450 sock_put(sk);
1451 }
1452 EXPORT_SYMBOL(sk_release_kernel);
1453
1454 static void sk_update_clone(const struct sock *sk, struct sock *newsk)
1455 {
1456 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1457 sock_update_memcg(newsk);
1458 }
1459
1460 /**
1461 * sk_clone_lock - clone a socket, and lock its clone
1462 * @sk: the socket to clone
1463 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1464 *
1465 * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1466 */
1467 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1468 {
1469 struct sock *newsk;
1470
1471 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1472 if (newsk != NULL) {
1473 struct sk_filter *filter;
1474
1475 sock_copy(newsk, sk);
1476
1477 /* SANITY */
1478 get_net(sock_net(newsk));
1479 sk_node_init(&newsk->sk_node);
1480 sock_lock_init(newsk);
1481 bh_lock_sock(newsk);
1482 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1483 newsk->sk_backlog.len = 0;
1484
1485 atomic_set(&newsk->sk_rmem_alloc, 0);
1486 /*
1487 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1488 */
1489 atomic_set(&newsk->sk_wmem_alloc, 1);
1490 atomic_set(&newsk->sk_omem_alloc, 0);
1491 skb_queue_head_init(&newsk->sk_receive_queue);
1492 skb_queue_head_init(&newsk->sk_write_queue);
1493 #ifdef CONFIG_NET_DMA
1494 skb_queue_head_init(&newsk->sk_async_wait_queue);
1495 #endif
1496
1497 spin_lock_init(&newsk->sk_dst_lock);
1498 rwlock_init(&newsk->sk_callback_lock);
1499 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1500 af_callback_keys + newsk->sk_family,
1501 af_family_clock_key_strings[newsk->sk_family]);
1502
1503 newsk->sk_dst_cache = NULL;
1504 newsk->sk_wmem_queued = 0;
1505 newsk->sk_forward_alloc = 0;
1506 newsk->sk_send_head = NULL;
1507 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1508
1509 sock_reset_flag(newsk, SOCK_DONE);
1510 skb_queue_head_init(&newsk->sk_error_queue);
1511
1512 filter = rcu_dereference_protected(newsk->sk_filter, 1);
1513 if (filter != NULL)
1514 sk_filter_charge(newsk, filter);
1515
1516 if (unlikely(xfrm_sk_clone_policy(newsk))) {
1517 /* It is still raw copy of parent, so invalidate
1518 * destructor and make plain sk_free() */
1519 newsk->sk_destruct = NULL;
1520 bh_unlock_sock(newsk);
1521 sk_free(newsk);
1522 newsk = NULL;
1523 goto out;
1524 }
1525
1526 newsk->sk_err = 0;
1527 newsk->sk_err_soft = 0;
1528 newsk->sk_priority = 0;
1529 /*
1530 * Before updating sk_refcnt, we must commit prior changes to memory
1531 * (Documentation/RCU/rculist_nulls.txt for details)
1532 */
1533 smp_wmb();
1534 atomic_set(&newsk->sk_refcnt, 2);
1535
1536 /*
1537 * Increment the counter in the same struct proto as the master
1538 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1539 * is the same as sk->sk_prot->socks, as this field was copied
1540 * with memcpy).
1541 *
1542 * This _changes_ the previous behaviour, where
1543 * tcp_create_openreq_child always was incrementing the
1544 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1545 * to be taken into account in all callers. -acme
1546 */
1547 sk_refcnt_debug_inc(newsk);
1548 sk_set_socket(newsk, NULL);
1549 newsk->sk_wq = NULL;
1550
1551 sk_update_clone(sk, newsk);
1552
1553 if (newsk->sk_prot->sockets_allocated)
1554 sk_sockets_allocated_inc(newsk);
1555
1556 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1557 net_enable_timestamp();
1558 }
1559 out:
1560 return newsk;
1561 }
1562 EXPORT_SYMBOL_GPL(sk_clone_lock);
1563
1564 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1565 {
1566 __sk_dst_set(sk, dst);
1567 sk->sk_route_caps = dst->dev->features;
1568 if (sk->sk_route_caps & NETIF_F_GSO)
1569 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1570 sk->sk_route_caps &= ~sk->sk_route_nocaps;
1571 if (sk_can_gso(sk)) {
1572 if (dst->header_len) {
1573 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1574 } else {
1575 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1576 sk->sk_gso_max_size = dst->dev->gso_max_size;
1577 sk->sk_gso_max_segs = dst->dev->gso_max_segs;
1578 }
1579 }
1580 }
1581 EXPORT_SYMBOL_GPL(sk_setup_caps);
1582
1583 /*
1584 * Simple resource managers for sockets.
1585 */
1586
1587
1588 /*
1589 * Write buffer destructor automatically called from kfree_skb.
1590 */
1591 void sock_wfree(struct sk_buff *skb)
1592 {
1593 struct sock *sk = skb->sk;
1594 unsigned int len = skb->truesize;
1595
1596 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1597 /*
1598 * Keep a reference on sk_wmem_alloc, this will be released
1599 * after sk_write_space() call
1600 */
1601 atomic_sub(len - 1, &sk->sk_wmem_alloc);
1602 sk->sk_write_space(sk);
1603 len = 1;
1604 }
1605 /*
1606 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1607 * could not do because of in-flight packets
1608 */
1609 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
1610 __sk_free(sk);
1611 }
1612 EXPORT_SYMBOL(sock_wfree);
1613
1614 /*
1615 * Read buffer destructor automatically called from kfree_skb.
1616 */
1617 void sock_rfree(struct sk_buff *skb)
1618 {
1619 struct sock *sk = skb->sk;
1620 unsigned int len = skb->truesize;
1621
1622 atomic_sub(len, &sk->sk_rmem_alloc);
1623 sk_mem_uncharge(sk, len);
1624 }
1625 EXPORT_SYMBOL(sock_rfree);
1626
1627 void sock_edemux(struct sk_buff *skb)
1628 {
1629 struct sock *sk = skb->sk;
1630
1631 #ifdef CONFIG_INET
1632 if (sk->sk_state == TCP_TIME_WAIT)
1633 inet_twsk_put(inet_twsk(sk));
1634 else
1635 #endif
1636 sock_put(sk);
1637 }
1638 EXPORT_SYMBOL(sock_edemux);
1639
1640 kuid_t sock_i_uid(struct sock *sk)
1641 {
1642 kuid_t uid;
1643
1644 /*mtk_net: fix kernel bug*/
1645 if (!sk) {
1646 pr_info("sk == NULL for sock_i_uid\n");
1647 return GLOBAL_ROOT_UID;
1648 }
1649
1650 read_lock_bh(&sk->sk_callback_lock);
1651 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
1652 read_unlock_bh(&sk->sk_callback_lock);
1653 return uid;
1654 }
1655 EXPORT_SYMBOL(sock_i_uid);
1656
1657 unsigned long sock_i_ino(struct sock *sk)
1658 {
1659 unsigned long ino;
1660
1661 read_lock_bh(&sk->sk_callback_lock);
1662 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1663 read_unlock_bh(&sk->sk_callback_lock);
1664 return ino;
1665 }
1666 EXPORT_SYMBOL(sock_i_ino);
1667
1668 /*
1669 * Allocate a skb from the socket's send buffer.
1670 */
1671 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1672 gfp_t priority)
1673 {
1674 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1675 struct sk_buff *skb = alloc_skb(size, priority);
1676 if (skb) {
1677 skb_set_owner_w(skb, sk);
1678 return skb;
1679 }
1680 }
1681 return NULL;
1682 }
1683 EXPORT_SYMBOL(sock_wmalloc);
1684
1685 /*
1686 * Allocate a skb from the socket's receive buffer.
1687 */
1688 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1689 gfp_t priority)
1690 {
1691 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1692 struct sk_buff *skb = alloc_skb(size, priority);
1693 if (skb) {
1694 skb_set_owner_r(skb, sk);
1695 return skb;
1696 }
1697 }
1698 return NULL;
1699 }
1700
1701 /*
1702 * Allocate a memory block from the socket's option memory buffer.
1703 */
1704 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1705 {
1706 if ((unsigned int)size <= sysctl_optmem_max &&
1707 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1708 void *mem;
1709 /* First do the add, to avoid the race if kmalloc
1710 * might sleep.
1711 */
1712 atomic_add(size, &sk->sk_omem_alloc);
1713 mem = kmalloc(size, priority);
1714 if (mem)
1715 return mem;
1716 atomic_sub(size, &sk->sk_omem_alloc);
1717 }
1718 return NULL;
1719 }
1720 EXPORT_SYMBOL(sock_kmalloc);
1721
1722 /*
1723 * Free an option memory block.
1724 */
1725 void sock_kfree_s(struct sock *sk, void *mem, int size)
1726 {
1727 kfree(mem);
1728 atomic_sub(size, &sk->sk_omem_alloc);
1729 }
1730 EXPORT_SYMBOL(sock_kfree_s);
1731
1732 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1733 I think, these locks should be removed for datagram sockets.
1734 */
1735 static long sock_wait_for_wmem(struct sock *sk, long timeo)
1736 {
1737 DEFINE_WAIT(wait);
1738
1739 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1740 for (;;) {
1741 if (!timeo)
1742 break;
1743 if (signal_pending(current))
1744 break;
1745 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1746 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1747 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1748 break;
1749 if (sk->sk_shutdown & SEND_SHUTDOWN)
1750 break;
1751 if (sk->sk_err)
1752 break;
1753 timeo = schedule_timeout(timeo);
1754 }
1755 finish_wait(sk_sleep(sk), &wait);
1756 return timeo;
1757 }
1758
1759
1760 //debug funcion
1761
1762 static int sock_dump_info(struct sock *sk)
1763 {
1764 //dump receiver queue 128 bytes
1765 //struct sk_buff *skb;
1766 //char skbmsg[128];
1767 //dump receiver queue 128 bytes end
1768
1769 if(sk->sk_family == AF_UNIX)
1770 {
1771 struct unix_sock *u = unix_sk(sk);
1772 struct sock *other = NULL;
1773 if( (u->path.dentry !=NULL)&&(u->path.dentry->d_iname!=NULL))
1774 //if( (u->dentry !=NULL)&&(u->dentry->d_iname!=NULL))
1775 {
1776 #ifdef CONFIG_MTK_NET_LOGGING
1777 printk(KERN_INFO "[mtk_net][sock]sockdbg: socket-Name:%s \n",u->path.dentry->d_iname);
1778 #endif
1779 }
1780 else
1781 {
1782 #ifdef CONFIG_MTK_NET_LOGGING
1783 printk(KERN_INFO "[mtk_net][sock]sockdbg:socket Name (NULL)\n" );
1784 #endif
1785 }
1786
1787 if(sk->sk_socket && SOCK_INODE(sk->sk_socket))
1788 {
1789 #ifdef CONFIG_MTK_NET_LOGGING
1790 printk(KERN_INFO "[mtk_net][sock]sockdbg:socket Inode[%lu]\n" ,SOCK_INODE(sk->sk_socket)->i_ino);
1791 #endif
1792 }
1793
1794 other = unix_sk(sk)->peer ;
1795 if (!other)
1796 {
1797 #ifdef CONFIG_MTK_NET_LOGGING
1798 printk(KERN_INFO "[mtk_net][sock]sockdbg:peer is (NULL) \n");
1799 #endif
1800 } else{
1801
1802 if ((((struct unix_sock *)other)->path.dentry != NULL)&&(((struct unix_sock *)other)->path.dentry->d_iname != NULL))
1803 //if ((((struct unix_sock *)other)->dentry != NULL)&&(((struct unix_sock *)other)->dentry->d_iname != NULL))
1804 {
1805 #ifdef CONFIG_MTK_NET_LOGGING
1806 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Name:%s \n",((struct unix_sock *)other)->path.dentry->d_iname);
1807 #endif
1808 }
1809 else
1810 {
1811 #ifdef CONFIG_MTK_NET_LOGGING
1812 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Name (NULL) \n");
1813 #endif
1814 }
1815
1816 if(other->sk_socket && SOCK_INODE(other->sk_socket))
1817 {
1818 #ifdef CONFIG_MTK_NET_LOGGING
1819 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Inode [%lu] \n", SOCK_INODE(other->sk_socket)->i_ino);
1820 #endif
1821 }
1822 #ifdef CONFIG_MTK_NET_LOGGING
1823 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Recieve Queue len:%d \n",other->sk_receive_queue.qlen);
1824 #endif
1825 //dump receiver queue 128 bytes
1826 /* if ((skb = skb_peek_tail(&other->sk_receive_queue)) == NULL) {
1827
1828 printk(KERN_INFO "sockdbg: Peer Recieve Queue is null (warning) \n");
1829 }else{
1830 int i =0 ,len=0;
1831 if((skb->len !=0) && (skb->data != NULL)){
1832
1833 if(skb->len >= 127){
1834 len = 127 ;
1835 }else
1836 {
1837 len = skb->len ;
1838 }
1839 for (i=0;i<len;i++)
1840 sprintf(skbmsg+i, "%x", skb->data[i]);
1841
1842 skbmsg[len]= '\0' ;
1843
1844 printk(KERN_INFO "sockdbg: Peer Recieve Queue dump(%d bytes):%s\n", len, skbmsg);
1845
1846
1847 }else{
1848 printk(KERN_INFO "sockdbg: Peer Recieve skb error \n");
1849 }*/
1850 //dump receiver queue 128 bytes end
1851
1852 //}
1853 //dump receiver queue 128 bytes end
1854
1855 }
1856 }
1857
1858 return 0 ;
1859
1860
1861 }
1862
1863
1864
1865 /*
1866 * Generic send/receive buffer handlers
1867 */
1868
1869 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1870 unsigned long data_len, int noblock,
1871 int *errcode)
1872 {
1873 struct sk_buff *skb;
1874 gfp_t gfp_mask;
1875 long timeo;
1876 int err;
1877 int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1878
1879 err = -EMSGSIZE;
1880 if (npages > MAX_SKB_FRAGS)
1881 goto failure;
1882
1883 gfp_mask = sk->sk_allocation;
1884 if (gfp_mask & __GFP_WAIT)
1885 gfp_mask |= __GFP_REPEAT;
1886
1887 timeo = sock_sndtimeo(sk, noblock);
1888 while (1) {
1889 err = sock_error(sk);
1890 if (err != 0)
1891 goto failure;
1892
1893 err = -EPIPE;
1894 if (sk->sk_shutdown & SEND_SHUTDOWN)
1895 goto failure;
1896
1897 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1898 skb = alloc_skb(header_len, gfp_mask);
1899 if (skb) {
1900 int i;
1901
1902 /* No pages, we're done... */
1903 if (!data_len)
1904 break;
1905
1906 skb->truesize += data_len;
1907 skb_shinfo(skb)->nr_frags = npages;
1908 for (i = 0; i < npages; i++) {
1909 struct page *page;
1910
1911 page = alloc_pages(sk->sk_allocation, 0);
1912 if (!page) {
1913 err = -ENOBUFS;
1914 skb_shinfo(skb)->nr_frags = i;
1915 kfree_skb(skb);
1916 goto failure;
1917 }
1918
1919 __skb_fill_page_desc(skb, i,
1920 page, 0,
1921 (data_len >= PAGE_SIZE ?
1922 PAGE_SIZE :
1923 data_len));
1924 data_len -= PAGE_SIZE;
1925 }
1926
1927 /* Full success... */
1928 break;
1929 }
1930 err = -ENOBUFS;
1931 goto failure;
1932 }
1933 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1934 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1935 err = -EAGAIN;
1936 if (!timeo)
1937 goto failure;
1938 if (signal_pending(current))
1939 goto interrupted;
1940
1941 sock_dump_info(sk);
1942 #ifdef CONFIG_MTK_NET_LOGGING
1943 printk(KERN_INFO "[mtk_net][sock]sockdbg: wait_for_wmem, timeo =%ld, wmem =%d, snd buf =%d \n",
1944 timeo, atomic_read(&sk->sk_wmem_alloc), sk->sk_sndbuf);
1945 #endif
1946 timeo = sock_wait_for_wmem(sk, timeo);
1947 #ifdef CONFIG_MTK_NET_LOGGING
1948 printk(KERN_INFO "[mtk_net][sock]sockdbg: wait_for_wmem done, header_len=0x%lx, data_len=0x%lx,timeo =%ld \n",
1949 header_len, data_len ,timeo);
1950 #endif
1951 }
1952
1953 skb_set_owner_w(skb, sk);
1954 return skb;
1955
1956 interrupted:
1957 err = sock_intr_errno(timeo);
1958 failure:
1959 *errcode = err;
1960 return NULL;
1961 }
1962 EXPORT_SYMBOL(sock_alloc_send_pskb);
1963
1964 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1965 int noblock, int *errcode)
1966 {
1967 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1968 }
1969 EXPORT_SYMBOL(sock_alloc_send_skb);
1970
1971 /* On 32bit arches, an skb frag is limited to 2^15 */
1972 #define SKB_FRAG_PAGE_ORDER get_order(32768)
1973
1974 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1975 {
1976 int order;
1977
1978 if (pfrag->page) {
1979 if (atomic_read(&pfrag->page->_count) == 1) {
1980 pfrag->offset = 0;
1981 return true;
1982 }
1983 if (pfrag->offset < pfrag->size)
1984 return true;
1985 put_page(pfrag->page);
1986 }
1987
1988 /* We restrict high order allocations to users that can afford to wait */
1989 order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
1990
1991 do {
1992 gfp_t gfp = sk->sk_allocation;
1993
1994 if (order)
1995 gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
1996 pfrag->page = alloc_pages(gfp, order);
1997 if (likely(pfrag->page)) {
1998 pfrag->offset = 0;
1999 pfrag->size = PAGE_SIZE << order;
2000 return true;
2001 }
2002 } while (--order >= 0);
2003
2004 sk_enter_memory_pressure(sk);
2005 sk_stream_moderate_sndbuf(sk);
2006 return false;
2007 }
2008 EXPORT_SYMBOL(sk_page_frag_refill);
2009
2010 static void __lock_sock(struct sock *sk)
2011 __releases(&sk->sk_lock.slock)
2012 __acquires(&sk->sk_lock.slock)
2013 {
2014 DEFINE_WAIT(wait);
2015
2016 for (;;) {
2017 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2018 TASK_UNINTERRUPTIBLE);
2019 spin_unlock_bh(&sk->sk_lock.slock);
2020 schedule();
2021 spin_lock_bh(&sk->sk_lock.slock);
2022 if (!sock_owned_by_user(sk))
2023 break;
2024 }
2025 finish_wait(&sk->sk_lock.wq, &wait);
2026 }
2027
2028 static void __release_sock(struct sock *sk)
2029 __releases(&sk->sk_lock.slock)
2030 __acquires(&sk->sk_lock.slock)
2031 {
2032 struct sk_buff *skb = sk->sk_backlog.head;
2033
2034 do {
2035 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2036 bh_unlock_sock(sk);
2037
2038 do {
2039 struct sk_buff *next = skb->next;
2040
2041 prefetch(next);
2042 WARN_ON_ONCE(skb_dst_is_noref(skb));
2043 skb->next = NULL;
2044 sk_backlog_rcv(sk, skb);
2045
2046 /*
2047 * We are in process context here with softirqs
2048 * disabled, use cond_resched_softirq() to preempt.
2049 * This is safe to do because we've taken the backlog
2050 * queue private:
2051 */
2052 cond_resched_softirq();
2053
2054 skb = next;
2055 } while (skb != NULL);
2056
2057 bh_lock_sock(sk);
2058 } while ((skb = sk->sk_backlog.head) != NULL);
2059
2060 /*
2061 * Doing the zeroing here guarantee we can not loop forever
2062 * while a wild producer attempts to flood us.
2063 */
2064 sk->sk_backlog.len = 0;
2065 }
2066
2067 /**
2068 * sk_wait_data - wait for data to arrive at sk_receive_queue
2069 * @sk: sock to wait on
2070 * @timeo: for how long
2071 *
2072 * Now socket state including sk->sk_err is changed only under lock,
2073 * hence we may omit checks after joining wait queue.
2074 * We check receive queue before schedule() only as optimization;
2075 * it is very likely that release_sock() added new data.
2076 */
2077 int sk_wait_data(struct sock *sk, long *timeo)
2078 {
2079 int rc;
2080 DEFINE_WAIT(wait);
2081
2082 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2083 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2084 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
2085 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2086 finish_wait(sk_sleep(sk), &wait);
2087 return rc;
2088 }
2089 EXPORT_SYMBOL(sk_wait_data);
2090
2091 /**
2092 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
2093 * @sk: socket
2094 * @size: memory size to allocate
2095 * @kind: allocation type
2096 *
2097 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
2098 * rmem allocation. This function assumes that protocols which have
2099 * memory_pressure use sk_wmem_queued as write buffer accounting.
2100 */
2101 int __sk_mem_schedule(struct sock *sk, int size, int kind)
2102 {
2103 struct proto *prot = sk->sk_prot;
2104 int amt = sk_mem_pages(size);
2105 long allocated;
2106 int parent_status = UNDER_LIMIT;
2107
2108 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
2109
2110 allocated = sk_memory_allocated_add(sk, amt, &parent_status);
2111
2112 /* Under limit. */
2113 if (parent_status == UNDER_LIMIT &&
2114 allocated <= sk_prot_mem_limits(sk, 0)) {
2115 sk_leave_memory_pressure(sk);
2116 return 1;
2117 }
2118
2119 /* Under pressure. (we or our parents) */
2120 if ((parent_status > SOFT_LIMIT) ||
2121 allocated > sk_prot_mem_limits(sk, 1))
2122 sk_enter_memory_pressure(sk);
2123
2124 /* Over hard limit (we or our parents) */
2125 if ((parent_status == OVER_LIMIT) ||
2126 (allocated > sk_prot_mem_limits(sk, 2)))
2127 goto suppress_allocation;
2128
2129 /* guarantee minimum buffer size under pressure */
2130 if (kind == SK_MEM_RECV) {
2131 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2132 return 1;
2133
2134 } else { /* SK_MEM_SEND */
2135 if (sk->sk_type == SOCK_STREAM) {
2136 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2137 return 1;
2138 } else if (atomic_read(&sk->sk_wmem_alloc) <
2139 prot->sysctl_wmem[0])
2140 return 1;
2141 }
2142
2143 if (sk_has_memory_pressure(sk)) {
2144 int alloc;
2145
2146 if (!sk_under_memory_pressure(sk))
2147 return 1;
2148 alloc = sk_sockets_allocated_read_positive(sk);
2149 if (sk_prot_mem_limits(sk, 2) > alloc *
2150 sk_mem_pages(sk->sk_wmem_queued +
2151 atomic_read(&sk->sk_rmem_alloc) +
2152 sk->sk_forward_alloc))
2153 return 1;
2154 }
2155
2156 suppress_allocation:
2157
2158 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2159 sk_stream_moderate_sndbuf(sk);
2160
2161 /* Fail only if socket is _under_ its sndbuf.
2162 * In this case we cannot block, so that we have to fail.
2163 */
2164 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2165 return 1;
2166 }
2167
2168 trace_sock_exceed_buf_limit(sk, prot, allocated);
2169
2170 /* Alas. Undo changes. */
2171 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
2172
2173 sk_memory_allocated_sub(sk, amt);
2174
2175 return 0;
2176 }
2177 EXPORT_SYMBOL(__sk_mem_schedule);
2178
2179 /**
2180 * __sk_reclaim - reclaim memory_allocated
2181 * @sk: socket
2182 * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
2183 */
2184 void __sk_mem_reclaim(struct sock *sk, int amount)
2185 {
2186 amount >>= SK_MEM_QUANTUM_SHIFT;
2187 sk_memory_allocated_sub(sk, amount);
2188 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2189
2190 if (sk_under_memory_pressure(sk) &&
2191 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2192 sk_leave_memory_pressure(sk);
2193 }
2194 EXPORT_SYMBOL(__sk_mem_reclaim);
2195
2196
2197 /*
2198 * Set of default routines for initialising struct proto_ops when
2199 * the protocol does not support a particular function. In certain
2200 * cases where it makes no sense for a protocol to have a "do nothing"
2201 * function, some default processing is provided.
2202 */
2203
2204 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2205 {
2206 return -EOPNOTSUPP;
2207 }
2208 EXPORT_SYMBOL(sock_no_bind);
2209
2210 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2211 int len, int flags)
2212 {
2213 return -EOPNOTSUPP;
2214 }
2215 EXPORT_SYMBOL(sock_no_connect);
2216
2217 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2218 {
2219 return -EOPNOTSUPP;
2220 }
2221 EXPORT_SYMBOL(sock_no_socketpair);
2222
2223 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
2224 {
2225 return -EOPNOTSUPP;
2226 }
2227 EXPORT_SYMBOL(sock_no_accept);
2228
2229 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2230 int *len, int peer)
2231 {
2232 return -EOPNOTSUPP;
2233 }
2234 EXPORT_SYMBOL(sock_no_getname);
2235
2236 unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
2237 {
2238 return 0;
2239 }
2240 EXPORT_SYMBOL(sock_no_poll);
2241
2242 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2243 {
2244 return -EOPNOTSUPP;
2245 }
2246 EXPORT_SYMBOL(sock_no_ioctl);
2247
2248 int sock_no_listen(struct socket *sock, int backlog)
2249 {
2250 return -EOPNOTSUPP;
2251 }
2252 EXPORT_SYMBOL(sock_no_listen);
2253
2254 int sock_no_shutdown(struct socket *sock, int how)
2255 {
2256 return -EOPNOTSUPP;
2257 }
2258 EXPORT_SYMBOL(sock_no_shutdown);
2259
2260 int sock_no_setsockopt(struct socket *sock, int level, int optname,
2261 char __user *optval, unsigned int optlen)
2262 {
2263 return -EOPNOTSUPP;
2264 }
2265 EXPORT_SYMBOL(sock_no_setsockopt);
2266
2267 int sock_no_getsockopt(struct socket *sock, int level, int optname,
2268 char __user *optval, int __user *optlen)
2269 {
2270 return -EOPNOTSUPP;
2271 }
2272 EXPORT_SYMBOL(sock_no_getsockopt);
2273
2274 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2275 size_t len)
2276 {
2277 return -EOPNOTSUPP;
2278 }
2279 EXPORT_SYMBOL(sock_no_sendmsg);
2280
2281 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2282 size_t len, int flags)
2283 {
2284 return -EOPNOTSUPP;
2285 }
2286 EXPORT_SYMBOL(sock_no_recvmsg);
2287
2288 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2289 {
2290 /* Mirror missing mmap method error code */
2291 return -ENODEV;
2292 }
2293 EXPORT_SYMBOL(sock_no_mmap);
2294
2295 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2296 {
2297 ssize_t res;
2298 struct msghdr msg = {.msg_flags = flags};
2299 struct kvec iov;
2300 char *kaddr = kmap(page);
2301 iov.iov_base = kaddr + offset;
2302 iov.iov_len = size;
2303 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2304 kunmap(page);
2305 return res;
2306 }
2307 EXPORT_SYMBOL(sock_no_sendpage);
2308
2309 /*
2310 * Default Socket Callbacks
2311 */
2312
2313 static void sock_def_wakeup(struct sock *sk)
2314 {
2315 struct socket_wq *wq;
2316
2317 rcu_read_lock();
2318 wq = rcu_dereference(sk->sk_wq);
2319 if (wq_has_sleeper(wq))
2320 wake_up_interruptible_all(&wq->wait);
2321 rcu_read_unlock();
2322 }
2323
2324 static void sock_def_error_report(struct sock *sk)
2325 {
2326 struct socket_wq *wq;
2327
2328 rcu_read_lock();
2329 wq = rcu_dereference(sk->sk_wq);
2330 if (wq_has_sleeper(wq))
2331 wake_up_interruptible_poll(&wq->wait, POLLERR);
2332 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2333 rcu_read_unlock();
2334 }
2335
2336 static void sock_def_readable(struct sock *sk, int len)
2337 {
2338 struct socket_wq *wq;
2339
2340 rcu_read_lock();
2341 wq = rcu_dereference(sk->sk_wq);
2342 if (wq_has_sleeper(wq))
2343 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
2344 POLLRDNORM | POLLRDBAND);
2345 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2346 rcu_read_unlock();
2347 }
2348
2349 static void sock_def_write_space(struct sock *sk)
2350 {
2351 struct socket_wq *wq;
2352
2353 rcu_read_lock();
2354
2355 /* Do not wake up a writer until he can make "significant"
2356 * progress. --DaveM
2357 */
2358 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2359 wq = rcu_dereference(sk->sk_wq);
2360 if (wq_has_sleeper(wq))
2361 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
2362 POLLWRNORM | POLLWRBAND);
2363
2364 /* Should agree with poll, otherwise some programs break */
2365 if (sock_writeable(sk))
2366 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2367 }
2368
2369 rcu_read_unlock();
2370 }
2371
2372 static void sock_def_destruct(struct sock *sk)
2373 {
2374 kfree(sk->sk_protinfo);
2375 }
2376
2377 void sk_send_sigurg(struct sock *sk)
2378 {
2379 if (sk->sk_socket && sk->sk_socket->file)
2380 if (send_sigurg(&sk->sk_socket->file->f_owner))
2381 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2382 }
2383 EXPORT_SYMBOL(sk_send_sigurg);
2384
2385 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2386 unsigned long expires)
2387 {
2388 if (!mod_timer(timer, expires))
2389 sock_hold(sk);
2390 }
2391 EXPORT_SYMBOL(sk_reset_timer);
2392
2393 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2394 {
2395 if (del_timer(timer))
2396 __sock_put(sk);
2397 }
2398 EXPORT_SYMBOL(sk_stop_timer);
2399
2400 void sock_init_data(struct socket *sock, struct sock *sk)
2401 {
2402 skb_queue_head_init(&sk->sk_receive_queue);
2403 skb_queue_head_init(&sk->sk_write_queue);
2404 skb_queue_head_init(&sk->sk_error_queue);
2405 #ifdef CONFIG_NET_DMA
2406 skb_queue_head_init(&sk->sk_async_wait_queue);
2407 #endif
2408
2409 sk->sk_send_head = NULL;
2410
2411 init_timer(&sk->sk_timer);
2412
2413 sk->sk_allocation = GFP_KERNEL;
2414 sk->sk_rcvbuf = sysctl_rmem_default;
2415 sk->sk_sndbuf = sysctl_wmem_default;
2416 sk->sk_state = TCP_CLOSE;
2417 sk_set_socket(sk, sock);
2418
2419 sock_set_flag(sk, SOCK_ZAPPED);
2420
2421 if (sock) {
2422 sk->sk_type = sock->type;
2423 sk->sk_wq = sock->wq;
2424 sock->sk = sk;
2425 } else
2426 sk->sk_wq = NULL;
2427
2428 spin_lock_init(&sk->sk_dst_lock);
2429 rwlock_init(&sk->sk_callback_lock);
2430 lockdep_set_class_and_name(&sk->sk_callback_lock,
2431 af_callback_keys + sk->sk_family,
2432 af_family_clock_key_strings[sk->sk_family]);
2433
2434 sk->sk_state_change = sock_def_wakeup;
2435 sk->sk_data_ready = sock_def_readable;
2436 sk->sk_write_space = sock_def_write_space;
2437 sk->sk_error_report = sock_def_error_report;
2438 sk->sk_destruct = sock_def_destruct;
2439
2440 sk->sk_frag.page = NULL;
2441 sk->sk_frag.offset = 0;
2442 sk->sk_peek_off = -1;
2443
2444 sk->sk_peer_pid = NULL;
2445 sk->sk_peer_cred = NULL;
2446 sk->sk_write_pending = 0;
2447 sk->sk_rcvlowat = 1;
2448 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2449 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2450
2451 sk->sk_stamp = ktime_set(-1L, 0);
2452
2453 sk->sk_pacing_rate = ~0U;
2454 /*
2455 * Before updating sk_refcnt, we must commit prior changes to memory
2456 * (Documentation/RCU/rculist_nulls.txt for details)
2457 */
2458 smp_wmb();
2459 atomic_set(&sk->sk_refcnt, 1);
2460 atomic_set(&sk->sk_drops, 0);
2461 }
2462 EXPORT_SYMBOL(sock_init_data);
2463
2464 void lock_sock_nested(struct sock *sk, int subclass)
2465 {
2466 might_sleep();
2467 spin_lock_bh(&sk->sk_lock.slock);
2468 if (sk->sk_lock.owned)
2469 __lock_sock(sk);
2470 sk->sk_lock.owned = 1;
2471 spin_unlock(&sk->sk_lock.slock);
2472 /*
2473 * The sk_lock has mutex_lock() semantics here:
2474 */
2475 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2476 local_bh_enable();
2477 }
2478 EXPORT_SYMBOL(lock_sock_nested);
2479
2480 void release_sock(struct sock *sk)
2481 {
2482 /*
2483 * The sk_lock has mutex_unlock() semantics:
2484 */
2485 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
2486
2487 spin_lock_bh(&sk->sk_lock.slock);
2488 if (sk->sk_backlog.tail)
2489 __release_sock(sk);
2490
2491 /* Warning : release_cb() might need to release sk ownership,
2492 * ie call sock_release_ownership(sk) before us.
2493 */
2494 if (sk->sk_prot->release_cb)
2495 sk->sk_prot->release_cb(sk);
2496
2497 sock_release_ownership(sk);
2498 if (waitqueue_active(&sk->sk_lock.wq))
2499 wake_up(&sk->sk_lock.wq);
2500 spin_unlock_bh(&sk->sk_lock.slock);
2501 }
2502 EXPORT_SYMBOL(release_sock);
2503
2504 /**
2505 * lock_sock_fast - fast version of lock_sock
2506 * @sk: socket
2507 *
2508 * This version should be used for very small section, where process wont block
2509 * return false if fast path is taken
2510 * sk_lock.slock locked, owned = 0, BH disabled
2511 * return true if slow path is taken
2512 * sk_lock.slock unlocked, owned = 1, BH enabled
2513 */
2514 bool lock_sock_fast(struct sock *sk)
2515 {
2516 might_sleep();
2517 spin_lock_bh(&sk->sk_lock.slock);
2518
2519 if (!sk->sk_lock.owned)
2520 /*
2521 * Note : We must disable BH
2522 */
2523 return false;
2524
2525 __lock_sock(sk);
2526 sk->sk_lock.owned = 1;
2527 spin_unlock(&sk->sk_lock.slock);
2528 /*
2529 * The sk_lock has mutex_lock() semantics here:
2530 */
2531 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2532 local_bh_enable();
2533 return true;
2534 }
2535 EXPORT_SYMBOL(lock_sock_fast);
2536
2537 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2538 {
2539 struct timeval tv;
2540 if (!sock_flag(sk, SOCK_TIMESTAMP))
2541 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2542 tv = ktime_to_timeval(sk->sk_stamp);
2543 if (tv.tv_sec == -1)
2544 return -ENOENT;
2545 if (tv.tv_sec == 0) {
2546 sk->sk_stamp = ktime_get_real();
2547 tv = ktime_to_timeval(sk->sk_stamp);
2548 }
2549 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2550 }
2551 EXPORT_SYMBOL(sock_get_timestamp);
2552
2553 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2554 {
2555 struct timespec ts;
2556 if (!sock_flag(sk, SOCK_TIMESTAMP))
2557 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2558 ts = ktime_to_timespec(sk->sk_stamp);
2559 if (ts.tv_sec == -1)
2560 return -ENOENT;
2561 if (ts.tv_sec == 0) {
2562 sk->sk_stamp = ktime_get_real();
2563 ts = ktime_to_timespec(sk->sk_stamp);
2564 }
2565 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2566 }
2567 EXPORT_SYMBOL(sock_get_timestampns);
2568
2569 void sock_enable_timestamp(struct sock *sk, int flag)
2570 {
2571 if (!sock_flag(sk, flag)) {
2572 unsigned long previous_flags = sk->sk_flags;
2573
2574 sock_set_flag(sk, flag);
2575 /*
2576 * we just set one of the two flags which require net
2577 * time stamping, but time stamping might have been on
2578 * already because of the other one
2579 */
2580 if (!(previous_flags & SK_FLAGS_TIMESTAMP))
2581 net_enable_timestamp();
2582 }
2583 }
2584
2585 /*
2586 * Get a socket option on an socket.
2587 *
2588 * FIX: POSIX 1003.1g is very ambiguous here. It states that
2589 * asynchronous errors should be reported by getsockopt. We assume
2590 * this means if you specify SO_ERROR (otherwise whats the point of it).
2591 */
2592 int sock_common_getsockopt(struct socket *sock, int level, int optname,
2593 char __user *optval, int __user *optlen)
2594 {
2595 struct sock *sk = sock->sk;
2596
2597 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2598 }
2599 EXPORT_SYMBOL(sock_common_getsockopt);
2600
2601 #ifdef CONFIG_COMPAT
2602 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2603 char __user *optval, int __user *optlen)
2604 {
2605 struct sock *sk = sock->sk;
2606
2607 if (sk->sk_prot->compat_getsockopt != NULL)
2608 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2609 optval, optlen);
2610 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2611 }
2612 EXPORT_SYMBOL(compat_sock_common_getsockopt);
2613 #endif
2614
2615 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
2616 struct msghdr *msg, size_t size, int flags)
2617 {
2618 struct sock *sk = sock->sk;
2619 int addr_len = 0;
2620 int err;
2621
2622 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
2623 flags & ~MSG_DONTWAIT, &addr_len);
2624 if (err >= 0)
2625 msg->msg_namelen = addr_len;
2626 return err;
2627 }
2628 EXPORT_SYMBOL(sock_common_recvmsg);
2629
2630 /*
2631 * Set socket options on an inet socket.
2632 */
2633 int sock_common_setsockopt(struct socket *sock, int level, int optname,
2634 char __user *optval, unsigned int optlen)
2635 {
2636 struct sock *sk = sock->sk;
2637
2638 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2639 }
2640 EXPORT_SYMBOL(sock_common_setsockopt);
2641
2642 #ifdef CONFIG_COMPAT
2643 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
2644 char __user *optval, unsigned int optlen)
2645 {
2646 struct sock *sk = sock->sk;
2647
2648 if (sk->sk_prot->compat_setsockopt != NULL)
2649 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2650 optval, optlen);
2651 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2652 }
2653 EXPORT_SYMBOL(compat_sock_common_setsockopt);
2654 #endif
2655
2656 void sk_common_release(struct sock *sk)
2657 {
2658 if (sk->sk_prot->destroy)
2659 sk->sk_prot->destroy(sk);
2660
2661 /*
2662 * Observation: when sock_common_release is called, processes have
2663 * no access to socket. But net still has.
2664 * Step one, detach it from networking:
2665 *
2666 * A. Remove from hash tables.
2667 */
2668
2669 sk->sk_prot->unhash(sk);
2670
2671 /*
2672 * In this point socket cannot receive new packets, but it is possible
2673 * that some packets are in flight because some CPU runs receiver and
2674 * did hash table lookup before we unhashed socket. They will achieve
2675 * receive queue and will be purged by socket destructor.
2676 *
2677 * Also we still have packets pending on receive queue and probably,
2678 * our own packets waiting in device queues. sock_destroy will drain
2679 * receive queue, but transmitted packets will delay socket destruction
2680 * until the last reference will be released.
2681 */
2682
2683 sock_orphan(sk);
2684
2685 xfrm_sk_free_policy(sk);
2686
2687 sk_refcnt_debug_release(sk);
2688
2689 sock_put(sk);
2690 }
2691 EXPORT_SYMBOL(sk_common_release);
2692
2693 #ifdef CONFIG_PROC_FS
2694 #define PROTO_INUSE_NR 64 /* should be enough for the first time */
2695 struct prot_inuse {
2696 int val[PROTO_INUSE_NR];
2697 };
2698
2699 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2700
2701 #ifdef CONFIG_NET_NS
2702 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2703 {
2704 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2705 }
2706 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2707
2708 int sock_prot_inuse_get(struct net *net, struct proto *prot)
2709 {
2710 int cpu, idx = prot->inuse_idx;
2711 int res = 0;
2712
2713 for_each_possible_cpu(cpu)
2714 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2715
2716 return res >= 0 ? res : 0;
2717 }
2718 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2719
2720 static int __net_init sock_inuse_init_net(struct net *net)
2721 {
2722 net->core.inuse = alloc_percpu(struct prot_inuse);
2723 return net->core.inuse ? 0 : -ENOMEM;
2724 }
2725
2726 static void __net_exit sock_inuse_exit_net(struct net *net)
2727 {
2728 free_percpu(net->core.inuse);
2729 }
2730
2731 static struct pernet_operations net_inuse_ops = {
2732 .init = sock_inuse_init_net,
2733 .exit = sock_inuse_exit_net,
2734 };
2735
2736 static __init int net_inuse_init(void)
2737 {
2738 if (register_pernet_subsys(&net_inuse_ops))
2739 panic("Cannot initialize net inuse counters");
2740
2741 return 0;
2742 }
2743
2744 core_initcall(net_inuse_init);
2745 #else
2746 static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2747
2748 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2749 {
2750 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2751 }
2752 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2753
2754 int sock_prot_inuse_get(struct net *net, struct proto *prot)
2755 {
2756 int cpu, idx = prot->inuse_idx;
2757 int res = 0;
2758
2759 for_each_possible_cpu(cpu)
2760 res += per_cpu(prot_inuse, cpu).val[idx];
2761
2762 return res >= 0 ? res : 0;
2763 }
2764 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2765 #endif
2766
2767 static void assign_proto_idx(struct proto *prot)
2768 {
2769 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2770
2771 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
2772 pr_err("PROTO_INUSE_NR exhausted\n");
2773 return;
2774 }
2775
2776 set_bit(prot->inuse_idx, proto_inuse_idx);
2777 }
2778
2779 static void release_proto_idx(struct proto *prot)
2780 {
2781 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2782 clear_bit(prot->inuse_idx, proto_inuse_idx);
2783 }
2784 #else
2785 static inline void assign_proto_idx(struct proto *prot)
2786 {
2787 }
2788
2789 static inline void release_proto_idx(struct proto *prot)
2790 {
2791 }
2792 #endif
2793
2794 int proto_register(struct proto *prot, int alloc_slab)
2795 {
2796 if (alloc_slab) {
2797 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
2798 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2799 NULL);
2800
2801 if (prot->slab == NULL) {
2802 pr_crit("%s: Can't create sock SLAB cache!\n",
2803 prot->name);
2804 goto out;
2805 }
2806
2807 if (prot->rsk_prot != NULL) {
2808 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
2809 if (prot->rsk_prot->slab_name == NULL)
2810 goto out_free_sock_slab;
2811
2812 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2813 prot->rsk_prot->obj_size, 0,
2814 SLAB_HWCACHE_ALIGN, NULL);
2815
2816 if (prot->rsk_prot->slab == NULL) {
2817 pr_crit("%s: Can't create request sock SLAB cache!\n",
2818 prot->name);
2819 goto out_free_request_sock_slab_name;
2820 }
2821 }
2822
2823 if (prot->twsk_prot != NULL) {
2824 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
2825
2826 if (prot->twsk_prot->twsk_slab_name == NULL)
2827 goto out_free_request_sock_slab;
2828
2829 prot->twsk_prot->twsk_slab =
2830 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
2831 prot->twsk_prot->twsk_obj_size,
2832 0,
2833 SLAB_HWCACHE_ALIGN |
2834 prot->slab_flags,
2835 NULL);
2836 if (prot->twsk_prot->twsk_slab == NULL)
2837 goto out_free_timewait_sock_slab_name;
2838 }
2839 }
2840
2841 mutex_lock(&proto_list_mutex);
2842 list_add(&prot->node, &proto_list);
2843 assign_proto_idx(prot);
2844 mutex_unlock(&proto_list_mutex);
2845 return 0;
2846
2847 out_free_timewait_sock_slab_name:
2848 kfree(prot->twsk_prot->twsk_slab_name);
2849 out_free_request_sock_slab:
2850 if (prot->rsk_prot && prot->rsk_prot->slab) {
2851 kmem_cache_destroy(prot->rsk_prot->slab);
2852 prot->rsk_prot->slab = NULL;
2853 }
2854 out_free_request_sock_slab_name:
2855 if (prot->rsk_prot)
2856 kfree(prot->rsk_prot->slab_name);
2857 out_free_sock_slab:
2858 kmem_cache_destroy(prot->slab);
2859 prot->slab = NULL;
2860 out:
2861 return -ENOBUFS;
2862 }
2863 EXPORT_SYMBOL(proto_register);
2864
2865 void proto_unregister(struct proto *prot)
2866 {
2867 mutex_lock(&proto_list_mutex);
2868 release_proto_idx(prot);
2869 list_del(&prot->node);
2870 mutex_unlock(&proto_list_mutex);
2871
2872 if (prot->slab != NULL) {
2873 kmem_cache_destroy(prot->slab);
2874 prot->slab = NULL;
2875 }
2876
2877 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
2878 kmem_cache_destroy(prot->rsk_prot->slab);
2879 kfree(prot->rsk_prot->slab_name);
2880 prot->rsk_prot->slab = NULL;
2881 }
2882
2883 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
2884 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
2885 kfree(prot->twsk_prot->twsk_slab_name);
2886 prot->twsk_prot->twsk_slab = NULL;
2887 }
2888 }
2889 EXPORT_SYMBOL(proto_unregister);
2890
2891 #ifdef CONFIG_PROC_FS
2892 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
2893 __acquires(proto_list_mutex)
2894 {
2895 mutex_lock(&proto_list_mutex);
2896 return seq_list_start_head(&proto_list, *pos);
2897 }
2898
2899 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2900 {
2901 return seq_list_next(v, &proto_list, pos);
2902 }
2903
2904 static void proto_seq_stop(struct seq_file *seq, void *v)
2905 __releases(proto_list_mutex)
2906 {
2907 mutex_unlock(&proto_list_mutex);
2908 }
2909
2910 static char proto_method_implemented(const void *method)
2911 {
2912 return method == NULL ? 'n' : 'y';
2913 }
2914 static long sock_prot_memory_allocated(struct proto *proto)
2915 {
2916 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
2917 }
2918
2919 static char *sock_prot_memory_pressure(struct proto *proto)
2920 {
2921 return proto->memory_pressure != NULL ?
2922 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
2923 }
2924
2925 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2926 {
2927
2928 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
2929 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2930 proto->name,
2931 proto->obj_size,
2932 sock_prot_inuse_get(seq_file_net(seq), proto),
2933 sock_prot_memory_allocated(proto),
2934 sock_prot_memory_pressure(proto),
2935 proto->max_header,
2936 proto->slab == NULL ? "no" : "yes",
2937 module_name(proto->owner),
2938 proto_method_implemented(proto->close),
2939 proto_method_implemented(proto->connect),
2940 proto_method_implemented(proto->disconnect),
2941 proto_method_implemented(proto->accept),
2942 proto_method_implemented(proto->ioctl),
2943 proto_method_implemented(proto->init),
2944 proto_method_implemented(proto->destroy),
2945 proto_method_implemented(proto->shutdown),
2946 proto_method_implemented(proto->setsockopt),
2947 proto_method_implemented(proto->getsockopt),
2948 proto_method_implemented(proto->sendmsg),
2949 proto_method_implemented(proto->recvmsg),
2950 proto_method_implemented(proto->sendpage),
2951 proto_method_implemented(proto->bind),
2952 proto_method_implemented(proto->backlog_rcv),
2953 proto_method_implemented(proto->hash),
2954 proto_method_implemented(proto->unhash),
2955 proto_method_implemented(proto->get_port),
2956 proto_method_implemented(proto->enter_memory_pressure));
2957 }
2958
2959 static int proto_seq_show(struct seq_file *seq, void *v)
2960 {
2961 if (v == &proto_list)
2962 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2963 "protocol",
2964 "size",
2965 "sockets",
2966 "memory",
2967 "press",
2968 "maxhdr",
2969 "slab",
2970 "module",
2971 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2972 else
2973 proto_seq_printf(seq, list_entry(v, struct proto, node));
2974 return 0;
2975 }
2976
2977 static const struct seq_operations proto_seq_ops = {
2978 .start = proto_seq_start,
2979 .next = proto_seq_next,
2980 .stop = proto_seq_stop,
2981 .show = proto_seq_show,
2982 };
2983
2984 static int proto_seq_open(struct inode *inode, struct file *file)
2985 {
2986 return seq_open_net(inode, file, &proto_seq_ops,
2987 sizeof(struct seq_net_private));
2988 }
2989
2990 static const struct file_operations proto_seq_fops = {
2991 .owner = THIS_MODULE,
2992 .open = proto_seq_open,
2993 .read = seq_read,
2994 .llseek = seq_lseek,
2995 .release = seq_release_net,
2996 };
2997
2998 static __net_init int proto_init_net(struct net *net)
2999 {
3000 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
3001 return -ENOMEM;
3002
3003 return 0;
3004 }
3005
3006 static __net_exit void proto_exit_net(struct net *net)
3007 {
3008 remove_proc_entry("protocols", net->proc_net);
3009 }
3010
3011
3012 static __net_initdata struct pernet_operations proto_net_ops = {
3013 .init = proto_init_net,
3014 .exit = proto_exit_net,
3015 };
3016
3017 static int __init proto_init(void)
3018 {
3019 return register_pernet_subsys(&proto_net_ops);
3020 }
3021
3022 subsys_initcall(proto_init);
3023
3024 #endif /* PROC_FS */