Merge tag 'v3.10.108' into update
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / core / sock.c
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Alan Cox, <A.Cox@swansea.ac.uk>
14 *
15 * Fixes:
16 * Alan Cox : Numerous verify_area() problems
17 * Alan Cox : Connecting on a connecting socket
18 * now returns an error for tcp.
19 * Alan Cox : sock->protocol is set correctly.
20 * and is not sometimes left as 0.
21 * Alan Cox : connect handles icmp errors on a
22 * connect properly. Unfortunately there
23 * is a restart syscall nasty there. I
24 * can't match BSD without hacking the C
25 * library. Ideas urgently sought!
26 * Alan Cox : Disallow bind() to addresses that are
27 * not ours - especially broadcast ones!!
28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
30 * instead they leave that for the DESTROY timer.
31 * Alan Cox : Clean up error flag in accept
32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
33 * was buggy. Put a remove_sock() in the handler
34 * for memory when we hit 0. Also altered the timer
35 * code. The ACK stuff can wait and needs major
36 * TCP layer surgery.
37 * Alan Cox : Fixed TCP ack bug, removed remove sock
38 * and fixed timer/inet_bh race.
39 * Alan Cox : Added zapped flag for TCP
40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45 * Rick Sladkey : Relaxed UDP rules for matching packets.
46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
47 * Pauline Middelink : identd support
48 * Alan Cox : Fixed connect() taking signals I think.
49 * Alan Cox : SO_LINGER supported
50 * Alan Cox : Error reporting fixes
51 * Anonymous : inet_create tidied up (sk->reuse setting)
52 * Alan Cox : inet sockets don't set sk->type!
53 * Alan Cox : Split socket option code
54 * Alan Cox : Callbacks
55 * Alan Cox : Nagle flag for Charles & Johannes stuff
56 * Alex : Removed restriction on inet fioctl
57 * Alan Cox : Splitting INET from NET core
58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
60 * Alan Cox : Split IP from generic code
61 * Alan Cox : New kfree_skbmem()
62 * Alan Cox : Make SO_DEBUG superuser only.
63 * Alan Cox : Allow anyone to clear SO_DEBUG
64 * (compatibility fix)
65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
66 * Alan Cox : Allocator for a socket is settable.
67 * Alan Cox : SO_ERROR includes soft errors.
68 * Alan Cox : Allow NULL arguments on some SO_ opts
69 * Alan Cox : Generic socket allocation to make hooks
70 * easier (suggested by Craig Metz).
71 * Michael Pall : SO_ERROR returns positive errno again
72 * Steve Whitehouse: Added default destructor to free
73 * protocol private data.
74 * Steve Whitehouse: Added various other default routines
75 * common to several socket families.
76 * Chris Evans : Call suser() check last on F_SETOWN
77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
79 * Andi Kleen : Fix write_space callback
80 * Chris Evans : Security fixes - signedness again
81 * Arnaldo C. Melo : cleanups, use skb_queue_purge
82 *
83 * To Fix:
84 *
85 *
86 * This program is free software; you can redistribute it and/or
87 * modify it under the terms of the GNU General Public License
88 * as published by the Free Software Foundation; either version
89 * 2 of the License, or (at your option) any later version.
90 */
91
92 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 #include <linux/highmem.h>
115 #include <linux/user_namespace.h>
116 #include <linux/static_key.h>
117 #include <linux/memcontrol.h>
118 #include <linux/prefetch.h>
119
120 #include <asm/uaccess.h>
121
122 #include <linux/netdevice.h>
123 #include <net/protocol.h>
124 #include <linux/skbuff.h>
125 #include <net/net_namespace.h>
126 #include <net/request_sock.h>
127 #include <net/sock.h>
128 #include <linux/net_tstamp.h>
129 #include <net/xfrm.h>
130 #include <linux/ipsec.h>
131 #include <net/cls_cgroup.h>
132 #include <net/netprio_cgroup.h>
133
134 #include <linux/filter.h>
135
136 #include <trace/events/sock.h>
137
138 #include <net/af_unix.h>
139
140
141 #ifdef CONFIG_INET
142 #include <net/tcp.h>
143 #endif
144 #include <linux/xlog.h>
145
146 static DEFINE_MUTEX(proto_list_mutex);
147 static LIST_HEAD(proto_list);
148
149 /**
150 * sk_ns_capable - General socket capability test
151 * @sk: Socket to use a capability on or through
152 * @user_ns: The user namespace of the capability to use
153 * @cap: The capability to use
154 *
155 * Test to see if the opener of the socket had when the socket was
156 * created and the current process has the capability @cap in the user
157 * namespace @user_ns.
158 */
159 bool sk_ns_capable(const struct sock *sk,
160 struct user_namespace *user_ns, int cap)
161 {
162 return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
163 ns_capable(user_ns, cap);
164 }
165 EXPORT_SYMBOL(sk_ns_capable);
166
167 /**
168 * sk_capable - Socket global capability test
169 * @sk: Socket to use a capability on or through
170 * @cap: The global capbility to use
171 *
172 * Test to see if the opener of the socket had when the socket was
173 * created and the current process has the capability @cap in all user
174 * namespaces.
175 */
176 bool sk_capable(const struct sock *sk, int cap)
177 {
178 return sk_ns_capable(sk, &init_user_ns, cap);
179 }
180 EXPORT_SYMBOL(sk_capable);
181
182 /**
183 * sk_net_capable - Network namespace socket capability test
184 * @sk: Socket to use a capability on or through
185 * @cap: The capability to use
186 *
187 * Test to see if the opener of the socket had when the socke was created
188 * and the current process has the capability @cap over the network namespace
189 * the socket is a member of.
190 */
191 bool sk_net_capable(const struct sock *sk, int cap)
192 {
193 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
194 }
195 EXPORT_SYMBOL(sk_net_capable);
196
197
198 #ifdef CONFIG_MEMCG_KMEM
199 int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
200 {
201 struct proto *proto;
202 int ret = 0;
203
204 mutex_lock(&proto_list_mutex);
205 list_for_each_entry(proto, &proto_list, node) {
206 if (proto->init_cgroup) {
207 ret = proto->init_cgroup(memcg, ss);
208 if (ret)
209 goto out;
210 }
211 }
212
213 mutex_unlock(&proto_list_mutex);
214 return ret;
215 out:
216 list_for_each_entry_continue_reverse(proto, &proto_list, node)
217 if (proto->destroy_cgroup)
218 proto->destroy_cgroup(memcg);
219 mutex_unlock(&proto_list_mutex);
220 return ret;
221 }
222
223 void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
224 {
225 struct proto *proto;
226
227 mutex_lock(&proto_list_mutex);
228 list_for_each_entry_reverse(proto, &proto_list, node)
229 if (proto->destroy_cgroup)
230 proto->destroy_cgroup(memcg);
231 mutex_unlock(&proto_list_mutex);
232 }
233 #endif
234
235 /*
236 * Each address family might have different locking rules, so we have
237 * one slock key per address family:
238 */
239 static struct lock_class_key af_family_keys[AF_MAX];
240 static struct lock_class_key af_family_slock_keys[AF_MAX];
241
242 #if defined(CONFIG_MEMCG_KMEM)
243 struct static_key memcg_socket_limit_enabled;
244 EXPORT_SYMBOL(memcg_socket_limit_enabled);
245 #endif
246
247 /*
248 * Make lock validator output more readable. (we pre-construct these
249 * strings build-time, so that runtime initialization of socket
250 * locks is fast):
251 */
252 static const char *const af_family_key_strings[AF_MAX+1] = {
253 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
254 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
255 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
256 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
257 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
258 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
259 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
260 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
261 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
262 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
263 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
264 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
265 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
266 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
267 };
268 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
269 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
270 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
271 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
272 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
273 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
274 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
275 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
276 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
277 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
278 "slock-27" , "slock-28" , "slock-AF_CAN" ,
279 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
280 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
281 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
282 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
283 };
284 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
285 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
286 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
287 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
288 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
289 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
290 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
291 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
292 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
293 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
294 "clock-27" , "clock-28" , "clock-AF_CAN" ,
295 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
296 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
297 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
298 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
299 };
300
301 /*
302 * sk_callback_lock locking rules are per-address-family,
303 * so split the lock classes by using a per-AF key:
304 */
305 static struct lock_class_key af_callback_keys[AF_MAX];
306
307 /* Take into consideration the size of the struct sk_buff overhead in the
308 * determination of these values, since that is non-constant across
309 * platforms. This makes socket queueing behavior and performance
310 * not depend upon such differences.
311 */
312 #define _SK_MEM_PACKETS 256
313 #define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
314 #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
315 #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
316
317 /* Run time adjustable parameters. */
318 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
319 EXPORT_SYMBOL(sysctl_wmem_max);
320 __u32 sysctl_rmem_max __read_mostly = (SK_RMEM_MAX*8);
321 EXPORT_SYMBOL(sysctl_rmem_max);
322 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
323 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
324
325 /* Maximal space eaten by iovec or ancillary data plus some space */
326 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
327 EXPORT_SYMBOL(sysctl_optmem_max);
328
329 struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
330 EXPORT_SYMBOL_GPL(memalloc_socks);
331
332 /**
333 * sk_set_memalloc - sets %SOCK_MEMALLOC
334 * @sk: socket to set it on
335 *
336 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
337 * It's the responsibility of the admin to adjust min_free_kbytes
338 * to meet the requirements
339 */
340 void sk_set_memalloc(struct sock *sk)
341 {
342 sock_set_flag(sk, SOCK_MEMALLOC);
343 sk->sk_allocation |= __GFP_MEMALLOC;
344 static_key_slow_inc(&memalloc_socks);
345 }
346 EXPORT_SYMBOL_GPL(sk_set_memalloc);
347
348 void sk_clear_memalloc(struct sock *sk)
349 {
350 sock_reset_flag(sk, SOCK_MEMALLOC);
351 sk->sk_allocation &= ~__GFP_MEMALLOC;
352 static_key_slow_dec(&memalloc_socks);
353
354 /*
355 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
356 * progress of swapping. However, if SOCK_MEMALLOC is cleared while
357 * it has rmem allocations there is a risk that the user of the
358 * socket cannot make forward progress due to exceeding the rmem
359 * limits. By rights, sk_clear_memalloc() should only be called
360 * on sockets being torn down but warn and reset the accounting if
361 * that assumption breaks.
362 */
363 if (WARN_ON(sk->sk_forward_alloc))
364 sk_mem_reclaim(sk);
365 }
366 EXPORT_SYMBOL_GPL(sk_clear_memalloc);
367
368 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
369 {
370 int ret;
371 unsigned long pflags = current->flags;
372
373 /* these should have been dropped before queueing */
374 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
375
376 current->flags |= PF_MEMALLOC;
377 ret = sk->sk_backlog_rcv(sk, skb);
378 tsk_restore_flags(current, pflags, PF_MEMALLOC);
379
380 return ret;
381 }
382 EXPORT_SYMBOL(__sk_backlog_rcv);
383
384 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
385 {
386 struct timeval tv;
387
388 if (optlen < sizeof(tv))
389 return -EINVAL;
390 if (copy_from_user(&tv, optval, sizeof(tv)))
391 return -EFAULT;
392 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
393 return -EDOM;
394
395 if (tv.tv_sec < 0) {
396 static int warned __read_mostly;
397
398 *timeo_p = 0;
399 if (warned < 10 && net_ratelimit()) {
400 warned++;
401 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
402 __func__, current->comm, task_pid_nr(current));
403 }
404 return 0;
405 }
406 *timeo_p = MAX_SCHEDULE_TIMEOUT;
407 if (tv.tv_sec == 0 && tv.tv_usec == 0)
408 return 0;
409 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
410 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
411 return 0;
412 }
413
414 static void sock_warn_obsolete_bsdism(const char *name)
415 {
416 static int warned;
417 static char warncomm[TASK_COMM_LEN];
418 if (strcmp(warncomm, current->comm) && warned < 5) {
419 strcpy(warncomm, current->comm);
420 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
421 warncomm, name);
422 warned++;
423 }
424 }
425
426 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
427 {
428 if (sk->sk_flags & flags) {
429 sk->sk_flags &= ~flags;
430 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
431 net_disable_timestamp();
432 }
433 }
434
435
436 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
437 {
438 int err;
439 int skb_len;
440 unsigned long flags;
441 struct sk_buff_head *list = &sk->sk_receive_queue;
442
443 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
444 atomic_inc(&sk->sk_drops);
445 trace_sock_rcvqueue_full(sk, skb);
446 return -ENOMEM;
447 }
448
449 err = sk_filter(sk, skb);
450 if (err)
451 return err;
452
453 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
454 atomic_inc(&sk->sk_drops);
455 return -ENOBUFS;
456 }
457
458 skb->dev = NULL;
459 skb_set_owner_r(skb, sk);
460
461 /* Cache the SKB length before we tack it onto the receive
462 * queue. Once it is added it no longer belongs to us and
463 * may be freed by other threads of control pulling packets
464 * from the queue.
465 */
466 skb_len = skb->len;
467
468 /* we escape from rcu protected region, make sure we dont leak
469 * a norefcounted dst
470 */
471 skb_dst_force(skb);
472
473 spin_lock_irqsave(&list->lock, flags);
474 skb->dropcount = atomic_read(&sk->sk_drops);
475 __skb_queue_tail(list, skb);
476 spin_unlock_irqrestore(&list->lock, flags);
477
478 if (!sock_flag(sk, SOCK_DEAD))
479 sk->sk_data_ready(sk, skb_len);
480 return 0;
481 }
482 EXPORT_SYMBOL(sock_queue_rcv_skb);
483
484 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
485 {
486 int rc = NET_RX_SUCCESS;
487
488 if (sk_filter(sk, skb))
489 goto discard_and_relse;
490
491 skb->dev = NULL;
492
493 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
494 atomic_inc(&sk->sk_drops);
495 goto discard_and_relse;
496 }
497 if (nested)
498 bh_lock_sock_nested(sk);
499 else
500 bh_lock_sock(sk);
501 if (!sock_owned_by_user(sk)) {
502 /*
503 * trylock + unlock semantics:
504 */
505 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
506
507 rc = sk_backlog_rcv(sk, skb);
508
509 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
510 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
511 bh_unlock_sock(sk);
512 atomic_inc(&sk->sk_drops);
513 goto discard_and_relse;
514 }
515
516 bh_unlock_sock(sk);
517 out:
518 sock_put(sk);
519 return rc;
520 discard_and_relse:
521 kfree_skb(skb);
522 goto out;
523 }
524 EXPORT_SYMBOL(sk_receive_skb);
525
526 void sk_reset_txq(struct sock *sk)
527 {
528 sk_tx_queue_clear(sk);
529 }
530 EXPORT_SYMBOL(sk_reset_txq);
531
532 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
533 {
534 struct dst_entry *dst = __sk_dst_get(sk);
535
536 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
537 sk_tx_queue_clear(sk);
538 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
539 dst_release(dst);
540 return NULL;
541 }
542
543 return dst;
544 }
545 EXPORT_SYMBOL(__sk_dst_check);
546
547 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
548 {
549 struct dst_entry *dst = sk_dst_get(sk);
550
551 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
552 sk_dst_reset(sk);
553 dst_release(dst);
554 return NULL;
555 }
556
557 return dst;
558 }
559 EXPORT_SYMBOL(sk_dst_check);
560
561 static int sock_setbindtodevice(struct sock *sk, char __user *optval,
562 int optlen)
563 {
564 int ret = -ENOPROTOOPT;
565 #ifdef CONFIG_NETDEVICES
566 struct net *net = sock_net(sk);
567 char devname[IFNAMSIZ];
568 int index;
569
570 /* Sorry... */
571 ret = -EPERM;
572 if (!ns_capable(net->user_ns, CAP_NET_RAW))
573 goto out;
574
575 ret = -EINVAL;
576 if (optlen < 0)
577 goto out;
578
579 /* Bind this socket to a particular device like "eth0",
580 * as specified in the passed interface name. If the
581 * name is "" or the option length is zero the socket
582 * is not bound.
583 */
584 if (optlen > IFNAMSIZ - 1)
585 optlen = IFNAMSIZ - 1;
586 memset(devname, 0, sizeof(devname));
587
588 ret = -EFAULT;
589 if (copy_from_user(devname, optval, optlen))
590 goto out;
591
592 index = 0;
593 if (devname[0] != '\0') {
594 struct net_device *dev;
595
596 rcu_read_lock();
597 dev = dev_get_by_name_rcu(net, devname);
598 if (dev)
599 index = dev->ifindex;
600 rcu_read_unlock();
601 ret = -ENODEV;
602 if (!dev)
603 goto out;
604 }
605
606 lock_sock(sk);
607 sk->sk_bound_dev_if = index;
608 sk_dst_reset(sk);
609 release_sock(sk);
610
611 ret = 0;
612
613 out:
614 #endif
615
616 return ret;
617 }
618
619 static int sock_getbindtodevice(struct sock *sk, char __user *optval,
620 int __user *optlen, int len)
621 {
622 int ret = -ENOPROTOOPT;
623 #ifdef CONFIG_NETDEVICES
624 struct net *net = sock_net(sk);
625 char devname[IFNAMSIZ];
626
627 if (sk->sk_bound_dev_if == 0) {
628 len = 0;
629 goto zero;
630 }
631
632 ret = -EINVAL;
633 if (len < IFNAMSIZ)
634 goto out;
635
636 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
637 if (ret)
638 goto out;
639
640 len = strlen(devname) + 1;
641
642 ret = -EFAULT;
643 if (copy_to_user(optval, devname, len))
644 goto out;
645
646 zero:
647 ret = -EFAULT;
648 if (put_user(len, optlen))
649 goto out;
650
651 ret = 0;
652
653 out:
654 #endif
655
656 return ret;
657 }
658
659 static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
660 {
661 if (valbool)
662 sock_set_flag(sk, bit);
663 else
664 sock_reset_flag(sk, bit);
665 }
666
667 /*
668 * This is meant for all protocols to use and covers goings on
669 * at the socket level. Everything here is generic.
670 */
671
672 int sock_setsockopt(struct socket *sock, int level, int optname,
673 char __user *optval, unsigned int optlen)
674 {
675 struct sock *sk = sock->sk;
676 int val;
677 int valbool;
678 struct linger ling;
679 int ret = 0;
680
681 /*
682 * Options without arguments
683 */
684
685 if (optname == SO_BINDTODEVICE)
686 return sock_setbindtodevice(sk, optval, optlen);
687
688 if (optlen < sizeof(int))
689 return -EINVAL;
690
691 if (get_user(val, (int __user *)optval))
692 return -EFAULT;
693
694 valbool = val ? 1 : 0;
695
696 lock_sock(sk);
697
698 switch (optname) {
699 case SO_DEBUG:
700 if (val && !capable(CAP_NET_ADMIN))
701 ret = -EACCES;
702 else
703 sock_valbool_flag(sk, SOCK_DBG, valbool);
704 break;
705 case SO_REUSEADDR:
706 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
707 break;
708 case SO_REUSEPORT:
709 sk->sk_reuseport = valbool;
710 break;
711 case SO_TYPE:
712 case SO_PROTOCOL:
713 case SO_DOMAIN:
714 case SO_ERROR:
715 ret = -ENOPROTOOPT;
716 break;
717 case SO_DONTROUTE:
718 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
719 break;
720 case SO_BROADCAST:
721 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
722 break;
723 case SO_SNDBUF:
724 /* Don't error on this BSD doesn't and if you think
725 * about it this is right. Otherwise apps have to
726 * play 'guess the biggest size' games. RCVBUF/SNDBUF
727 * are treated in BSD as hints
728 */
729 val = min_t(u32, val, sysctl_wmem_max);
730 set_sndbuf:
731 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
732 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
733 /* Wake up sending tasks if we upped the value. */
734 sk->sk_write_space(sk);
735 break;
736
737 case SO_SNDBUFFORCE:
738 if (!capable(CAP_NET_ADMIN)) {
739 ret = -EPERM;
740 break;
741 }
742 goto set_sndbuf;
743
744 case SO_RCVBUF:
745 /* Don't error on this BSD doesn't and if you think
746 * about it this is right. Otherwise apps have to
747 * play 'guess the biggest size' games. RCVBUF/SNDBUF
748 * are treated in BSD as hints
749 */
750 val = min_t(u32, val, sysctl_rmem_max);
751 set_rcvbuf:
752 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
753 /*
754 * We double it on the way in to account for
755 * "struct sk_buff" etc. overhead. Applications
756 * assume that the SO_RCVBUF setting they make will
757 * allow that much actual data to be received on that
758 * socket.
759 *
760 * Applications are unaware that "struct sk_buff" and
761 * other overheads allocate from the receive buffer
762 * during socket buffer allocation.
763 *
764 * And after considering the possible alternatives,
765 * returning the value we actually used in getsockopt
766 * is the most desirable behavior.
767 */
768 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
769 break;
770
771 case SO_RCVBUFFORCE:
772 if (!capable(CAP_NET_ADMIN)) {
773 ret = -EPERM;
774 break;
775 }
776 goto set_rcvbuf;
777
778 case SO_KEEPALIVE:
779 #ifdef CONFIG_INET
780 if (sk->sk_protocol == IPPROTO_TCP &&
781 sk->sk_type == SOCK_STREAM)
782 tcp_set_keepalive(sk, valbool);
783 #endif
784 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
785 break;
786
787 case SO_OOBINLINE:
788 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
789 break;
790
791 case SO_NO_CHECK:
792 sk->sk_no_check = valbool;
793 break;
794
795 case SO_PRIORITY:
796 if ((val >= 0 && val <= 6) ||
797 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
798 sk->sk_priority = val;
799 else
800 ret = -EPERM;
801 break;
802
803 case SO_LINGER:
804 if (optlen < sizeof(ling)) {
805 ret = -EINVAL; /* 1003.1g */
806 break;
807 }
808 if (copy_from_user(&ling, optval, sizeof(ling))) {
809 ret = -EFAULT;
810 break;
811 }
812 if (!ling.l_onoff)
813 sock_reset_flag(sk, SOCK_LINGER);
814 else {
815 #if (BITS_PER_LONG == 32)
816 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
817 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
818 else
819 #endif
820 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
821 sock_set_flag(sk, SOCK_LINGER);
822 }
823 break;
824
825 case SO_BSDCOMPAT:
826 sock_warn_obsolete_bsdism("setsockopt");
827 break;
828
829 case SO_PASSCRED:
830 if (valbool)
831 set_bit(SOCK_PASSCRED, &sock->flags);
832 else
833 clear_bit(SOCK_PASSCRED, &sock->flags);
834 break;
835
836 case SO_TIMESTAMP:
837 case SO_TIMESTAMPNS:
838 if (valbool) {
839 if (optname == SO_TIMESTAMP)
840 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
841 else
842 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
843 sock_set_flag(sk, SOCK_RCVTSTAMP);
844 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
845 } else {
846 sock_reset_flag(sk, SOCK_RCVTSTAMP);
847 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
848 }
849 break;
850
851 case SO_TIMESTAMPING:
852 if (val & ~SOF_TIMESTAMPING_MASK) {
853 ret = -EINVAL;
854 break;
855 }
856 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
857 val & SOF_TIMESTAMPING_TX_HARDWARE);
858 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
859 val & SOF_TIMESTAMPING_TX_SOFTWARE);
860 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
861 val & SOF_TIMESTAMPING_RX_HARDWARE);
862 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
863 sock_enable_timestamp(sk,
864 SOCK_TIMESTAMPING_RX_SOFTWARE);
865 else
866 sock_disable_timestamp(sk,
867 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
868 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
869 val & SOF_TIMESTAMPING_SOFTWARE);
870 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
871 val & SOF_TIMESTAMPING_SYS_HARDWARE);
872 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
873 val & SOF_TIMESTAMPING_RAW_HARDWARE);
874 break;
875
876 case SO_RCVLOWAT:
877 if (val < 0)
878 val = INT_MAX;
879 sk->sk_rcvlowat = val ? : 1;
880 break;
881
882 case SO_RCVTIMEO:
883 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
884 break;
885
886 case SO_SNDTIMEO:
887 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
888 break;
889
890 case SO_ATTACH_FILTER:
891 ret = -EINVAL;
892 if (optlen == sizeof(struct sock_fprog)) {
893 struct sock_fprog fprog;
894
895 ret = -EFAULT;
896 if (copy_from_user(&fprog, optval, sizeof(fprog)))
897 break;
898
899 ret = sk_attach_filter(&fprog, sk);
900 }
901 break;
902
903 case SO_DETACH_FILTER:
904 ret = sk_detach_filter(sk);
905 break;
906
907 case SO_LOCK_FILTER:
908 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
909 ret = -EPERM;
910 else
911 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
912 break;
913
914 case SO_PASSSEC:
915 if (valbool)
916 set_bit(SOCK_PASSSEC, &sock->flags);
917 else
918 clear_bit(SOCK_PASSSEC, &sock->flags);
919 break;
920 case SO_MARK:
921 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
922 ret = -EPERM;
923 else
924 sk->sk_mark = val;
925 break;
926
927 /* We implement the SO_SNDLOWAT etc to
928 not be settable (1003.1g 5.3) */
929 case SO_RXQ_OVFL:
930 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
931 break;
932
933 case SO_WIFI_STATUS:
934 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
935 break;
936
937 case SO_PEEK_OFF:
938 if (sock->ops->set_peek_off)
939 ret = sock->ops->set_peek_off(sk, val);
940 else
941 ret = -EOPNOTSUPP;
942 break;
943
944 case SO_NOFCS:
945 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
946 break;
947
948 case SO_SELECT_ERR_QUEUE:
949 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
950 break;
951
952 default:
953 ret = -ENOPROTOOPT;
954 break;
955 }
956 release_sock(sk);
957 return ret;
958 }
959 EXPORT_SYMBOL(sock_setsockopt);
960
961
962 void cred_to_ucred(struct pid *pid, const struct cred *cred,
963 struct ucred *ucred)
964 {
965 ucred->pid = pid_vnr(pid);
966 ucred->uid = ucred->gid = -1;
967 if (cred) {
968 struct user_namespace *current_ns = current_user_ns();
969
970 ucred->uid = from_kuid_munged(current_ns, cred->euid);
971 ucred->gid = from_kgid_munged(current_ns, cred->egid);
972 }
973 }
974 EXPORT_SYMBOL_GPL(cred_to_ucred);
975
976 int sock_getsockopt(struct socket *sock, int level, int optname,
977 char __user *optval, int __user *optlen)
978 {
979 struct sock *sk = sock->sk;
980
981 union {
982 int val;
983 struct linger ling;
984 struct timeval tm;
985 } v;
986
987 int lv = sizeof(int);
988 int len;
989
990 if (get_user(len, optlen))
991 return -EFAULT;
992 if (len < 0)
993 return -EINVAL;
994
995 memset(&v, 0, sizeof(v));
996
997 switch (optname) {
998 case SO_DEBUG:
999 v.val = sock_flag(sk, SOCK_DBG);
1000 break;
1001
1002 case SO_DONTROUTE:
1003 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1004 break;
1005
1006 case SO_BROADCAST:
1007 v.val = sock_flag(sk, SOCK_BROADCAST);
1008 break;
1009
1010 case SO_SNDBUF:
1011 v.val = sk->sk_sndbuf;
1012 break;
1013
1014 case SO_RCVBUF:
1015 v.val = sk->sk_rcvbuf;
1016 break;
1017
1018 case SO_REUSEADDR:
1019 v.val = sk->sk_reuse;
1020 break;
1021
1022 case SO_REUSEPORT:
1023 v.val = sk->sk_reuseport;
1024 break;
1025
1026 case SO_KEEPALIVE:
1027 v.val = sock_flag(sk, SOCK_KEEPOPEN);
1028 break;
1029
1030 case SO_TYPE:
1031 v.val = sk->sk_type;
1032 break;
1033
1034 case SO_PROTOCOL:
1035 v.val = sk->sk_protocol;
1036 break;
1037
1038 case SO_DOMAIN:
1039 v.val = sk->sk_family;
1040 break;
1041
1042 case SO_ERROR:
1043 v.val = -sock_error(sk);
1044 if (v.val == 0)
1045 v.val = xchg(&sk->sk_err_soft, 0);
1046 break;
1047
1048 case SO_OOBINLINE:
1049 v.val = sock_flag(sk, SOCK_URGINLINE);
1050 break;
1051
1052 case SO_NO_CHECK:
1053 v.val = sk->sk_no_check;
1054 break;
1055
1056 case SO_PRIORITY:
1057 v.val = sk->sk_priority;
1058 break;
1059
1060 case SO_LINGER:
1061 lv = sizeof(v.ling);
1062 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
1063 v.ling.l_linger = sk->sk_lingertime / HZ;
1064 break;
1065
1066 case SO_BSDCOMPAT:
1067 sock_warn_obsolete_bsdism("getsockopt");
1068 break;
1069
1070 case SO_TIMESTAMP:
1071 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1072 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1073 break;
1074
1075 case SO_TIMESTAMPNS:
1076 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1077 break;
1078
1079 case SO_TIMESTAMPING:
1080 v.val = 0;
1081 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
1082 v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
1083 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
1084 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
1085 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
1086 v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
1087 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
1088 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
1089 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
1090 v.val |= SOF_TIMESTAMPING_SOFTWARE;
1091 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
1092 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
1093 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
1094 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
1095 break;
1096
1097 case SO_RCVTIMEO:
1098 lv = sizeof(struct timeval);
1099 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1100 v.tm.tv_sec = 0;
1101 v.tm.tv_usec = 0;
1102 } else {
1103 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1104 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
1105 }
1106 break;
1107
1108 case SO_SNDTIMEO:
1109 lv = sizeof(struct timeval);
1110 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1111 v.tm.tv_sec = 0;
1112 v.tm.tv_usec = 0;
1113 } else {
1114 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1115 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
1116 }
1117 break;
1118
1119 case SO_RCVLOWAT:
1120 v.val = sk->sk_rcvlowat;
1121 break;
1122
1123 case SO_SNDLOWAT:
1124 v.val = 1;
1125 break;
1126
1127 case SO_PASSCRED:
1128 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1129 break;
1130
1131 case SO_PEERCRED:
1132 {
1133 struct ucred peercred;
1134 if (len > sizeof(peercred))
1135 len = sizeof(peercred);
1136 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1137 if (copy_to_user(optval, &peercred, len))
1138 return -EFAULT;
1139 goto lenout;
1140 }
1141
1142 case SO_PEERNAME:
1143 {
1144 char address[128];
1145
1146 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1147 return -ENOTCONN;
1148 if (lv < len)
1149 return -EINVAL;
1150 if (copy_to_user(optval, address, len))
1151 return -EFAULT;
1152 goto lenout;
1153 }
1154
1155 /* Dubious BSD thing... Probably nobody even uses it, but
1156 * the UNIX standard wants it for whatever reason... -DaveM
1157 */
1158 case SO_ACCEPTCONN:
1159 v.val = sk->sk_state == TCP_LISTEN;
1160 break;
1161
1162 case SO_PASSSEC:
1163 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1164 break;
1165
1166 case SO_PEERSEC:
1167 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1168
1169 case SO_MARK:
1170 v.val = sk->sk_mark;
1171 break;
1172
1173 case SO_RXQ_OVFL:
1174 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1175 break;
1176
1177 case SO_WIFI_STATUS:
1178 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1179 break;
1180
1181 case SO_PEEK_OFF:
1182 if (!sock->ops->set_peek_off)
1183 return -EOPNOTSUPP;
1184
1185 v.val = sk->sk_peek_off;
1186 break;
1187 case SO_NOFCS:
1188 v.val = sock_flag(sk, SOCK_NOFCS);
1189 break;
1190
1191 case SO_BINDTODEVICE:
1192 return sock_getbindtodevice(sk, optval, optlen, len);
1193
1194 case SO_GET_FILTER:
1195 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1196 if (len < 0)
1197 return len;
1198
1199 goto lenout;
1200
1201 case SO_LOCK_FILTER:
1202 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1203 break;
1204
1205 case SO_SELECT_ERR_QUEUE:
1206 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1207 break;
1208
1209 default:
1210 return -ENOPROTOOPT;
1211 }
1212
1213 if (len > lv)
1214 len = lv;
1215 if (copy_to_user(optval, &v, len))
1216 return -EFAULT;
1217 lenout:
1218 if (put_user(len, optlen))
1219 return -EFAULT;
1220 return 0;
1221 }
1222
1223 /*
1224 * Initialize an sk_lock.
1225 *
1226 * (We also register the sk_lock with the lock validator.)
1227 */
1228 static inline void sock_lock_init(struct sock *sk)
1229 {
1230 sock_lock_init_class_and_name(sk,
1231 af_family_slock_key_strings[sk->sk_family],
1232 af_family_slock_keys + sk->sk_family,
1233 af_family_key_strings[sk->sk_family],
1234 af_family_keys + sk->sk_family);
1235 }
1236
1237 /*
1238 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1239 * even temporarly, because of RCU lookups. sk_node should also be left as is.
1240 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
1241 */
1242 static void sock_copy(struct sock *nsk, const struct sock *osk)
1243 {
1244 #ifdef CONFIG_SECURITY_NETWORK
1245 void *sptr = nsk->sk_security;
1246 #endif
1247 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1248
1249 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1250 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1251
1252 #ifdef CONFIG_SECURITY_NETWORK
1253 nsk->sk_security = sptr;
1254 security_sk_clone(osk, nsk);
1255 #endif
1256 }
1257
1258 void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1259 {
1260 unsigned long nulls1, nulls2;
1261
1262 nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1263 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1264 if (nulls1 > nulls2)
1265 swap(nulls1, nulls2);
1266
1267 if (nulls1 != 0)
1268 memset((char *)sk, 0, nulls1);
1269 memset((char *)sk + nulls1 + sizeof(void *), 0,
1270 nulls2 - nulls1 - sizeof(void *));
1271 memset((char *)sk + nulls2 + sizeof(void *), 0,
1272 size - nulls2 - sizeof(void *));
1273 }
1274 EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1275
1276 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1277 int family)
1278 {
1279 struct sock *sk;
1280 struct kmem_cache *slab;
1281
1282 slab = prot->slab;
1283 if (slab != NULL) {
1284 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1285 if (!sk)
1286 return sk;
1287 if (priority & __GFP_ZERO) {
1288 if (prot->clear_sk)
1289 prot->clear_sk(sk, prot->obj_size);
1290 else
1291 sk_prot_clear_nulls(sk, prot->obj_size);
1292 }
1293 } else
1294 sk = kmalloc(prot->obj_size, priority);
1295
1296 if (sk != NULL) {
1297 kmemcheck_annotate_bitfield(sk, flags);
1298
1299 if (security_sk_alloc(sk, family, priority))
1300 goto out_free;
1301
1302 if (!try_module_get(prot->owner))
1303 goto out_free_sec;
1304 sk_tx_queue_clear(sk);
1305 }
1306
1307 return sk;
1308
1309 out_free_sec:
1310 security_sk_free(sk);
1311 out_free:
1312 if (slab != NULL)
1313 kmem_cache_free(slab, sk);
1314 else
1315 kfree(sk);
1316 return NULL;
1317 }
1318
1319 static void sk_prot_free(struct proto *prot, struct sock *sk)
1320 {
1321 struct kmem_cache *slab;
1322 struct module *owner;
1323
1324 owner = prot->owner;
1325 slab = prot->slab;
1326
1327 security_sk_free(sk);
1328 if (slab != NULL)
1329 kmem_cache_free(slab, sk);
1330 else
1331 kfree(sk);
1332 module_put(owner);
1333 }
1334
1335 #if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
1336 void sock_update_classid(struct sock *sk)
1337 {
1338 u32 classid;
1339
1340 classid = task_cls_classid(current);
1341 if (classid != sk->sk_classid)
1342 sk->sk_classid = classid;
1343 }
1344 EXPORT_SYMBOL(sock_update_classid);
1345 #endif
1346
1347 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
1348 void sock_update_netprioidx(struct sock *sk)
1349 {
1350 if (in_interrupt())
1351 return;
1352
1353 sk->sk_cgrp_prioidx = task_netprioidx(current);
1354 }
1355 EXPORT_SYMBOL_GPL(sock_update_netprioidx);
1356 #endif
1357
1358 /**
1359 * sk_alloc - All socket objects are allocated here
1360 * @net: the applicable net namespace
1361 * @family: protocol family
1362 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1363 * @prot: struct proto associated with this new sock instance
1364 */
1365 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1366 struct proto *prot)
1367 {
1368 struct sock *sk;
1369
1370 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1371 if (sk) {
1372 sk->sk_family = family;
1373 /*
1374 * See comment in struct sock definition to understand
1375 * why we need sk_prot_creator -acme
1376 */
1377 sk->sk_prot = sk->sk_prot_creator = prot;
1378 sock_lock_init(sk);
1379 sock_net_set(sk, get_net(net));
1380 atomic_set(&sk->sk_wmem_alloc, 1);
1381
1382 sock_update_classid(sk);
1383 sock_update_netprioidx(sk);
1384 }
1385
1386 return sk;
1387 }
1388 EXPORT_SYMBOL(sk_alloc);
1389
1390 static void __sk_free(struct sock *sk)
1391 {
1392 struct sk_filter *filter;
1393
1394 if (sk->sk_destruct)
1395 sk->sk_destruct(sk);
1396
1397 filter = rcu_dereference_check(sk->sk_filter,
1398 atomic_read(&sk->sk_wmem_alloc) == 0);
1399 if (filter) {
1400 sk_filter_uncharge(sk, filter);
1401 RCU_INIT_POINTER(sk->sk_filter, NULL);
1402 }
1403
1404 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1405
1406 if (atomic_read(&sk->sk_omem_alloc))
1407 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1408 __func__, atomic_read(&sk->sk_omem_alloc));
1409
1410 if (sk->sk_frag.page) {
1411 put_page(sk->sk_frag.page);
1412 sk->sk_frag.page = NULL;
1413 }
1414
1415 if (sk->sk_peer_cred)
1416 put_cred(sk->sk_peer_cred);
1417 put_pid(sk->sk_peer_pid);
1418 put_net(sock_net(sk));
1419 sk_prot_free(sk->sk_prot_creator, sk);
1420 }
1421
1422 void sk_free(struct sock *sk)
1423 {
1424 /*
1425 * We subtract one from sk_wmem_alloc and can know if
1426 * some packets are still in some tx queue.
1427 * If not null, sock_wfree() will call __sk_free(sk) later
1428 */
1429 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1430 __sk_free(sk);
1431 }
1432 EXPORT_SYMBOL(sk_free);
1433
1434 /*
1435 * Last sock_put should drop reference to sk->sk_net. It has already
1436 * been dropped in sk_change_net. Taking reference to stopping namespace
1437 * is not an option.
1438 * Take reference to a socket to remove it from hash _alive_ and after that
1439 * destroy it in the context of init_net.
1440 */
1441 void sk_release_kernel(struct sock *sk)
1442 {
1443 if (sk == NULL || sk->sk_socket == NULL)
1444 return;
1445
1446 sock_hold(sk);
1447 sock_release(sk->sk_socket);
1448 release_net(sock_net(sk));
1449 sock_net_set(sk, get_net(&init_net));
1450 sock_put(sk);
1451 }
1452 EXPORT_SYMBOL(sk_release_kernel);
1453
1454 static void sk_update_clone(const struct sock *sk, struct sock *newsk)
1455 {
1456 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1457 sock_update_memcg(newsk);
1458 }
1459
1460 /**
1461 * sk_clone_lock - clone a socket, and lock its clone
1462 * @sk: the socket to clone
1463 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1464 *
1465 * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1466 */
1467 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1468 {
1469 struct sock *newsk;
1470
1471 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1472 if (newsk != NULL) {
1473 struct sk_filter *filter;
1474
1475 sock_copy(newsk, sk);
1476
1477 newsk->sk_prot_creator = sk->sk_prot;
1478
1479 /* SANITY */
1480 get_net(sock_net(newsk));
1481 sk_node_init(&newsk->sk_node);
1482 sock_lock_init(newsk);
1483 bh_lock_sock(newsk);
1484 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1485 newsk->sk_backlog.len = 0;
1486
1487 atomic_set(&newsk->sk_rmem_alloc, 0);
1488 /*
1489 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1490 */
1491 atomic_set(&newsk->sk_wmem_alloc, 1);
1492 atomic_set(&newsk->sk_omem_alloc, 0);
1493 skb_queue_head_init(&newsk->sk_receive_queue);
1494 skb_queue_head_init(&newsk->sk_write_queue);
1495 #ifdef CONFIG_NET_DMA
1496 skb_queue_head_init(&newsk->sk_async_wait_queue);
1497 #endif
1498
1499 spin_lock_init(&newsk->sk_dst_lock);
1500 rwlock_init(&newsk->sk_callback_lock);
1501 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1502 af_callback_keys + newsk->sk_family,
1503 af_family_clock_key_strings[newsk->sk_family]);
1504
1505 newsk->sk_dst_cache = NULL;
1506 newsk->sk_wmem_queued = 0;
1507 newsk->sk_forward_alloc = 0;
1508 newsk->sk_send_head = NULL;
1509 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1510
1511 sock_reset_flag(newsk, SOCK_DONE);
1512 skb_queue_head_init(&newsk->sk_error_queue);
1513
1514 filter = rcu_dereference_protected(newsk->sk_filter, 1);
1515 if (filter != NULL)
1516 sk_filter_charge(newsk, filter);
1517
1518 if (unlikely(xfrm_sk_clone_policy(newsk))) {
1519 /* It is still raw copy of parent, so invalidate
1520 * destructor and make plain sk_free() */
1521 newsk->sk_destruct = NULL;
1522 bh_unlock_sock(newsk);
1523 sk_free(newsk);
1524 newsk = NULL;
1525 goto out;
1526 }
1527
1528 newsk->sk_err = 0;
1529 newsk->sk_err_soft = 0;
1530 newsk->sk_priority = 0;
1531 /*
1532 * Before updating sk_refcnt, we must commit prior changes to memory
1533 * (Documentation/RCU/rculist_nulls.txt for details)
1534 */
1535 smp_wmb();
1536 atomic_set(&newsk->sk_refcnt, 2);
1537
1538 /*
1539 * Increment the counter in the same struct proto as the master
1540 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1541 * is the same as sk->sk_prot->socks, as this field was copied
1542 * with memcpy).
1543 *
1544 * This _changes_ the previous behaviour, where
1545 * tcp_create_openreq_child always was incrementing the
1546 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1547 * to be taken into account in all callers. -acme
1548 */
1549 sk_refcnt_debug_inc(newsk);
1550 sk_set_socket(newsk, NULL);
1551 newsk->sk_wq = NULL;
1552
1553 sk_update_clone(sk, newsk);
1554
1555 if (newsk->sk_prot->sockets_allocated)
1556 sk_sockets_allocated_inc(newsk);
1557
1558 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1559 net_enable_timestamp();
1560 }
1561 out:
1562 return newsk;
1563 }
1564 EXPORT_SYMBOL_GPL(sk_clone_lock);
1565
1566 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1567 {
1568 __sk_dst_set(sk, dst);
1569 sk->sk_route_caps = dst->dev->features;
1570 if (sk->sk_route_caps & NETIF_F_GSO)
1571 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1572 sk->sk_route_caps &= ~sk->sk_route_nocaps;
1573 if (sk_can_gso(sk)) {
1574 if (dst->header_len) {
1575 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1576 } else {
1577 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1578 sk->sk_gso_max_size = dst->dev->gso_max_size;
1579 sk->sk_gso_max_segs = dst->dev->gso_max_segs;
1580 }
1581 }
1582 }
1583 EXPORT_SYMBOL_GPL(sk_setup_caps);
1584
1585 /*
1586 * Simple resource managers for sockets.
1587 */
1588
1589
1590 /*
1591 * Write buffer destructor automatically called from kfree_skb.
1592 */
1593 void sock_wfree(struct sk_buff *skb)
1594 {
1595 struct sock *sk = skb->sk;
1596 unsigned int len = skb->truesize;
1597
1598 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1599 /*
1600 * Keep a reference on sk_wmem_alloc, this will be released
1601 * after sk_write_space() call
1602 */
1603 atomic_sub(len - 1, &sk->sk_wmem_alloc);
1604 sk->sk_write_space(sk);
1605 len = 1;
1606 }
1607 /*
1608 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1609 * could not do because of in-flight packets
1610 */
1611 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
1612 __sk_free(sk);
1613 }
1614 EXPORT_SYMBOL(sock_wfree);
1615
1616 /*
1617 * Read buffer destructor automatically called from kfree_skb.
1618 */
1619 void sock_rfree(struct sk_buff *skb)
1620 {
1621 struct sock *sk = skb->sk;
1622 unsigned int len = skb->truesize;
1623
1624 atomic_sub(len, &sk->sk_rmem_alloc);
1625 sk_mem_uncharge(sk, len);
1626 }
1627 EXPORT_SYMBOL(sock_rfree);
1628
1629 void sock_edemux(struct sk_buff *skb)
1630 {
1631 struct sock *sk = skb->sk;
1632
1633 #ifdef CONFIG_INET
1634 if (sk->sk_state == TCP_TIME_WAIT)
1635 inet_twsk_put(inet_twsk(sk));
1636 else
1637 #endif
1638 sock_put(sk);
1639 }
1640 EXPORT_SYMBOL(sock_edemux);
1641
1642 kuid_t sock_i_uid(struct sock *sk)
1643 {
1644 kuid_t uid;
1645
1646 /*mtk_net: fix kernel bug*/
1647 if (!sk) {
1648 pr_info("sk == NULL for sock_i_uid\n");
1649 return GLOBAL_ROOT_UID;
1650 }
1651
1652 read_lock_bh(&sk->sk_callback_lock);
1653 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
1654 read_unlock_bh(&sk->sk_callback_lock);
1655 return uid;
1656 }
1657 EXPORT_SYMBOL(sock_i_uid);
1658
1659 unsigned long sock_i_ino(struct sock *sk)
1660 {
1661 unsigned long ino;
1662
1663 read_lock_bh(&sk->sk_callback_lock);
1664 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1665 read_unlock_bh(&sk->sk_callback_lock);
1666 return ino;
1667 }
1668 EXPORT_SYMBOL(sock_i_ino);
1669
1670 /*
1671 * Allocate a skb from the socket's send buffer.
1672 */
1673 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1674 gfp_t priority)
1675 {
1676 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1677 struct sk_buff *skb = alloc_skb(size, priority);
1678 if (skb) {
1679 skb_set_owner_w(skb, sk);
1680 return skb;
1681 }
1682 }
1683 return NULL;
1684 }
1685 EXPORT_SYMBOL(sock_wmalloc);
1686
1687 /*
1688 * Allocate a skb from the socket's receive buffer.
1689 */
1690 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1691 gfp_t priority)
1692 {
1693 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1694 struct sk_buff *skb = alloc_skb(size, priority);
1695 if (skb) {
1696 skb_set_owner_r(skb, sk);
1697 return skb;
1698 }
1699 }
1700 return NULL;
1701 }
1702
1703 /*
1704 * Allocate a memory block from the socket's option memory buffer.
1705 */
1706 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1707 {
1708 if ((unsigned int)size <= sysctl_optmem_max &&
1709 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1710 void *mem;
1711 /* First do the add, to avoid the race if kmalloc
1712 * might sleep.
1713 */
1714 atomic_add(size, &sk->sk_omem_alloc);
1715 mem = kmalloc(size, priority);
1716 if (mem)
1717 return mem;
1718 atomic_sub(size, &sk->sk_omem_alloc);
1719 }
1720 return NULL;
1721 }
1722 EXPORT_SYMBOL(sock_kmalloc);
1723
1724 /*
1725 * Free an option memory block.
1726 */
1727 void sock_kfree_s(struct sock *sk, void *mem, int size)
1728 {
1729 kfree(mem);
1730 atomic_sub(size, &sk->sk_omem_alloc);
1731 }
1732 EXPORT_SYMBOL(sock_kfree_s);
1733
1734 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1735 I think, these locks should be removed for datagram sockets.
1736 */
1737 static long sock_wait_for_wmem(struct sock *sk, long timeo)
1738 {
1739 DEFINE_WAIT(wait);
1740
1741 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1742 for (;;) {
1743 if (!timeo)
1744 break;
1745 if (signal_pending(current))
1746 break;
1747 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1748 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1749 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1750 break;
1751 if (sk->sk_shutdown & SEND_SHUTDOWN)
1752 break;
1753 if (sk->sk_err)
1754 break;
1755 timeo = schedule_timeout(timeo);
1756 }
1757 finish_wait(sk_sleep(sk), &wait);
1758 return timeo;
1759 }
1760
1761
1762 //debug funcion
1763
1764 static int sock_dump_info(struct sock *sk)
1765 {
1766 //dump receiver queue 128 bytes
1767 //struct sk_buff *skb;
1768 //char skbmsg[128];
1769 //dump receiver queue 128 bytes end
1770
1771 if(sk->sk_family == AF_UNIX)
1772 {
1773 struct unix_sock *u = unix_sk(sk);
1774 struct sock *other = NULL;
1775 if( (u->path.dentry !=NULL)&&(u->path.dentry->d_iname!=NULL))
1776 //if( (u->dentry !=NULL)&&(u->dentry->d_iname!=NULL))
1777 {
1778 #ifdef CONFIG_MTK_NET_LOGGING
1779 printk(KERN_INFO "[mtk_net][sock]sockdbg: socket-Name:%s \n",u->path.dentry->d_iname);
1780 #endif
1781 }
1782 else
1783 {
1784 #ifdef CONFIG_MTK_NET_LOGGING
1785 printk(KERN_INFO "[mtk_net][sock]sockdbg:socket Name (NULL)\n" );
1786 #endif
1787 }
1788
1789 if(sk->sk_socket && SOCK_INODE(sk->sk_socket))
1790 {
1791 #ifdef CONFIG_MTK_NET_LOGGING
1792 printk(KERN_INFO "[mtk_net][sock]sockdbg:socket Inode[%lu]\n" ,SOCK_INODE(sk->sk_socket)->i_ino);
1793 #endif
1794 }
1795
1796 other = unix_sk(sk)->peer ;
1797 if (!other)
1798 {
1799 #ifdef CONFIG_MTK_NET_LOGGING
1800 printk(KERN_INFO "[mtk_net][sock]sockdbg:peer is (NULL) \n");
1801 #endif
1802 } else{
1803
1804 if ((((struct unix_sock *)other)->path.dentry != NULL)&&(((struct unix_sock *)other)->path.dentry->d_iname != NULL))
1805 //if ((((struct unix_sock *)other)->dentry != NULL)&&(((struct unix_sock *)other)->dentry->d_iname != NULL))
1806 {
1807 #ifdef CONFIG_MTK_NET_LOGGING
1808 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Name:%s \n",((struct unix_sock *)other)->path.dentry->d_iname);
1809 #endif
1810 }
1811 else
1812 {
1813 #ifdef CONFIG_MTK_NET_LOGGING
1814 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Name (NULL) \n");
1815 #endif
1816 }
1817
1818 if(other->sk_socket && SOCK_INODE(other->sk_socket))
1819 {
1820 #ifdef CONFIG_MTK_NET_LOGGING
1821 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Inode [%lu] \n", SOCK_INODE(other->sk_socket)->i_ino);
1822 #endif
1823 }
1824 #ifdef CONFIG_MTK_NET_LOGGING
1825 printk(KERN_INFO "[mtk_net][sock]sockdbg: Peer Recieve Queue len:%d \n",other->sk_receive_queue.qlen);
1826 #endif
1827 //dump receiver queue 128 bytes
1828 /* if ((skb = skb_peek_tail(&other->sk_receive_queue)) == NULL) {
1829
1830 printk(KERN_INFO "sockdbg: Peer Recieve Queue is null (warning) \n");
1831 }else{
1832 int i =0 ,len=0;
1833 if((skb->len !=0) && (skb->data != NULL)){
1834
1835 if(skb->len >= 127){
1836 len = 127 ;
1837 }else
1838 {
1839 len = skb->len ;
1840 }
1841 for (i=0;i<len;i++)
1842 sprintf(skbmsg+i, "%x", skb->data[i]);
1843
1844 skbmsg[len]= '\0' ;
1845
1846 printk(KERN_INFO "sockdbg: Peer Recieve Queue dump(%d bytes):%s\n", len, skbmsg);
1847
1848
1849 }else{
1850 printk(KERN_INFO "sockdbg: Peer Recieve skb error \n");
1851 }*/
1852 //dump receiver queue 128 bytes end
1853
1854 //}
1855 //dump receiver queue 128 bytes end
1856
1857 }
1858 }
1859
1860 return 0 ;
1861
1862
1863 }
1864
1865
1866
1867 /*
1868 * Generic send/receive buffer handlers
1869 */
1870
1871 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1872 unsigned long data_len, int noblock,
1873 int *errcode)
1874 {
1875 struct sk_buff *skb;
1876 gfp_t gfp_mask;
1877 long timeo;
1878 int err;
1879 int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1880
1881 err = -EMSGSIZE;
1882 if (npages > MAX_SKB_FRAGS)
1883 goto failure;
1884
1885 gfp_mask = sk->sk_allocation;
1886 if (gfp_mask & __GFP_WAIT)
1887 gfp_mask |= __GFP_REPEAT;
1888
1889 timeo = sock_sndtimeo(sk, noblock);
1890 while (1) {
1891 err = sock_error(sk);
1892 if (err != 0)
1893 goto failure;
1894
1895 err = -EPIPE;
1896 if (sk->sk_shutdown & SEND_SHUTDOWN)
1897 goto failure;
1898
1899 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1900 skb = alloc_skb(header_len, gfp_mask);
1901 if (skb) {
1902 int i;
1903
1904 /* No pages, we're done... */
1905 if (!data_len)
1906 break;
1907
1908 skb->truesize += data_len;
1909 skb_shinfo(skb)->nr_frags = npages;
1910 for (i = 0; i < npages; i++) {
1911 struct page *page;
1912
1913 page = alloc_pages(sk->sk_allocation, 0);
1914 if (!page) {
1915 err = -ENOBUFS;
1916 skb_shinfo(skb)->nr_frags = i;
1917 kfree_skb(skb);
1918 goto failure;
1919 }
1920
1921 __skb_fill_page_desc(skb, i,
1922 page, 0,
1923 (data_len >= PAGE_SIZE ?
1924 PAGE_SIZE :
1925 data_len));
1926 data_len -= PAGE_SIZE;
1927 }
1928
1929 /* Full success... */
1930 break;
1931 }
1932 err = -ENOBUFS;
1933 goto failure;
1934 }
1935 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1936 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1937 err = -EAGAIN;
1938 if (!timeo)
1939 goto failure;
1940 if (signal_pending(current))
1941 goto interrupted;
1942
1943 sock_dump_info(sk);
1944 #ifdef CONFIG_MTK_NET_LOGGING
1945 printk(KERN_INFO "[mtk_net][sock]sockdbg: wait_for_wmem, timeo =%ld, wmem =%d, snd buf =%d \n",
1946 timeo, atomic_read(&sk->sk_wmem_alloc), sk->sk_sndbuf);
1947 #endif
1948 timeo = sock_wait_for_wmem(sk, timeo);
1949 #ifdef CONFIG_MTK_NET_LOGGING
1950 printk(KERN_INFO "[mtk_net][sock]sockdbg: wait_for_wmem done, header_len=0x%lx, data_len=0x%lx,timeo =%ld \n",
1951 header_len, data_len ,timeo);
1952 #endif
1953 }
1954
1955 skb_set_owner_w(skb, sk);
1956 return skb;
1957
1958 interrupted:
1959 err = sock_intr_errno(timeo);
1960 failure:
1961 *errcode = err;
1962 return NULL;
1963 }
1964 EXPORT_SYMBOL(sock_alloc_send_pskb);
1965
1966 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1967 int noblock, int *errcode)
1968 {
1969 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1970 }
1971 EXPORT_SYMBOL(sock_alloc_send_skb);
1972
1973 /* On 32bit arches, an skb frag is limited to 2^15 */
1974 #define SKB_FRAG_PAGE_ORDER get_order(32768)
1975
1976 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1977 {
1978 int order;
1979
1980 if (pfrag->page) {
1981 if (atomic_read(&pfrag->page->_count) == 1) {
1982 pfrag->offset = 0;
1983 return true;
1984 }
1985 if (pfrag->offset < pfrag->size)
1986 return true;
1987 put_page(pfrag->page);
1988 }
1989
1990 /* We restrict high order allocations to users that can afford to wait */
1991 order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
1992
1993 do {
1994 gfp_t gfp = sk->sk_allocation;
1995
1996 if (order)
1997 gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
1998 pfrag->page = alloc_pages(gfp, order);
1999 if (likely(pfrag->page)) {
2000 pfrag->offset = 0;
2001 pfrag->size = PAGE_SIZE << order;
2002 return true;
2003 }
2004 } while (--order >= 0);
2005
2006 sk_enter_memory_pressure(sk);
2007 sk_stream_moderate_sndbuf(sk);
2008 return false;
2009 }
2010 EXPORT_SYMBOL(sk_page_frag_refill);
2011
2012 static void __lock_sock(struct sock *sk)
2013 __releases(&sk->sk_lock.slock)
2014 __acquires(&sk->sk_lock.slock)
2015 {
2016 DEFINE_WAIT(wait);
2017
2018 for (;;) {
2019 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2020 TASK_UNINTERRUPTIBLE);
2021 spin_unlock_bh(&sk->sk_lock.slock);
2022 schedule();
2023 spin_lock_bh(&sk->sk_lock.slock);
2024 if (!sock_owned_by_user(sk))
2025 break;
2026 }
2027 finish_wait(&sk->sk_lock.wq, &wait);
2028 }
2029
2030 static void __release_sock(struct sock *sk)
2031 __releases(&sk->sk_lock.slock)
2032 __acquires(&sk->sk_lock.slock)
2033 {
2034 struct sk_buff *skb = sk->sk_backlog.head;
2035
2036 do {
2037 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2038 bh_unlock_sock(sk);
2039
2040 do {
2041 struct sk_buff *next = skb->next;
2042
2043 prefetch(next);
2044 WARN_ON_ONCE(skb_dst_is_noref(skb));
2045 skb->next = NULL;
2046 sk_backlog_rcv(sk, skb);
2047
2048 /*
2049 * We are in process context here with softirqs
2050 * disabled, use cond_resched_softirq() to preempt.
2051 * This is safe to do because we've taken the backlog
2052 * queue private:
2053 */
2054 cond_resched_softirq();
2055
2056 skb = next;
2057 } while (skb != NULL);
2058
2059 bh_lock_sock(sk);
2060 } while ((skb = sk->sk_backlog.head) != NULL);
2061
2062 /*
2063 * Doing the zeroing here guarantee we can not loop forever
2064 * while a wild producer attempts to flood us.
2065 */
2066 sk->sk_backlog.len = 0;
2067 }
2068
2069 /**
2070 * sk_wait_data - wait for data to arrive at sk_receive_queue
2071 * @sk: sock to wait on
2072 * @timeo: for how long
2073 *
2074 * Now socket state including sk->sk_err is changed only under lock,
2075 * hence we may omit checks after joining wait queue.
2076 * We check receive queue before schedule() only as optimization;
2077 * it is very likely that release_sock() added new data.
2078 */
2079 int sk_wait_data(struct sock *sk, long *timeo)
2080 {
2081 int rc;
2082 DEFINE_WAIT(wait);
2083
2084 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2085 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2086 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
2087 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2088 finish_wait(sk_sleep(sk), &wait);
2089 return rc;
2090 }
2091 EXPORT_SYMBOL(sk_wait_data);
2092
2093 /**
2094 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
2095 * @sk: socket
2096 * @size: memory size to allocate
2097 * @kind: allocation type
2098 *
2099 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
2100 * rmem allocation. This function assumes that protocols which have
2101 * memory_pressure use sk_wmem_queued as write buffer accounting.
2102 */
2103 int __sk_mem_schedule(struct sock *sk, int size, int kind)
2104 {
2105 struct proto *prot = sk->sk_prot;
2106 int amt = sk_mem_pages(size);
2107 long allocated;
2108 int parent_status = UNDER_LIMIT;
2109
2110 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
2111
2112 allocated = sk_memory_allocated_add(sk, amt, &parent_status);
2113
2114 /* Under limit. */
2115 if (parent_status == UNDER_LIMIT &&
2116 allocated <= sk_prot_mem_limits(sk, 0)) {
2117 sk_leave_memory_pressure(sk);
2118 return 1;
2119 }
2120
2121 /* Under pressure. (we or our parents) */
2122 if ((parent_status > SOFT_LIMIT) ||
2123 allocated > sk_prot_mem_limits(sk, 1))
2124 sk_enter_memory_pressure(sk);
2125
2126 /* Over hard limit (we or our parents) */
2127 if ((parent_status == OVER_LIMIT) ||
2128 (allocated > sk_prot_mem_limits(sk, 2)))
2129 goto suppress_allocation;
2130
2131 /* guarantee minimum buffer size under pressure */
2132 if (kind == SK_MEM_RECV) {
2133 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2134 return 1;
2135
2136 } else { /* SK_MEM_SEND */
2137 if (sk->sk_type == SOCK_STREAM) {
2138 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2139 return 1;
2140 } else if (atomic_read(&sk->sk_wmem_alloc) <
2141 prot->sysctl_wmem[0])
2142 return 1;
2143 }
2144
2145 if (sk_has_memory_pressure(sk)) {
2146 int alloc;
2147
2148 if (!sk_under_memory_pressure(sk))
2149 return 1;
2150 alloc = sk_sockets_allocated_read_positive(sk);
2151 if (sk_prot_mem_limits(sk, 2) > alloc *
2152 sk_mem_pages(sk->sk_wmem_queued +
2153 atomic_read(&sk->sk_rmem_alloc) +
2154 sk->sk_forward_alloc))
2155 return 1;
2156 }
2157
2158 suppress_allocation:
2159
2160 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2161 sk_stream_moderate_sndbuf(sk);
2162
2163 /* Fail only if socket is _under_ its sndbuf.
2164 * In this case we cannot block, so that we have to fail.
2165 */
2166 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2167 return 1;
2168 }
2169
2170 trace_sock_exceed_buf_limit(sk, prot, allocated);
2171
2172 /* Alas. Undo changes. */
2173 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
2174
2175 sk_memory_allocated_sub(sk, amt);
2176
2177 return 0;
2178 }
2179 EXPORT_SYMBOL(__sk_mem_schedule);
2180
2181 /**
2182 * __sk_reclaim - reclaim memory_allocated
2183 * @sk: socket
2184 * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
2185 */
2186 void __sk_mem_reclaim(struct sock *sk, int amount)
2187 {
2188 amount >>= SK_MEM_QUANTUM_SHIFT;
2189 sk_memory_allocated_sub(sk, amount);
2190 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2191
2192 if (sk_under_memory_pressure(sk) &&
2193 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2194 sk_leave_memory_pressure(sk);
2195 }
2196 EXPORT_SYMBOL(__sk_mem_reclaim);
2197
2198
2199 /*
2200 * Set of default routines for initialising struct proto_ops when
2201 * the protocol does not support a particular function. In certain
2202 * cases where it makes no sense for a protocol to have a "do nothing"
2203 * function, some default processing is provided.
2204 */
2205
2206 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2207 {
2208 return -EOPNOTSUPP;
2209 }
2210 EXPORT_SYMBOL(sock_no_bind);
2211
2212 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2213 int len, int flags)
2214 {
2215 return -EOPNOTSUPP;
2216 }
2217 EXPORT_SYMBOL(sock_no_connect);
2218
2219 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2220 {
2221 return -EOPNOTSUPP;
2222 }
2223 EXPORT_SYMBOL(sock_no_socketpair);
2224
2225 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
2226 {
2227 return -EOPNOTSUPP;
2228 }
2229 EXPORT_SYMBOL(sock_no_accept);
2230
2231 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2232 int *len, int peer)
2233 {
2234 return -EOPNOTSUPP;
2235 }
2236 EXPORT_SYMBOL(sock_no_getname);
2237
2238 unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
2239 {
2240 return 0;
2241 }
2242 EXPORT_SYMBOL(sock_no_poll);
2243
2244 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2245 {
2246 return -EOPNOTSUPP;
2247 }
2248 EXPORT_SYMBOL(sock_no_ioctl);
2249
2250 int sock_no_listen(struct socket *sock, int backlog)
2251 {
2252 return -EOPNOTSUPP;
2253 }
2254 EXPORT_SYMBOL(sock_no_listen);
2255
2256 int sock_no_shutdown(struct socket *sock, int how)
2257 {
2258 return -EOPNOTSUPP;
2259 }
2260 EXPORT_SYMBOL(sock_no_shutdown);
2261
2262 int sock_no_setsockopt(struct socket *sock, int level, int optname,
2263 char __user *optval, unsigned int optlen)
2264 {
2265 return -EOPNOTSUPP;
2266 }
2267 EXPORT_SYMBOL(sock_no_setsockopt);
2268
2269 int sock_no_getsockopt(struct socket *sock, int level, int optname,
2270 char __user *optval, int __user *optlen)
2271 {
2272 return -EOPNOTSUPP;
2273 }
2274 EXPORT_SYMBOL(sock_no_getsockopt);
2275
2276 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2277 size_t len)
2278 {
2279 return -EOPNOTSUPP;
2280 }
2281 EXPORT_SYMBOL(sock_no_sendmsg);
2282
2283 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2284 size_t len, int flags)
2285 {
2286 return -EOPNOTSUPP;
2287 }
2288 EXPORT_SYMBOL(sock_no_recvmsg);
2289
2290 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2291 {
2292 /* Mirror missing mmap method error code */
2293 return -ENODEV;
2294 }
2295 EXPORT_SYMBOL(sock_no_mmap);
2296
2297 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2298 {
2299 ssize_t res;
2300 struct msghdr msg = {.msg_flags = flags};
2301 struct kvec iov;
2302 char *kaddr = kmap(page);
2303 iov.iov_base = kaddr + offset;
2304 iov.iov_len = size;
2305 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2306 kunmap(page);
2307 return res;
2308 }
2309 EXPORT_SYMBOL(sock_no_sendpage);
2310
2311 /*
2312 * Default Socket Callbacks
2313 */
2314
2315 static void sock_def_wakeup(struct sock *sk)
2316 {
2317 struct socket_wq *wq;
2318
2319 rcu_read_lock();
2320 wq = rcu_dereference(sk->sk_wq);
2321 if (wq_has_sleeper(wq))
2322 wake_up_interruptible_all(&wq->wait);
2323 rcu_read_unlock();
2324 }
2325
2326 static void sock_def_error_report(struct sock *sk)
2327 {
2328 struct socket_wq *wq;
2329
2330 rcu_read_lock();
2331 wq = rcu_dereference(sk->sk_wq);
2332 if (wq_has_sleeper(wq))
2333 wake_up_interruptible_poll(&wq->wait, POLLERR);
2334 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2335 rcu_read_unlock();
2336 }
2337
2338 static void sock_def_readable(struct sock *sk, int len)
2339 {
2340 struct socket_wq *wq;
2341
2342 rcu_read_lock();
2343 wq = rcu_dereference(sk->sk_wq);
2344 if (wq_has_sleeper(wq))
2345 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
2346 POLLRDNORM | POLLRDBAND);
2347 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2348 rcu_read_unlock();
2349 }
2350
2351 static void sock_def_write_space(struct sock *sk)
2352 {
2353 struct socket_wq *wq;
2354
2355 rcu_read_lock();
2356
2357 /* Do not wake up a writer until he can make "significant"
2358 * progress. --DaveM
2359 */
2360 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2361 wq = rcu_dereference(sk->sk_wq);
2362 if (wq_has_sleeper(wq))
2363 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
2364 POLLWRNORM | POLLWRBAND);
2365
2366 /* Should agree with poll, otherwise some programs break */
2367 if (sock_writeable(sk))
2368 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2369 }
2370
2371 rcu_read_unlock();
2372 }
2373
2374 static void sock_def_destruct(struct sock *sk)
2375 {
2376 kfree(sk->sk_protinfo);
2377 }
2378
2379 void sk_send_sigurg(struct sock *sk)
2380 {
2381 if (sk->sk_socket && sk->sk_socket->file)
2382 if (send_sigurg(&sk->sk_socket->file->f_owner))
2383 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2384 }
2385 EXPORT_SYMBOL(sk_send_sigurg);
2386
2387 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2388 unsigned long expires)
2389 {
2390 if (!mod_timer(timer, expires))
2391 sock_hold(sk);
2392 }
2393 EXPORT_SYMBOL(sk_reset_timer);
2394
2395 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2396 {
2397 if (del_timer(timer))
2398 __sock_put(sk);
2399 }
2400 EXPORT_SYMBOL(sk_stop_timer);
2401
2402 void sock_init_data(struct socket *sock, struct sock *sk)
2403 {
2404 skb_queue_head_init(&sk->sk_receive_queue);
2405 skb_queue_head_init(&sk->sk_write_queue);
2406 skb_queue_head_init(&sk->sk_error_queue);
2407 #ifdef CONFIG_NET_DMA
2408 skb_queue_head_init(&sk->sk_async_wait_queue);
2409 #endif
2410
2411 sk->sk_send_head = NULL;
2412
2413 init_timer(&sk->sk_timer);
2414
2415 sk->sk_allocation = GFP_KERNEL;
2416 sk->sk_rcvbuf = sysctl_rmem_default;
2417 sk->sk_sndbuf = sysctl_wmem_default;
2418 sk->sk_state = TCP_CLOSE;
2419 sk_set_socket(sk, sock);
2420
2421 sock_set_flag(sk, SOCK_ZAPPED);
2422
2423 if (sock) {
2424 sk->sk_type = sock->type;
2425 sk->sk_wq = sock->wq;
2426 sock->sk = sk;
2427 } else
2428 sk->sk_wq = NULL;
2429
2430 spin_lock_init(&sk->sk_dst_lock);
2431 rwlock_init(&sk->sk_callback_lock);
2432 lockdep_set_class_and_name(&sk->sk_callback_lock,
2433 af_callback_keys + sk->sk_family,
2434 af_family_clock_key_strings[sk->sk_family]);
2435
2436 sk->sk_state_change = sock_def_wakeup;
2437 sk->sk_data_ready = sock_def_readable;
2438 sk->sk_write_space = sock_def_write_space;
2439 sk->sk_error_report = sock_def_error_report;
2440 sk->sk_destruct = sock_def_destruct;
2441
2442 sk->sk_frag.page = NULL;
2443 sk->sk_frag.offset = 0;
2444 sk->sk_peek_off = -1;
2445
2446 sk->sk_peer_pid = NULL;
2447 sk->sk_peer_cred = NULL;
2448 sk->sk_write_pending = 0;
2449 sk->sk_rcvlowat = 1;
2450 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2451 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2452
2453 sk->sk_stamp = ktime_set(-1L, 0);
2454
2455 sk->sk_pacing_rate = ~0U;
2456 /*
2457 * Before updating sk_refcnt, we must commit prior changes to memory
2458 * (Documentation/RCU/rculist_nulls.txt for details)
2459 */
2460 smp_wmb();
2461 atomic_set(&sk->sk_refcnt, 1);
2462 atomic_set(&sk->sk_drops, 0);
2463 }
2464 EXPORT_SYMBOL(sock_init_data);
2465
2466 void lock_sock_nested(struct sock *sk, int subclass)
2467 {
2468 might_sleep();
2469 spin_lock_bh(&sk->sk_lock.slock);
2470 if (sk->sk_lock.owned)
2471 __lock_sock(sk);
2472 sk->sk_lock.owned = 1;
2473 spin_unlock(&sk->sk_lock.slock);
2474 /*
2475 * The sk_lock has mutex_lock() semantics here:
2476 */
2477 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2478 local_bh_enable();
2479 }
2480 EXPORT_SYMBOL(lock_sock_nested);
2481
2482 void release_sock(struct sock *sk)
2483 {
2484 /*
2485 * The sk_lock has mutex_unlock() semantics:
2486 */
2487 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
2488
2489 spin_lock_bh(&sk->sk_lock.slock);
2490 if (sk->sk_backlog.tail)
2491 __release_sock(sk);
2492
2493 /* Warning : release_cb() might need to release sk ownership,
2494 * ie call sock_release_ownership(sk) before us.
2495 */
2496 if (sk->sk_prot->release_cb)
2497 sk->sk_prot->release_cb(sk);
2498
2499 sock_release_ownership(sk);
2500 if (waitqueue_active(&sk->sk_lock.wq))
2501 wake_up(&sk->sk_lock.wq);
2502 spin_unlock_bh(&sk->sk_lock.slock);
2503 }
2504 EXPORT_SYMBOL(release_sock);
2505
2506 /**
2507 * lock_sock_fast - fast version of lock_sock
2508 * @sk: socket
2509 *
2510 * This version should be used for very small section, where process wont block
2511 * return false if fast path is taken
2512 * sk_lock.slock locked, owned = 0, BH disabled
2513 * return true if slow path is taken
2514 * sk_lock.slock unlocked, owned = 1, BH enabled
2515 */
2516 bool lock_sock_fast(struct sock *sk)
2517 {
2518 might_sleep();
2519 spin_lock_bh(&sk->sk_lock.slock);
2520
2521 if (!sk->sk_lock.owned)
2522 /*
2523 * Note : We must disable BH
2524 */
2525 return false;
2526
2527 __lock_sock(sk);
2528 sk->sk_lock.owned = 1;
2529 spin_unlock(&sk->sk_lock.slock);
2530 /*
2531 * The sk_lock has mutex_lock() semantics here:
2532 */
2533 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2534 local_bh_enable();
2535 return true;
2536 }
2537 EXPORT_SYMBOL(lock_sock_fast);
2538
2539 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2540 {
2541 struct timeval tv;
2542 if (!sock_flag(sk, SOCK_TIMESTAMP))
2543 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2544 tv = ktime_to_timeval(sk->sk_stamp);
2545 if (tv.tv_sec == -1)
2546 return -ENOENT;
2547 if (tv.tv_sec == 0) {
2548 sk->sk_stamp = ktime_get_real();
2549 tv = ktime_to_timeval(sk->sk_stamp);
2550 }
2551 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2552 }
2553 EXPORT_SYMBOL(sock_get_timestamp);
2554
2555 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2556 {
2557 struct timespec ts;
2558 if (!sock_flag(sk, SOCK_TIMESTAMP))
2559 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2560 ts = ktime_to_timespec(sk->sk_stamp);
2561 if (ts.tv_sec == -1)
2562 return -ENOENT;
2563 if (ts.tv_sec == 0) {
2564 sk->sk_stamp = ktime_get_real();
2565 ts = ktime_to_timespec(sk->sk_stamp);
2566 }
2567 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2568 }
2569 EXPORT_SYMBOL(sock_get_timestampns);
2570
2571 void sock_enable_timestamp(struct sock *sk, int flag)
2572 {
2573 if (!sock_flag(sk, flag)) {
2574 unsigned long previous_flags = sk->sk_flags;
2575
2576 sock_set_flag(sk, flag);
2577 /*
2578 * we just set one of the two flags which require net
2579 * time stamping, but time stamping might have been on
2580 * already because of the other one
2581 */
2582 if (!(previous_flags & SK_FLAGS_TIMESTAMP))
2583 net_enable_timestamp();
2584 }
2585 }
2586
2587 /*
2588 * Get a socket option on an socket.
2589 *
2590 * FIX: POSIX 1003.1g is very ambiguous here. It states that
2591 * asynchronous errors should be reported by getsockopt. We assume
2592 * this means if you specify SO_ERROR (otherwise whats the point of it).
2593 */
2594 int sock_common_getsockopt(struct socket *sock, int level, int optname,
2595 char __user *optval, int __user *optlen)
2596 {
2597 struct sock *sk = sock->sk;
2598
2599 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2600 }
2601 EXPORT_SYMBOL(sock_common_getsockopt);
2602
2603 #ifdef CONFIG_COMPAT
2604 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2605 char __user *optval, int __user *optlen)
2606 {
2607 struct sock *sk = sock->sk;
2608
2609 if (sk->sk_prot->compat_getsockopt != NULL)
2610 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2611 optval, optlen);
2612 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2613 }
2614 EXPORT_SYMBOL(compat_sock_common_getsockopt);
2615 #endif
2616
2617 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
2618 struct msghdr *msg, size_t size, int flags)
2619 {
2620 struct sock *sk = sock->sk;
2621 int addr_len = 0;
2622 int err;
2623
2624 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
2625 flags & ~MSG_DONTWAIT, &addr_len);
2626 if (err >= 0)
2627 msg->msg_namelen = addr_len;
2628 return err;
2629 }
2630 EXPORT_SYMBOL(sock_common_recvmsg);
2631
2632 /*
2633 * Set socket options on an inet socket.
2634 */
2635 int sock_common_setsockopt(struct socket *sock, int level, int optname,
2636 char __user *optval, unsigned int optlen)
2637 {
2638 struct sock *sk = sock->sk;
2639
2640 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2641 }
2642 EXPORT_SYMBOL(sock_common_setsockopt);
2643
2644 #ifdef CONFIG_COMPAT
2645 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
2646 char __user *optval, unsigned int optlen)
2647 {
2648 struct sock *sk = sock->sk;
2649
2650 if (sk->sk_prot->compat_setsockopt != NULL)
2651 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2652 optval, optlen);
2653 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2654 }
2655 EXPORT_SYMBOL(compat_sock_common_setsockopt);
2656 #endif
2657
2658 void sk_common_release(struct sock *sk)
2659 {
2660 if (sk->sk_prot->destroy)
2661 sk->sk_prot->destroy(sk);
2662
2663 /*
2664 * Observation: when sock_common_release is called, processes have
2665 * no access to socket. But net still has.
2666 * Step one, detach it from networking:
2667 *
2668 * A. Remove from hash tables.
2669 */
2670
2671 sk->sk_prot->unhash(sk);
2672
2673 /*
2674 * In this point socket cannot receive new packets, but it is possible
2675 * that some packets are in flight because some CPU runs receiver and
2676 * did hash table lookup before we unhashed socket. They will achieve
2677 * receive queue and will be purged by socket destructor.
2678 *
2679 * Also we still have packets pending on receive queue and probably,
2680 * our own packets waiting in device queues. sock_destroy will drain
2681 * receive queue, but transmitted packets will delay socket destruction
2682 * until the last reference will be released.
2683 */
2684
2685 sock_orphan(sk);
2686
2687 xfrm_sk_free_policy(sk);
2688
2689 sk_refcnt_debug_release(sk);
2690
2691 sock_put(sk);
2692 }
2693 EXPORT_SYMBOL(sk_common_release);
2694
2695 #ifdef CONFIG_PROC_FS
2696 #define PROTO_INUSE_NR 64 /* should be enough for the first time */
2697 struct prot_inuse {
2698 int val[PROTO_INUSE_NR];
2699 };
2700
2701 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2702
2703 #ifdef CONFIG_NET_NS
2704 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2705 {
2706 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2707 }
2708 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2709
2710 int sock_prot_inuse_get(struct net *net, struct proto *prot)
2711 {
2712 int cpu, idx = prot->inuse_idx;
2713 int res = 0;
2714
2715 for_each_possible_cpu(cpu)
2716 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2717
2718 return res >= 0 ? res : 0;
2719 }
2720 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2721
2722 static int __net_init sock_inuse_init_net(struct net *net)
2723 {
2724 net->core.inuse = alloc_percpu(struct prot_inuse);
2725 return net->core.inuse ? 0 : -ENOMEM;
2726 }
2727
2728 static void __net_exit sock_inuse_exit_net(struct net *net)
2729 {
2730 free_percpu(net->core.inuse);
2731 }
2732
2733 static struct pernet_operations net_inuse_ops = {
2734 .init = sock_inuse_init_net,
2735 .exit = sock_inuse_exit_net,
2736 };
2737
2738 static __init int net_inuse_init(void)
2739 {
2740 if (register_pernet_subsys(&net_inuse_ops))
2741 panic("Cannot initialize net inuse counters");
2742
2743 return 0;
2744 }
2745
2746 core_initcall(net_inuse_init);
2747 #else
2748 static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2749
2750 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2751 {
2752 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2753 }
2754 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2755
2756 int sock_prot_inuse_get(struct net *net, struct proto *prot)
2757 {
2758 int cpu, idx = prot->inuse_idx;
2759 int res = 0;
2760
2761 for_each_possible_cpu(cpu)
2762 res += per_cpu(prot_inuse, cpu).val[idx];
2763
2764 return res >= 0 ? res : 0;
2765 }
2766 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2767 #endif
2768
2769 static void assign_proto_idx(struct proto *prot)
2770 {
2771 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2772
2773 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
2774 pr_err("PROTO_INUSE_NR exhausted\n");
2775 return;
2776 }
2777
2778 set_bit(prot->inuse_idx, proto_inuse_idx);
2779 }
2780
2781 static void release_proto_idx(struct proto *prot)
2782 {
2783 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2784 clear_bit(prot->inuse_idx, proto_inuse_idx);
2785 }
2786 #else
2787 static inline void assign_proto_idx(struct proto *prot)
2788 {
2789 }
2790
2791 static inline void release_proto_idx(struct proto *prot)
2792 {
2793 }
2794 #endif
2795
2796 int proto_register(struct proto *prot, int alloc_slab)
2797 {
2798 if (alloc_slab) {
2799 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
2800 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2801 NULL);
2802
2803 if (prot->slab == NULL) {
2804 pr_crit("%s: Can't create sock SLAB cache!\n",
2805 prot->name);
2806 goto out;
2807 }
2808
2809 if (prot->rsk_prot != NULL) {
2810 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
2811 if (prot->rsk_prot->slab_name == NULL)
2812 goto out_free_sock_slab;
2813
2814 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2815 prot->rsk_prot->obj_size, 0,
2816 SLAB_HWCACHE_ALIGN, NULL);
2817
2818 if (prot->rsk_prot->slab == NULL) {
2819 pr_crit("%s: Can't create request sock SLAB cache!\n",
2820 prot->name);
2821 goto out_free_request_sock_slab_name;
2822 }
2823 }
2824
2825 if (prot->twsk_prot != NULL) {
2826 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
2827
2828 if (prot->twsk_prot->twsk_slab_name == NULL)
2829 goto out_free_request_sock_slab;
2830
2831 prot->twsk_prot->twsk_slab =
2832 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
2833 prot->twsk_prot->twsk_obj_size,
2834 0,
2835 SLAB_HWCACHE_ALIGN |
2836 prot->slab_flags,
2837 NULL);
2838 if (prot->twsk_prot->twsk_slab == NULL)
2839 goto out_free_timewait_sock_slab_name;
2840 }
2841 }
2842
2843 mutex_lock(&proto_list_mutex);
2844 list_add(&prot->node, &proto_list);
2845 assign_proto_idx(prot);
2846 mutex_unlock(&proto_list_mutex);
2847 return 0;
2848
2849 out_free_timewait_sock_slab_name:
2850 kfree(prot->twsk_prot->twsk_slab_name);
2851 out_free_request_sock_slab:
2852 if (prot->rsk_prot && prot->rsk_prot->slab) {
2853 kmem_cache_destroy(prot->rsk_prot->slab);
2854 prot->rsk_prot->slab = NULL;
2855 }
2856 out_free_request_sock_slab_name:
2857 if (prot->rsk_prot)
2858 kfree(prot->rsk_prot->slab_name);
2859 out_free_sock_slab:
2860 kmem_cache_destroy(prot->slab);
2861 prot->slab = NULL;
2862 out:
2863 return -ENOBUFS;
2864 }
2865 EXPORT_SYMBOL(proto_register);
2866
2867 void proto_unregister(struct proto *prot)
2868 {
2869 mutex_lock(&proto_list_mutex);
2870 release_proto_idx(prot);
2871 list_del(&prot->node);
2872 mutex_unlock(&proto_list_mutex);
2873
2874 if (prot->slab != NULL) {
2875 kmem_cache_destroy(prot->slab);
2876 prot->slab = NULL;
2877 }
2878
2879 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
2880 kmem_cache_destroy(prot->rsk_prot->slab);
2881 kfree(prot->rsk_prot->slab_name);
2882 prot->rsk_prot->slab = NULL;
2883 }
2884
2885 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
2886 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
2887 kfree(prot->twsk_prot->twsk_slab_name);
2888 prot->twsk_prot->twsk_slab = NULL;
2889 }
2890 }
2891 EXPORT_SYMBOL(proto_unregister);
2892
2893 #ifdef CONFIG_PROC_FS
2894 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
2895 __acquires(proto_list_mutex)
2896 {
2897 mutex_lock(&proto_list_mutex);
2898 return seq_list_start_head(&proto_list, *pos);
2899 }
2900
2901 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2902 {
2903 return seq_list_next(v, &proto_list, pos);
2904 }
2905
2906 static void proto_seq_stop(struct seq_file *seq, void *v)
2907 __releases(proto_list_mutex)
2908 {
2909 mutex_unlock(&proto_list_mutex);
2910 }
2911
2912 static char proto_method_implemented(const void *method)
2913 {
2914 return method == NULL ? 'n' : 'y';
2915 }
2916 static long sock_prot_memory_allocated(struct proto *proto)
2917 {
2918 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
2919 }
2920
2921 static char *sock_prot_memory_pressure(struct proto *proto)
2922 {
2923 return proto->memory_pressure != NULL ?
2924 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
2925 }
2926
2927 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2928 {
2929
2930 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
2931 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2932 proto->name,
2933 proto->obj_size,
2934 sock_prot_inuse_get(seq_file_net(seq), proto),
2935 sock_prot_memory_allocated(proto),
2936 sock_prot_memory_pressure(proto),
2937 proto->max_header,
2938 proto->slab == NULL ? "no" : "yes",
2939 module_name(proto->owner),
2940 proto_method_implemented(proto->close),
2941 proto_method_implemented(proto->connect),
2942 proto_method_implemented(proto->disconnect),
2943 proto_method_implemented(proto->accept),
2944 proto_method_implemented(proto->ioctl),
2945 proto_method_implemented(proto->init),
2946 proto_method_implemented(proto->destroy),
2947 proto_method_implemented(proto->shutdown),
2948 proto_method_implemented(proto->setsockopt),
2949 proto_method_implemented(proto->getsockopt),
2950 proto_method_implemented(proto->sendmsg),
2951 proto_method_implemented(proto->recvmsg),
2952 proto_method_implemented(proto->sendpage),
2953 proto_method_implemented(proto->bind),
2954 proto_method_implemented(proto->backlog_rcv),
2955 proto_method_implemented(proto->hash),
2956 proto_method_implemented(proto->unhash),
2957 proto_method_implemented(proto->get_port),
2958 proto_method_implemented(proto->enter_memory_pressure));
2959 }
2960
2961 static int proto_seq_show(struct seq_file *seq, void *v)
2962 {
2963 if (v == &proto_list)
2964 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2965 "protocol",
2966 "size",
2967 "sockets",
2968 "memory",
2969 "press",
2970 "maxhdr",
2971 "slab",
2972 "module",
2973 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2974 else
2975 proto_seq_printf(seq, list_entry(v, struct proto, node));
2976 return 0;
2977 }
2978
2979 static const struct seq_operations proto_seq_ops = {
2980 .start = proto_seq_start,
2981 .next = proto_seq_next,
2982 .stop = proto_seq_stop,
2983 .show = proto_seq_show,
2984 };
2985
2986 static int proto_seq_open(struct inode *inode, struct file *file)
2987 {
2988 return seq_open_net(inode, file, &proto_seq_ops,
2989 sizeof(struct seq_net_private));
2990 }
2991
2992 static const struct file_operations proto_seq_fops = {
2993 .owner = THIS_MODULE,
2994 .open = proto_seq_open,
2995 .read = seq_read,
2996 .llseek = seq_lseek,
2997 .release = seq_release_net,
2998 };
2999
3000 static __net_init int proto_init_net(struct net *net)
3001 {
3002 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
3003 return -ENOMEM;
3004
3005 return 0;
3006 }
3007
3008 static __net_exit void proto_exit_net(struct net *net)
3009 {
3010 remove_proc_entry("protocols", net->proc_net);
3011 }
3012
3013
3014 static __net_initdata struct pernet_operations proto_net_ops = {
3015 .init = proto_init_net,
3016 .exit = proto_exit_net,
3017 };
3018
3019 static int __init proto_init(void)
3020 {
3021 return register_pernet_subsys(&proto_net_ops);
3022 }
3023
3024 subsys_initcall(proto_init);
3025
3026 #endif /* PROC_FS */