remove libdss from Makefile
[GitHub/moto-9609/android_kernel_motorola_exynos9610.git] / net / unix / af_unix.c
CommitLineData
1da177e4
LT
1/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
113aa838 4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
1da177e4
LT
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
5cc208be 83#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84
1da177e4 85#include <linux/module.h>
1da177e4 86#include <linux/kernel.h>
1da177e4 87#include <linux/signal.h>
3f07c014 88#include <linux/sched/signal.h>
1da177e4
LT
89#include <linux/errno.h>
90#include <linux/string.h>
91#include <linux/stat.h>
92#include <linux/dcache.h>
93#include <linux/namei.h>
94#include <linux/socket.h>
95#include <linux/un.h>
96#include <linux/fcntl.h>
97#include <linux/termios.h>
98#include <linux/sockios.h>
99#include <linux/net.h>
100#include <linux/in.h>
101#include <linux/fs.h>
102#include <linux/slab.h>
7c0f6ba6 103#include <linux/uaccess.h>
1da177e4
LT
104#include <linux/skbuff.h>
105#include <linux/netdevice.h>
457c4cbc 106#include <net/net_namespace.h>
1da177e4 107#include <net/sock.h>
c752f073 108#include <net/tcp_states.h>
1da177e4
LT
109#include <net/af_unix.h>
110#include <linux/proc_fs.h>
111#include <linux/seq_file.h>
112#include <net/scm.h>
113#include <linux/init.h>
114#include <linux/poll.h>
1da177e4
LT
115#include <linux/rtnetlink.h>
116#include <linux/mount.h>
117#include <net/checksum.h>
118#include <linux/security.h>
2b15af6f 119#include <linux/freezer.h>
ba94f308 120#include <linux/file.h>
1da177e4 121
7123aaa3 122struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
fa7ff56f
PE
123EXPORT_SYMBOL_GPL(unix_socket_table);
124DEFINE_SPINLOCK(unix_table_lock);
125EXPORT_SYMBOL_GPL(unix_table_lock);
518de9b3 126static atomic_long_t unix_nr_socks;
1da177e4 127
1da177e4 128
7123aaa3
ED
129static struct hlist_head *unix_sockets_unbound(void *addr)
130{
131 unsigned long hash = (unsigned long)addr;
132
133 hash ^= hash >> 16;
134 hash ^= hash >> 8;
135 hash %= UNIX_HASH_SIZE;
136 return &unix_socket_table[UNIX_HASH_SIZE + hash];
137}
138
139#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
1da177e4 140
877ce7c1 141#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 142static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 143{
37a9a8df 144 UNIXCB(skb).secid = scm->secid;
877ce7c1
CZ
145}
146
147static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
148{
37a9a8df
SS
149 scm->secid = UNIXCB(skb).secid;
150}
151
152static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
153{
154 return (scm->secid == UNIXCB(skb).secid);
877ce7c1
CZ
155}
156#else
dc49c1f9 157static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
158{ }
159
160static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
161{ }
37a9a8df
SS
162
163static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
164{
165 return true;
166}
877ce7c1
CZ
167#endif /* CONFIG_SECURITY_NETWORK */
168
1da177e4
LT
169/*
170 * SMP locking strategy:
fbe9cc4a 171 * hash table is protected with spinlock unix_table_lock
663717f6 172 * each socket state is protected by separate spin lock.
1da177e4
LT
173 */
174
95c96174 175static inline unsigned int unix_hash_fold(__wsum n)
1da177e4 176{
0a13404d 177 unsigned int hash = (__force unsigned int)csum_fold(n);
95c96174 178
1da177e4
LT
179 hash ^= hash>>8;
180 return hash&(UNIX_HASH_SIZE-1);
181}
182
183#define unix_peer(sk) (unix_sk(sk)->peer)
184
185static inline int unix_our_peer(struct sock *sk, struct sock *osk)
186{
187 return unix_peer(osk) == sk;
188}
189
190static inline int unix_may_send(struct sock *sk, struct sock *osk)
191{
6eba6a37 192 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
1da177e4
LT
193}
194
3c73419c
RW
195static inline int unix_recvq_full(struct sock const *sk)
196{
197 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
198}
199
fa7ff56f 200struct sock *unix_peer_get(struct sock *s)
1da177e4
LT
201{
202 struct sock *peer;
203
1c92b4e5 204 unix_state_lock(s);
1da177e4
LT
205 peer = unix_peer(s);
206 if (peer)
207 sock_hold(peer);
1c92b4e5 208 unix_state_unlock(s);
1da177e4
LT
209 return peer;
210}
fa7ff56f 211EXPORT_SYMBOL_GPL(unix_peer_get);
1da177e4
LT
212
213static inline void unix_release_addr(struct unix_address *addr)
214{
8c9814b9 215 if (refcount_dec_and_test(&addr->refcnt))
1da177e4
LT
216 kfree(addr);
217}
218
219/*
220 * Check unix socket name:
221 * - should be not zero length.
222 * - if started by not zero, should be NULL terminated (FS object)
223 * - if started by zero, it is abstract name.
224 */
ac7bfa62 225
95c96174 226static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
1da177e4
LT
227{
228 if (len <= sizeof(short) || len > sizeof(*sunaddr))
229 return -EINVAL;
230 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
231 return -EINVAL;
232 if (sunaddr->sun_path[0]) {
233 /*
234 * This may look like an off by one error but it is a bit more
235 * subtle. 108 is the longest valid AF_UNIX path for a binding.
25985edc 236 * sun_path[108] doesn't as such exist. However in kernel space
1da177e4
LT
237 * we are guaranteed that it is a valid memory location in our
238 * kernel address buffer.
239 */
e27dfcea 240 ((char *)sunaddr)[len] = 0;
1da177e4
LT
241 len = strlen(sunaddr->sun_path)+1+sizeof(short);
242 return len;
243 }
244
07f0757a 245 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
1da177e4
LT
246 return len;
247}
248
249static void __unix_remove_socket(struct sock *sk)
250{
251 sk_del_node_init(sk);
252}
253
254static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
255{
547b792c 256 WARN_ON(!sk_unhashed(sk));
1da177e4
LT
257 sk_add_node(sk, list);
258}
259
260static inline void unix_remove_socket(struct sock *sk)
261{
fbe9cc4a 262 spin_lock(&unix_table_lock);
1da177e4 263 __unix_remove_socket(sk);
fbe9cc4a 264 spin_unlock(&unix_table_lock);
1da177e4
LT
265}
266
267static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
268{
fbe9cc4a 269 spin_lock(&unix_table_lock);
1da177e4 270 __unix_insert_socket(list, sk);
fbe9cc4a 271 spin_unlock(&unix_table_lock);
1da177e4
LT
272}
273
097e66c5
DL
274static struct sock *__unix_find_socket_byname(struct net *net,
275 struct sockaddr_un *sunname,
95c96174 276 int len, int type, unsigned int hash)
1da177e4
LT
277{
278 struct sock *s;
1da177e4 279
b67bfe0d 280 sk_for_each(s, &unix_socket_table[hash ^ type]) {
1da177e4
LT
281 struct unix_sock *u = unix_sk(s);
282
878628fb 283 if (!net_eq(sock_net(s), net))
097e66c5
DL
284 continue;
285
1da177e4
LT
286 if (u->addr->len == len &&
287 !memcmp(u->addr->name, sunname, len))
288 goto found;
289 }
290 s = NULL;
291found:
292 return s;
293}
294
097e66c5
DL
295static inline struct sock *unix_find_socket_byname(struct net *net,
296 struct sockaddr_un *sunname,
1da177e4 297 int len, int type,
95c96174 298 unsigned int hash)
1da177e4
LT
299{
300 struct sock *s;
301
fbe9cc4a 302 spin_lock(&unix_table_lock);
097e66c5 303 s = __unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
304 if (s)
305 sock_hold(s);
fbe9cc4a 306 spin_unlock(&unix_table_lock);
1da177e4
LT
307 return s;
308}
309
6616f788 310static struct sock *unix_find_socket_byinode(struct inode *i)
1da177e4
LT
311{
312 struct sock *s;
1da177e4 313
fbe9cc4a 314 spin_lock(&unix_table_lock);
b67bfe0d 315 sk_for_each(s,
1da177e4 316 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
40ffe67d 317 struct dentry *dentry = unix_sk(s)->path.dentry;
1da177e4 318
beef5121 319 if (dentry && d_backing_inode(dentry) == i) {
1da177e4
LT
320 sock_hold(s);
321 goto found;
322 }
323 }
324 s = NULL;
325found:
fbe9cc4a 326 spin_unlock(&unix_table_lock);
1da177e4
LT
327 return s;
328}
329
7d267278
RW
330/* Support code for asymmetrically connected dgram sockets
331 *
332 * If a datagram socket is connected to a socket not itself connected
333 * to the first socket (eg, /dev/log), clients may only enqueue more
334 * messages if the present receive queue of the server socket is not
335 * "too large". This means there's a second writeability condition
336 * poll and sendmsg need to test. The dgram recv code will do a wake
337 * up on the peer_wait wait queue of a socket upon reception of a
338 * datagram which needs to be propagated to sleeping would-be writers
339 * since these might not have sent anything so far. This can't be
340 * accomplished via poll_wait because the lifetime of the server
341 * socket might be less than that of its clients if these break their
342 * association with it or if the server socket is closed while clients
343 * are still connected to it and there's no way to inform "a polling
344 * implementation" that it should let go of a certain wait queue
345 *
ac6424b9 346 * In order to propagate a wake up, a wait_queue_entry_t of the client
7d267278
RW
347 * socket is enqueued on the peer_wait queue of the server socket
348 * whose wake function does a wake_up on the ordinary client socket
349 * wait queue. This connection is established whenever a write (or
350 * poll for write) hit the flow control condition and broken when the
351 * association to the server socket is dissolved or after a wake up
352 * was relayed.
353 */
354
ac6424b9 355static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
7d267278
RW
356 void *key)
357{
358 struct unix_sock *u;
359 wait_queue_head_t *u_sleep;
360
361 u = container_of(q, struct unix_sock, peer_wake);
362
363 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
364 q);
365 u->peer_wake.private = NULL;
366
367 /* relaying can only happen while the wq still exists */
368 u_sleep = sk_sleep(&u->sk);
369 if (u_sleep)
370 wake_up_interruptible_poll(u_sleep, key);
371
372 return 0;
373}
374
375static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
376{
377 struct unix_sock *u, *u_other;
378 int rc;
379
380 u = unix_sk(sk);
381 u_other = unix_sk(other);
382 rc = 0;
383 spin_lock(&u_other->peer_wait.lock);
384
385 if (!u->peer_wake.private) {
386 u->peer_wake.private = other;
387 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
388
389 rc = 1;
390 }
391
392 spin_unlock(&u_other->peer_wait.lock);
393 return rc;
394}
395
396static void unix_dgram_peer_wake_disconnect(struct sock *sk,
397 struct sock *other)
398{
399 struct unix_sock *u, *u_other;
400
401 u = unix_sk(sk);
402 u_other = unix_sk(other);
403 spin_lock(&u_other->peer_wait.lock);
404
405 if (u->peer_wake.private == other) {
406 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
407 u->peer_wake.private = NULL;
408 }
409
410 spin_unlock(&u_other->peer_wait.lock);
411}
412
413static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
414 struct sock *other)
415{
416 unix_dgram_peer_wake_disconnect(sk, other);
417 wake_up_interruptible_poll(sk_sleep(sk),
418 POLLOUT |
419 POLLWRNORM |
420 POLLWRBAND);
421}
422
423/* preconditions:
424 * - unix_peer(sk) == other
425 * - association is stable
426 */
427static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
428{
429 int connected;
430
431 connected = unix_dgram_peer_wake_connect(sk, other);
432
433 if (unix_recvq_full(other))
434 return 1;
435
436 if (connected)
437 unix_dgram_peer_wake_disconnect(sk, other);
438
439 return 0;
440}
441
1586a587 442static int unix_writable(const struct sock *sk)
1da177e4 443{
1586a587 444 return sk->sk_state != TCP_LISTEN &&
14afee4b 445 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
1da177e4
LT
446}
447
448static void unix_write_space(struct sock *sk)
449{
43815482
ED
450 struct socket_wq *wq;
451
452 rcu_read_lock();
1da177e4 453 if (unix_writable(sk)) {
43815482 454 wq = rcu_dereference(sk->sk_wq);
1ce0bf50 455 if (skwq_has_sleeper(wq))
67426b75
ED
456 wake_up_interruptible_sync_poll(&wq->wait,
457 POLLOUT | POLLWRNORM | POLLWRBAND);
8d8ad9d7 458 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4 459 }
43815482 460 rcu_read_unlock();
1da177e4
LT
461}
462
463/* When dgram socket disconnects (or changes its peer), we clear its receive
464 * queue of packets arrived from previous peer. First, it allows to do
465 * flow control based only on wmem_alloc; second, sk connected to peer
466 * may receive messages only from that peer. */
467static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
468{
b03efcfb 469 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
470 skb_queue_purge(&sk->sk_receive_queue);
471 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
472
473 /* If one link of bidirectional dgram pipe is disconnected,
474 * we signal error. Messages are lost. Do not make this,
475 * when peer was not connected to us.
476 */
477 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
478 other->sk_err = ECONNRESET;
479 other->sk_error_report(other);
480 }
481 }
482}
483
484static void unix_sock_destructor(struct sock *sk)
485{
486 struct unix_sock *u = unix_sk(sk);
487
488 skb_queue_purge(&sk->sk_receive_queue);
489
14afee4b 490 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
547b792c
IJ
491 WARN_ON(!sk_unhashed(sk));
492 WARN_ON(sk->sk_socket);
1da177e4 493 if (!sock_flag(sk, SOCK_DEAD)) {
5cc208be 494 pr_info("Attempt to release alive unix socket: %p\n", sk);
1da177e4
LT
495 return;
496 }
497
498 if (u->addr)
499 unix_release_addr(u->addr);
500
518de9b3 501 atomic_long_dec(&unix_nr_socks);
6f756a8c 502 local_bh_disable();
a8076d8d 503 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
6f756a8c 504 local_bh_enable();
1da177e4 505#ifdef UNIX_REFCNT_DEBUG
5cc208be 506 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
518de9b3 507 atomic_long_read(&unix_nr_socks));
1da177e4
LT
508#endif
509}
510
ded34e0f 511static void unix_release_sock(struct sock *sk, int embrion)
1da177e4
LT
512{
513 struct unix_sock *u = unix_sk(sk);
40ffe67d 514 struct path path;
1da177e4
LT
515 struct sock *skpair;
516 struct sk_buff *skb;
517 int state;
518
519 unix_remove_socket(sk);
520
521 /* Clear state */
1c92b4e5 522 unix_state_lock(sk);
1da177e4
LT
523 sock_orphan(sk);
524 sk->sk_shutdown = SHUTDOWN_MASK;
40ffe67d
AV
525 path = u->path;
526 u->path.dentry = NULL;
527 u->path.mnt = NULL;
1da177e4
LT
528 state = sk->sk_state;
529 sk->sk_state = TCP_CLOSE;
1c92b4e5 530 unix_state_unlock(sk);
1da177e4
LT
531
532 wake_up_interruptible_all(&u->peer_wait);
533
e27dfcea 534 skpair = unix_peer(sk);
1da177e4 535
e27dfcea 536 if (skpair != NULL) {
1da177e4 537 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
1c92b4e5 538 unix_state_lock(skpair);
1da177e4
LT
539 /* No more writes */
540 skpair->sk_shutdown = SHUTDOWN_MASK;
541 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
542 skpair->sk_err = ECONNRESET;
1c92b4e5 543 unix_state_unlock(skpair);
1da177e4 544 skpair->sk_state_change(skpair);
8d8ad9d7 545 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4 546 }
7d267278
RW
547
548 unix_dgram_peer_wake_disconnect(sk, skpair);
1da177e4
LT
549 sock_put(skpair); /* It may now die */
550 unix_peer(sk) = NULL;
551 }
552
553 /* Try to flush out this socket. Throw out buffers at least */
554
555 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
e27dfcea 556 if (state == TCP_LISTEN)
1da177e4
LT
557 unix_release_sock(skb->sk, 1);
558 /* passed fds are erased in the kfree_skb hook */
73ed5d25 559 UNIXCB(skb).consumed = skb->len;
1da177e4
LT
560 kfree_skb(skb);
561 }
562
40ffe67d
AV
563 if (path.dentry)
564 path_put(&path);
1da177e4
LT
565
566 sock_put(sk);
567
568 /* ---- Socket is dead now and most probably destroyed ---- */
569
570 /*
e04dae84 571 * Fixme: BSD difference: In BSD all sockets connected to us get
1da177e4
LT
572 * ECONNRESET and we die on the spot. In Linux we behave
573 * like files and pipes do and wait for the last
574 * dereference.
575 *
576 * Can't we simply set sock->err?
577 *
578 * What the above comment does talk about? --ANK(980817)
579 */
580
9305cfa4 581 if (unix_tot_inflight)
ac7bfa62 582 unix_gc(); /* Garbage collect fds */
1da177e4
LT
583}
584
109f6e39
EB
585static void init_peercred(struct sock *sk)
586{
587 put_pid(sk->sk_peer_pid);
588 if (sk->sk_peer_cred)
589 put_cred(sk->sk_peer_cred);
590 sk->sk_peer_pid = get_pid(task_tgid(current));
591 sk->sk_peer_cred = get_current_cred();
592}
593
594static void copy_peercred(struct sock *sk, struct sock *peersk)
595{
596 put_pid(sk->sk_peer_pid);
597 if (sk->sk_peer_cred)
598 put_cred(sk->sk_peer_cred);
599 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
600 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
601}
602
1da177e4
LT
603static int unix_listen(struct socket *sock, int backlog)
604{
605 int err;
606 struct sock *sk = sock->sk;
607 struct unix_sock *u = unix_sk(sk);
109f6e39 608 struct pid *old_pid = NULL;
1da177e4
LT
609
610 err = -EOPNOTSUPP;
6eba6a37
ED
611 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
612 goto out; /* Only stream/seqpacket sockets accept */
1da177e4
LT
613 err = -EINVAL;
614 if (!u->addr)
6eba6a37 615 goto out; /* No listens on an unbound socket */
1c92b4e5 616 unix_state_lock(sk);
1da177e4
LT
617 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
618 goto out_unlock;
619 if (backlog > sk->sk_max_ack_backlog)
620 wake_up_interruptible_all(&u->peer_wait);
621 sk->sk_max_ack_backlog = backlog;
622 sk->sk_state = TCP_LISTEN;
623 /* set credentials so connect can copy them */
109f6e39 624 init_peercred(sk);
1da177e4
LT
625 err = 0;
626
627out_unlock:
1c92b4e5 628 unix_state_unlock(sk);
109f6e39 629 put_pid(old_pid);
1da177e4
LT
630out:
631 return err;
632}
633
634static int unix_release(struct socket *);
635static int unix_bind(struct socket *, struct sockaddr *, int);
636static int unix_stream_connect(struct socket *, struct sockaddr *,
637 int addr_len, int flags);
638static int unix_socketpair(struct socket *, struct socket *);
cdfbabfb 639static int unix_accept(struct socket *, struct socket *, int, bool);
1da177e4
LT
640static int unix_getname(struct socket *, struct sockaddr *, int *, int);
641static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
ec0d215f
RW
642static unsigned int unix_dgram_poll(struct file *, struct socket *,
643 poll_table *);
1da177e4
LT
644static int unix_ioctl(struct socket *, unsigned int, unsigned long);
645static int unix_shutdown(struct socket *, int);
1b784140
YX
646static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
647static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
869e7c62
HFS
648static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
649 size_t size, int flags);
2b514574
HFS
650static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
651 struct pipe_inode_info *, size_t size,
652 unsigned int flags);
1b784140
YX
653static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
654static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
1da177e4
LT
655static int unix_dgram_connect(struct socket *, struct sockaddr *,
656 int, int);
1b784140
YX
657static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
658static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
659 int);
1da177e4 660
12663bfc 661static int unix_set_peek_off(struct sock *sk, int val)
f55bb7f9
PE
662{
663 struct unix_sock *u = unix_sk(sk);
664
6e1ce3c3 665 if (mutex_lock_interruptible(&u->iolock))
12663bfc
SL
666 return -EINTR;
667
f55bb7f9 668 sk->sk_peek_off = val;
6e1ce3c3 669 mutex_unlock(&u->iolock);
12663bfc
SL
670
671 return 0;
f55bb7f9
PE
672}
673
674
90ddc4f0 675static const struct proto_ops unix_stream_ops = {
1da177e4
LT
676 .family = PF_UNIX,
677 .owner = THIS_MODULE,
678 .release = unix_release,
679 .bind = unix_bind,
680 .connect = unix_stream_connect,
681 .socketpair = unix_socketpair,
682 .accept = unix_accept,
683 .getname = unix_getname,
684 .poll = unix_poll,
685 .ioctl = unix_ioctl,
686 .listen = unix_listen,
687 .shutdown = unix_shutdown,
688 .setsockopt = sock_no_setsockopt,
689 .getsockopt = sock_no_getsockopt,
690 .sendmsg = unix_stream_sendmsg,
691 .recvmsg = unix_stream_recvmsg,
692 .mmap = sock_no_mmap,
869e7c62 693 .sendpage = unix_stream_sendpage,
2b514574 694 .splice_read = unix_stream_splice_read,
fc0d7536 695 .set_peek_off = unix_set_peek_off,
1da177e4
LT
696};
697
90ddc4f0 698static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
699 .family = PF_UNIX,
700 .owner = THIS_MODULE,
701 .release = unix_release,
702 .bind = unix_bind,
703 .connect = unix_dgram_connect,
704 .socketpair = unix_socketpair,
705 .accept = sock_no_accept,
706 .getname = unix_getname,
ec0d215f 707 .poll = unix_dgram_poll,
1da177e4
LT
708 .ioctl = unix_ioctl,
709 .listen = sock_no_listen,
710 .shutdown = unix_shutdown,
711 .setsockopt = sock_no_setsockopt,
712 .getsockopt = sock_no_getsockopt,
713 .sendmsg = unix_dgram_sendmsg,
714 .recvmsg = unix_dgram_recvmsg,
715 .mmap = sock_no_mmap,
716 .sendpage = sock_no_sendpage,
f55bb7f9 717 .set_peek_off = unix_set_peek_off,
1da177e4
LT
718};
719
90ddc4f0 720static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
721 .family = PF_UNIX,
722 .owner = THIS_MODULE,
723 .release = unix_release,
724 .bind = unix_bind,
725 .connect = unix_stream_connect,
726 .socketpair = unix_socketpair,
727 .accept = unix_accept,
728 .getname = unix_getname,
ec0d215f 729 .poll = unix_dgram_poll,
1da177e4
LT
730 .ioctl = unix_ioctl,
731 .listen = unix_listen,
732 .shutdown = unix_shutdown,
733 .setsockopt = sock_no_setsockopt,
734 .getsockopt = sock_no_getsockopt,
735 .sendmsg = unix_seqpacket_sendmsg,
a05d2ad1 736 .recvmsg = unix_seqpacket_recvmsg,
1da177e4
LT
737 .mmap = sock_no_mmap,
738 .sendpage = sock_no_sendpage,
f55bb7f9 739 .set_peek_off = unix_set_peek_off,
1da177e4
LT
740};
741
742static struct proto unix_proto = {
248969ae
ED
743 .name = "UNIX",
744 .owner = THIS_MODULE,
248969ae 745 .obj_size = sizeof(struct unix_sock),
1da177e4
LT
746};
747
a09785a2
IM
748/*
749 * AF_UNIX sockets do not interact with hardware, hence they
750 * dont trigger interrupts - so it's safe for them to have
751 * bh-unsafe locking for their sk_receive_queue.lock. Split off
752 * this special lock-class by reinitializing the spinlock key:
753 */
754static struct lock_class_key af_unix_sk_receive_queue_lock_key;
755
11aa9c28 756static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
1da177e4
LT
757{
758 struct sock *sk = NULL;
759 struct unix_sock *u;
760
518de9b3
ED
761 atomic_long_inc(&unix_nr_socks);
762 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
1da177e4
LT
763 goto out;
764
11aa9c28 765 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
1da177e4
LT
766 if (!sk)
767 goto out;
768
6eba6a37 769 sock_init_data(sock, sk);
a09785a2
IM
770 lockdep_set_class(&sk->sk_receive_queue.lock,
771 &af_unix_sk_receive_queue_lock_key);
1da177e4 772
3aa9799e 773 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
1da177e4 774 sk->sk_write_space = unix_write_space;
a0a53c8b 775 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
1da177e4
LT
776 sk->sk_destruct = unix_sock_destructor;
777 u = unix_sk(sk);
40ffe67d
AV
778 u->path.dentry = NULL;
779 u->path.mnt = NULL;
fd19f329 780 spin_lock_init(&u->lock);
516e0cc5 781 atomic_long_set(&u->inflight, 0);
1fd05ba5 782 INIT_LIST_HEAD(&u->link);
6e1ce3c3
LT
783 mutex_init(&u->iolock); /* single task reading lock */
784 mutex_init(&u->bindlock); /* single task binding lock */
1da177e4 785 init_waitqueue_head(&u->peer_wait);
7d267278 786 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
7123aaa3 787 unix_insert_socket(unix_sockets_unbound(sk), sk);
1da177e4 788out:
284b327b 789 if (sk == NULL)
518de9b3 790 atomic_long_dec(&unix_nr_socks);
920de804
ED
791 else {
792 local_bh_disable();
a8076d8d 793 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
920de804
ED
794 local_bh_enable();
795 }
1da177e4
LT
796 return sk;
797}
798
3f378b68
EP
799static int unix_create(struct net *net, struct socket *sock, int protocol,
800 int kern)
1da177e4
LT
801{
802 if (protocol && protocol != PF_UNIX)
803 return -EPROTONOSUPPORT;
804
805 sock->state = SS_UNCONNECTED;
806
807 switch (sock->type) {
808 case SOCK_STREAM:
809 sock->ops = &unix_stream_ops;
810 break;
811 /*
812 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
813 * nothing uses it.
814 */
815 case SOCK_RAW:
e27dfcea 816 sock->type = SOCK_DGRAM;
1da177e4
LT
817 case SOCK_DGRAM:
818 sock->ops = &unix_dgram_ops;
819 break;
820 case SOCK_SEQPACKET:
821 sock->ops = &unix_seqpacket_ops;
822 break;
823 default:
824 return -ESOCKTNOSUPPORT;
825 }
826
11aa9c28 827 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
1da177e4
LT
828}
829
830static int unix_release(struct socket *sock)
831{
832 struct sock *sk = sock->sk;
833
834 if (!sk)
835 return 0;
836
ded34e0f 837 unix_release_sock(sk, 0);
1da177e4
LT
838 sock->sk = NULL;
839
ded34e0f 840 return 0;
1da177e4
LT
841}
842
843static int unix_autobind(struct socket *sock)
844{
845 struct sock *sk = sock->sk;
3b1e0a65 846 struct net *net = sock_net(sk);
1da177e4
LT
847 struct unix_sock *u = unix_sk(sk);
848 static u32 ordernum = 1;
6eba6a37 849 struct unix_address *addr;
1da177e4 850 int err;
8df73ff9 851 unsigned int retries = 0;
1da177e4 852
6e1ce3c3 853 err = mutex_lock_interruptible(&u->bindlock);
37ab4fa7
SL
854 if (err)
855 return err;
1da177e4
LT
856
857 err = 0;
858 if (u->addr)
859 goto out;
860
861 err = -ENOMEM;
0da974f4 862 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
1da177e4
LT
863 if (!addr)
864 goto out;
865
1da177e4 866 addr->name->sun_family = AF_UNIX;
8c9814b9 867 refcount_set(&addr->refcnt, 1);
1da177e4
LT
868
869retry:
870 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
07f0757a 871 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
1da177e4 872
fbe9cc4a 873 spin_lock(&unix_table_lock);
1da177e4
LT
874 ordernum = (ordernum+1)&0xFFFFF;
875
097e66c5 876 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
1da177e4 877 addr->hash)) {
fbe9cc4a 878 spin_unlock(&unix_table_lock);
8df73ff9
TH
879 /*
880 * __unix_find_socket_byname() may take long time if many names
881 * are already in use.
882 */
883 cond_resched();
884 /* Give up if all names seems to be in use. */
885 if (retries++ == 0xFFFFF) {
886 err = -ENOSPC;
887 kfree(addr);
888 goto out;
889 }
1da177e4
LT
890 goto retry;
891 }
892 addr->hash ^= sk->sk_type;
893
894 __unix_remove_socket(sk);
727a2619 895 smp_store_release(&u->addr, addr);
1da177e4 896 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
fbe9cc4a 897 spin_unlock(&unix_table_lock);
1da177e4
LT
898 err = 0;
899
6e1ce3c3 900out: mutex_unlock(&u->bindlock);
1da177e4
LT
901 return err;
902}
903
097e66c5
DL
904static struct sock *unix_find_other(struct net *net,
905 struct sockaddr_un *sunname, int len,
95c96174 906 int type, unsigned int hash, int *error)
1da177e4
LT
907{
908 struct sock *u;
421748ec 909 struct path path;
1da177e4 910 int err = 0;
ac7bfa62 911
1da177e4 912 if (sunname->sun_path[0]) {
421748ec
AV
913 struct inode *inode;
914 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1da177e4
LT
915 if (err)
916 goto fail;
beef5121 917 inode = d_backing_inode(path.dentry);
421748ec 918 err = inode_permission(inode, MAY_WRITE);
1da177e4
LT
919 if (err)
920 goto put_fail;
921
922 err = -ECONNREFUSED;
421748ec 923 if (!S_ISSOCK(inode->i_mode))
1da177e4 924 goto put_fail;
6616f788 925 u = unix_find_socket_byinode(inode);
1da177e4
LT
926 if (!u)
927 goto put_fail;
928
929 if (u->sk_type == type)
68ac1234 930 touch_atime(&path);
1da177e4 931
421748ec 932 path_put(&path);
1da177e4 933
e27dfcea 934 err = -EPROTOTYPE;
1da177e4
LT
935 if (u->sk_type != type) {
936 sock_put(u);
937 goto fail;
938 }
939 } else {
940 err = -ECONNREFUSED;
e27dfcea 941 u = unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
942 if (u) {
943 struct dentry *dentry;
40ffe67d 944 dentry = unix_sk(u)->path.dentry;
1da177e4 945 if (dentry)
68ac1234 946 touch_atime(&unix_sk(u)->path);
1da177e4
LT
947 } else
948 goto fail;
949 }
950 return u;
951
952put_fail:
421748ec 953 path_put(&path);
1da177e4 954fail:
e27dfcea 955 *error = err;
1da177e4
LT
956 return NULL;
957}
958
38f7bd94 959static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
faf02010 960{
38f7bd94
LT
961 struct dentry *dentry;
962 struct path path;
963 int err = 0;
964 /*
965 * Get the parent directory, calculate the hash for last
966 * component.
967 */
968 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
969 err = PTR_ERR(dentry);
970 if (IS_ERR(dentry))
971 return err;
faf02010 972
38f7bd94
LT
973 /*
974 * All right, let's create it.
975 */
976 err = security_path_mknod(&path, dentry, mode, 0);
faf02010 977 if (!err) {
38f7bd94 978 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
faf02010 979 if (!err) {
38f7bd94 980 res->mnt = mntget(path.mnt);
faf02010
AV
981 res->dentry = dget(dentry);
982 }
983 }
38f7bd94 984 done_path_create(&path, dentry);
faf02010
AV
985 return err;
986}
1da177e4
LT
987
988static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
989{
990 struct sock *sk = sock->sk;
3b1e0a65 991 struct net *net = sock_net(sk);
1da177e4 992 struct unix_sock *u = unix_sk(sk);
e27dfcea 993 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
dae6ad8f 994 char *sun_path = sunaddr->sun_path;
38f7bd94 995 int err;
95c96174 996 unsigned int hash;
1da177e4
LT
997 struct unix_address *addr;
998 struct hlist_head *list;
82fe0d2b 999 struct path path = { };
1da177e4
LT
1000
1001 err = -EINVAL;
defbcf2d
MJ
1002 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1003 sunaddr->sun_family != AF_UNIX)
1da177e4
LT
1004 goto out;
1005
e27dfcea 1006 if (addr_len == sizeof(short)) {
1da177e4
LT
1007 err = unix_autobind(sock);
1008 goto out;
1009 }
1010
1011 err = unix_mkname(sunaddr, addr_len, &hash);
1012 if (err < 0)
1013 goto out;
1014 addr_len = err;
1015
0fb44559
WC
1016 if (sun_path[0]) {
1017 umode_t mode = S_IFSOCK |
1018 (SOCK_INODE(sock)->i_mode & ~current_umask());
1019 err = unix_mknod(sun_path, mode, &path);
1020 if (err) {
1021 if (err == -EEXIST)
1022 err = -EADDRINUSE;
1023 goto out;
1024 }
1025 }
1026
6e1ce3c3 1027 err = mutex_lock_interruptible(&u->bindlock);
37ab4fa7 1028 if (err)
0fb44559 1029 goto out_put;
1da177e4
LT
1030
1031 err = -EINVAL;
1032 if (u->addr)
1033 goto out_up;
1034
1035 err = -ENOMEM;
1036 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1037 if (!addr)
1038 goto out_up;
1039
1040 memcpy(addr->name, sunaddr, addr_len);
1041 addr->len = addr_len;
1042 addr->hash = hash ^ sk->sk_type;
8c9814b9 1043 refcount_set(&addr->refcnt, 1);
1da177e4 1044
38f7bd94 1045 if (sun_path[0]) {
1da177e4 1046 addr->hash = UNIX_HASH_SIZE;
beef5121 1047 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
faf02010 1048 spin_lock(&unix_table_lock);
38f7bd94 1049 u->path = path;
faf02010
AV
1050 list = &unix_socket_table[hash];
1051 } else {
1052 spin_lock(&unix_table_lock);
1da177e4 1053 err = -EADDRINUSE;
097e66c5 1054 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1da177e4
LT
1055 sk->sk_type, hash)) {
1056 unix_release_addr(addr);
1057 goto out_unlock;
1058 }
1059
1060 list = &unix_socket_table[addr->hash];
1da177e4
LT
1061 }
1062
1063 err = 0;
1064 __unix_remove_socket(sk);
727a2619 1065 smp_store_release(&u->addr, addr);
1da177e4
LT
1066 __unix_insert_socket(list, sk);
1067
1068out_unlock:
fbe9cc4a 1069 spin_unlock(&unix_table_lock);
1da177e4 1070out_up:
6e1ce3c3 1071 mutex_unlock(&u->bindlock);
0fb44559
WC
1072out_put:
1073 if (err)
1074 path_put(&path);
1da177e4
LT
1075out:
1076 return err;
1da177e4
LT
1077}
1078
278a3de5
DM
1079static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1080{
1081 if (unlikely(sk1 == sk2) || !sk2) {
1082 unix_state_lock(sk1);
1083 return;
1084 }
1085 if (sk1 < sk2) {
1086 unix_state_lock(sk1);
1087 unix_state_lock_nested(sk2);
1088 } else {
1089 unix_state_lock(sk2);
1090 unix_state_lock_nested(sk1);
1091 }
1092}
1093
1094static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1095{
1096 if (unlikely(sk1 == sk2) || !sk2) {
1097 unix_state_unlock(sk1);
1098 return;
1099 }
1100 unix_state_unlock(sk1);
1101 unix_state_unlock(sk2);
1102}
1103
1da177e4
LT
1104static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1105 int alen, int flags)
1106{
1107 struct sock *sk = sock->sk;
3b1e0a65 1108 struct net *net = sock_net(sk);
e27dfcea 1109 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1da177e4 1110 struct sock *other;
95c96174 1111 unsigned int hash;
1da177e4
LT
1112 int err;
1113
defbcf2d
MJ
1114 err = -EINVAL;
1115 if (alen < offsetofend(struct sockaddr, sa_family))
1116 goto out;
1117
1da177e4
LT
1118 if (addr->sa_family != AF_UNSPEC) {
1119 err = unix_mkname(sunaddr, alen, &hash);
1120 if (err < 0)
1121 goto out;
1122 alen = err;
1123
1124 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1125 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1126 goto out;
1127
278a3de5 1128restart:
e27dfcea 1129 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1da177e4
LT
1130 if (!other)
1131 goto out;
1132
278a3de5
DM
1133 unix_state_double_lock(sk, other);
1134
1135 /* Apparently VFS overslept socket death. Retry. */
1136 if (sock_flag(other, SOCK_DEAD)) {
1137 unix_state_double_unlock(sk, other);
1138 sock_put(other);
1139 goto restart;
1140 }
1da177e4
LT
1141
1142 err = -EPERM;
1143 if (!unix_may_send(sk, other))
1144 goto out_unlock;
1145
1146 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1147 if (err)
1148 goto out_unlock;
1149
1150 } else {
1151 /*
1152 * 1003.1g breaking connected state with AF_UNSPEC
1153 */
1154 other = NULL;
278a3de5 1155 unix_state_double_lock(sk, other);
1da177e4
LT
1156 }
1157
1158 /*
1159 * If it was connected, reconnect.
1160 */
1161 if (unix_peer(sk)) {
1162 struct sock *old_peer = unix_peer(sk);
e27dfcea 1163 unix_peer(sk) = other;
7d267278
RW
1164 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1165
278a3de5 1166 unix_state_double_unlock(sk, other);
1da177e4
LT
1167
1168 if (other != old_peer)
1169 unix_dgram_disconnected(sk, old_peer);
1170 sock_put(old_peer);
1171 } else {
e27dfcea 1172 unix_peer(sk) = other;
278a3de5 1173 unix_state_double_unlock(sk, other);
1da177e4 1174 }
ac7bfa62 1175 return 0;
1da177e4
LT
1176
1177out_unlock:
278a3de5 1178 unix_state_double_unlock(sk, other);
1da177e4
LT
1179 sock_put(other);
1180out:
1181 return err;
1182}
1183
1184static long unix_wait_for_peer(struct sock *other, long timeo)
1185{
1186 struct unix_sock *u = unix_sk(other);
1187 int sched;
1188 DEFINE_WAIT(wait);
1189
1190 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1191
1192 sched = !sock_flag(other, SOCK_DEAD) &&
1193 !(other->sk_shutdown & RCV_SHUTDOWN) &&
3c73419c 1194 unix_recvq_full(other);
1da177e4 1195
1c92b4e5 1196 unix_state_unlock(other);
1da177e4
LT
1197
1198 if (sched)
1199 timeo = schedule_timeout(timeo);
1200
1201 finish_wait(&u->peer_wait, &wait);
1202 return timeo;
1203}
1204
1205static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1206 int addr_len, int flags)
1207{
e27dfcea 1208 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1da177e4 1209 struct sock *sk = sock->sk;
3b1e0a65 1210 struct net *net = sock_net(sk);
1da177e4
LT
1211 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1212 struct sock *newsk = NULL;
1213 struct sock *other = NULL;
1214 struct sk_buff *skb = NULL;
95c96174 1215 unsigned int hash;
1da177e4
LT
1216 int st;
1217 int err;
1218 long timeo;
1219
1220 err = unix_mkname(sunaddr, addr_len, &hash);
1221 if (err < 0)
1222 goto out;
1223 addr_len = err;
1224
f64f9e71
JP
1225 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1226 (err = unix_autobind(sock)) != 0)
1da177e4
LT
1227 goto out;
1228
1229 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1230
1231 /* First of all allocate resources.
1232 If we will make it after state is locked,
1233 we will have to recheck all again in any case.
1234 */
1235
1236 err = -ENOMEM;
1237
1238 /* create new sock for complete connection */
11aa9c28 1239 newsk = unix_create1(sock_net(sk), NULL, 0);
1da177e4
LT
1240 if (newsk == NULL)
1241 goto out;
1242
1243 /* Allocate skb for sending to listening sock */
1244 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1245 if (skb == NULL)
1246 goto out;
1247
1248restart:
1249 /* Find listening sock. */
097e66c5 1250 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1da177e4
LT
1251 if (!other)
1252 goto out;
1253
1254 /* Latch state of peer */
1c92b4e5 1255 unix_state_lock(other);
1da177e4
LT
1256
1257 /* Apparently VFS overslept socket death. Retry. */
1258 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1259 unix_state_unlock(other);
1da177e4
LT
1260 sock_put(other);
1261 goto restart;
1262 }
1263
1264 err = -ECONNREFUSED;
1265 if (other->sk_state != TCP_LISTEN)
1266 goto out_unlock;
77238f2b
TS
1267 if (other->sk_shutdown & RCV_SHUTDOWN)
1268 goto out_unlock;
1da177e4 1269
3c73419c 1270 if (unix_recvq_full(other)) {
1da177e4
LT
1271 err = -EAGAIN;
1272 if (!timeo)
1273 goto out_unlock;
1274
1275 timeo = unix_wait_for_peer(other, timeo);
1276
1277 err = sock_intr_errno(timeo);
1278 if (signal_pending(current))
1279 goto out;
1280 sock_put(other);
1281 goto restart;
ac7bfa62 1282 }
1da177e4
LT
1283
1284 /* Latch our state.
1285
e5537bfc 1286 It is tricky place. We need to grab our state lock and cannot
1da177e4
LT
1287 drop lock on peer. It is dangerous because deadlock is
1288 possible. Connect to self case and simultaneous
1289 attempt to connect are eliminated by checking socket
1290 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1291 check this before attempt to grab lock.
1292
1293 Well, and we have to recheck the state after socket locked.
1294 */
1295 st = sk->sk_state;
1296
1297 switch (st) {
1298 case TCP_CLOSE:
1299 /* This is ok... continue with connect */
1300 break;
1301 case TCP_ESTABLISHED:
1302 /* Socket is already connected */
1303 err = -EISCONN;
1304 goto out_unlock;
1305 default:
1306 err = -EINVAL;
1307 goto out_unlock;
1308 }
1309
1c92b4e5 1310 unix_state_lock_nested(sk);
1da177e4
LT
1311
1312 if (sk->sk_state != st) {
1c92b4e5
DM
1313 unix_state_unlock(sk);
1314 unix_state_unlock(other);
1da177e4
LT
1315 sock_put(other);
1316 goto restart;
1317 }
1318
3610cda5 1319 err = security_unix_stream_connect(sk, other, newsk);
1da177e4 1320 if (err) {
1c92b4e5 1321 unix_state_unlock(sk);
1da177e4
LT
1322 goto out_unlock;
1323 }
1324
1325 /* The way is open! Fastly set all the necessary fields... */
1326
1327 sock_hold(sk);
1328 unix_peer(newsk) = sk;
1329 newsk->sk_state = TCP_ESTABLISHED;
1330 newsk->sk_type = sk->sk_type;
109f6e39 1331 init_peercred(newsk);
1da177e4 1332 newu = unix_sk(newsk);
eaefd110 1333 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1da177e4
LT
1334 otheru = unix_sk(other);
1335
727a2619
AV
1336 /* copy address information from listening to new sock
1337 *
1338 * The contents of *(otheru->addr) and otheru->path
1339 * are seen fully set up here, since we have found
1340 * otheru in hash under unix_table_lock. Insertion
1341 * into the hash chain we'd found it in had been done
1342 * in an earlier critical area protected by unix_table_lock,
1343 * the same one where we'd set *(otheru->addr) contents,
1344 * as well as otheru->path and otheru->addr itself.
1345 *
1346 * Using smp_store_release() here to set newu->addr
1347 * is enough to make those stores, as well as stores
1348 * to newu->path visible to anyone who gets newu->addr
1349 * by smp_load_acquire(). IOW, the same warranties
1350 * as for unix_sock instances bound in unix_bind() or
1351 * in unix_autobind().
1352 */
40ffe67d
AV
1353 if (otheru->path.dentry) {
1354 path_get(&otheru->path);
1355 newu->path = otheru->path;
1da177e4 1356 }
727a2619
AV
1357 refcount_inc(&otheru->addr->refcnt);
1358 smp_store_release(&newu->addr, otheru->addr);
1da177e4
LT
1359
1360 /* Set credentials */
109f6e39 1361 copy_peercred(sk, other);
1da177e4 1362
1da177e4
LT
1363 sock->state = SS_CONNECTED;
1364 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1365 sock_hold(newsk);
1366
4e857c58 1367 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
830a1e5c 1368 unix_peer(sk) = newsk;
1da177e4 1369
1c92b4e5 1370 unix_state_unlock(sk);
1da177e4
LT
1371
1372 /* take ten and and send info to listening sock */
1373 spin_lock(&other->sk_receive_queue.lock);
1374 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1375 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1376 unix_state_unlock(other);
676d2369 1377 other->sk_data_ready(other);
1da177e4
LT
1378 sock_put(other);
1379 return 0;
1380
1381out_unlock:
1382 if (other)
1c92b4e5 1383 unix_state_unlock(other);
1da177e4
LT
1384
1385out:
40d44446 1386 kfree_skb(skb);
1da177e4
LT
1387 if (newsk)
1388 unix_release_sock(newsk, 0);
1389 if (other)
1390 sock_put(other);
1391 return err;
1392}
1393
1394static int unix_socketpair(struct socket *socka, struct socket *sockb)
1395{
e27dfcea 1396 struct sock *ska = socka->sk, *skb = sockb->sk;
1da177e4
LT
1397
1398 /* Join our sockets back to back */
1399 sock_hold(ska);
1400 sock_hold(skb);
e27dfcea
JK
1401 unix_peer(ska) = skb;
1402 unix_peer(skb) = ska;
109f6e39
EB
1403 init_peercred(ska);
1404 init_peercred(skb);
1da177e4
LT
1405
1406 if (ska->sk_type != SOCK_DGRAM) {
1407 ska->sk_state = TCP_ESTABLISHED;
1408 skb->sk_state = TCP_ESTABLISHED;
1409 socka->state = SS_CONNECTED;
1410 sockb->state = SS_CONNECTED;
1411 }
1412 return 0;
1413}
1414
90c6bd34
DB
1415static void unix_sock_inherit_flags(const struct socket *old,
1416 struct socket *new)
1417{
1418 if (test_bit(SOCK_PASSCRED, &old->flags))
1419 set_bit(SOCK_PASSCRED, &new->flags);
1420 if (test_bit(SOCK_PASSSEC, &old->flags))
1421 set_bit(SOCK_PASSSEC, &new->flags);
1422}
1423
cdfbabfb
DH
1424static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1425 bool kern)
1da177e4
LT
1426{
1427 struct sock *sk = sock->sk;
1428 struct sock *tsk;
1429 struct sk_buff *skb;
1430 int err;
1431
1432 err = -EOPNOTSUPP;
6eba6a37 1433 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1da177e4
LT
1434 goto out;
1435
1436 err = -EINVAL;
1437 if (sk->sk_state != TCP_LISTEN)
1438 goto out;
1439
1440 /* If socket state is TCP_LISTEN it cannot change (for now...),
1441 * so that no locks are necessary.
1442 */
1443
1444 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1445 if (!skb) {
1446 /* This means receive shutdown. */
1447 if (err == 0)
1448 err = -EINVAL;
1449 goto out;
1450 }
1451
1452 tsk = skb->sk;
1453 skb_free_datagram(sk, skb);
1454 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1455
1456 /* attach accepted sock to socket */
1c92b4e5 1457 unix_state_lock(tsk);
1da177e4 1458 newsock->state = SS_CONNECTED;
90c6bd34 1459 unix_sock_inherit_flags(sock, newsock);
1da177e4 1460 sock_graft(tsk, newsock);
1c92b4e5 1461 unix_state_unlock(tsk);
1da177e4
LT
1462 return 0;
1463
1464out:
1465 return err;
1466}
1467
1468
1469static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1470{
1471 struct sock *sk = sock->sk;
727a2619 1472 struct unix_address *addr;
13cfa97b 1473 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1da177e4
LT
1474 int err = 0;
1475
1476 if (peer) {
1477 sk = unix_peer_get(sk);
1478
1479 err = -ENOTCONN;
1480 if (!sk)
1481 goto out;
1482 err = 0;
1483 } else {
1484 sock_hold(sk);
1485 }
1486
727a2619
AV
1487 addr = smp_load_acquire(&unix_sk(sk)->addr);
1488 if (!addr) {
1da177e4
LT
1489 sunaddr->sun_family = AF_UNIX;
1490 sunaddr->sun_path[0] = 0;
1491 *uaddr_len = sizeof(short);
1492 } else {
1da177e4
LT
1493 *uaddr_len = addr->len;
1494 memcpy(sunaddr, addr->name, *uaddr_len);
1495 }
1da177e4
LT
1496 sock_put(sk);
1497out:
1498 return err;
1499}
1500
1501static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1502{
1503 int i;
1504
1505 scm->fp = UNIXCB(skb).fp;
1da177e4
LT
1506 UNIXCB(skb).fp = NULL;
1507
6eba6a37 1508 for (i = scm->fp->count-1; i >= 0; i--)
415e3d3e 1509 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1da177e4
LT
1510}
1511
7361c36c 1512static void unix_destruct_scm(struct sk_buff *skb)
1da177e4
LT
1513{
1514 struct scm_cookie scm;
1515 memset(&scm, 0, sizeof(scm));
7361c36c 1516 scm.pid = UNIXCB(skb).pid;
7361c36c
EB
1517 if (UNIXCB(skb).fp)
1518 unix_detach_fds(&scm, skb);
1da177e4
LT
1519
1520 /* Alas, it calls VFS */
1521 /* So fscking what? fput() had been SMP-safe since the last Summer */
1522 scm_destroy(&scm);
1523 sock_wfree(skb);
1524}
1525
712f4aad 1526/*
1527 * The "user->unix_inflight" variable is protected by the garbage
1528 * collection lock, and we just read it locklessly here. If you go
1529 * over the limit, there might be a tiny race in actually noticing
1530 * it across threads. Tough.
1531 */
1532static inline bool too_many_unix_fds(struct task_struct *p)
1533{
1534 struct user_struct *user = current_user();
1535
1536 if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1537 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1538 return false;
1539}
1540
6209344f 1541static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1da177e4
LT
1542{
1543 int i;
25888e30 1544
712f4aad 1545 if (too_many_unix_fds(current))
1546 return -ETOOMANYREFS;
1547
6209344f
MS
1548 /*
1549 * Need to duplicate file references for the sake of garbage
1550 * collection. Otherwise a socket in the fps might become a
1551 * candidate for GC while the skb is not yet queued.
1552 */
1553 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1554 if (!UNIXCB(skb).fp)
1555 return -ENOMEM;
1556
712f4aad 1557 for (i = scm->fp->count - 1; i >= 0; i--)
415e3d3e 1558 unix_inflight(scm->fp->user, scm->fp->fp[i]);
27eac47b 1559 return 0;
1da177e4
LT
1560}
1561
f78a5fda 1562static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
7361c36c
EB
1563{
1564 int err = 0;
16e57262 1565
f78a5fda 1566 UNIXCB(skb).pid = get_pid(scm->pid);
6b0ee8c0
EB
1567 UNIXCB(skb).uid = scm->creds.uid;
1568 UNIXCB(skb).gid = scm->creds.gid;
7361c36c 1569 UNIXCB(skb).fp = NULL;
37a9a8df 1570 unix_get_secdata(scm, skb);
7361c36c
EB
1571 if (scm->fp && send_fds)
1572 err = unix_attach_fds(scm, skb);
1573
1574 skb->destructor = unix_destruct_scm;
1575 return err;
1576}
1577
9490f886
HFS
1578static bool unix_passcred_enabled(const struct socket *sock,
1579 const struct sock *other)
1580{
1581 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1582 !other->sk_socket ||
1583 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1584}
1585
16e57262
ED
1586/*
1587 * Some apps rely on write() giving SCM_CREDENTIALS
1588 * We include credentials if source or destination socket
1589 * asserted SOCK_PASSCRED.
1590 */
1591static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1592 const struct sock *other)
1593{
6b0ee8c0 1594 if (UNIXCB(skb).pid)
16e57262 1595 return;
9490f886 1596 if (unix_passcred_enabled(sock, other)) {
16e57262 1597 UNIXCB(skb).pid = get_pid(task_tgid(current));
6e0895c2 1598 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
16e57262
ED
1599 }
1600}
1601
9490f886
HFS
1602static int maybe_init_creds(struct scm_cookie *scm,
1603 struct socket *socket,
1604 const struct sock *other)
1605{
1606 int err;
1607 struct msghdr msg = { .msg_controllen = 0 };
1608
1609 err = scm_send(socket, &msg, scm, false);
1610 if (err)
1611 return err;
1612
1613 if (unix_passcred_enabled(socket, other)) {
1614 scm->pid = get_pid(task_tgid(current));
1615 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1616 }
1617 return err;
1618}
1619
1620static bool unix_skb_scm_eq(struct sk_buff *skb,
1621 struct scm_cookie *scm)
1622{
1623 const struct unix_skb_parms *u = &UNIXCB(skb);
1624
1625 return u->pid == scm->pid &&
1626 uid_eq(u->uid, scm->creds.uid) &&
1627 gid_eq(u->gid, scm->creds.gid) &&
1628 unix_secdata_eq(scm, skb);
1629}
1630
1da177e4
LT
1631/*
1632 * Send AF_UNIX data.
1633 */
1634
1b784140
YX
1635static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1636 size_t len)
1da177e4 1637{
1da177e4 1638 struct sock *sk = sock->sk;
3b1e0a65 1639 struct net *net = sock_net(sk);
1da177e4 1640 struct unix_sock *u = unix_sk(sk);
342dfc30 1641 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1da177e4
LT
1642 struct sock *other = NULL;
1643 int namelen = 0; /* fake GCC */
1644 int err;
95c96174 1645 unsigned int hash;
f78a5fda 1646 struct sk_buff *skb;
1da177e4 1647 long timeo;
7cc05662 1648 struct scm_cookie scm;
eb6a2481 1649 int data_len = 0;
7d267278 1650 int sk_locked;
1da177e4 1651
5f23b734 1652 wait_for_unix_gc();
7cc05662 1653 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1654 if (err < 0)
1655 return err;
1656
1657 err = -EOPNOTSUPP;
1658 if (msg->msg_flags&MSG_OOB)
1659 goto out;
1660
1661 if (msg->msg_namelen) {
1662 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1663 if (err < 0)
1664 goto out;
1665 namelen = err;
1666 } else {
1667 sunaddr = NULL;
1668 err = -ENOTCONN;
1669 other = unix_peer_get(sk);
1670 if (!other)
1671 goto out;
1672 }
1673
f64f9e71
JP
1674 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1675 && (err = unix_autobind(sock)) != 0)
1da177e4
LT
1676 goto out;
1677
1678 err = -EMSGSIZE;
1679 if (len > sk->sk_sndbuf - 32)
1680 goto out;
1681
31ff6aa5 1682 if (len > SKB_MAX_ALLOC) {
eb6a2481
ED
1683 data_len = min_t(size_t,
1684 len - SKB_MAX_ALLOC,
1685 MAX_SKB_FRAGS * PAGE_SIZE);
31ff6aa5
KT
1686 data_len = PAGE_ALIGN(data_len);
1687
1688 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1689 }
eb6a2481
ED
1690
1691 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
28d64271
ED
1692 msg->msg_flags & MSG_DONTWAIT, &err,
1693 PAGE_ALLOC_COSTLY_ORDER);
e27dfcea 1694 if (skb == NULL)
1da177e4
LT
1695 goto out;
1696
7cc05662 1697 err = unix_scm_to_skb(&scm, skb, true);
25888e30 1698 if (err < 0)
7361c36c 1699 goto out_free;
877ce7c1 1700
eb6a2481
ED
1701 skb_put(skb, len - data_len);
1702 skb->data_len = data_len;
1703 skb->len = len;
c0371da6 1704 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1da177e4
LT
1705 if (err)
1706 goto out_free;
1707
1708 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1709
1710restart:
1711 if (!other) {
1712 err = -ECONNRESET;
1713 if (sunaddr == NULL)
1714 goto out_free;
1715
097e66c5 1716 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1da177e4 1717 hash, &err);
e27dfcea 1718 if (other == NULL)
1da177e4
LT
1719 goto out_free;
1720 }
1721
d6ae3bae
AC
1722 if (sk_filter(other, skb) < 0) {
1723 /* Toss the packet but do not return any error to the sender */
1724 err = len;
1725 goto out_free;
1726 }
1727
7d267278 1728 sk_locked = 0;
1c92b4e5 1729 unix_state_lock(other);
7d267278 1730restart_locked:
1da177e4
LT
1731 err = -EPERM;
1732 if (!unix_may_send(sk, other))
1733 goto out_unlock;
1734
7d267278 1735 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1da177e4
LT
1736 /*
1737 * Check with 1003.1g - what should
1738 * datagram error
1739 */
1c92b4e5 1740 unix_state_unlock(other);
1da177e4
LT
1741 sock_put(other);
1742
7d267278
RW
1743 if (!sk_locked)
1744 unix_state_lock(sk);
1745
1da177e4 1746 err = 0;
1da177e4 1747 if (unix_peer(sk) == other) {
e27dfcea 1748 unix_peer(sk) = NULL;
7d267278
RW
1749 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1750
1c92b4e5 1751 unix_state_unlock(sk);
1da177e4
LT
1752
1753 unix_dgram_disconnected(sk, other);
1754 sock_put(other);
1755 err = -ECONNREFUSED;
1756 } else {
1c92b4e5 1757 unix_state_unlock(sk);
1da177e4
LT
1758 }
1759
1760 other = NULL;
1761 if (err)
1762 goto out_free;
1763 goto restart;
1764 }
1765
1766 err = -EPIPE;
1767 if (other->sk_shutdown & RCV_SHUTDOWN)
1768 goto out_unlock;
1769
1770 if (sk->sk_type != SOCK_SEQPACKET) {
1771 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1772 if (err)
1773 goto out_unlock;
1774 }
1775
a5527dda
RW
1776 /* other == sk && unix_peer(other) != sk if
1777 * - unix_peer(sk) == NULL, destination address bound to sk
1778 * - unix_peer(sk) == sk by time of get but disconnected before lock
1779 */
1780 if (other != sk &&
1781 unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
7d267278
RW
1782 if (timeo) {
1783 timeo = unix_wait_for_peer(other, timeo);
1784
1785 err = sock_intr_errno(timeo);
1786 if (signal_pending(current))
1787 goto out_free;
1788
1789 goto restart;
1da177e4
LT
1790 }
1791
7d267278
RW
1792 if (!sk_locked) {
1793 unix_state_unlock(other);
1794 unix_state_double_lock(sk, other);
1795 }
1da177e4 1796
7d267278
RW
1797 if (unix_peer(sk) != other ||
1798 unix_dgram_peer_wake_me(sk, other)) {
1799 err = -EAGAIN;
1800 sk_locked = 1;
1801 goto out_unlock;
1802 }
1da177e4 1803
7d267278
RW
1804 if (!sk_locked) {
1805 sk_locked = 1;
1806 goto restart_locked;
1807 }
1da177e4
LT
1808 }
1809
7d267278
RW
1810 if (unlikely(sk_locked))
1811 unix_state_unlock(sk);
1812
3f66116e
AC
1813 if (sock_flag(other, SOCK_RCVTSTAMP))
1814 __net_timestamp(skb);
16e57262 1815 maybe_add_creds(skb, sock, other);
1da177e4 1816 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 1817 unix_state_unlock(other);
676d2369 1818 other->sk_data_ready(other);
1da177e4 1819 sock_put(other);
7cc05662 1820 scm_destroy(&scm);
1da177e4
LT
1821 return len;
1822
1823out_unlock:
7d267278
RW
1824 if (sk_locked)
1825 unix_state_unlock(sk);
1c92b4e5 1826 unix_state_unlock(other);
1da177e4
LT
1827out_free:
1828 kfree_skb(skb);
1829out:
1830 if (other)
1831 sock_put(other);
7cc05662 1832 scm_destroy(&scm);
1da177e4
LT
1833 return err;
1834}
1835
e370a723
ED
1836/* We use paged skbs for stream sockets, and limit occupancy to 32768
1837 * bytes, and a minimun of a full page.
1838 */
1839#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
ac7bfa62 1840
1b784140
YX
1841static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1842 size_t len)
1da177e4 1843{
1da177e4
LT
1844 struct sock *sk = sock->sk;
1845 struct sock *other = NULL;
6eba6a37 1846 int err, size;
f78a5fda 1847 struct sk_buff *skb;
e27dfcea 1848 int sent = 0;
7cc05662 1849 struct scm_cookie scm;
8ba69ba6 1850 bool fds_sent = false;
e370a723 1851 int data_len;
1da177e4 1852
5f23b734 1853 wait_for_unix_gc();
7cc05662 1854 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1855 if (err < 0)
1856 return err;
1857
1858 err = -EOPNOTSUPP;
1859 if (msg->msg_flags&MSG_OOB)
1860 goto out_err;
1861
1862 if (msg->msg_namelen) {
1863 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1864 goto out_err;
1865 } else {
1da177e4 1866 err = -ENOTCONN;
830a1e5c 1867 other = unix_peer(sk);
1da177e4
LT
1868 if (!other)
1869 goto out_err;
1870 }
1871
1872 if (sk->sk_shutdown & SEND_SHUTDOWN)
1873 goto pipe_err;
1874
6eba6a37 1875 while (sent < len) {
e370a723 1876 size = len - sent;
1da177e4
LT
1877
1878 /* Keep two messages in the pipe so it schedules better */
e370a723 1879 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1da177e4 1880
e370a723
ED
1881 /* allow fallback to order-0 allocations */
1882 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
ac7bfa62 1883
e370a723 1884 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1da177e4 1885
31ff6aa5
KT
1886 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1887
e370a723 1888 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
28d64271
ED
1889 msg->msg_flags & MSG_DONTWAIT, &err,
1890 get_order(UNIX_SKB_FRAGS_SZ));
e370a723 1891 if (!skb)
1da177e4
LT
1892 goto out_err;
1893
f78a5fda 1894 /* Only send the fds in the first buffer */
7cc05662 1895 err = unix_scm_to_skb(&scm, skb, !fds_sent);
25888e30 1896 if (err < 0) {
7361c36c 1897 kfree_skb(skb);
f78a5fda 1898 goto out_err;
6209344f 1899 }
7361c36c 1900 fds_sent = true;
1da177e4 1901
e370a723
ED
1902 skb_put(skb, size - data_len);
1903 skb->data_len = data_len;
1904 skb->len = size;
c0371da6 1905 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
6eba6a37 1906 if (err) {
1da177e4 1907 kfree_skb(skb);
f78a5fda 1908 goto out_err;
1da177e4
LT
1909 }
1910
1c92b4e5 1911 unix_state_lock(other);
1da177e4
LT
1912
1913 if (sock_flag(other, SOCK_DEAD) ||
1914 (other->sk_shutdown & RCV_SHUTDOWN))
1915 goto pipe_err_free;
1916
16e57262 1917 maybe_add_creds(skb, sock, other);
1da177e4 1918 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 1919 unix_state_unlock(other);
676d2369 1920 other->sk_data_ready(other);
e27dfcea 1921 sent += size;
1da177e4 1922 }
1da177e4 1923
7cc05662 1924 scm_destroy(&scm);
1da177e4
LT
1925
1926 return sent;
1927
1928pipe_err_free:
1c92b4e5 1929 unix_state_unlock(other);
1da177e4
LT
1930 kfree_skb(skb);
1931pipe_err:
6eba6a37
ED
1932 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1933 send_sig(SIGPIPE, current, 0);
1da177e4
LT
1934 err = -EPIPE;
1935out_err:
7cc05662 1936 scm_destroy(&scm);
1da177e4
LT
1937 return sent ? : err;
1938}
1939
869e7c62
HFS
1940static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1941 int offset, size_t size, int flags)
1942{
9490f886
HFS
1943 int err;
1944 bool send_sigpipe = false;
1945 bool init_scm = true;
1946 struct scm_cookie scm;
869e7c62
HFS
1947 struct sock *other, *sk = socket->sk;
1948 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1949
1950 if (flags & MSG_OOB)
1951 return -EOPNOTSUPP;
1952
1953 other = unix_peer(sk);
1954 if (!other || sk->sk_state != TCP_ESTABLISHED)
1955 return -ENOTCONN;
1956
1957 if (false) {
1958alloc_skb:
1959 unix_state_unlock(other);
6e1ce3c3 1960 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
1961 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1962 &err, 0);
1963 if (!newskb)
9490f886 1964 goto err;
869e7c62
HFS
1965 }
1966
6e1ce3c3 1967 /* we must acquire iolock as we modify already present
869e7c62
HFS
1968 * skbs in the sk_receive_queue and mess with skb->len
1969 */
6e1ce3c3 1970 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
869e7c62
HFS
1971 if (err) {
1972 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
869e7c62
HFS
1973 goto err;
1974 }
1975
1976 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1977 err = -EPIPE;
9490f886 1978 send_sigpipe = true;
869e7c62
HFS
1979 goto err_unlock;
1980 }
1981
1982 unix_state_lock(other);
1983
1984 if (sock_flag(other, SOCK_DEAD) ||
1985 other->sk_shutdown & RCV_SHUTDOWN) {
1986 err = -EPIPE;
9490f886 1987 send_sigpipe = true;
869e7c62
HFS
1988 goto err_state_unlock;
1989 }
1990
9490f886
HFS
1991 if (init_scm) {
1992 err = maybe_init_creds(&scm, socket, other);
1993 if (err)
1994 goto err_state_unlock;
1995 init_scm = false;
1996 }
1997
869e7c62
HFS
1998 skb = skb_peek_tail(&other->sk_receive_queue);
1999 if (tail && tail == skb) {
2000 skb = newskb;
9490f886
HFS
2001 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2002 if (newskb) {
869e7c62 2003 skb = newskb;
9490f886
HFS
2004 } else {
2005 tail = skb;
869e7c62 2006 goto alloc_skb;
9490f886 2007 }
869e7c62
HFS
2008 } else if (newskb) {
2009 /* this is fast path, we don't necessarily need to
2010 * call to kfree_skb even though with newskb == NULL
2011 * this - does no harm
2012 */
2013 consume_skb(newskb);
8844f972 2014 newskb = NULL;
869e7c62
HFS
2015 }
2016
2017 if (skb_append_pagefrags(skb, page, offset, size)) {
2018 tail = skb;
2019 goto alloc_skb;
2020 }
2021
2022 skb->len += size;
2023 skb->data_len += size;
2024 skb->truesize += size;
14afee4b 2025 refcount_add(size, &sk->sk_wmem_alloc);
869e7c62 2026
a3a116e0 2027 if (newskb) {
9490f886
HFS
2028 err = unix_scm_to_skb(&scm, skb, false);
2029 if (err)
2030 goto err_state_unlock;
a3a116e0 2031 spin_lock(&other->sk_receive_queue.lock);
869e7c62 2032 __skb_queue_tail(&other->sk_receive_queue, newskb);
a3a116e0
HFS
2033 spin_unlock(&other->sk_receive_queue.lock);
2034 }
869e7c62
HFS
2035
2036 unix_state_unlock(other);
6e1ce3c3 2037 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
2038
2039 other->sk_data_ready(other);
9490f886 2040 scm_destroy(&scm);
869e7c62
HFS
2041 return size;
2042
2043err_state_unlock:
2044 unix_state_unlock(other);
2045err_unlock:
6e1ce3c3 2046 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
2047err:
2048 kfree_skb(newskb);
2049 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2050 send_sig(SIGPIPE, current, 0);
9490f886
HFS
2051 if (!init_scm)
2052 scm_destroy(&scm);
869e7c62
HFS
2053 return err;
2054}
2055
1b784140
YX
2056static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2057 size_t len)
1da177e4
LT
2058{
2059 int err;
2060 struct sock *sk = sock->sk;
ac7bfa62 2061
1da177e4
LT
2062 err = sock_error(sk);
2063 if (err)
2064 return err;
2065
2066 if (sk->sk_state != TCP_ESTABLISHED)
2067 return -ENOTCONN;
2068
2069 if (msg->msg_namelen)
2070 msg->msg_namelen = 0;
2071
1b784140 2072 return unix_dgram_sendmsg(sock, msg, len);
1da177e4 2073}
ac7bfa62 2074
1b784140
YX
2075static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2076 size_t size, int flags)
a05d2ad1
EB
2077{
2078 struct sock *sk = sock->sk;
2079
2080 if (sk->sk_state != TCP_ESTABLISHED)
2081 return -ENOTCONN;
2082
1b784140 2083 return unix_dgram_recvmsg(sock, msg, size, flags);
a05d2ad1
EB
2084}
2085
1da177e4
LT
2086static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2087{
727a2619 2088 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
1da177e4 2089
727a2619
AV
2090 if (addr) {
2091 msg->msg_namelen = addr->len;
2092 memcpy(msg->msg_name, addr->name, addr->len);
1da177e4
LT
2093 }
2094}
2095
1b784140
YX
2096static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2097 size_t size, int flags)
1da177e4 2098{
7cc05662 2099 struct scm_cookie scm;
1da177e4
LT
2100 struct sock *sk = sock->sk;
2101 struct unix_sock *u = unix_sk(sk);
64874280
RW
2102 struct sk_buff *skb, *last;
2103 long timeo;
1da177e4 2104 int err;
f55bb7f9 2105 int peeked, skip;
1da177e4
LT
2106
2107 err = -EOPNOTSUPP;
2108 if (flags&MSG_OOB)
2109 goto out;
2110
64874280 2111 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1da177e4 2112
64874280 2113 do {
6e1ce3c3 2114 mutex_lock(&u->iolock);
f55bb7f9 2115
64874280 2116 skip = sk_peek_offset(sk, flags);
7c13f97f
PA
2117 skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2118 &err, &last);
64874280
RW
2119 if (skb)
2120 break;
2121
6e1ce3c3 2122 mutex_unlock(&u->iolock);
64874280
RW
2123
2124 if (err != -EAGAIN)
2125 break;
2126 } while (timeo &&
2127 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2128
6e1ce3c3 2129 if (!skb) { /* implies iolock unlocked */
0a112258
FZ
2130 unix_state_lock(sk);
2131 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2132 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2133 (sk->sk_shutdown & RCV_SHUTDOWN))
2134 err = 0;
2135 unix_state_unlock(sk);
64874280 2136 goto out;
0a112258 2137 }
1da177e4 2138
77b75f4d
RW
2139 if (wq_has_sleeper(&u->peer_wait))
2140 wake_up_interruptible_sync_poll(&u->peer_wait,
2141 POLLOUT | POLLWRNORM |
2142 POLLWRBAND);
1da177e4
LT
2143
2144 if (msg->msg_name)
2145 unix_copy_addr(msg, skb->sk);
2146
f55bb7f9
PE
2147 if (size > skb->len - skip)
2148 size = skb->len - skip;
2149 else if (size < skb->len - skip)
1da177e4
LT
2150 msg->msg_flags |= MSG_TRUNC;
2151
51f3d02b 2152 err = skb_copy_datagram_msg(skb, skip, msg, size);
1da177e4
LT
2153 if (err)
2154 goto out_free;
2155
3f66116e
AC
2156 if (sock_flag(sk, SOCK_RCVTSTAMP))
2157 __sock_recv_timestamp(msg, sk, skb);
2158
7cc05662
CH
2159 memset(&scm, 0, sizeof(scm));
2160
2161 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2162 unix_set_secdata(&scm, skb);
1da177e4 2163
6eba6a37 2164 if (!(flags & MSG_PEEK)) {
1da177e4 2165 if (UNIXCB(skb).fp)
7cc05662 2166 unix_detach_fds(&scm, skb);
f55bb7f9
PE
2167
2168 sk_peek_offset_bwd(sk, skb->len);
6eba6a37 2169 } else {
1da177e4
LT
2170 /* It is questionable: on PEEK we could:
2171 - do not return fds - good, but too simple 8)
2172 - return fds, and do not return them on read (old strategy,
2173 apparently wrong)
2174 - clone fds (I chose it for now, it is the most universal
2175 solution)
ac7bfa62
YH
2176
2177 POSIX 1003.1g does not actually define this clearly
2178 at all. POSIX 1003.1g doesn't define a lot of things
2179 clearly however!
2180
1da177e4 2181 */
f55bb7f9
PE
2182
2183 sk_peek_offset_fwd(sk, size);
2184
1da177e4 2185 if (UNIXCB(skb).fp)
7cc05662 2186 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
1da177e4 2187 }
9f6f9af7 2188 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1da177e4 2189
7cc05662 2190 scm_recv(sock, msg, &scm, flags);
1da177e4
LT
2191
2192out_free:
6eba6a37 2193 skb_free_datagram(sk, skb);
6e1ce3c3 2194 mutex_unlock(&u->iolock);
1da177e4
LT
2195out:
2196 return err;
2197}
2198
2199/*
79f632c7 2200 * Sleep until more data has arrived. But check for races..
1da177e4 2201 */
79f632c7 2202static long unix_stream_data_wait(struct sock *sk, long timeo,
06a77b07
WC
2203 struct sk_buff *last, unsigned int last_len,
2204 bool freezable)
1da177e4 2205{
2b514574 2206 struct sk_buff *tail;
1da177e4
LT
2207 DEFINE_WAIT(wait);
2208
1c92b4e5 2209 unix_state_lock(sk);
1da177e4
LT
2210
2211 for (;;) {
aa395145 2212 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4 2213
2b514574
HFS
2214 tail = skb_peek_tail(&sk->sk_receive_queue);
2215 if (tail != last ||
2216 (tail && tail->len != last_len) ||
1da177e4
LT
2217 sk->sk_err ||
2218 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2219 signal_pending(current) ||
2220 !timeo)
2221 break;
2222
9cd3e072 2223 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1c92b4e5 2224 unix_state_unlock(sk);
06a77b07
WC
2225 if (freezable)
2226 timeo = freezable_schedule_timeout(timeo);
2227 else
2228 timeo = schedule_timeout(timeo);
1c92b4e5 2229 unix_state_lock(sk);
b48732e4
MS
2230
2231 if (sock_flag(sk, SOCK_DEAD))
2232 break;
2233
9cd3e072 2234 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1da177e4
LT
2235 }
2236
aa395145 2237 finish_wait(sk_sleep(sk), &wait);
1c92b4e5 2238 unix_state_unlock(sk);
1da177e4
LT
2239 return timeo;
2240}
2241
e370a723
ED
2242static unsigned int unix_skb_len(const struct sk_buff *skb)
2243{
2244 return skb->len - UNIXCB(skb).consumed;
2245}
2246
2b514574
HFS
2247struct unix_stream_read_state {
2248 int (*recv_actor)(struct sk_buff *, int, int,
2249 struct unix_stream_read_state *);
2250 struct socket *socket;
2251 struct msghdr *msg;
2252 struct pipe_inode_info *pipe;
2253 size_t size;
2254 int flags;
2255 unsigned int splice_flags;
2256};
2257
06a77b07
WC
2258static int unix_stream_read_generic(struct unix_stream_read_state *state,
2259 bool freezable)
1da177e4 2260{
7cc05662 2261 struct scm_cookie scm;
2b514574 2262 struct socket *sock = state->socket;
1da177e4
LT
2263 struct sock *sk = sock->sk;
2264 struct unix_sock *u = unix_sk(sk);
1da177e4 2265 int copied = 0;
2b514574 2266 int flags = state->flags;
de144391 2267 int noblock = flags & MSG_DONTWAIT;
2b514574 2268 bool check_creds = false;
1da177e4
LT
2269 int target;
2270 int err = 0;
2271 long timeo;
fc0d7536 2272 int skip;
2b514574
HFS
2273 size_t size = state->size;
2274 unsigned int last_len;
1da177e4 2275
1b92ee3d
RW
2276 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2277 err = -EINVAL;
1da177e4 2278 goto out;
1b92ee3d 2279 }
1da177e4 2280
1b92ee3d
RW
2281 if (unlikely(flags & MSG_OOB)) {
2282 err = -EOPNOTSUPP;
1da177e4 2283 goto out;
1b92ee3d 2284 }
1da177e4 2285
2b514574 2286 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
de144391 2287 timeo = sock_rcvtimeo(sk, noblock);
1da177e4 2288
2b514574
HFS
2289 memset(&scm, 0, sizeof(scm));
2290
1da177e4
LT
2291 /* Lock the socket to prevent queue disordering
2292 * while sleeps in memcpy_tomsg
2293 */
6e1ce3c3 2294 mutex_lock(&u->iolock);
1da177e4 2295
a0917e0b 2296 skip = max(sk_peek_offset(sk, flags), 0);
e9193d60 2297
6eba6a37 2298 do {
1da177e4 2299 int chunk;
73ed5d25 2300 bool drop_skb;
79f632c7 2301 struct sk_buff *skb, *last;
1da177e4 2302
18eceb81 2303redo:
3c0d2f37 2304 unix_state_lock(sk);
b48732e4
MS
2305 if (sock_flag(sk, SOCK_DEAD)) {
2306 err = -ECONNRESET;
2307 goto unlock;
2308 }
79f632c7 2309 last = skb = skb_peek(&sk->sk_receive_queue);
2b514574 2310 last_len = last ? last->len : 0;
fc0d7536 2311again:
6eba6a37 2312 if (skb == NULL) {
1da177e4 2313 if (copied >= target)
3c0d2f37 2314 goto unlock;
1da177e4
LT
2315
2316 /*
2317 * POSIX 1003.1g mandates this order.
2318 */
ac7bfa62 2319
6eba6a37
ED
2320 err = sock_error(sk);
2321 if (err)
3c0d2f37 2322 goto unlock;
1da177e4 2323 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
2324 goto unlock;
2325
2326 unix_state_unlock(sk);
1b92ee3d
RW
2327 if (!timeo) {
2328 err = -EAGAIN;
1da177e4 2329 break;
1b92ee3d
RW
2330 }
2331
6e1ce3c3 2332 mutex_unlock(&u->iolock);
1da177e4 2333
2b514574 2334 timeo = unix_stream_data_wait(sk, timeo, last,
06a77b07 2335 last_len, freezable);
1da177e4 2336
3822b5c2 2337 if (signal_pending(current)) {
1da177e4 2338 err = sock_intr_errno(timeo);
fa0dc04d 2339 scm_destroy(&scm);
1da177e4
LT
2340 goto out;
2341 }
b3ca9b02 2342
6e1ce3c3 2343 mutex_lock(&u->iolock);
18eceb81 2344 goto redo;
2b514574 2345unlock:
3c0d2f37
MS
2346 unix_state_unlock(sk);
2347 break;
1da177e4 2348 }
fc0d7536 2349
e370a723
ED
2350 while (skip >= unix_skb_len(skb)) {
2351 skip -= unix_skb_len(skb);
79f632c7 2352 last = skb;
2b514574 2353 last_len = skb->len;
fc0d7536 2354 skb = skb_peek_next(skb, &sk->sk_receive_queue);
79f632c7
BP
2355 if (!skb)
2356 goto again;
fc0d7536
PE
2357 }
2358
3c0d2f37 2359 unix_state_unlock(sk);
1da177e4
LT
2360
2361 if (check_creds) {
2362 /* Never glue messages from different writers */
9490f886 2363 if (!unix_skb_scm_eq(skb, &scm))
1da177e4 2364 break;
0e82e7f6 2365 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1da177e4 2366 /* Copy credentials */
7cc05662 2367 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
37a9a8df 2368 unix_set_secdata(&scm, skb);
2b514574 2369 check_creds = true;
1da177e4
LT
2370 }
2371
2372 /* Copy address just once */
2b514574
HFS
2373 if (state->msg && state->msg->msg_name) {
2374 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2375 state->msg->msg_name);
2376 unix_copy_addr(state->msg, skb->sk);
1da177e4
LT
2377 sunaddr = NULL;
2378 }
2379
e370a723 2380 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
73ed5d25 2381 skb_get(skb);
2b514574 2382 chunk = state->recv_actor(skb, skip, chunk, state);
73ed5d25
HFS
2383 drop_skb = !unix_skb_len(skb);
2384 /* skb is only safe to use if !drop_skb */
2385 consume_skb(skb);
2b514574 2386 if (chunk < 0) {
1da177e4
LT
2387 if (copied == 0)
2388 copied = -EFAULT;
2389 break;
2390 }
2391 copied += chunk;
2392 size -= chunk;
2393
73ed5d25
HFS
2394 if (drop_skb) {
2395 /* the skb was touched by a concurrent reader;
2396 * we should not expect anything from this skb
2397 * anymore and assume it invalid - we can be
2398 * sure it was dropped from the socket queue
2399 *
2400 * let's report a short read
2401 */
2402 err = 0;
2403 break;
2404 }
2405
1da177e4 2406 /* Mark read part of skb as used */
6eba6a37 2407 if (!(flags & MSG_PEEK)) {
e370a723 2408 UNIXCB(skb).consumed += chunk;
1da177e4 2409
fc0d7536
PE
2410 sk_peek_offset_bwd(sk, chunk);
2411
1da177e4 2412 if (UNIXCB(skb).fp)
7cc05662 2413 unix_detach_fds(&scm, skb);
1da177e4 2414
e370a723 2415 if (unix_skb_len(skb))
1da177e4 2416 break;
1da177e4 2417
6f01fd6e 2418 skb_unlink(skb, &sk->sk_receive_queue);
70d4bf6d 2419 consume_skb(skb);
1da177e4 2420
7cc05662 2421 if (scm.fp)
1da177e4 2422 break;
6eba6a37 2423 } else {
1da177e4
LT
2424 /* It is questionable, see note in unix_dgram_recvmsg.
2425 */
2426 if (UNIXCB(skb).fp)
7cc05662 2427 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
1da177e4 2428
e9193d60 2429 sk_peek_offset_fwd(sk, chunk);
fc0d7536 2430
9f389e35
AC
2431 if (UNIXCB(skb).fp)
2432 break;
2433
e9193d60 2434 skip = 0;
9f389e35
AC
2435 last = skb;
2436 last_len = skb->len;
2437 unix_state_lock(sk);
2438 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2439 if (skb)
2440 goto again;
2441 unix_state_unlock(sk);
1da177e4
LT
2442 break;
2443 }
2444 } while (size);
2445
6e1ce3c3 2446 mutex_unlock(&u->iolock);
2b514574
HFS
2447 if (state->msg)
2448 scm_recv(sock, state->msg, &scm, flags);
2449 else
2450 scm_destroy(&scm);
1da177e4
LT
2451out:
2452 return copied ? : err;
2453}
2454
2b514574
HFS
2455static int unix_stream_read_actor(struct sk_buff *skb,
2456 int skip, int chunk,
2457 struct unix_stream_read_state *state)
2458{
2459 int ret;
2460
2461 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2462 state->msg, chunk);
2463 return ret ?: chunk;
2464}
2465
2466static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2467 size_t size, int flags)
2468{
2469 struct unix_stream_read_state state = {
2470 .recv_actor = unix_stream_read_actor,
2471 .socket = sock,
2472 .msg = msg,
2473 .size = size,
2474 .flags = flags
2475 };
2476
06a77b07 2477 return unix_stream_read_generic(&state, true);
2b514574
HFS
2478}
2479
2b514574
HFS
2480static int unix_stream_splice_actor(struct sk_buff *skb,
2481 int skip, int chunk,
2482 struct unix_stream_read_state *state)
2483{
2484 return skb_splice_bits(skb, state->socket->sk,
2485 UNIXCB(skb).consumed + skip,
25869262 2486 state->pipe, chunk, state->splice_flags);
2b514574
HFS
2487}
2488
2489static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2490 struct pipe_inode_info *pipe,
2491 size_t size, unsigned int flags)
2492{
2493 struct unix_stream_read_state state = {
2494 .recv_actor = unix_stream_splice_actor,
2495 .socket = sock,
2496 .pipe = pipe,
2497 .size = size,
2498 .splice_flags = flags,
2499 };
2500
2501 if (unlikely(*ppos))
2502 return -ESPIPE;
2503
2504 if (sock->file->f_flags & O_NONBLOCK ||
2505 flags & SPLICE_F_NONBLOCK)
2506 state.flags = MSG_DONTWAIT;
2507
06a77b07 2508 return unix_stream_read_generic(&state, false);
2b514574
HFS
2509}
2510
1da177e4
LT
2511static int unix_shutdown(struct socket *sock, int mode)
2512{
2513 struct sock *sk = sock->sk;
2514 struct sock *other;
2515
fc61b928
XW
2516 if (mode < SHUT_RD || mode > SHUT_RDWR)
2517 return -EINVAL;
2518 /* This maps:
2519 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2520 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2521 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2522 */
2523 ++mode;
7180a031
AC
2524
2525 unix_state_lock(sk);
2526 sk->sk_shutdown |= mode;
2527 other = unix_peer(sk);
2528 if (other)
2529 sock_hold(other);
2530 unix_state_unlock(sk);
2531 sk->sk_state_change(sk);
2532
2533 if (other &&
2534 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2535
2536 int peer_mode = 0;
2537
2538 if (mode&RCV_SHUTDOWN)
2539 peer_mode |= SEND_SHUTDOWN;
2540 if (mode&SEND_SHUTDOWN)
2541 peer_mode |= RCV_SHUTDOWN;
2542 unix_state_lock(other);
2543 other->sk_shutdown |= peer_mode;
2544 unix_state_unlock(other);
2545 other->sk_state_change(other);
2546 if (peer_mode == SHUTDOWN_MASK)
2547 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2548 else if (peer_mode & RCV_SHUTDOWN)
2549 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1da177e4 2550 }
7180a031
AC
2551 if (other)
2552 sock_put(other);
2553
1da177e4
LT
2554 return 0;
2555}
2556
885ee74d
PE
2557long unix_inq_len(struct sock *sk)
2558{
2559 struct sk_buff *skb;
2560 long amount = 0;
2561
2562 if (sk->sk_state == TCP_LISTEN)
2563 return -EINVAL;
2564
2565 spin_lock(&sk->sk_receive_queue.lock);
2566 if (sk->sk_type == SOCK_STREAM ||
2567 sk->sk_type == SOCK_SEQPACKET) {
2568 skb_queue_walk(&sk->sk_receive_queue, skb)
e370a723 2569 amount += unix_skb_len(skb);
885ee74d
PE
2570 } else {
2571 skb = skb_peek(&sk->sk_receive_queue);
2572 if (skb)
2573 amount = skb->len;
2574 }
2575 spin_unlock(&sk->sk_receive_queue.lock);
2576
2577 return amount;
2578}
2579EXPORT_SYMBOL_GPL(unix_inq_len);
2580
2581long unix_outq_len(struct sock *sk)
2582{
2583 return sk_wmem_alloc_get(sk);
2584}
2585EXPORT_SYMBOL_GPL(unix_outq_len);
2586
ba94f308
AV
2587static int unix_open_file(struct sock *sk)
2588{
2589 struct path path;
2590 struct file *f;
2591 int fd;
2592
2593 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2594 return -EPERM;
2595
727a2619
AV
2596 if (!smp_load_acquire(&unix_sk(sk)->addr))
2597 return -ENOENT;
2598
ba94f308 2599 path = unix_sk(sk)->path;
727a2619 2600 if (!path.dentry)
ba94f308 2601 return -ENOENT;
ba94f308
AV
2602
2603 path_get(&path);
ba94f308
AV
2604
2605 fd = get_unused_fd_flags(O_CLOEXEC);
2606 if (fd < 0)
2607 goto out;
2608
2609 f = dentry_open(&path, O_PATH, current_cred());
2610 if (IS_ERR(f)) {
2611 put_unused_fd(fd);
2612 fd = PTR_ERR(f);
2613 goto out;
2614 }
2615
2616 fd_install(fd, f);
2617out:
2618 path_put(&path);
2619
2620 return fd;
2621}
2622
1da177e4
LT
2623static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2624{
2625 struct sock *sk = sock->sk;
e27dfcea 2626 long amount = 0;
1da177e4
LT
2627 int err;
2628
6eba6a37
ED
2629 switch (cmd) {
2630 case SIOCOUTQ:
885ee74d 2631 amount = unix_outq_len(sk);
6eba6a37
ED
2632 err = put_user(amount, (int __user *)arg);
2633 break;
2634 case SIOCINQ:
885ee74d
PE
2635 amount = unix_inq_len(sk);
2636 if (amount < 0)
2637 err = amount;
2638 else
1da177e4 2639 err = put_user(amount, (int __user *)arg);
885ee74d 2640 break;
ba94f308
AV
2641 case SIOCUNIXFILE:
2642 err = unix_open_file(sk);
2643 break;
6eba6a37
ED
2644 default:
2645 err = -ENOIOCTLCMD;
2646 break;
1da177e4
LT
2647 }
2648 return err;
2649}
2650
6eba6a37 2651static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1da177e4
LT
2652{
2653 struct sock *sk = sock->sk;
2654 unsigned int mask;
2655
aa395145 2656 sock_poll_wait(file, sk_sleep(sk), wait);
1da177e4
LT
2657 mask = 0;
2658
2659 /* exceptional events? */
2660 if (sk->sk_err)
2661 mask |= POLLERR;
2662 if (sk->sk_shutdown == SHUTDOWN_MASK)
2663 mask |= POLLHUP;
f348d70a 2664 if (sk->sk_shutdown & RCV_SHUTDOWN)
db40980f 2665 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
1da177e4
LT
2666
2667 /* readable? */
db40980f 2668 if (!skb_queue_empty(&sk->sk_receive_queue))
1da177e4
LT
2669 mask |= POLLIN | POLLRDNORM;
2670
2671 /* Connection-based need to check for termination and startup */
6eba6a37
ED
2672 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2673 sk->sk_state == TCP_CLOSE)
1da177e4
LT
2674 mask |= POLLHUP;
2675
2676 /*
2677 * we set writable also when the other side has shut down the
2678 * connection. This prevents stuck sockets.
2679 */
2680 if (unix_writable(sk))
2681 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2682
2683 return mask;
2684}
2685
ec0d215f
RW
2686static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2687 poll_table *wait)
3c73419c 2688{
ec0d215f
RW
2689 struct sock *sk = sock->sk, *other;
2690 unsigned int mask, writable;
3c73419c 2691
aa395145 2692 sock_poll_wait(file, sk_sleep(sk), wait);
3c73419c
RW
2693 mask = 0;
2694
2695 /* exceptional events? */
2696 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
7d4c04fc 2697 mask |= POLLERR |
8facd5fb 2698 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
7d4c04fc 2699
3c73419c 2700 if (sk->sk_shutdown & RCV_SHUTDOWN)
5456f09a 2701 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
3c73419c
RW
2702 if (sk->sk_shutdown == SHUTDOWN_MASK)
2703 mask |= POLLHUP;
2704
2705 /* readable? */
5456f09a 2706 if (!skb_queue_empty(&sk->sk_receive_queue))
3c73419c
RW
2707 mask |= POLLIN | POLLRDNORM;
2708
2709 /* Connection-based need to check for termination and startup */
2710 if (sk->sk_type == SOCK_SEQPACKET) {
2711 if (sk->sk_state == TCP_CLOSE)
2712 mask |= POLLHUP;
2713 /* connection hasn't started yet? */
2714 if (sk->sk_state == TCP_SYN_SENT)
2715 return mask;
2716 }
2717
973a34aa 2718 /* No write status requested, avoid expensive OUT tests. */
626cf236 2719 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
973a34aa
ED
2720 return mask;
2721
ec0d215f 2722 writable = unix_writable(sk);
7d267278
RW
2723 if (writable) {
2724 unix_state_lock(sk);
2725
2726 other = unix_peer(sk);
2727 if (other && unix_peer(other) != sk &&
2728 unix_recvq_full(other) &&
2729 unix_dgram_peer_wake_me(sk, other))
2730 writable = 0;
2731
2732 unix_state_unlock(sk);
ec0d215f
RW
2733 }
2734
2735 if (writable)
3c73419c
RW
2736 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2737 else
9cd3e072 2738 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3c73419c 2739
3c73419c
RW
2740 return mask;
2741}
1da177e4
LT
2742
2743#ifdef CONFIG_PROC_FS
a53eb3fe 2744
7123aaa3
ED
2745#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2746
2747#define get_bucket(x) ((x) >> BUCKET_SPACE)
2748#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2749#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
a53eb3fe 2750
7123aaa3 2751static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
1da177e4 2752{
7123aaa3
ED
2753 unsigned long offset = get_offset(*pos);
2754 unsigned long bucket = get_bucket(*pos);
2755 struct sock *sk;
2756 unsigned long count = 0;
1da177e4 2757
7123aaa3
ED
2758 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2759 if (sock_net(sk) != seq_file_net(seq))
097e66c5 2760 continue;
7123aaa3
ED
2761 if (++count == offset)
2762 break;
2763 }
2764
2765 return sk;
2766}
2767
2768static struct sock *unix_next_socket(struct seq_file *seq,
2769 struct sock *sk,
2770 loff_t *pos)
2771{
2772 unsigned long bucket;
2773
2774 while (sk > (struct sock *)SEQ_START_TOKEN) {
2775 sk = sk_next(sk);
2776 if (!sk)
2777 goto next_bucket;
2778 if (sock_net(sk) == seq_file_net(seq))
2779 return sk;
1da177e4 2780 }
7123aaa3
ED
2781
2782 do {
2783 sk = unix_from_bucket(seq, pos);
2784 if (sk)
2785 return sk;
2786
2787next_bucket:
2788 bucket = get_bucket(*pos) + 1;
2789 *pos = set_bucket_offset(bucket, 1);
2790 } while (bucket < ARRAY_SIZE(unix_socket_table));
2791
1da177e4
LT
2792 return NULL;
2793}
2794
1da177e4 2795static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 2796 __acquires(unix_table_lock)
1da177e4 2797{
fbe9cc4a 2798 spin_lock(&unix_table_lock);
7123aaa3
ED
2799
2800 if (!*pos)
2801 return SEQ_START_TOKEN;
2802
2803 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2804 return NULL;
2805
2806 return unix_next_socket(seq, NULL, pos);
1da177e4
LT
2807}
2808
2809static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2810{
2811 ++*pos;
7123aaa3 2812 return unix_next_socket(seq, v, pos);
1da177e4
LT
2813}
2814
2815static void unix_seq_stop(struct seq_file *seq, void *v)
9a429c49 2816 __releases(unix_table_lock)
1da177e4 2817{
fbe9cc4a 2818 spin_unlock(&unix_table_lock);
1da177e4
LT
2819}
2820
2821static int unix_seq_show(struct seq_file *seq, void *v)
2822{
ac7bfa62 2823
b9f3124f 2824 if (v == SEQ_START_TOKEN)
1da177e4
LT
2825 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2826 "Inode Path\n");
2827 else {
2828 struct sock *s = v;
2829 struct unix_sock *u = unix_sk(s);
1c92b4e5 2830 unix_state_lock(s);
1da177e4 2831
71338aa7 2832 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
1da177e4 2833 s,
41c6d650 2834 refcount_read(&s->sk_refcnt),
1da177e4
LT
2835 0,
2836 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2837 s->sk_type,
2838 s->sk_socket ?
2839 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2840 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2841 sock_i_ino(s));
2842
727a2619 2843 if (u->addr) { // under unix_table_lock here
1da177e4
LT
2844 int i, len;
2845 seq_putc(seq, ' ');
2846
2847 i = 0;
2848 len = u->addr->len - sizeof(short);
2849 if (!UNIX_ABSTRACT(s))
2850 len--;
2851 else {
2852 seq_putc(seq, '@');
2853 i++;
2854 }
2855 for ( ; i < len; i++)
e7947ea7
IB
2856 seq_putc(seq, u->addr->name->sun_path[i] ?:
2857 '@');
1da177e4 2858 }
1c92b4e5 2859 unix_state_unlock(s);
1da177e4
LT
2860 seq_putc(seq, '\n');
2861 }
2862
2863 return 0;
2864}
2865
56b3d975 2866static const struct seq_operations unix_seq_ops = {
1da177e4
LT
2867 .start = unix_seq_start,
2868 .next = unix_seq_next,
2869 .stop = unix_seq_stop,
2870 .show = unix_seq_show,
2871};
2872
1da177e4
LT
2873static int unix_seq_open(struct inode *inode, struct file *file)
2874{
e372c414 2875 return seq_open_net(inode, file, &unix_seq_ops,
8b51b064 2876 sizeof(struct seq_net_private));
1da177e4
LT
2877}
2878
da7071d7 2879static const struct file_operations unix_seq_fops = {
1da177e4
LT
2880 .owner = THIS_MODULE,
2881 .open = unix_seq_open,
2882 .read = seq_read,
2883 .llseek = seq_lseek,
e372c414 2884 .release = seq_release_net,
1da177e4
LT
2885};
2886
2887#endif
2888
ec1b4cf7 2889static const struct net_proto_family unix_family_ops = {
1da177e4
LT
2890 .family = PF_UNIX,
2891 .create = unix_create,
2892 .owner = THIS_MODULE,
2893};
2894
097e66c5 2895
2c8c1e72 2896static int __net_init unix_net_init(struct net *net)
097e66c5
DL
2897{
2898 int error = -ENOMEM;
2899
a0a53c8b 2900 net->unx.sysctl_max_dgram_qlen = 10;
1597fbc0
PE
2901 if (unix_sysctl_register(net))
2902 goto out;
d392e497 2903
097e66c5 2904#ifdef CONFIG_PROC_FS
d4beaa66 2905 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
1597fbc0 2906 unix_sysctl_unregister(net);
097e66c5 2907 goto out;
1597fbc0 2908 }
097e66c5
DL
2909#endif
2910 error = 0;
2911out:
48dcc33e 2912 return error;
097e66c5
DL
2913}
2914
2c8c1e72 2915static void __net_exit unix_net_exit(struct net *net)
097e66c5 2916{
1597fbc0 2917 unix_sysctl_unregister(net);
ece31ffd 2918 remove_proc_entry("unix", net->proc_net);
097e66c5
DL
2919}
2920
2921static struct pernet_operations unix_net_ops = {
2922 .init = unix_net_init,
2923 .exit = unix_net_exit,
2924};
2925
1da177e4
LT
2926static int __init af_unix_init(void)
2927{
2928 int rc = -1;
1da177e4 2929
b4fff5f8 2930 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
1da177e4
LT
2931
2932 rc = proto_register(&unix_proto, 1);
ac7bfa62 2933 if (rc != 0) {
5cc208be 2934 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
1da177e4
LT
2935 goto out;
2936 }
2937
2938 sock_register(&unix_family_ops);
097e66c5 2939 register_pernet_subsys(&unix_net_ops);
1da177e4
LT
2940out:
2941 return rc;
2942}
2943
2944static void __exit af_unix_exit(void)
2945{
2946 sock_unregister(PF_UNIX);
1da177e4 2947 proto_unregister(&unix_proto);
097e66c5 2948 unregister_pernet_subsys(&unix_net_ops);
1da177e4
LT
2949}
2950
3d366960
DW
2951/* Earlier than device_initcall() so that other drivers invoking
2952 request_module() don't end up in a loop when modprobe tries
2953 to use a UNIX socket. But later than subsys_initcall() because
2954 we depend on stuff initialised there */
2955fs_initcall(af_unix_init);
1da177e4
LT
2956module_exit(af_unix_exit);
2957
2958MODULE_LICENSE("GPL");
2959MODULE_ALIAS_NETPROTO(PF_UNIX);