unix_diag: Add the MEMINFO extension
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / unix / af_unix.c
CommitLineData
1da177e4
LT
1/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
113aa838 4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
1da177e4
LT
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
83#include <linux/module.h>
1da177e4 84#include <linux/kernel.h>
1da177e4
LT
85#include <linux/signal.h>
86#include <linux/sched.h>
87#include <linux/errno.h>
88#include <linux/string.h>
89#include <linux/stat.h>
90#include <linux/dcache.h>
91#include <linux/namei.h>
92#include <linux/socket.h>
93#include <linux/un.h>
94#include <linux/fcntl.h>
95#include <linux/termios.h>
96#include <linux/sockios.h>
97#include <linux/net.h>
98#include <linux/in.h>
99#include <linux/fs.h>
100#include <linux/slab.h>
101#include <asm/uaccess.h>
102#include <linux/skbuff.h>
103#include <linux/netdevice.h>
457c4cbc 104#include <net/net_namespace.h>
1da177e4 105#include <net/sock.h>
c752f073 106#include <net/tcp_states.h>
1da177e4
LT
107#include <net/af_unix.h>
108#include <linux/proc_fs.h>
109#include <linux/seq_file.h>
110#include <net/scm.h>
111#include <linux/init.h>
112#include <linux/poll.h>
1da177e4
LT
113#include <linux/rtnetlink.h>
114#include <linux/mount.h>
115#include <net/checksum.h>
116#include <linux/security.h>
117
fa7ff56f
PE
118struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119EXPORT_SYMBOL_GPL(unix_socket_table);
120DEFINE_SPINLOCK(unix_table_lock);
121EXPORT_SYMBOL_GPL(unix_table_lock);
518de9b3 122static atomic_long_t unix_nr_socks;
1da177e4
LT
123
124#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
125
126#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
127
877ce7c1 128#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 129static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 130{
dc49c1f9 131 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
877ce7c1
CZ
132}
133
134static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
135{
dc49c1f9 136 scm->secid = *UNIXSID(skb);
877ce7c1
CZ
137}
138#else
dc49c1f9 139static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
140{ }
141
142static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
143{ }
144#endif /* CONFIG_SECURITY_NETWORK */
145
1da177e4
LT
146/*
147 * SMP locking strategy:
fbe9cc4a 148 * hash table is protected with spinlock unix_table_lock
663717f6 149 * each socket state is protected by separate spin lock.
1da177e4
LT
150 */
151
44bb9363 152static inline unsigned unix_hash_fold(__wsum n)
1da177e4 153{
44bb9363 154 unsigned hash = (__force unsigned)n;
1da177e4
LT
155 hash ^= hash>>16;
156 hash ^= hash>>8;
157 return hash&(UNIX_HASH_SIZE-1);
158}
159
160#define unix_peer(sk) (unix_sk(sk)->peer)
161
162static inline int unix_our_peer(struct sock *sk, struct sock *osk)
163{
164 return unix_peer(osk) == sk;
165}
166
167static inline int unix_may_send(struct sock *sk, struct sock *osk)
168{
6eba6a37 169 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
1da177e4
LT
170}
171
3c73419c
RW
172static inline int unix_recvq_full(struct sock const *sk)
173{
174 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
175}
176
fa7ff56f 177struct sock *unix_peer_get(struct sock *s)
1da177e4
LT
178{
179 struct sock *peer;
180
1c92b4e5 181 unix_state_lock(s);
1da177e4
LT
182 peer = unix_peer(s);
183 if (peer)
184 sock_hold(peer);
1c92b4e5 185 unix_state_unlock(s);
1da177e4
LT
186 return peer;
187}
fa7ff56f 188EXPORT_SYMBOL_GPL(unix_peer_get);
1da177e4
LT
189
190static inline void unix_release_addr(struct unix_address *addr)
191{
192 if (atomic_dec_and_test(&addr->refcnt))
193 kfree(addr);
194}
195
196/*
197 * Check unix socket name:
198 * - should be not zero length.
199 * - if started by not zero, should be NULL terminated (FS object)
200 * - if started by zero, it is abstract name.
201 */
ac7bfa62 202
6eba6a37 203static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
1da177e4
LT
204{
205 if (len <= sizeof(short) || len > sizeof(*sunaddr))
206 return -EINVAL;
207 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
208 return -EINVAL;
209 if (sunaddr->sun_path[0]) {
210 /*
211 * This may look like an off by one error but it is a bit more
212 * subtle. 108 is the longest valid AF_UNIX path for a binding.
25985edc 213 * sun_path[108] doesn't as such exist. However in kernel space
1da177e4
LT
214 * we are guaranteed that it is a valid memory location in our
215 * kernel address buffer.
216 */
e27dfcea 217 ((char *)sunaddr)[len] = 0;
1da177e4
LT
218 len = strlen(sunaddr->sun_path)+1+sizeof(short);
219 return len;
220 }
221
07f0757a 222 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
1da177e4
LT
223 return len;
224}
225
226static void __unix_remove_socket(struct sock *sk)
227{
228 sk_del_node_init(sk);
229}
230
231static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
232{
547b792c 233 WARN_ON(!sk_unhashed(sk));
1da177e4
LT
234 sk_add_node(sk, list);
235}
236
237static inline void unix_remove_socket(struct sock *sk)
238{
fbe9cc4a 239 spin_lock(&unix_table_lock);
1da177e4 240 __unix_remove_socket(sk);
fbe9cc4a 241 spin_unlock(&unix_table_lock);
1da177e4
LT
242}
243
244static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
245{
fbe9cc4a 246 spin_lock(&unix_table_lock);
1da177e4 247 __unix_insert_socket(list, sk);
fbe9cc4a 248 spin_unlock(&unix_table_lock);
1da177e4
LT
249}
250
097e66c5
DL
251static struct sock *__unix_find_socket_byname(struct net *net,
252 struct sockaddr_un *sunname,
1da177e4
LT
253 int len, int type, unsigned hash)
254{
255 struct sock *s;
256 struct hlist_node *node;
257
258 sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
259 struct unix_sock *u = unix_sk(s);
260
878628fb 261 if (!net_eq(sock_net(s), net))
097e66c5
DL
262 continue;
263
1da177e4
LT
264 if (u->addr->len == len &&
265 !memcmp(u->addr->name, sunname, len))
266 goto found;
267 }
268 s = NULL;
269found:
270 return s;
271}
272
097e66c5
DL
273static inline struct sock *unix_find_socket_byname(struct net *net,
274 struct sockaddr_un *sunname,
1da177e4
LT
275 int len, int type,
276 unsigned hash)
277{
278 struct sock *s;
279
fbe9cc4a 280 spin_lock(&unix_table_lock);
097e66c5 281 s = __unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
282 if (s)
283 sock_hold(s);
fbe9cc4a 284 spin_unlock(&unix_table_lock);
1da177e4
LT
285 return s;
286}
287
6616f788 288static struct sock *unix_find_socket_byinode(struct inode *i)
1da177e4
LT
289{
290 struct sock *s;
291 struct hlist_node *node;
292
fbe9cc4a 293 spin_lock(&unix_table_lock);
1da177e4
LT
294 sk_for_each(s, node,
295 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
296 struct dentry *dentry = unix_sk(s)->dentry;
297
6eba6a37 298 if (dentry && dentry->d_inode == i) {
1da177e4
LT
299 sock_hold(s);
300 goto found;
301 }
302 }
303 s = NULL;
304found:
fbe9cc4a 305 spin_unlock(&unix_table_lock);
1da177e4
LT
306 return s;
307}
308
309static inline int unix_writable(struct sock *sk)
310{
311 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312}
313
314static void unix_write_space(struct sock *sk)
315{
43815482
ED
316 struct socket_wq *wq;
317
318 rcu_read_lock();
1da177e4 319 if (unix_writable(sk)) {
43815482
ED
320 wq = rcu_dereference(sk->sk_wq);
321 if (wq_has_sleeper(wq))
67426b75
ED
322 wake_up_interruptible_sync_poll(&wq->wait,
323 POLLOUT | POLLWRNORM | POLLWRBAND);
8d8ad9d7 324 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4 325 }
43815482 326 rcu_read_unlock();
1da177e4
LT
327}
328
329/* When dgram socket disconnects (or changes its peer), we clear its receive
330 * queue of packets arrived from previous peer. First, it allows to do
331 * flow control based only on wmem_alloc; second, sk connected to peer
332 * may receive messages only from that peer. */
333static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
334{
b03efcfb 335 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
336 skb_queue_purge(&sk->sk_receive_queue);
337 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
338
339 /* If one link of bidirectional dgram pipe is disconnected,
340 * we signal error. Messages are lost. Do not make this,
341 * when peer was not connected to us.
342 */
343 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
344 other->sk_err = ECONNRESET;
345 other->sk_error_report(other);
346 }
347 }
348}
349
350static void unix_sock_destructor(struct sock *sk)
351{
352 struct unix_sock *u = unix_sk(sk);
353
354 skb_queue_purge(&sk->sk_receive_queue);
355
547b792c
IJ
356 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
357 WARN_ON(!sk_unhashed(sk));
358 WARN_ON(sk->sk_socket);
1da177e4 359 if (!sock_flag(sk, SOCK_DEAD)) {
6b41e7dd 360 printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
1da177e4
LT
361 return;
362 }
363
364 if (u->addr)
365 unix_release_addr(u->addr);
366
518de9b3 367 atomic_long_dec(&unix_nr_socks);
6f756a8c 368 local_bh_disable();
a8076d8d 369 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
6f756a8c 370 local_bh_enable();
1da177e4 371#ifdef UNIX_REFCNT_DEBUG
518de9b3
ED
372 printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
373 atomic_long_read(&unix_nr_socks));
1da177e4
LT
374#endif
375}
376
6eba6a37 377static int unix_release_sock(struct sock *sk, int embrion)
1da177e4
LT
378{
379 struct unix_sock *u = unix_sk(sk);
380 struct dentry *dentry;
381 struct vfsmount *mnt;
382 struct sock *skpair;
383 struct sk_buff *skb;
384 int state;
385
386 unix_remove_socket(sk);
387
388 /* Clear state */
1c92b4e5 389 unix_state_lock(sk);
1da177e4
LT
390 sock_orphan(sk);
391 sk->sk_shutdown = SHUTDOWN_MASK;
392 dentry = u->dentry;
393 u->dentry = NULL;
394 mnt = u->mnt;
395 u->mnt = NULL;
396 state = sk->sk_state;
397 sk->sk_state = TCP_CLOSE;
1c92b4e5 398 unix_state_unlock(sk);
1da177e4
LT
399
400 wake_up_interruptible_all(&u->peer_wait);
401
e27dfcea 402 skpair = unix_peer(sk);
1da177e4 403
e27dfcea 404 if (skpair != NULL) {
1da177e4 405 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
1c92b4e5 406 unix_state_lock(skpair);
1da177e4
LT
407 /* No more writes */
408 skpair->sk_shutdown = SHUTDOWN_MASK;
409 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
410 skpair->sk_err = ECONNRESET;
1c92b4e5 411 unix_state_unlock(skpair);
1da177e4 412 skpair->sk_state_change(skpair);
8d8ad9d7 413 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4
LT
414 }
415 sock_put(skpair); /* It may now die */
416 unix_peer(sk) = NULL;
417 }
418
419 /* Try to flush out this socket. Throw out buffers at least */
420
421 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
e27dfcea 422 if (state == TCP_LISTEN)
1da177e4
LT
423 unix_release_sock(skb->sk, 1);
424 /* passed fds are erased in the kfree_skb hook */
425 kfree_skb(skb);
426 }
427
428 if (dentry) {
429 dput(dentry);
430 mntput(mnt);
431 }
432
433 sock_put(sk);
434
435 /* ---- Socket is dead now and most probably destroyed ---- */
436
437 /*
438 * Fixme: BSD difference: In BSD all sockets connected to use get
439 * ECONNRESET and we die on the spot. In Linux we behave
440 * like files and pipes do and wait for the last
441 * dereference.
442 *
443 * Can't we simply set sock->err?
444 *
445 * What the above comment does talk about? --ANK(980817)
446 */
447
9305cfa4 448 if (unix_tot_inflight)
ac7bfa62 449 unix_gc(); /* Garbage collect fds */
1da177e4
LT
450
451 return 0;
452}
453
109f6e39
EB
454static void init_peercred(struct sock *sk)
455{
456 put_pid(sk->sk_peer_pid);
457 if (sk->sk_peer_cred)
458 put_cred(sk->sk_peer_cred);
459 sk->sk_peer_pid = get_pid(task_tgid(current));
460 sk->sk_peer_cred = get_current_cred();
461}
462
463static void copy_peercred(struct sock *sk, struct sock *peersk)
464{
465 put_pid(sk->sk_peer_pid);
466 if (sk->sk_peer_cred)
467 put_cred(sk->sk_peer_cred);
468 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
469 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
470}
471
1da177e4
LT
472static int unix_listen(struct socket *sock, int backlog)
473{
474 int err;
475 struct sock *sk = sock->sk;
476 struct unix_sock *u = unix_sk(sk);
109f6e39
EB
477 struct pid *old_pid = NULL;
478 const struct cred *old_cred = NULL;
1da177e4
LT
479
480 err = -EOPNOTSUPP;
6eba6a37
ED
481 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
482 goto out; /* Only stream/seqpacket sockets accept */
1da177e4
LT
483 err = -EINVAL;
484 if (!u->addr)
6eba6a37 485 goto out; /* No listens on an unbound socket */
1c92b4e5 486 unix_state_lock(sk);
1da177e4
LT
487 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
488 goto out_unlock;
489 if (backlog > sk->sk_max_ack_backlog)
490 wake_up_interruptible_all(&u->peer_wait);
491 sk->sk_max_ack_backlog = backlog;
492 sk->sk_state = TCP_LISTEN;
493 /* set credentials so connect can copy them */
109f6e39 494 init_peercred(sk);
1da177e4
LT
495 err = 0;
496
497out_unlock:
1c92b4e5 498 unix_state_unlock(sk);
109f6e39
EB
499 put_pid(old_pid);
500 if (old_cred)
501 put_cred(old_cred);
1da177e4
LT
502out:
503 return err;
504}
505
506static int unix_release(struct socket *);
507static int unix_bind(struct socket *, struct sockaddr *, int);
508static int unix_stream_connect(struct socket *, struct sockaddr *,
509 int addr_len, int flags);
510static int unix_socketpair(struct socket *, struct socket *);
511static int unix_accept(struct socket *, struct socket *, int);
512static int unix_getname(struct socket *, struct sockaddr *, int *, int);
513static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
ec0d215f
RW
514static unsigned int unix_dgram_poll(struct file *, struct socket *,
515 poll_table *);
1da177e4
LT
516static int unix_ioctl(struct socket *, unsigned int, unsigned long);
517static int unix_shutdown(struct socket *, int);
518static int unix_stream_sendmsg(struct kiocb *, struct socket *,
519 struct msghdr *, size_t);
520static int unix_stream_recvmsg(struct kiocb *, struct socket *,
521 struct msghdr *, size_t, int);
522static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
523 struct msghdr *, size_t);
524static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
525 struct msghdr *, size_t, int);
526static int unix_dgram_connect(struct socket *, struct sockaddr *,
527 int, int);
528static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
529 struct msghdr *, size_t);
a05d2ad1
EB
530static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
531 struct msghdr *, size_t, int);
1da177e4 532
90ddc4f0 533static const struct proto_ops unix_stream_ops = {
1da177e4
LT
534 .family = PF_UNIX,
535 .owner = THIS_MODULE,
536 .release = unix_release,
537 .bind = unix_bind,
538 .connect = unix_stream_connect,
539 .socketpair = unix_socketpair,
540 .accept = unix_accept,
541 .getname = unix_getname,
542 .poll = unix_poll,
543 .ioctl = unix_ioctl,
544 .listen = unix_listen,
545 .shutdown = unix_shutdown,
546 .setsockopt = sock_no_setsockopt,
547 .getsockopt = sock_no_getsockopt,
548 .sendmsg = unix_stream_sendmsg,
549 .recvmsg = unix_stream_recvmsg,
550 .mmap = sock_no_mmap,
551 .sendpage = sock_no_sendpage,
552};
553
90ddc4f0 554static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
555 .family = PF_UNIX,
556 .owner = THIS_MODULE,
557 .release = unix_release,
558 .bind = unix_bind,
559 .connect = unix_dgram_connect,
560 .socketpair = unix_socketpair,
561 .accept = sock_no_accept,
562 .getname = unix_getname,
ec0d215f 563 .poll = unix_dgram_poll,
1da177e4
LT
564 .ioctl = unix_ioctl,
565 .listen = sock_no_listen,
566 .shutdown = unix_shutdown,
567 .setsockopt = sock_no_setsockopt,
568 .getsockopt = sock_no_getsockopt,
569 .sendmsg = unix_dgram_sendmsg,
570 .recvmsg = unix_dgram_recvmsg,
571 .mmap = sock_no_mmap,
572 .sendpage = sock_no_sendpage,
573};
574
90ddc4f0 575static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
576 .family = PF_UNIX,
577 .owner = THIS_MODULE,
578 .release = unix_release,
579 .bind = unix_bind,
580 .connect = unix_stream_connect,
581 .socketpair = unix_socketpair,
582 .accept = unix_accept,
583 .getname = unix_getname,
ec0d215f 584 .poll = unix_dgram_poll,
1da177e4
LT
585 .ioctl = unix_ioctl,
586 .listen = unix_listen,
587 .shutdown = unix_shutdown,
588 .setsockopt = sock_no_setsockopt,
589 .getsockopt = sock_no_getsockopt,
590 .sendmsg = unix_seqpacket_sendmsg,
a05d2ad1 591 .recvmsg = unix_seqpacket_recvmsg,
1da177e4
LT
592 .mmap = sock_no_mmap,
593 .sendpage = sock_no_sendpage,
594};
595
596static struct proto unix_proto = {
248969ae
ED
597 .name = "UNIX",
598 .owner = THIS_MODULE,
248969ae 599 .obj_size = sizeof(struct unix_sock),
1da177e4
LT
600};
601
a09785a2
IM
602/*
603 * AF_UNIX sockets do not interact with hardware, hence they
604 * dont trigger interrupts - so it's safe for them to have
605 * bh-unsafe locking for their sk_receive_queue.lock. Split off
606 * this special lock-class by reinitializing the spinlock key:
607 */
608static struct lock_class_key af_unix_sk_receive_queue_lock_key;
609
6eba6a37 610static struct sock *unix_create1(struct net *net, struct socket *sock)
1da177e4
LT
611{
612 struct sock *sk = NULL;
613 struct unix_sock *u;
614
518de9b3
ED
615 atomic_long_inc(&unix_nr_socks);
616 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
1da177e4
LT
617 goto out;
618
6257ff21 619 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
1da177e4
LT
620 if (!sk)
621 goto out;
622
6eba6a37 623 sock_init_data(sock, sk);
a09785a2
IM
624 lockdep_set_class(&sk->sk_receive_queue.lock,
625 &af_unix_sk_receive_queue_lock_key);
1da177e4
LT
626
627 sk->sk_write_space = unix_write_space;
a0a53c8b 628 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
1da177e4
LT
629 sk->sk_destruct = unix_sock_destructor;
630 u = unix_sk(sk);
631 u->dentry = NULL;
632 u->mnt = NULL;
fd19f329 633 spin_lock_init(&u->lock);
516e0cc5 634 atomic_long_set(&u->inflight, 0);
1fd05ba5 635 INIT_LIST_HEAD(&u->link);
57b47a53 636 mutex_init(&u->readlock); /* single task reading lock */
1da177e4
LT
637 init_waitqueue_head(&u->peer_wait);
638 unix_insert_socket(unix_sockets_unbound, sk);
639out:
284b327b 640 if (sk == NULL)
518de9b3 641 atomic_long_dec(&unix_nr_socks);
920de804
ED
642 else {
643 local_bh_disable();
a8076d8d 644 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
920de804
ED
645 local_bh_enable();
646 }
1da177e4
LT
647 return sk;
648}
649
3f378b68
EP
650static int unix_create(struct net *net, struct socket *sock, int protocol,
651 int kern)
1da177e4
LT
652{
653 if (protocol && protocol != PF_UNIX)
654 return -EPROTONOSUPPORT;
655
656 sock->state = SS_UNCONNECTED;
657
658 switch (sock->type) {
659 case SOCK_STREAM:
660 sock->ops = &unix_stream_ops;
661 break;
662 /*
663 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
664 * nothing uses it.
665 */
666 case SOCK_RAW:
e27dfcea 667 sock->type = SOCK_DGRAM;
1da177e4
LT
668 case SOCK_DGRAM:
669 sock->ops = &unix_dgram_ops;
670 break;
671 case SOCK_SEQPACKET:
672 sock->ops = &unix_seqpacket_ops;
673 break;
674 default:
675 return -ESOCKTNOSUPPORT;
676 }
677
1b8d7ae4 678 return unix_create1(net, sock) ? 0 : -ENOMEM;
1da177e4
LT
679}
680
681static int unix_release(struct socket *sock)
682{
683 struct sock *sk = sock->sk;
684
685 if (!sk)
686 return 0;
687
688 sock->sk = NULL;
689
6eba6a37 690 return unix_release_sock(sk, 0);
1da177e4
LT
691}
692
693static int unix_autobind(struct socket *sock)
694{
695 struct sock *sk = sock->sk;
3b1e0a65 696 struct net *net = sock_net(sk);
1da177e4
LT
697 struct unix_sock *u = unix_sk(sk);
698 static u32 ordernum = 1;
6eba6a37 699 struct unix_address *addr;
1da177e4 700 int err;
8df73ff9 701 unsigned int retries = 0;
1da177e4 702
57b47a53 703 mutex_lock(&u->readlock);
1da177e4
LT
704
705 err = 0;
706 if (u->addr)
707 goto out;
708
709 err = -ENOMEM;
0da974f4 710 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
1da177e4
LT
711 if (!addr)
712 goto out;
713
1da177e4
LT
714 addr->name->sun_family = AF_UNIX;
715 atomic_set(&addr->refcnt, 1);
716
717retry:
718 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
07f0757a 719 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
1da177e4 720
fbe9cc4a 721 spin_lock(&unix_table_lock);
1da177e4
LT
722 ordernum = (ordernum+1)&0xFFFFF;
723
097e66c5 724 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
1da177e4 725 addr->hash)) {
fbe9cc4a 726 spin_unlock(&unix_table_lock);
8df73ff9
TH
727 /*
728 * __unix_find_socket_byname() may take long time if many names
729 * are already in use.
730 */
731 cond_resched();
732 /* Give up if all names seems to be in use. */
733 if (retries++ == 0xFFFFF) {
734 err = -ENOSPC;
735 kfree(addr);
736 goto out;
737 }
1da177e4
LT
738 goto retry;
739 }
740 addr->hash ^= sk->sk_type;
741
742 __unix_remove_socket(sk);
743 u->addr = addr;
744 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
fbe9cc4a 745 spin_unlock(&unix_table_lock);
1da177e4
LT
746 err = 0;
747
57b47a53 748out: mutex_unlock(&u->readlock);
1da177e4
LT
749 return err;
750}
751
097e66c5
DL
752static struct sock *unix_find_other(struct net *net,
753 struct sockaddr_un *sunname, int len,
1da177e4
LT
754 int type, unsigned hash, int *error)
755{
756 struct sock *u;
421748ec 757 struct path path;
1da177e4 758 int err = 0;
ac7bfa62 759
1da177e4 760 if (sunname->sun_path[0]) {
421748ec
AV
761 struct inode *inode;
762 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1da177e4
LT
763 if (err)
764 goto fail;
421748ec
AV
765 inode = path.dentry->d_inode;
766 err = inode_permission(inode, MAY_WRITE);
1da177e4
LT
767 if (err)
768 goto put_fail;
769
770 err = -ECONNREFUSED;
421748ec 771 if (!S_ISSOCK(inode->i_mode))
1da177e4 772 goto put_fail;
6616f788 773 u = unix_find_socket_byinode(inode);
1da177e4
LT
774 if (!u)
775 goto put_fail;
776
777 if (u->sk_type == type)
421748ec 778 touch_atime(path.mnt, path.dentry);
1da177e4 779
421748ec 780 path_put(&path);
1da177e4 781
e27dfcea 782 err = -EPROTOTYPE;
1da177e4
LT
783 if (u->sk_type != type) {
784 sock_put(u);
785 goto fail;
786 }
787 } else {
788 err = -ECONNREFUSED;
e27dfcea 789 u = unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
790 if (u) {
791 struct dentry *dentry;
792 dentry = unix_sk(u)->dentry;
793 if (dentry)
794 touch_atime(unix_sk(u)->mnt, dentry);
795 } else
796 goto fail;
797 }
798 return u;
799
800put_fail:
421748ec 801 path_put(&path);
1da177e4 802fail:
e27dfcea 803 *error = err;
1da177e4
LT
804 return NULL;
805}
806
807
808static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
809{
810 struct sock *sk = sock->sk;
3b1e0a65 811 struct net *net = sock_net(sk);
1da177e4 812 struct unix_sock *u = unix_sk(sk);
e27dfcea 813 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
dae6ad8f 814 char *sun_path = sunaddr->sun_path;
6eba6a37 815 struct dentry *dentry = NULL;
dae6ad8f 816 struct path path;
1da177e4
LT
817 int err;
818 unsigned hash;
819 struct unix_address *addr;
820 struct hlist_head *list;
821
822 err = -EINVAL;
823 if (sunaddr->sun_family != AF_UNIX)
824 goto out;
825
e27dfcea 826 if (addr_len == sizeof(short)) {
1da177e4
LT
827 err = unix_autobind(sock);
828 goto out;
829 }
830
831 err = unix_mkname(sunaddr, addr_len, &hash);
832 if (err < 0)
833 goto out;
834 addr_len = err;
835
57b47a53 836 mutex_lock(&u->readlock);
1da177e4
LT
837
838 err = -EINVAL;
839 if (u->addr)
840 goto out_up;
841
842 err = -ENOMEM;
843 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
844 if (!addr)
845 goto out_up;
846
847 memcpy(addr->name, sunaddr, addr_len);
848 addr->len = addr_len;
849 addr->hash = hash ^ sk->sk_type;
850 atomic_set(&addr->refcnt, 1);
851
dae6ad8f 852 if (sun_path[0]) {
1da177e4
LT
853 unsigned int mode;
854 err = 0;
855 /*
856 * Get the parent directory, calculate the hash for last
857 * component.
858 */
dae6ad8f 859 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
1da177e4
LT
860 err = PTR_ERR(dentry);
861 if (IS_ERR(dentry))
dae6ad8f 862 goto out_mknod_parent;
f81a0bff 863
1da177e4
LT
864 /*
865 * All right, let's create it.
866 */
867 mode = S_IFSOCK |
ce3b0f8d 868 (SOCK_INODE(sock)->i_mode & ~current_umask());
dae6ad8f 869 err = mnt_want_write(path.mnt);
463c3197
DH
870 if (err)
871 goto out_mknod_dput;
dae6ad8f 872 err = security_path_mknod(&path, dentry, mode, 0);
be6d3e56
KT
873 if (err)
874 goto out_mknod_drop_write;
dae6ad8f 875 err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
be6d3e56 876out_mknod_drop_write:
dae6ad8f 877 mnt_drop_write(path.mnt);
1da177e4
LT
878 if (err)
879 goto out_mknod_dput;
dae6ad8f
AV
880 mutex_unlock(&path.dentry->d_inode->i_mutex);
881 dput(path.dentry);
882 path.dentry = dentry;
1da177e4
LT
883
884 addr->hash = UNIX_HASH_SIZE;
885 }
886
fbe9cc4a 887 spin_lock(&unix_table_lock);
1da177e4 888
dae6ad8f 889 if (!sun_path[0]) {
1da177e4 890 err = -EADDRINUSE;
097e66c5 891 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1da177e4
LT
892 sk->sk_type, hash)) {
893 unix_release_addr(addr);
894 goto out_unlock;
895 }
896
897 list = &unix_socket_table[addr->hash];
898 } else {
899 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
dae6ad8f
AV
900 u->dentry = path.dentry;
901 u->mnt = path.mnt;
1da177e4
LT
902 }
903
904 err = 0;
905 __unix_remove_socket(sk);
906 u->addr = addr;
907 __unix_insert_socket(list, sk);
908
909out_unlock:
fbe9cc4a 910 spin_unlock(&unix_table_lock);
1da177e4 911out_up:
57b47a53 912 mutex_unlock(&u->readlock);
1da177e4
LT
913out:
914 return err;
915
916out_mknod_dput:
917 dput(dentry);
dae6ad8f
AV
918 mutex_unlock(&path.dentry->d_inode->i_mutex);
919 path_put(&path);
1da177e4 920out_mknod_parent:
e27dfcea
JK
921 if (err == -EEXIST)
922 err = -EADDRINUSE;
1da177e4
LT
923 unix_release_addr(addr);
924 goto out_up;
925}
926
278a3de5
DM
927static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
928{
929 if (unlikely(sk1 == sk2) || !sk2) {
930 unix_state_lock(sk1);
931 return;
932 }
933 if (sk1 < sk2) {
934 unix_state_lock(sk1);
935 unix_state_lock_nested(sk2);
936 } else {
937 unix_state_lock(sk2);
938 unix_state_lock_nested(sk1);
939 }
940}
941
942static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
943{
944 if (unlikely(sk1 == sk2) || !sk2) {
945 unix_state_unlock(sk1);
946 return;
947 }
948 unix_state_unlock(sk1);
949 unix_state_unlock(sk2);
950}
951
1da177e4
LT
952static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
953 int alen, int flags)
954{
955 struct sock *sk = sock->sk;
3b1e0a65 956 struct net *net = sock_net(sk);
e27dfcea 957 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1da177e4
LT
958 struct sock *other;
959 unsigned hash;
960 int err;
961
962 if (addr->sa_family != AF_UNSPEC) {
963 err = unix_mkname(sunaddr, alen, &hash);
964 if (err < 0)
965 goto out;
966 alen = err;
967
968 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
969 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
970 goto out;
971
278a3de5 972restart:
e27dfcea 973 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1da177e4
LT
974 if (!other)
975 goto out;
976
278a3de5
DM
977 unix_state_double_lock(sk, other);
978
979 /* Apparently VFS overslept socket death. Retry. */
980 if (sock_flag(other, SOCK_DEAD)) {
981 unix_state_double_unlock(sk, other);
982 sock_put(other);
983 goto restart;
984 }
1da177e4
LT
985
986 err = -EPERM;
987 if (!unix_may_send(sk, other))
988 goto out_unlock;
989
990 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
991 if (err)
992 goto out_unlock;
993
994 } else {
995 /*
996 * 1003.1g breaking connected state with AF_UNSPEC
997 */
998 other = NULL;
278a3de5 999 unix_state_double_lock(sk, other);
1da177e4
LT
1000 }
1001
1002 /*
1003 * If it was connected, reconnect.
1004 */
1005 if (unix_peer(sk)) {
1006 struct sock *old_peer = unix_peer(sk);
e27dfcea 1007 unix_peer(sk) = other;
278a3de5 1008 unix_state_double_unlock(sk, other);
1da177e4
LT
1009
1010 if (other != old_peer)
1011 unix_dgram_disconnected(sk, old_peer);
1012 sock_put(old_peer);
1013 } else {
e27dfcea 1014 unix_peer(sk) = other;
278a3de5 1015 unix_state_double_unlock(sk, other);
1da177e4 1016 }
ac7bfa62 1017 return 0;
1da177e4
LT
1018
1019out_unlock:
278a3de5 1020 unix_state_double_unlock(sk, other);
1da177e4
LT
1021 sock_put(other);
1022out:
1023 return err;
1024}
1025
1026static long unix_wait_for_peer(struct sock *other, long timeo)
1027{
1028 struct unix_sock *u = unix_sk(other);
1029 int sched;
1030 DEFINE_WAIT(wait);
1031
1032 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1033
1034 sched = !sock_flag(other, SOCK_DEAD) &&
1035 !(other->sk_shutdown & RCV_SHUTDOWN) &&
3c73419c 1036 unix_recvq_full(other);
1da177e4 1037
1c92b4e5 1038 unix_state_unlock(other);
1da177e4
LT
1039
1040 if (sched)
1041 timeo = schedule_timeout(timeo);
1042
1043 finish_wait(&u->peer_wait, &wait);
1044 return timeo;
1045}
1046
1047static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1048 int addr_len, int flags)
1049{
e27dfcea 1050 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1da177e4 1051 struct sock *sk = sock->sk;
3b1e0a65 1052 struct net *net = sock_net(sk);
1da177e4
LT
1053 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1054 struct sock *newsk = NULL;
1055 struct sock *other = NULL;
1056 struct sk_buff *skb = NULL;
1057 unsigned hash;
1058 int st;
1059 int err;
1060 long timeo;
1061
1062 err = unix_mkname(sunaddr, addr_len, &hash);
1063 if (err < 0)
1064 goto out;
1065 addr_len = err;
1066
f64f9e71
JP
1067 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1068 (err = unix_autobind(sock)) != 0)
1da177e4
LT
1069 goto out;
1070
1071 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1072
1073 /* First of all allocate resources.
1074 If we will make it after state is locked,
1075 we will have to recheck all again in any case.
1076 */
1077
1078 err = -ENOMEM;
1079
1080 /* create new sock for complete connection */
3b1e0a65 1081 newsk = unix_create1(sock_net(sk), NULL);
1da177e4
LT
1082 if (newsk == NULL)
1083 goto out;
1084
1085 /* Allocate skb for sending to listening sock */
1086 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1087 if (skb == NULL)
1088 goto out;
1089
1090restart:
1091 /* Find listening sock. */
097e66c5 1092 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1da177e4
LT
1093 if (!other)
1094 goto out;
1095
1096 /* Latch state of peer */
1c92b4e5 1097 unix_state_lock(other);
1da177e4
LT
1098
1099 /* Apparently VFS overslept socket death. Retry. */
1100 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1101 unix_state_unlock(other);
1da177e4
LT
1102 sock_put(other);
1103 goto restart;
1104 }
1105
1106 err = -ECONNREFUSED;
1107 if (other->sk_state != TCP_LISTEN)
1108 goto out_unlock;
77238f2b
TS
1109 if (other->sk_shutdown & RCV_SHUTDOWN)
1110 goto out_unlock;
1da177e4 1111
3c73419c 1112 if (unix_recvq_full(other)) {
1da177e4
LT
1113 err = -EAGAIN;
1114 if (!timeo)
1115 goto out_unlock;
1116
1117 timeo = unix_wait_for_peer(other, timeo);
1118
1119 err = sock_intr_errno(timeo);
1120 if (signal_pending(current))
1121 goto out;
1122 sock_put(other);
1123 goto restart;
ac7bfa62 1124 }
1da177e4
LT
1125
1126 /* Latch our state.
1127
e5537bfc 1128 It is tricky place. We need to grab our state lock and cannot
1da177e4
LT
1129 drop lock on peer. It is dangerous because deadlock is
1130 possible. Connect to self case and simultaneous
1131 attempt to connect are eliminated by checking socket
1132 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1133 check this before attempt to grab lock.
1134
1135 Well, and we have to recheck the state after socket locked.
1136 */
1137 st = sk->sk_state;
1138
1139 switch (st) {
1140 case TCP_CLOSE:
1141 /* This is ok... continue with connect */
1142 break;
1143 case TCP_ESTABLISHED:
1144 /* Socket is already connected */
1145 err = -EISCONN;
1146 goto out_unlock;
1147 default:
1148 err = -EINVAL;
1149 goto out_unlock;
1150 }
1151
1c92b4e5 1152 unix_state_lock_nested(sk);
1da177e4
LT
1153
1154 if (sk->sk_state != st) {
1c92b4e5
DM
1155 unix_state_unlock(sk);
1156 unix_state_unlock(other);
1da177e4
LT
1157 sock_put(other);
1158 goto restart;
1159 }
1160
3610cda5 1161 err = security_unix_stream_connect(sk, other, newsk);
1da177e4 1162 if (err) {
1c92b4e5 1163 unix_state_unlock(sk);
1da177e4
LT
1164 goto out_unlock;
1165 }
1166
1167 /* The way is open! Fastly set all the necessary fields... */
1168
1169 sock_hold(sk);
1170 unix_peer(newsk) = sk;
1171 newsk->sk_state = TCP_ESTABLISHED;
1172 newsk->sk_type = sk->sk_type;
109f6e39 1173 init_peercred(newsk);
1da177e4 1174 newu = unix_sk(newsk);
eaefd110 1175 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1da177e4
LT
1176 otheru = unix_sk(other);
1177
1178 /* copy address information from listening to new sock*/
1179 if (otheru->addr) {
1180 atomic_inc(&otheru->addr->refcnt);
1181 newu->addr = otheru->addr;
1182 }
1183 if (otheru->dentry) {
1184 newu->dentry = dget(otheru->dentry);
1185 newu->mnt = mntget(otheru->mnt);
1186 }
1187
1188 /* Set credentials */
109f6e39 1189 copy_peercred(sk, other);
1da177e4 1190
1da177e4
LT
1191 sock->state = SS_CONNECTED;
1192 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1193 sock_hold(newsk);
1194
1195 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1196 unix_peer(sk) = newsk;
1da177e4 1197
1c92b4e5 1198 unix_state_unlock(sk);
1da177e4
LT
1199
1200 /* take ten and and send info to listening sock */
1201 spin_lock(&other->sk_receive_queue.lock);
1202 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1203 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1204 unix_state_unlock(other);
1da177e4
LT
1205 other->sk_data_ready(other, 0);
1206 sock_put(other);
1207 return 0;
1208
1209out_unlock:
1210 if (other)
1c92b4e5 1211 unix_state_unlock(other);
1da177e4
LT
1212
1213out:
40d44446 1214 kfree_skb(skb);
1da177e4
LT
1215 if (newsk)
1216 unix_release_sock(newsk, 0);
1217 if (other)
1218 sock_put(other);
1219 return err;
1220}
1221
1222static int unix_socketpair(struct socket *socka, struct socket *sockb)
1223{
e27dfcea 1224 struct sock *ska = socka->sk, *skb = sockb->sk;
1da177e4
LT
1225
1226 /* Join our sockets back to back */
1227 sock_hold(ska);
1228 sock_hold(skb);
e27dfcea
JK
1229 unix_peer(ska) = skb;
1230 unix_peer(skb) = ska;
109f6e39
EB
1231 init_peercred(ska);
1232 init_peercred(skb);
1da177e4
LT
1233
1234 if (ska->sk_type != SOCK_DGRAM) {
1235 ska->sk_state = TCP_ESTABLISHED;
1236 skb->sk_state = TCP_ESTABLISHED;
1237 socka->state = SS_CONNECTED;
1238 sockb->state = SS_CONNECTED;
1239 }
1240 return 0;
1241}
1242
1243static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1244{
1245 struct sock *sk = sock->sk;
1246 struct sock *tsk;
1247 struct sk_buff *skb;
1248 int err;
1249
1250 err = -EOPNOTSUPP;
6eba6a37 1251 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1da177e4
LT
1252 goto out;
1253
1254 err = -EINVAL;
1255 if (sk->sk_state != TCP_LISTEN)
1256 goto out;
1257
1258 /* If socket state is TCP_LISTEN it cannot change (for now...),
1259 * so that no locks are necessary.
1260 */
1261
1262 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1263 if (!skb) {
1264 /* This means receive shutdown. */
1265 if (err == 0)
1266 err = -EINVAL;
1267 goto out;
1268 }
1269
1270 tsk = skb->sk;
1271 skb_free_datagram(sk, skb);
1272 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1273
1274 /* attach accepted sock to socket */
1c92b4e5 1275 unix_state_lock(tsk);
1da177e4
LT
1276 newsock->state = SS_CONNECTED;
1277 sock_graft(tsk, newsock);
1c92b4e5 1278 unix_state_unlock(tsk);
1da177e4
LT
1279 return 0;
1280
1281out:
1282 return err;
1283}
1284
1285
1286static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1287{
1288 struct sock *sk = sock->sk;
1289 struct unix_sock *u;
13cfa97b 1290 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1da177e4
LT
1291 int err = 0;
1292
1293 if (peer) {
1294 sk = unix_peer_get(sk);
1295
1296 err = -ENOTCONN;
1297 if (!sk)
1298 goto out;
1299 err = 0;
1300 } else {
1301 sock_hold(sk);
1302 }
1303
1304 u = unix_sk(sk);
1c92b4e5 1305 unix_state_lock(sk);
1da177e4
LT
1306 if (!u->addr) {
1307 sunaddr->sun_family = AF_UNIX;
1308 sunaddr->sun_path[0] = 0;
1309 *uaddr_len = sizeof(short);
1310 } else {
1311 struct unix_address *addr = u->addr;
1312
1313 *uaddr_len = addr->len;
1314 memcpy(sunaddr, addr->name, *uaddr_len);
1315 }
1c92b4e5 1316 unix_state_unlock(sk);
1da177e4
LT
1317 sock_put(sk);
1318out:
1319 return err;
1320}
1321
1322static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1323{
1324 int i;
1325
1326 scm->fp = UNIXCB(skb).fp;
1da177e4
LT
1327 UNIXCB(skb).fp = NULL;
1328
6eba6a37 1329 for (i = scm->fp->count-1; i >= 0; i--)
1da177e4
LT
1330 unix_notinflight(scm->fp->fp[i]);
1331}
1332
7361c36c 1333static void unix_destruct_scm(struct sk_buff *skb)
1da177e4
LT
1334{
1335 struct scm_cookie scm;
1336 memset(&scm, 0, sizeof(scm));
7361c36c
EB
1337 scm.pid = UNIXCB(skb).pid;
1338 scm.cred = UNIXCB(skb).cred;
1339 if (UNIXCB(skb).fp)
1340 unix_detach_fds(&scm, skb);
1da177e4
LT
1341
1342 /* Alas, it calls VFS */
1343 /* So fscking what? fput() had been SMP-safe since the last Summer */
1344 scm_destroy(&scm);
1345 sock_wfree(skb);
1346}
1347
25888e30
ED
1348#define MAX_RECURSION_LEVEL 4
1349
6209344f 1350static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1da177e4
LT
1351{
1352 int i;
25888e30
ED
1353 unsigned char max_level = 0;
1354 int unix_sock_count = 0;
1355
1356 for (i = scm->fp->count - 1; i >= 0; i--) {
1357 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1358
1359 if (sk) {
1360 unix_sock_count++;
1361 max_level = max(max_level,
1362 unix_sk(sk)->recursion_level);
1363 }
1364 }
1365 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1366 return -ETOOMANYREFS;
6209344f
MS
1367
1368 /*
1369 * Need to duplicate file references for the sake of garbage
1370 * collection. Otherwise a socket in the fps might become a
1371 * candidate for GC while the skb is not yet queued.
1372 */
1373 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1374 if (!UNIXCB(skb).fp)
1375 return -ENOMEM;
1376
25888e30
ED
1377 if (unix_sock_count) {
1378 for (i = scm->fp->count - 1; i >= 0; i--)
1379 unix_inflight(scm->fp->fp[i]);
1380 }
1381 return max_level;
1da177e4
LT
1382}
1383
f78a5fda 1384static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
7361c36c
EB
1385{
1386 int err = 0;
16e57262 1387
f78a5fda 1388 UNIXCB(skb).pid = get_pid(scm->pid);
16e57262
ED
1389 if (scm->cred)
1390 UNIXCB(skb).cred = get_cred(scm->cred);
7361c36c
EB
1391 UNIXCB(skb).fp = NULL;
1392 if (scm->fp && send_fds)
1393 err = unix_attach_fds(scm, skb);
1394
1395 skb->destructor = unix_destruct_scm;
1396 return err;
1397}
1398
16e57262
ED
1399/*
1400 * Some apps rely on write() giving SCM_CREDENTIALS
1401 * We include credentials if source or destination socket
1402 * asserted SOCK_PASSCRED.
1403 */
1404static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1405 const struct sock *other)
1406{
1407 if (UNIXCB(skb).cred)
1408 return;
1409 if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1410 !other->sk_socket ||
1411 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1412 UNIXCB(skb).pid = get_pid(task_tgid(current));
1413 UNIXCB(skb).cred = get_current_cred();
1414 }
1415}
1416
1da177e4
LT
1417/*
1418 * Send AF_UNIX data.
1419 */
1420
1421static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1422 struct msghdr *msg, size_t len)
1423{
1424 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1425 struct sock *sk = sock->sk;
3b1e0a65 1426 struct net *net = sock_net(sk);
1da177e4 1427 struct unix_sock *u = unix_sk(sk);
e27dfcea 1428 struct sockaddr_un *sunaddr = msg->msg_name;
1da177e4
LT
1429 struct sock *other = NULL;
1430 int namelen = 0; /* fake GCC */
1431 int err;
1432 unsigned hash;
f78a5fda 1433 struct sk_buff *skb;
1da177e4
LT
1434 long timeo;
1435 struct scm_cookie tmp_scm;
25888e30 1436 int max_level;
1da177e4
LT
1437
1438 if (NULL == siocb->scm)
1439 siocb->scm = &tmp_scm;
5f23b734 1440 wait_for_unix_gc();
1da177e4
LT
1441 err = scm_send(sock, msg, siocb->scm);
1442 if (err < 0)
1443 return err;
1444
1445 err = -EOPNOTSUPP;
1446 if (msg->msg_flags&MSG_OOB)
1447 goto out;
1448
1449 if (msg->msg_namelen) {
1450 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1451 if (err < 0)
1452 goto out;
1453 namelen = err;
1454 } else {
1455 sunaddr = NULL;
1456 err = -ENOTCONN;
1457 other = unix_peer_get(sk);
1458 if (!other)
1459 goto out;
1460 }
1461
f64f9e71
JP
1462 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1463 && (err = unix_autobind(sock)) != 0)
1da177e4
LT
1464 goto out;
1465
1466 err = -EMSGSIZE;
1467 if (len > sk->sk_sndbuf - 32)
1468 goto out;
1469
1470 skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
e27dfcea 1471 if (skb == NULL)
1da177e4
LT
1472 goto out;
1473
f78a5fda 1474 err = unix_scm_to_skb(siocb->scm, skb, true);
25888e30 1475 if (err < 0)
7361c36c 1476 goto out_free;
25888e30 1477 max_level = err + 1;
dc49c1f9 1478 unix_get_secdata(siocb->scm, skb);
877ce7c1 1479
badff6d0 1480 skb_reset_transport_header(skb);
6eba6a37 1481 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1da177e4
LT
1482 if (err)
1483 goto out_free;
1484
1485 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1486
1487restart:
1488 if (!other) {
1489 err = -ECONNRESET;
1490 if (sunaddr == NULL)
1491 goto out_free;
1492
097e66c5 1493 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1da177e4 1494 hash, &err);
e27dfcea 1495 if (other == NULL)
1da177e4
LT
1496 goto out_free;
1497 }
1498
d6ae3bae
AC
1499 if (sk_filter(other, skb) < 0) {
1500 /* Toss the packet but do not return any error to the sender */
1501 err = len;
1502 goto out_free;
1503 }
1504
1c92b4e5 1505 unix_state_lock(other);
1da177e4
LT
1506 err = -EPERM;
1507 if (!unix_may_send(sk, other))
1508 goto out_unlock;
1509
1510 if (sock_flag(other, SOCK_DEAD)) {
1511 /*
1512 * Check with 1003.1g - what should
1513 * datagram error
1514 */
1c92b4e5 1515 unix_state_unlock(other);
1da177e4
LT
1516 sock_put(other);
1517
1518 err = 0;
1c92b4e5 1519 unix_state_lock(sk);
1da177e4 1520 if (unix_peer(sk) == other) {
e27dfcea 1521 unix_peer(sk) = NULL;
1c92b4e5 1522 unix_state_unlock(sk);
1da177e4
LT
1523
1524 unix_dgram_disconnected(sk, other);
1525 sock_put(other);
1526 err = -ECONNREFUSED;
1527 } else {
1c92b4e5 1528 unix_state_unlock(sk);
1da177e4
LT
1529 }
1530
1531 other = NULL;
1532 if (err)
1533 goto out_free;
1534 goto restart;
1535 }
1536
1537 err = -EPIPE;
1538 if (other->sk_shutdown & RCV_SHUTDOWN)
1539 goto out_unlock;
1540
1541 if (sk->sk_type != SOCK_SEQPACKET) {
1542 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1543 if (err)
1544 goto out_unlock;
1545 }
1546
3c73419c 1547 if (unix_peer(other) != sk && unix_recvq_full(other)) {
1da177e4
LT
1548 if (!timeo) {
1549 err = -EAGAIN;
1550 goto out_unlock;
1551 }
1552
1553 timeo = unix_wait_for_peer(other, timeo);
1554
1555 err = sock_intr_errno(timeo);
1556 if (signal_pending(current))
1557 goto out_free;
1558
1559 goto restart;
1560 }
1561
3f66116e
AC
1562 if (sock_flag(other, SOCK_RCVTSTAMP))
1563 __net_timestamp(skb);
16e57262 1564 maybe_add_creds(skb, sock, other);
1da177e4 1565 skb_queue_tail(&other->sk_receive_queue, skb);
25888e30
ED
1566 if (max_level > unix_sk(other)->recursion_level)
1567 unix_sk(other)->recursion_level = max_level;
1c92b4e5 1568 unix_state_unlock(other);
1da177e4
LT
1569 other->sk_data_ready(other, len);
1570 sock_put(other);
f78a5fda 1571 scm_destroy(siocb->scm);
1da177e4
LT
1572 return len;
1573
1574out_unlock:
1c92b4e5 1575 unix_state_unlock(other);
1da177e4
LT
1576out_free:
1577 kfree_skb(skb);
1578out:
1579 if (other)
1580 sock_put(other);
f78a5fda 1581 scm_destroy(siocb->scm);
1da177e4
LT
1582 return err;
1583}
1584
ac7bfa62 1585
1da177e4
LT
1586static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1587 struct msghdr *msg, size_t len)
1588{
1589 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1590 struct sock *sk = sock->sk;
1591 struct sock *other = NULL;
6eba6a37 1592 int err, size;
f78a5fda 1593 struct sk_buff *skb;
e27dfcea 1594 int sent = 0;
1da177e4 1595 struct scm_cookie tmp_scm;
8ba69ba6 1596 bool fds_sent = false;
25888e30 1597 int max_level;
1da177e4
LT
1598
1599 if (NULL == siocb->scm)
1600 siocb->scm = &tmp_scm;
5f23b734 1601 wait_for_unix_gc();
1da177e4
LT
1602 err = scm_send(sock, msg, siocb->scm);
1603 if (err < 0)
1604 return err;
1605
1606 err = -EOPNOTSUPP;
1607 if (msg->msg_flags&MSG_OOB)
1608 goto out_err;
1609
1610 if (msg->msg_namelen) {
1611 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1612 goto out_err;
1613 } else {
1da177e4 1614 err = -ENOTCONN;
830a1e5c 1615 other = unix_peer(sk);
1da177e4
LT
1616 if (!other)
1617 goto out_err;
1618 }
1619
1620 if (sk->sk_shutdown & SEND_SHUTDOWN)
1621 goto pipe_err;
1622
6eba6a37 1623 while (sent < len) {
1da177e4 1624 /*
e9df7d7f
BL
1625 * Optimisation for the fact that under 0.01% of X
1626 * messages typically need breaking up.
1da177e4
LT
1627 */
1628
e9df7d7f 1629 size = len-sent;
1da177e4
LT
1630
1631 /* Keep two messages in the pipe so it schedules better */
e9df7d7f
BL
1632 if (size > ((sk->sk_sndbuf >> 1) - 64))
1633 size = (sk->sk_sndbuf >> 1) - 64;
1da177e4
LT
1634
1635 if (size > SKB_MAX_ALLOC)
1636 size = SKB_MAX_ALLOC;
ac7bfa62 1637
1da177e4
LT
1638 /*
1639 * Grab a buffer
1640 */
ac7bfa62 1641
6eba6a37
ED
1642 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1643 &err);
1da177e4 1644
e27dfcea 1645 if (skb == NULL)
1da177e4
LT
1646 goto out_err;
1647
1648 /*
1649 * If you pass two values to the sock_alloc_send_skb
1650 * it tries to grab the large buffer with GFP_NOFS
1651 * (which can fail easily), and if it fails grab the
1652 * fallback size buffer which is under a page and will
1653 * succeed. [Alan]
1654 */
1655 size = min_t(int, size, skb_tailroom(skb));
1656
7361c36c 1657
f78a5fda
DM
1658 /* Only send the fds in the first buffer */
1659 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
25888e30 1660 if (err < 0) {
7361c36c 1661 kfree_skb(skb);
f78a5fda 1662 goto out_err;
6209344f 1663 }
25888e30 1664 max_level = err + 1;
7361c36c 1665 fds_sent = true;
1da177e4 1666
6eba6a37
ED
1667 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1668 if (err) {
1da177e4 1669 kfree_skb(skb);
f78a5fda 1670 goto out_err;
1da177e4
LT
1671 }
1672
1c92b4e5 1673 unix_state_lock(other);
1da177e4
LT
1674
1675 if (sock_flag(other, SOCK_DEAD) ||
1676 (other->sk_shutdown & RCV_SHUTDOWN))
1677 goto pipe_err_free;
1678
16e57262 1679 maybe_add_creds(skb, sock, other);
1da177e4 1680 skb_queue_tail(&other->sk_receive_queue, skb);
25888e30
ED
1681 if (max_level > unix_sk(other)->recursion_level)
1682 unix_sk(other)->recursion_level = max_level;
1c92b4e5 1683 unix_state_unlock(other);
1da177e4 1684 other->sk_data_ready(other, size);
e27dfcea 1685 sent += size;
1da177e4 1686 }
1da177e4 1687
f78a5fda 1688 scm_destroy(siocb->scm);
1da177e4
LT
1689 siocb->scm = NULL;
1690
1691 return sent;
1692
1693pipe_err_free:
1c92b4e5 1694 unix_state_unlock(other);
1da177e4
LT
1695 kfree_skb(skb);
1696pipe_err:
6eba6a37
ED
1697 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1698 send_sig(SIGPIPE, current, 0);
1da177e4
LT
1699 err = -EPIPE;
1700out_err:
f78a5fda 1701 scm_destroy(siocb->scm);
1da177e4
LT
1702 siocb->scm = NULL;
1703 return sent ? : err;
1704}
1705
1706static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1707 struct msghdr *msg, size_t len)
1708{
1709 int err;
1710 struct sock *sk = sock->sk;
ac7bfa62 1711
1da177e4
LT
1712 err = sock_error(sk);
1713 if (err)
1714 return err;
1715
1716 if (sk->sk_state != TCP_ESTABLISHED)
1717 return -ENOTCONN;
1718
1719 if (msg->msg_namelen)
1720 msg->msg_namelen = 0;
1721
1722 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1723}
ac7bfa62 1724
a05d2ad1
EB
1725static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1726 struct msghdr *msg, size_t size,
1727 int flags)
1728{
1729 struct sock *sk = sock->sk;
1730
1731 if (sk->sk_state != TCP_ESTABLISHED)
1732 return -ENOTCONN;
1733
1734 return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1735}
1736
1da177e4
LT
1737static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1738{
1739 struct unix_sock *u = unix_sk(sk);
1740
1741 msg->msg_namelen = 0;
1742 if (u->addr) {
1743 msg->msg_namelen = u->addr->len;
1744 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1745 }
1746}
1747
1748static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1749 struct msghdr *msg, size_t size,
1750 int flags)
1751{
1752 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1753 struct scm_cookie tmp_scm;
1754 struct sock *sk = sock->sk;
1755 struct unix_sock *u = unix_sk(sk);
1756 int noblock = flags & MSG_DONTWAIT;
1757 struct sk_buff *skb;
1758 int err;
1759
1760 err = -EOPNOTSUPP;
1761 if (flags&MSG_OOB)
1762 goto out;
1763
1764 msg->msg_namelen = 0;
1765
b3ca9b02
RW
1766 err = mutex_lock_interruptible(&u->readlock);
1767 if (err) {
1768 err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1769 goto out;
1770 }
1da177e4
LT
1771
1772 skb = skb_recv_datagram(sk, flags, noblock, &err);
0a112258
FZ
1773 if (!skb) {
1774 unix_state_lock(sk);
1775 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1776 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1777 (sk->sk_shutdown & RCV_SHUTDOWN))
1778 err = 0;
1779 unix_state_unlock(sk);
1da177e4 1780 goto out_unlock;
0a112258 1781 }
1da177e4 1782
67426b75
ED
1783 wake_up_interruptible_sync_poll(&u->peer_wait,
1784 POLLOUT | POLLWRNORM | POLLWRBAND);
1da177e4
LT
1785
1786 if (msg->msg_name)
1787 unix_copy_addr(msg, skb->sk);
1788
1789 if (size > skb->len)
1790 size = skb->len;
1791 else if (size < skb->len)
1792 msg->msg_flags |= MSG_TRUNC;
1793
1794 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1795 if (err)
1796 goto out_free;
1797
3f66116e
AC
1798 if (sock_flag(sk, SOCK_RCVTSTAMP))
1799 __sock_recv_timestamp(msg, sk, skb);
1800
1da177e4
LT
1801 if (!siocb->scm) {
1802 siocb->scm = &tmp_scm;
1803 memset(&tmp_scm, 0, sizeof(tmp_scm));
1804 }
f78a5fda 1805 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
877ce7c1 1806 unix_set_secdata(siocb->scm, skb);
1da177e4 1807
6eba6a37 1808 if (!(flags & MSG_PEEK)) {
1da177e4
LT
1809 if (UNIXCB(skb).fp)
1810 unix_detach_fds(siocb->scm, skb);
6eba6a37 1811 } else {
1da177e4
LT
1812 /* It is questionable: on PEEK we could:
1813 - do not return fds - good, but too simple 8)
1814 - return fds, and do not return them on read (old strategy,
1815 apparently wrong)
1816 - clone fds (I chose it for now, it is the most universal
1817 solution)
ac7bfa62
YH
1818
1819 POSIX 1003.1g does not actually define this clearly
1820 at all. POSIX 1003.1g doesn't define a lot of things
1821 clearly however!
1822
1da177e4
LT
1823 */
1824 if (UNIXCB(skb).fp)
1825 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1826 }
1827 err = size;
1828
1829 scm_recv(sock, msg, siocb->scm, flags);
1830
1831out_free:
6eba6a37 1832 skb_free_datagram(sk, skb);
1da177e4 1833out_unlock:
57b47a53 1834 mutex_unlock(&u->readlock);
1da177e4
LT
1835out:
1836 return err;
1837}
1838
1839/*
1840 * Sleep until data has arrive. But check for races..
1841 */
ac7bfa62 1842
6eba6a37 1843static long unix_stream_data_wait(struct sock *sk, long timeo)
1da177e4
LT
1844{
1845 DEFINE_WAIT(wait);
1846
1c92b4e5 1847 unix_state_lock(sk);
1da177e4
LT
1848
1849 for (;;) {
aa395145 1850 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4 1851
b03efcfb 1852 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1da177e4
LT
1853 sk->sk_err ||
1854 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1855 signal_pending(current) ||
1856 !timeo)
1857 break;
1858
1859 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1c92b4e5 1860 unix_state_unlock(sk);
1da177e4 1861 timeo = schedule_timeout(timeo);
1c92b4e5 1862 unix_state_lock(sk);
1da177e4
LT
1863 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1864 }
1865
aa395145 1866 finish_wait(sk_sleep(sk), &wait);
1c92b4e5 1867 unix_state_unlock(sk);
1da177e4
LT
1868 return timeo;
1869}
1870
1871
1872
1873static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1874 struct msghdr *msg, size_t size,
1875 int flags)
1876{
1877 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1878 struct scm_cookie tmp_scm;
1879 struct sock *sk = sock->sk;
1880 struct unix_sock *u = unix_sk(sk);
e27dfcea 1881 struct sockaddr_un *sunaddr = msg->msg_name;
1da177e4
LT
1882 int copied = 0;
1883 int check_creds = 0;
1884 int target;
1885 int err = 0;
1886 long timeo;
1887
1888 err = -EINVAL;
1889 if (sk->sk_state != TCP_ESTABLISHED)
1890 goto out;
1891
1892 err = -EOPNOTSUPP;
1893 if (flags&MSG_OOB)
1894 goto out;
1895
1896 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1897 timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1898
1899 msg->msg_namelen = 0;
1900
1901 /* Lock the socket to prevent queue disordering
1902 * while sleeps in memcpy_tomsg
1903 */
1904
1905 if (!siocb->scm) {
1906 siocb->scm = &tmp_scm;
1907 memset(&tmp_scm, 0, sizeof(tmp_scm));
1908 }
1909
b3ca9b02
RW
1910 err = mutex_lock_interruptible(&u->readlock);
1911 if (err) {
1912 err = sock_intr_errno(timeo);
1913 goto out;
1914 }
1da177e4 1915
6eba6a37 1916 do {
1da177e4
LT
1917 int chunk;
1918 struct sk_buff *skb;
1919
3c0d2f37 1920 unix_state_lock(sk);
1da177e4 1921 skb = skb_dequeue(&sk->sk_receive_queue);
6eba6a37 1922 if (skb == NULL) {
25888e30 1923 unix_sk(sk)->recursion_level = 0;
1da177e4 1924 if (copied >= target)
3c0d2f37 1925 goto unlock;
1da177e4
LT
1926
1927 /*
1928 * POSIX 1003.1g mandates this order.
1929 */
ac7bfa62 1930
6eba6a37
ED
1931 err = sock_error(sk);
1932 if (err)
3c0d2f37 1933 goto unlock;
1da177e4 1934 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
1935 goto unlock;
1936
1937 unix_state_unlock(sk);
1da177e4
LT
1938 err = -EAGAIN;
1939 if (!timeo)
1940 break;
57b47a53 1941 mutex_unlock(&u->readlock);
1da177e4
LT
1942
1943 timeo = unix_stream_data_wait(sk, timeo);
1944
b3ca9b02
RW
1945 if (signal_pending(current)
1946 || mutex_lock_interruptible(&u->readlock)) {
1da177e4
LT
1947 err = sock_intr_errno(timeo);
1948 goto out;
1949 }
b3ca9b02 1950
1da177e4 1951 continue;
3c0d2f37
MS
1952 unlock:
1953 unix_state_unlock(sk);
1954 break;
1da177e4 1955 }
3c0d2f37 1956 unix_state_unlock(sk);
1da177e4
LT
1957
1958 if (check_creds) {
1959 /* Never glue messages from different writers */
7361c36c
EB
1960 if ((UNIXCB(skb).pid != siocb->scm->pid) ||
1961 (UNIXCB(skb).cred != siocb->scm->cred)) {
1da177e4 1962 skb_queue_head(&sk->sk_receive_queue, skb);
0884d7aa 1963 sk->sk_data_ready(sk, skb->len);
1da177e4
LT
1964 break;
1965 }
1966 } else {
1967 /* Copy credentials */
f78a5fda 1968 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1da177e4
LT
1969 check_creds = 1;
1970 }
1971
1972 /* Copy address just once */
6eba6a37 1973 if (sunaddr) {
1da177e4
LT
1974 unix_copy_addr(msg, skb->sk);
1975 sunaddr = NULL;
1976 }
1977
1978 chunk = min_t(unsigned int, skb->len, size);
1979 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1980 skb_queue_head(&sk->sk_receive_queue, skb);
0884d7aa 1981 sk->sk_data_ready(sk, skb->len);
1da177e4
LT
1982 if (copied == 0)
1983 copied = -EFAULT;
1984 break;
1985 }
1986 copied += chunk;
1987 size -= chunk;
1988
1989 /* Mark read part of skb as used */
6eba6a37 1990 if (!(flags & MSG_PEEK)) {
1da177e4
LT
1991 skb_pull(skb, chunk);
1992
1993 if (UNIXCB(skb).fp)
1994 unix_detach_fds(siocb->scm, skb);
1995
1996 /* put the skb back if we didn't use it up.. */
6eba6a37 1997 if (skb->len) {
1da177e4 1998 skb_queue_head(&sk->sk_receive_queue, skb);
0884d7aa 1999 sk->sk_data_ready(sk, skb->len);
1da177e4
LT
2000 break;
2001 }
2002
70d4bf6d 2003 consume_skb(skb);
1da177e4
LT
2004
2005 if (siocb->scm->fp)
2006 break;
6eba6a37 2007 } else {
1da177e4
LT
2008 /* It is questionable, see note in unix_dgram_recvmsg.
2009 */
2010 if (UNIXCB(skb).fp)
2011 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2012
2013 /* put message back and return */
2014 skb_queue_head(&sk->sk_receive_queue, skb);
0884d7aa 2015 sk->sk_data_ready(sk, skb->len);
1da177e4
LT
2016 break;
2017 }
2018 } while (size);
2019
57b47a53 2020 mutex_unlock(&u->readlock);
1da177e4
LT
2021 scm_recv(sock, msg, siocb->scm, flags);
2022out:
2023 return copied ? : err;
2024}
2025
2026static int unix_shutdown(struct socket *sock, int mode)
2027{
2028 struct sock *sk = sock->sk;
2029 struct sock *other;
2030
2031 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
2032
7180a031
AC
2033 if (!mode)
2034 return 0;
2035
2036 unix_state_lock(sk);
2037 sk->sk_shutdown |= mode;
2038 other = unix_peer(sk);
2039 if (other)
2040 sock_hold(other);
2041 unix_state_unlock(sk);
2042 sk->sk_state_change(sk);
2043
2044 if (other &&
2045 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2046
2047 int peer_mode = 0;
2048
2049 if (mode&RCV_SHUTDOWN)
2050 peer_mode |= SEND_SHUTDOWN;
2051 if (mode&SEND_SHUTDOWN)
2052 peer_mode |= RCV_SHUTDOWN;
2053 unix_state_lock(other);
2054 other->sk_shutdown |= peer_mode;
2055 unix_state_unlock(other);
2056 other->sk_state_change(other);
2057 if (peer_mode == SHUTDOWN_MASK)
2058 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2059 else if (peer_mode & RCV_SHUTDOWN)
2060 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1da177e4 2061 }
7180a031
AC
2062 if (other)
2063 sock_put(other);
2064
1da177e4
LT
2065 return 0;
2066}
2067
2068static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2069{
2070 struct sock *sk = sock->sk;
e27dfcea 2071 long amount = 0;
1da177e4
LT
2072 int err;
2073
6eba6a37
ED
2074 switch (cmd) {
2075 case SIOCOUTQ:
31e6d363 2076 amount = sk_wmem_alloc_get(sk);
6eba6a37
ED
2077 err = put_user(amount, (int __user *)arg);
2078 break;
2079 case SIOCINQ:
1da177e4
LT
2080 {
2081 struct sk_buff *skb;
2082
2083 if (sk->sk_state == TCP_LISTEN) {
2084 err = -EINVAL;
2085 break;
2086 }
2087
2088 spin_lock(&sk->sk_receive_queue.lock);
2089 if (sk->sk_type == SOCK_STREAM ||
2090 sk->sk_type == SOCK_SEQPACKET) {
2091 skb_queue_walk(&sk->sk_receive_queue, skb)
2092 amount += skb->len;
2093 } else {
2094 skb = skb_peek(&sk->sk_receive_queue);
2095 if (skb)
e27dfcea 2096 amount = skb->len;
1da177e4
LT
2097 }
2098 spin_unlock(&sk->sk_receive_queue.lock);
2099 err = put_user(amount, (int __user *)arg);
2100 break;
2101 }
2102
6eba6a37
ED
2103 default:
2104 err = -ENOIOCTLCMD;
2105 break;
1da177e4
LT
2106 }
2107 return err;
2108}
2109
6eba6a37 2110static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1da177e4
LT
2111{
2112 struct sock *sk = sock->sk;
2113 unsigned int mask;
2114
aa395145 2115 sock_poll_wait(file, sk_sleep(sk), wait);
1da177e4
LT
2116 mask = 0;
2117
2118 /* exceptional events? */
2119 if (sk->sk_err)
2120 mask |= POLLERR;
2121 if (sk->sk_shutdown == SHUTDOWN_MASK)
2122 mask |= POLLHUP;
f348d70a 2123 if (sk->sk_shutdown & RCV_SHUTDOWN)
db40980f 2124 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
1da177e4
LT
2125
2126 /* readable? */
db40980f 2127 if (!skb_queue_empty(&sk->sk_receive_queue))
1da177e4
LT
2128 mask |= POLLIN | POLLRDNORM;
2129
2130 /* Connection-based need to check for termination and startup */
6eba6a37
ED
2131 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2132 sk->sk_state == TCP_CLOSE)
1da177e4
LT
2133 mask |= POLLHUP;
2134
2135 /*
2136 * we set writable also when the other side has shut down the
2137 * connection. This prevents stuck sockets.
2138 */
2139 if (unix_writable(sk))
2140 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2141
2142 return mask;
2143}
2144
ec0d215f
RW
2145static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2146 poll_table *wait)
3c73419c 2147{
ec0d215f
RW
2148 struct sock *sk = sock->sk, *other;
2149 unsigned int mask, writable;
3c73419c 2150
aa395145 2151 sock_poll_wait(file, sk_sleep(sk), wait);
3c73419c
RW
2152 mask = 0;
2153
2154 /* exceptional events? */
2155 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2156 mask |= POLLERR;
2157 if (sk->sk_shutdown & RCV_SHUTDOWN)
5456f09a 2158 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
3c73419c
RW
2159 if (sk->sk_shutdown == SHUTDOWN_MASK)
2160 mask |= POLLHUP;
2161
2162 /* readable? */
5456f09a 2163 if (!skb_queue_empty(&sk->sk_receive_queue))
3c73419c
RW
2164 mask |= POLLIN | POLLRDNORM;
2165
2166 /* Connection-based need to check for termination and startup */
2167 if (sk->sk_type == SOCK_SEQPACKET) {
2168 if (sk->sk_state == TCP_CLOSE)
2169 mask |= POLLHUP;
2170 /* connection hasn't started yet? */
2171 if (sk->sk_state == TCP_SYN_SENT)
2172 return mask;
2173 }
2174
973a34aa
ED
2175 /* No write status requested, avoid expensive OUT tests. */
2176 if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT)))
2177 return mask;
2178
ec0d215f 2179 writable = unix_writable(sk);
5456f09a
ED
2180 other = unix_peer_get(sk);
2181 if (other) {
2182 if (unix_peer(other) != sk) {
2183 sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2184 if (unix_recvq_full(other))
2185 writable = 0;
ec0d215f 2186 }
5456f09a 2187 sock_put(other);
ec0d215f
RW
2188 }
2189
2190 if (writable)
3c73419c
RW
2191 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2192 else
2193 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2194
3c73419c
RW
2195 return mask;
2196}
1da177e4
LT
2197
2198#ifdef CONFIG_PROC_FS
a53eb3fe
PE
2199static struct sock *first_unix_socket(int *i)
2200{
2201 for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2202 if (!hlist_empty(&unix_socket_table[*i]))
2203 return __sk_head(&unix_socket_table[*i]);
2204 }
2205 return NULL;
2206}
2207
2208static struct sock *next_unix_socket(int *i, struct sock *s)
2209{
2210 struct sock *next = sk_next(s);
2211 /* More in this chain? */
2212 if (next)
2213 return next;
2214 /* Look for next non-empty chain. */
2215 for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2216 if (!hlist_empty(&unix_socket_table[*i]))
2217 return __sk_head(&unix_socket_table[*i]);
2218 }
2219 return NULL;
2220}
2221
097e66c5 2222struct unix_iter_state {
e372c414 2223 struct seq_net_private p;
097e66c5
DL
2224 int i;
2225};
e27dfcea 2226
1218854a 2227static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
1da177e4 2228{
1218854a 2229 struct unix_iter_state *iter = seq->private;
1da177e4
LT
2230 loff_t off = 0;
2231 struct sock *s;
2232
097e66c5 2233 for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
1218854a 2234 if (sock_net(s) != seq_file_net(seq))
097e66c5 2235 continue;
ac7bfa62 2236 if (off == pos)
1da177e4
LT
2237 return s;
2238 ++off;
2239 }
2240 return NULL;
2241}
2242
1da177e4 2243static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 2244 __acquires(unix_table_lock)
1da177e4 2245{
fbe9cc4a 2246 spin_lock(&unix_table_lock);
b9f3124f 2247 return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1da177e4
LT
2248}
2249
2250static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2251{
097e66c5
DL
2252 struct unix_iter_state *iter = seq->private;
2253 struct sock *sk = v;
1da177e4
LT
2254 ++*pos;
2255
b9f3124f 2256 if (v == SEQ_START_TOKEN)
097e66c5
DL
2257 sk = first_unix_socket(&iter->i);
2258 else
2259 sk = next_unix_socket(&iter->i, sk);
1218854a 2260 while (sk && (sock_net(sk) != seq_file_net(seq)))
097e66c5
DL
2261 sk = next_unix_socket(&iter->i, sk);
2262 return sk;
1da177e4
LT
2263}
2264
2265static void unix_seq_stop(struct seq_file *seq, void *v)
9a429c49 2266 __releases(unix_table_lock)
1da177e4 2267{
fbe9cc4a 2268 spin_unlock(&unix_table_lock);
1da177e4
LT
2269}
2270
2271static int unix_seq_show(struct seq_file *seq, void *v)
2272{
ac7bfa62 2273
b9f3124f 2274 if (v == SEQ_START_TOKEN)
1da177e4
LT
2275 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2276 "Inode Path\n");
2277 else {
2278 struct sock *s = v;
2279 struct unix_sock *u = unix_sk(s);
1c92b4e5 2280 unix_state_lock(s);
1da177e4 2281
71338aa7 2282 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
1da177e4
LT
2283 s,
2284 atomic_read(&s->sk_refcnt),
2285 0,
2286 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2287 s->sk_type,
2288 s->sk_socket ?
2289 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2290 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2291 sock_i_ino(s));
2292
2293 if (u->addr) {
2294 int i, len;
2295 seq_putc(seq, ' ');
2296
2297 i = 0;
2298 len = u->addr->len - sizeof(short);
2299 if (!UNIX_ABSTRACT(s))
2300 len--;
2301 else {
2302 seq_putc(seq, '@');
2303 i++;
2304 }
2305 for ( ; i < len; i++)
2306 seq_putc(seq, u->addr->name->sun_path[i]);
2307 }
1c92b4e5 2308 unix_state_unlock(s);
1da177e4
LT
2309 seq_putc(seq, '\n');
2310 }
2311
2312 return 0;
2313}
2314
56b3d975 2315static const struct seq_operations unix_seq_ops = {
1da177e4
LT
2316 .start = unix_seq_start,
2317 .next = unix_seq_next,
2318 .stop = unix_seq_stop,
2319 .show = unix_seq_show,
2320};
2321
1da177e4
LT
2322static int unix_seq_open(struct inode *inode, struct file *file)
2323{
e372c414
DL
2324 return seq_open_net(inode, file, &unix_seq_ops,
2325 sizeof(struct unix_iter_state));
1da177e4
LT
2326}
2327
da7071d7 2328static const struct file_operations unix_seq_fops = {
1da177e4
LT
2329 .owner = THIS_MODULE,
2330 .open = unix_seq_open,
2331 .read = seq_read,
2332 .llseek = seq_lseek,
e372c414 2333 .release = seq_release_net,
1da177e4
LT
2334};
2335
2336#endif
2337
ec1b4cf7 2338static const struct net_proto_family unix_family_ops = {
1da177e4
LT
2339 .family = PF_UNIX,
2340 .create = unix_create,
2341 .owner = THIS_MODULE,
2342};
2343
097e66c5 2344
2c8c1e72 2345static int __net_init unix_net_init(struct net *net)
097e66c5
DL
2346{
2347 int error = -ENOMEM;
2348
a0a53c8b 2349 net->unx.sysctl_max_dgram_qlen = 10;
1597fbc0
PE
2350 if (unix_sysctl_register(net))
2351 goto out;
d392e497 2352
097e66c5 2353#ifdef CONFIG_PROC_FS
1597fbc0
PE
2354 if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2355 unix_sysctl_unregister(net);
097e66c5 2356 goto out;
1597fbc0 2357 }
097e66c5
DL
2358#endif
2359 error = 0;
2360out:
48dcc33e 2361 return error;
097e66c5
DL
2362}
2363
2c8c1e72 2364static void __net_exit unix_net_exit(struct net *net)
097e66c5 2365{
1597fbc0 2366 unix_sysctl_unregister(net);
097e66c5
DL
2367 proc_net_remove(net, "unix");
2368}
2369
2370static struct pernet_operations unix_net_ops = {
2371 .init = unix_net_init,
2372 .exit = unix_net_exit,
2373};
2374
1da177e4
LT
2375static int __init af_unix_init(void)
2376{
2377 int rc = -1;
2378 struct sk_buff *dummy_skb;
2379
ef047f5e 2380 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
1da177e4
LT
2381
2382 rc = proto_register(&unix_proto, 1);
ac7bfa62
YH
2383 if (rc != 0) {
2384 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
0dc47877 2385 __func__);
1da177e4
LT
2386 goto out;
2387 }
2388
2389 sock_register(&unix_family_ops);
097e66c5 2390 register_pernet_subsys(&unix_net_ops);
1da177e4
LT
2391out:
2392 return rc;
2393}
2394
2395static void __exit af_unix_exit(void)
2396{
2397 sock_unregister(PF_UNIX);
1da177e4 2398 proto_unregister(&unix_proto);
097e66c5 2399 unregister_pernet_subsys(&unix_net_ops);
1da177e4
LT
2400}
2401
3d366960
DW
2402/* Earlier than device_initcall() so that other drivers invoking
2403 request_module() don't end up in a loop when modprobe tries
2404 to use a UNIX socket. But later than subsys_initcall() because
2405 we depend on stuff initialised there */
2406fs_initcall(af_unix_init);
1da177e4
LT
2407module_exit(af_unix_exit);
2408
2409MODULE_LICENSE("GPL");
2410MODULE_ALIAS_NETPROTO(PF_UNIX);