connector: bump skb->users before callback invocation
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / unix / af_unix.c
CommitLineData
1da177e4
LT
1/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
113aa838 4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
1da177e4
LT
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
83#include <linux/module.h>
1da177e4 84#include <linux/kernel.h>
1da177e4
LT
85#include <linux/signal.h>
86#include <linux/sched.h>
87#include <linux/errno.h>
88#include <linux/string.h>
89#include <linux/stat.h>
90#include <linux/dcache.h>
91#include <linux/namei.h>
92#include <linux/socket.h>
93#include <linux/un.h>
94#include <linux/fcntl.h>
95#include <linux/termios.h>
96#include <linux/sockios.h>
97#include <linux/net.h>
98#include <linux/in.h>
99#include <linux/fs.h>
100#include <linux/slab.h>
101#include <asm/uaccess.h>
102#include <linux/skbuff.h>
103#include <linux/netdevice.h>
457c4cbc 104#include <net/net_namespace.h>
1da177e4 105#include <net/sock.h>
c752f073 106#include <net/tcp_states.h>
1da177e4
LT
107#include <net/af_unix.h>
108#include <linux/proc_fs.h>
109#include <linux/seq_file.h>
110#include <net/scm.h>
111#include <linux/init.h>
112#include <linux/poll.h>
1da177e4
LT
113#include <linux/rtnetlink.h>
114#include <linux/mount.h>
115#include <net/checksum.h>
116#include <linux/security.h>
117
7123aaa3 118struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
fa7ff56f
PE
119EXPORT_SYMBOL_GPL(unix_socket_table);
120DEFINE_SPINLOCK(unix_table_lock);
121EXPORT_SYMBOL_GPL(unix_table_lock);
518de9b3 122static atomic_long_t unix_nr_socks;
1da177e4 123
1da177e4 124
7123aaa3
ED
125static struct hlist_head *unix_sockets_unbound(void *addr)
126{
127 unsigned long hash = (unsigned long)addr;
128
129 hash ^= hash >> 16;
130 hash ^= hash >> 8;
131 hash %= UNIX_HASH_SIZE;
132 return &unix_socket_table[UNIX_HASH_SIZE + hash];
133}
134
135#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
1da177e4 136
877ce7c1 137#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 138static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 139{
dc49c1f9 140 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
877ce7c1
CZ
141}
142
143static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
144{
dc49c1f9 145 scm->secid = *UNIXSID(skb);
877ce7c1
CZ
146}
147#else
dc49c1f9 148static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
149{ }
150
151static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
152{ }
153#endif /* CONFIG_SECURITY_NETWORK */
154
1da177e4
LT
155/*
156 * SMP locking strategy:
fbe9cc4a 157 * hash table is protected with spinlock unix_table_lock
663717f6 158 * each socket state is protected by separate spin lock.
1da177e4
LT
159 */
160
95c96174 161static inline unsigned int unix_hash_fold(__wsum n)
1da177e4 162{
83bb80f4 163 unsigned int hash = (__force unsigned int)csum_fold(n);
95c96174 164
1da177e4
LT
165 hash ^= hash>>8;
166 return hash&(UNIX_HASH_SIZE-1);
167}
168
169#define unix_peer(sk) (unix_sk(sk)->peer)
170
171static inline int unix_our_peer(struct sock *sk, struct sock *osk)
172{
173 return unix_peer(osk) == sk;
174}
175
176static inline int unix_may_send(struct sock *sk, struct sock *osk)
177{
6eba6a37 178 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
1da177e4
LT
179}
180
3c73419c
RW
181static inline int unix_recvq_full(struct sock const *sk)
182{
183 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
184}
185
fa7ff56f 186struct sock *unix_peer_get(struct sock *s)
1da177e4
LT
187{
188 struct sock *peer;
189
1c92b4e5 190 unix_state_lock(s);
1da177e4
LT
191 peer = unix_peer(s);
192 if (peer)
193 sock_hold(peer);
1c92b4e5 194 unix_state_unlock(s);
1da177e4
LT
195 return peer;
196}
fa7ff56f 197EXPORT_SYMBOL_GPL(unix_peer_get);
1da177e4
LT
198
199static inline void unix_release_addr(struct unix_address *addr)
200{
201 if (atomic_dec_and_test(&addr->refcnt))
202 kfree(addr);
203}
204
205/*
206 * Check unix socket name:
207 * - should be not zero length.
208 * - if started by not zero, should be NULL terminated (FS object)
209 * - if started by zero, it is abstract name.
210 */
ac7bfa62 211
95c96174 212static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
1da177e4
LT
213{
214 if (len <= sizeof(short) || len > sizeof(*sunaddr))
215 return -EINVAL;
216 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
217 return -EINVAL;
218 if (sunaddr->sun_path[0]) {
219 /*
220 * This may look like an off by one error but it is a bit more
221 * subtle. 108 is the longest valid AF_UNIX path for a binding.
25985edc 222 * sun_path[108] doesn't as such exist. However in kernel space
1da177e4
LT
223 * we are guaranteed that it is a valid memory location in our
224 * kernel address buffer.
225 */
e27dfcea 226 ((char *)sunaddr)[len] = 0;
1da177e4
LT
227 len = strlen(sunaddr->sun_path)+1+sizeof(short);
228 return len;
229 }
230
07f0757a 231 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
1da177e4
LT
232 return len;
233}
234
235static void __unix_remove_socket(struct sock *sk)
236{
237 sk_del_node_init(sk);
238}
239
240static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
241{
547b792c 242 WARN_ON(!sk_unhashed(sk));
1da177e4
LT
243 sk_add_node(sk, list);
244}
245
246static inline void unix_remove_socket(struct sock *sk)
247{
fbe9cc4a 248 spin_lock(&unix_table_lock);
1da177e4 249 __unix_remove_socket(sk);
fbe9cc4a 250 spin_unlock(&unix_table_lock);
1da177e4
LT
251}
252
253static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
254{
fbe9cc4a 255 spin_lock(&unix_table_lock);
1da177e4 256 __unix_insert_socket(list, sk);
fbe9cc4a 257 spin_unlock(&unix_table_lock);
1da177e4
LT
258}
259
097e66c5
DL
260static struct sock *__unix_find_socket_byname(struct net *net,
261 struct sockaddr_un *sunname,
95c96174 262 int len, int type, unsigned int hash)
1da177e4
LT
263{
264 struct sock *s;
1da177e4 265
b67bfe0d 266 sk_for_each(s, &unix_socket_table[hash ^ type]) {
1da177e4
LT
267 struct unix_sock *u = unix_sk(s);
268
878628fb 269 if (!net_eq(sock_net(s), net))
097e66c5
DL
270 continue;
271
1da177e4
LT
272 if (u->addr->len == len &&
273 !memcmp(u->addr->name, sunname, len))
274 goto found;
275 }
276 s = NULL;
277found:
278 return s;
279}
280
097e66c5
DL
281static inline struct sock *unix_find_socket_byname(struct net *net,
282 struct sockaddr_un *sunname,
1da177e4 283 int len, int type,
95c96174 284 unsigned int hash)
1da177e4
LT
285{
286 struct sock *s;
287
fbe9cc4a 288 spin_lock(&unix_table_lock);
097e66c5 289 s = __unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
290 if (s)
291 sock_hold(s);
fbe9cc4a 292 spin_unlock(&unix_table_lock);
1da177e4
LT
293 return s;
294}
295
6616f788 296static struct sock *unix_find_socket_byinode(struct inode *i)
1da177e4
LT
297{
298 struct sock *s;
1da177e4 299
fbe9cc4a 300 spin_lock(&unix_table_lock);
b67bfe0d 301 sk_for_each(s,
1da177e4 302 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
40ffe67d 303 struct dentry *dentry = unix_sk(s)->path.dentry;
1da177e4 304
6eba6a37 305 if (dentry && dentry->d_inode == i) {
1da177e4
LT
306 sock_hold(s);
307 goto found;
308 }
309 }
310 s = NULL;
311found:
fbe9cc4a 312 spin_unlock(&unix_table_lock);
1da177e4
LT
313 return s;
314}
315
da8db083
RW
316/* Support code for asymmetrically connected dgram sockets
317 *
318 * If a datagram socket is connected to a socket not itself connected
319 * to the first socket (eg, /dev/log), clients may only enqueue more
320 * messages if the present receive queue of the server socket is not
321 * "too large". This means there's a second writeability condition
322 * poll and sendmsg need to test. The dgram recv code will do a wake
323 * up on the peer_wait wait queue of a socket upon reception of a
324 * datagram which needs to be propagated to sleeping would-be writers
325 * since these might not have sent anything so far. This can't be
326 * accomplished via poll_wait because the lifetime of the server
327 * socket might be less than that of its clients if these break their
328 * association with it or if the server socket is closed while clients
329 * are still connected to it and there's no way to inform "a polling
330 * implementation" that it should let go of a certain wait queue
331 *
332 * In order to propagate a wake up, a wait_queue_t of the client
333 * socket is enqueued on the peer_wait queue of the server socket
334 * whose wake function does a wake_up on the ordinary client socket
335 * wait queue. This connection is established whenever a write (or
336 * poll for write) hit the flow control condition and broken when the
337 * association to the server socket is dissolved or after a wake up
338 * was relayed.
339 */
340
341static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
342 void *key)
343{
344 struct unix_sock *u;
345 wait_queue_head_t *u_sleep;
346
347 u = container_of(q, struct unix_sock, peer_wake);
348
349 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
350 q);
351 u->peer_wake.private = NULL;
352
353 /* relaying can only happen while the wq still exists */
354 u_sleep = sk_sleep(&u->sk);
355 if (u_sleep)
356 wake_up_interruptible_poll(u_sleep, key);
357
358 return 0;
359}
360
361static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
362{
363 struct unix_sock *u, *u_other;
364 int rc;
365
366 u = unix_sk(sk);
367 u_other = unix_sk(other);
368 rc = 0;
369 spin_lock(&u_other->peer_wait.lock);
370
371 if (!u->peer_wake.private) {
372 u->peer_wake.private = other;
373 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
374
375 rc = 1;
376 }
377
378 spin_unlock(&u_other->peer_wait.lock);
379 return rc;
380}
381
382static void unix_dgram_peer_wake_disconnect(struct sock *sk,
383 struct sock *other)
384{
385 struct unix_sock *u, *u_other;
386
387 u = unix_sk(sk);
388 u_other = unix_sk(other);
389 spin_lock(&u_other->peer_wait.lock);
390
391 if (u->peer_wake.private == other) {
392 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
393 u->peer_wake.private = NULL;
394 }
395
396 spin_unlock(&u_other->peer_wait.lock);
397}
398
399static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
400 struct sock *other)
401{
402 unix_dgram_peer_wake_disconnect(sk, other);
403 wake_up_interruptible_poll(sk_sleep(sk),
404 POLLOUT |
405 POLLWRNORM |
406 POLLWRBAND);
407}
408
409/* preconditions:
410 * - unix_peer(sk) == other
411 * - association is stable
412 */
413static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
414{
415 int connected;
416
417 connected = unix_dgram_peer_wake_connect(sk, other);
418
419 if (unix_recvq_full(other))
420 return 1;
421
422 if (connected)
423 unix_dgram_peer_wake_disconnect(sk, other);
424
425 return 0;
426}
427
1da177e4
LT
428static inline int unix_writable(struct sock *sk)
429{
430 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
431}
432
433static void unix_write_space(struct sock *sk)
434{
43815482
ED
435 struct socket_wq *wq;
436
437 rcu_read_lock();
1da177e4 438 if (unix_writable(sk)) {
43815482
ED
439 wq = rcu_dereference(sk->sk_wq);
440 if (wq_has_sleeper(wq))
67426b75
ED
441 wake_up_interruptible_sync_poll(&wq->wait,
442 POLLOUT | POLLWRNORM | POLLWRBAND);
8d8ad9d7 443 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4 444 }
43815482 445 rcu_read_unlock();
1da177e4
LT
446}
447
448/* When dgram socket disconnects (or changes its peer), we clear its receive
449 * queue of packets arrived from previous peer. First, it allows to do
450 * flow control based only on wmem_alloc; second, sk connected to peer
451 * may receive messages only from that peer. */
452static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
453{
b03efcfb 454 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
455 skb_queue_purge(&sk->sk_receive_queue);
456 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
457
458 /* If one link of bidirectional dgram pipe is disconnected,
459 * we signal error. Messages are lost. Do not make this,
460 * when peer was not connected to us.
461 */
462 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
463 other->sk_err = ECONNRESET;
464 other->sk_error_report(other);
465 }
466 }
467}
468
469static void unix_sock_destructor(struct sock *sk)
470{
471 struct unix_sock *u = unix_sk(sk);
472
473 skb_queue_purge(&sk->sk_receive_queue);
474
547b792c
IJ
475 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
476 WARN_ON(!sk_unhashed(sk));
477 WARN_ON(sk->sk_socket);
1da177e4 478 if (!sock_flag(sk, SOCK_DEAD)) {
6b41e7dd 479 printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
1da177e4
LT
480 return;
481 }
482
483 if (u->addr)
484 unix_release_addr(u->addr);
485
518de9b3 486 atomic_long_dec(&unix_nr_socks);
6f756a8c 487 local_bh_disable();
a8076d8d 488 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
6f756a8c 489 local_bh_enable();
1da177e4 490#ifdef UNIX_REFCNT_DEBUG
518de9b3
ED
491 printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
492 atomic_long_read(&unix_nr_socks));
1da177e4
LT
493#endif
494}
495
ded34e0f 496static void unix_release_sock(struct sock *sk, int embrion)
1da177e4
LT
497{
498 struct unix_sock *u = unix_sk(sk);
40ffe67d 499 struct path path;
1da177e4
LT
500 struct sock *skpair;
501 struct sk_buff *skb;
502 int state;
503
504 unix_remove_socket(sk);
505
506 /* Clear state */
1c92b4e5 507 unix_state_lock(sk);
1da177e4
LT
508 sock_orphan(sk);
509 sk->sk_shutdown = SHUTDOWN_MASK;
40ffe67d
AV
510 path = u->path;
511 u->path.dentry = NULL;
512 u->path.mnt = NULL;
1da177e4
LT
513 state = sk->sk_state;
514 sk->sk_state = TCP_CLOSE;
1c92b4e5 515 unix_state_unlock(sk);
1da177e4
LT
516
517 wake_up_interruptible_all(&u->peer_wait);
518
e27dfcea 519 skpair = unix_peer(sk);
1da177e4 520
e27dfcea 521 if (skpair != NULL) {
1da177e4 522 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
1c92b4e5 523 unix_state_lock(skpair);
1da177e4
LT
524 /* No more writes */
525 skpair->sk_shutdown = SHUTDOWN_MASK;
526 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
527 skpair->sk_err = ECONNRESET;
1c92b4e5 528 unix_state_unlock(skpair);
1da177e4 529 skpair->sk_state_change(skpair);
8d8ad9d7 530 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4 531 }
da8db083
RW
532
533 unix_dgram_peer_wake_disconnect(sk, skpair);
1da177e4
LT
534 sock_put(skpair); /* It may now die */
535 unix_peer(sk) = NULL;
536 }
537
538 /* Try to flush out this socket. Throw out buffers at least */
539
540 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
e27dfcea 541 if (state == TCP_LISTEN)
1da177e4
LT
542 unix_release_sock(skb->sk, 1);
543 /* passed fds are erased in the kfree_skb hook */
544 kfree_skb(skb);
545 }
546
40ffe67d
AV
547 if (path.dentry)
548 path_put(&path);
1da177e4
LT
549
550 sock_put(sk);
551
552 /* ---- Socket is dead now and most probably destroyed ---- */
553
554 /*
e04dae84 555 * Fixme: BSD difference: In BSD all sockets connected to us get
1da177e4
LT
556 * ECONNRESET and we die on the spot. In Linux we behave
557 * like files and pipes do and wait for the last
558 * dereference.
559 *
560 * Can't we simply set sock->err?
561 *
562 * What the above comment does talk about? --ANK(980817)
563 */
564
9305cfa4 565 if (unix_tot_inflight)
ac7bfa62 566 unix_gc(); /* Garbage collect fds */
1da177e4
LT
567}
568
109f6e39
EB
569static void init_peercred(struct sock *sk)
570{
571 put_pid(sk->sk_peer_pid);
572 if (sk->sk_peer_cred)
573 put_cred(sk->sk_peer_cred);
574 sk->sk_peer_pid = get_pid(task_tgid(current));
575 sk->sk_peer_cred = get_current_cred();
576}
577
578static void copy_peercred(struct sock *sk, struct sock *peersk)
579{
580 put_pid(sk->sk_peer_pid);
581 if (sk->sk_peer_cred)
582 put_cred(sk->sk_peer_cred);
583 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
584 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
585}
586
1da177e4
LT
587static int unix_listen(struct socket *sock, int backlog)
588{
589 int err;
590 struct sock *sk = sock->sk;
591 struct unix_sock *u = unix_sk(sk);
109f6e39 592 struct pid *old_pid = NULL;
1da177e4
LT
593
594 err = -EOPNOTSUPP;
6eba6a37
ED
595 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
596 goto out; /* Only stream/seqpacket sockets accept */
1da177e4
LT
597 err = -EINVAL;
598 if (!u->addr)
6eba6a37 599 goto out; /* No listens on an unbound socket */
1c92b4e5 600 unix_state_lock(sk);
1da177e4
LT
601 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
602 goto out_unlock;
603 if (backlog > sk->sk_max_ack_backlog)
604 wake_up_interruptible_all(&u->peer_wait);
605 sk->sk_max_ack_backlog = backlog;
606 sk->sk_state = TCP_LISTEN;
607 /* set credentials so connect can copy them */
109f6e39 608 init_peercred(sk);
1da177e4
LT
609 err = 0;
610
611out_unlock:
1c92b4e5 612 unix_state_unlock(sk);
109f6e39 613 put_pid(old_pid);
1da177e4
LT
614out:
615 return err;
616}
617
618static int unix_release(struct socket *);
619static int unix_bind(struct socket *, struct sockaddr *, int);
620static int unix_stream_connect(struct socket *, struct sockaddr *,
621 int addr_len, int flags);
622static int unix_socketpair(struct socket *, struct socket *);
623static int unix_accept(struct socket *, struct socket *, int);
624static int unix_getname(struct socket *, struct sockaddr *, int *, int);
625static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
ec0d215f
RW
626static unsigned int unix_dgram_poll(struct file *, struct socket *,
627 poll_table *);
1da177e4
LT
628static int unix_ioctl(struct socket *, unsigned int, unsigned long);
629static int unix_shutdown(struct socket *, int);
630static int unix_stream_sendmsg(struct kiocb *, struct socket *,
631 struct msghdr *, size_t);
632static int unix_stream_recvmsg(struct kiocb *, struct socket *,
633 struct msghdr *, size_t, int);
634static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
635 struct msghdr *, size_t);
636static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
637 struct msghdr *, size_t, int);
638static int unix_dgram_connect(struct socket *, struct sockaddr *,
639 int, int);
640static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
641 struct msghdr *, size_t);
a05d2ad1
EB
642static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
643 struct msghdr *, size_t, int);
1da177e4 644
d90d9ff6 645static int unix_set_peek_off(struct sock *sk, int val)
f55bb7f9
PE
646{
647 struct unix_sock *u = unix_sk(sk);
648
d90d9ff6
SL
649 if (mutex_lock_interruptible(&u->readlock))
650 return -EINTR;
651
f55bb7f9
PE
652 sk->sk_peek_off = val;
653 mutex_unlock(&u->readlock);
d90d9ff6
SL
654
655 return 0;
f55bb7f9
PE
656}
657
658
90ddc4f0 659static const struct proto_ops unix_stream_ops = {
1da177e4
LT
660 .family = PF_UNIX,
661 .owner = THIS_MODULE,
662 .release = unix_release,
663 .bind = unix_bind,
664 .connect = unix_stream_connect,
665 .socketpair = unix_socketpair,
666 .accept = unix_accept,
667 .getname = unix_getname,
668 .poll = unix_poll,
669 .ioctl = unix_ioctl,
670 .listen = unix_listen,
671 .shutdown = unix_shutdown,
672 .setsockopt = sock_no_setsockopt,
673 .getsockopt = sock_no_getsockopt,
674 .sendmsg = unix_stream_sendmsg,
675 .recvmsg = unix_stream_recvmsg,
676 .mmap = sock_no_mmap,
677 .sendpage = sock_no_sendpage,
fc0d7536 678 .set_peek_off = unix_set_peek_off,
1da177e4
LT
679};
680
90ddc4f0 681static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
682 .family = PF_UNIX,
683 .owner = THIS_MODULE,
684 .release = unix_release,
685 .bind = unix_bind,
686 .connect = unix_dgram_connect,
687 .socketpair = unix_socketpair,
688 .accept = sock_no_accept,
689 .getname = unix_getname,
ec0d215f 690 .poll = unix_dgram_poll,
1da177e4
LT
691 .ioctl = unix_ioctl,
692 .listen = sock_no_listen,
693 .shutdown = unix_shutdown,
694 .setsockopt = sock_no_setsockopt,
695 .getsockopt = sock_no_getsockopt,
696 .sendmsg = unix_dgram_sendmsg,
697 .recvmsg = unix_dgram_recvmsg,
698 .mmap = sock_no_mmap,
699 .sendpage = sock_no_sendpage,
f55bb7f9 700 .set_peek_off = unix_set_peek_off,
1da177e4
LT
701};
702
90ddc4f0 703static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
704 .family = PF_UNIX,
705 .owner = THIS_MODULE,
706 .release = unix_release,
707 .bind = unix_bind,
708 .connect = unix_stream_connect,
709 .socketpair = unix_socketpair,
710 .accept = unix_accept,
711 .getname = unix_getname,
ec0d215f 712 .poll = unix_dgram_poll,
1da177e4
LT
713 .ioctl = unix_ioctl,
714 .listen = unix_listen,
715 .shutdown = unix_shutdown,
716 .setsockopt = sock_no_setsockopt,
717 .getsockopt = sock_no_getsockopt,
718 .sendmsg = unix_seqpacket_sendmsg,
a05d2ad1 719 .recvmsg = unix_seqpacket_recvmsg,
1da177e4
LT
720 .mmap = sock_no_mmap,
721 .sendpage = sock_no_sendpage,
f55bb7f9 722 .set_peek_off = unix_set_peek_off,
1da177e4
LT
723};
724
725static struct proto unix_proto = {
248969ae
ED
726 .name = "UNIX",
727 .owner = THIS_MODULE,
248969ae 728 .obj_size = sizeof(struct unix_sock),
1da177e4
LT
729};
730
a09785a2
IM
731/*
732 * AF_UNIX sockets do not interact with hardware, hence they
733 * dont trigger interrupts - so it's safe for them to have
734 * bh-unsafe locking for their sk_receive_queue.lock. Split off
735 * this special lock-class by reinitializing the spinlock key:
736 */
737static struct lock_class_key af_unix_sk_receive_queue_lock_key;
738
6eba6a37 739static struct sock *unix_create1(struct net *net, struct socket *sock)
1da177e4
LT
740{
741 struct sock *sk = NULL;
742 struct unix_sock *u;
743
518de9b3
ED
744 atomic_long_inc(&unix_nr_socks);
745 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
1da177e4
LT
746 goto out;
747
6257ff21 748 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
1da177e4
LT
749 if (!sk)
750 goto out;
751
6eba6a37 752 sock_init_data(sock, sk);
a09785a2
IM
753 lockdep_set_class(&sk->sk_receive_queue.lock,
754 &af_unix_sk_receive_queue_lock_key);
1da177e4
LT
755
756 sk->sk_write_space = unix_write_space;
a0a53c8b 757 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
1da177e4
LT
758 sk->sk_destruct = unix_sock_destructor;
759 u = unix_sk(sk);
40ffe67d
AV
760 u->path.dentry = NULL;
761 u->path.mnt = NULL;
fd19f329 762 spin_lock_init(&u->lock);
516e0cc5 763 atomic_long_set(&u->inflight, 0);
1fd05ba5 764 INIT_LIST_HEAD(&u->link);
57b47a53 765 mutex_init(&u->readlock); /* single task reading lock */
1da177e4 766 init_waitqueue_head(&u->peer_wait);
da8db083 767 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
7123aaa3 768 unix_insert_socket(unix_sockets_unbound(sk), sk);
1da177e4 769out:
284b327b 770 if (sk == NULL)
518de9b3 771 atomic_long_dec(&unix_nr_socks);
920de804
ED
772 else {
773 local_bh_disable();
a8076d8d 774 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
920de804
ED
775 local_bh_enable();
776 }
1da177e4
LT
777 return sk;
778}
779
3f378b68
EP
780static int unix_create(struct net *net, struct socket *sock, int protocol,
781 int kern)
1da177e4
LT
782{
783 if (protocol && protocol != PF_UNIX)
784 return -EPROTONOSUPPORT;
785
786 sock->state = SS_UNCONNECTED;
787
788 switch (sock->type) {
789 case SOCK_STREAM:
790 sock->ops = &unix_stream_ops;
791 break;
792 /*
793 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
794 * nothing uses it.
795 */
796 case SOCK_RAW:
e27dfcea 797 sock->type = SOCK_DGRAM;
1da177e4
LT
798 case SOCK_DGRAM:
799 sock->ops = &unix_dgram_ops;
800 break;
801 case SOCK_SEQPACKET:
802 sock->ops = &unix_seqpacket_ops;
803 break;
804 default:
805 return -ESOCKTNOSUPPORT;
806 }
807
1b8d7ae4 808 return unix_create1(net, sock) ? 0 : -ENOMEM;
1da177e4
LT
809}
810
811static int unix_release(struct socket *sock)
812{
813 struct sock *sk = sock->sk;
814
815 if (!sk)
816 return 0;
817
ded34e0f 818 unix_release_sock(sk, 0);
1da177e4
LT
819 sock->sk = NULL;
820
ded34e0f 821 return 0;
1da177e4
LT
822}
823
824static int unix_autobind(struct socket *sock)
825{
826 struct sock *sk = sock->sk;
3b1e0a65 827 struct net *net = sock_net(sk);
1da177e4
LT
828 struct unix_sock *u = unix_sk(sk);
829 static u32 ordernum = 1;
6eba6a37 830 struct unix_address *addr;
1da177e4 831 int err;
8df73ff9 832 unsigned int retries = 0;
1da177e4 833
57bc52eb
SL
834 err = mutex_lock_interruptible(&u->readlock);
835 if (err)
836 return err;
1da177e4
LT
837
838 err = 0;
839 if (u->addr)
840 goto out;
841
842 err = -ENOMEM;
0da974f4 843 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
1da177e4
LT
844 if (!addr)
845 goto out;
846
1da177e4
LT
847 addr->name->sun_family = AF_UNIX;
848 atomic_set(&addr->refcnt, 1);
849
850retry:
851 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
07f0757a 852 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
1da177e4 853
fbe9cc4a 854 spin_lock(&unix_table_lock);
1da177e4
LT
855 ordernum = (ordernum+1)&0xFFFFF;
856
097e66c5 857 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
1da177e4 858 addr->hash)) {
fbe9cc4a 859 spin_unlock(&unix_table_lock);
8df73ff9
TH
860 /*
861 * __unix_find_socket_byname() may take long time if many names
862 * are already in use.
863 */
864 cond_resched();
865 /* Give up if all names seems to be in use. */
866 if (retries++ == 0xFFFFF) {
867 err = -ENOSPC;
868 kfree(addr);
869 goto out;
870 }
1da177e4
LT
871 goto retry;
872 }
873 addr->hash ^= sk->sk_type;
874
875 __unix_remove_socket(sk);
876 u->addr = addr;
877 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
fbe9cc4a 878 spin_unlock(&unix_table_lock);
1da177e4
LT
879 err = 0;
880
57b47a53 881out: mutex_unlock(&u->readlock);
1da177e4
LT
882 return err;
883}
884
097e66c5
DL
885static struct sock *unix_find_other(struct net *net,
886 struct sockaddr_un *sunname, int len,
95c96174 887 int type, unsigned int hash, int *error)
1da177e4
LT
888{
889 struct sock *u;
421748ec 890 struct path path;
1da177e4 891 int err = 0;
ac7bfa62 892
1da177e4 893 if (sunname->sun_path[0]) {
421748ec
AV
894 struct inode *inode;
895 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1da177e4
LT
896 if (err)
897 goto fail;
421748ec
AV
898 inode = path.dentry->d_inode;
899 err = inode_permission(inode, MAY_WRITE);
1da177e4
LT
900 if (err)
901 goto put_fail;
902
903 err = -ECONNREFUSED;
421748ec 904 if (!S_ISSOCK(inode->i_mode))
1da177e4 905 goto put_fail;
6616f788 906 u = unix_find_socket_byinode(inode);
1da177e4
LT
907 if (!u)
908 goto put_fail;
909
910 if (u->sk_type == type)
68ac1234 911 touch_atime(&path);
1da177e4 912
421748ec 913 path_put(&path);
1da177e4 914
e27dfcea 915 err = -EPROTOTYPE;
1da177e4
LT
916 if (u->sk_type != type) {
917 sock_put(u);
918 goto fail;
919 }
920 } else {
921 err = -ECONNREFUSED;
e27dfcea 922 u = unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
923 if (u) {
924 struct dentry *dentry;
40ffe67d 925 dentry = unix_sk(u)->path.dentry;
1da177e4 926 if (dentry)
68ac1234 927 touch_atime(&unix_sk(u)->path);
1da177e4
LT
928 } else
929 goto fail;
930 }
931 return u;
932
933put_fail:
421748ec 934 path_put(&path);
1da177e4 935fail:
e27dfcea 936 *error = err;
1da177e4
LT
937 return NULL;
938}
939
faf02010
AV
940static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
941{
942 struct dentry *dentry;
943 struct path path;
944 int err = 0;
945 /*
946 * Get the parent directory, calculate the hash for last
947 * component.
948 */
949 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
950 err = PTR_ERR(dentry);
951 if (IS_ERR(dentry))
952 return err;
953
954 /*
955 * All right, let's create it.
956 */
957 err = security_path_mknod(&path, dentry, mode, 0);
958 if (!err) {
959 err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
960 if (!err) {
961 res->mnt = mntget(path.mnt);
962 res->dentry = dget(dentry);
963 }
964 }
965 done_path_create(&path, dentry);
966 return err;
967}
1da177e4
LT
968
969static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
970{
971 struct sock *sk = sock->sk;
3b1e0a65 972 struct net *net = sock_net(sk);
1da177e4 973 struct unix_sock *u = unix_sk(sk);
e27dfcea 974 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
dae6ad8f 975 char *sun_path = sunaddr->sun_path;
1da177e4 976 int err;
95c96174 977 unsigned int hash;
1da177e4
LT
978 struct unix_address *addr;
979 struct hlist_head *list;
980
981 err = -EINVAL;
982 if (sunaddr->sun_family != AF_UNIX)
983 goto out;
984
e27dfcea 985 if (addr_len == sizeof(short)) {
1da177e4
LT
986 err = unix_autobind(sock);
987 goto out;
988 }
989
990 err = unix_mkname(sunaddr, addr_len, &hash);
991 if (err < 0)
992 goto out;
993 addr_len = err;
994
57bc52eb
SL
995 err = mutex_lock_interruptible(&u->readlock);
996 if (err)
997 goto out;
1da177e4
LT
998
999 err = -EINVAL;
1000 if (u->addr)
1001 goto out_up;
1002
1003 err = -ENOMEM;
1004 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1005 if (!addr)
1006 goto out_up;
1007
1008 memcpy(addr->name, sunaddr, addr_len);
1009 addr->len = addr_len;
1010 addr->hash = hash ^ sk->sk_type;
1011 atomic_set(&addr->refcnt, 1);
1012
dae6ad8f 1013 if (sun_path[0]) {
faf02010
AV
1014 struct path path;
1015 umode_t mode = S_IFSOCK |
ce3b0f8d 1016 (SOCK_INODE(sock)->i_mode & ~current_umask());
faf02010
AV
1017 err = unix_mknod(sun_path, mode, &path);
1018 if (err) {
1019 if (err == -EEXIST)
1020 err = -EADDRINUSE;
1021 unix_release_addr(addr);
1022 goto out_up;
1023 }
1da177e4 1024 addr->hash = UNIX_HASH_SIZE;
faf02010
AV
1025 hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
1026 spin_lock(&unix_table_lock);
1027 u->path = path;
1028 list = &unix_socket_table[hash];
1029 } else {
1030 spin_lock(&unix_table_lock);
1da177e4 1031 err = -EADDRINUSE;
097e66c5 1032 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1da177e4
LT
1033 sk->sk_type, hash)) {
1034 unix_release_addr(addr);
1035 goto out_unlock;
1036 }
1037
1038 list = &unix_socket_table[addr->hash];
1da177e4
LT
1039 }
1040
1041 err = 0;
1042 __unix_remove_socket(sk);
1043 u->addr = addr;
1044 __unix_insert_socket(list, sk);
1045
1046out_unlock:
fbe9cc4a 1047 spin_unlock(&unix_table_lock);
1da177e4 1048out_up:
57b47a53 1049 mutex_unlock(&u->readlock);
1da177e4
LT
1050out:
1051 return err;
1da177e4
LT
1052}
1053
278a3de5
DM
1054static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1055{
1056 if (unlikely(sk1 == sk2) || !sk2) {
1057 unix_state_lock(sk1);
1058 return;
1059 }
1060 if (sk1 < sk2) {
1061 unix_state_lock(sk1);
1062 unix_state_lock_nested(sk2);
1063 } else {
1064 unix_state_lock(sk2);
1065 unix_state_lock_nested(sk1);
1066 }
1067}
1068
1069static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1070{
1071 if (unlikely(sk1 == sk2) || !sk2) {
1072 unix_state_unlock(sk1);
1073 return;
1074 }
1075 unix_state_unlock(sk1);
1076 unix_state_unlock(sk2);
1077}
1078
1da177e4
LT
1079static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1080 int alen, int flags)
1081{
1082 struct sock *sk = sock->sk;
3b1e0a65 1083 struct net *net = sock_net(sk);
e27dfcea 1084 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1da177e4 1085 struct sock *other;
95c96174 1086 unsigned int hash;
1da177e4
LT
1087 int err;
1088
1089 if (addr->sa_family != AF_UNSPEC) {
1090 err = unix_mkname(sunaddr, alen, &hash);
1091 if (err < 0)
1092 goto out;
1093 alen = err;
1094
1095 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1096 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1097 goto out;
1098
278a3de5 1099restart:
e27dfcea 1100 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1da177e4
LT
1101 if (!other)
1102 goto out;
1103
278a3de5
DM
1104 unix_state_double_lock(sk, other);
1105
1106 /* Apparently VFS overslept socket death. Retry. */
1107 if (sock_flag(other, SOCK_DEAD)) {
1108 unix_state_double_unlock(sk, other);
1109 sock_put(other);
1110 goto restart;
1111 }
1da177e4
LT
1112
1113 err = -EPERM;
1114 if (!unix_may_send(sk, other))
1115 goto out_unlock;
1116
1117 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1118 if (err)
1119 goto out_unlock;
1120
1121 } else {
1122 /*
1123 * 1003.1g breaking connected state with AF_UNSPEC
1124 */
1125 other = NULL;
278a3de5 1126 unix_state_double_lock(sk, other);
1da177e4
LT
1127 }
1128
1129 /*
1130 * If it was connected, reconnect.
1131 */
1132 if (unix_peer(sk)) {
1133 struct sock *old_peer = unix_peer(sk);
e27dfcea 1134 unix_peer(sk) = other;
da8db083
RW
1135 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1136
278a3de5 1137 unix_state_double_unlock(sk, other);
1da177e4
LT
1138
1139 if (other != old_peer)
1140 unix_dgram_disconnected(sk, old_peer);
1141 sock_put(old_peer);
1142 } else {
e27dfcea 1143 unix_peer(sk) = other;
278a3de5 1144 unix_state_double_unlock(sk, other);
1da177e4 1145 }
ac7bfa62 1146 return 0;
1da177e4
LT
1147
1148out_unlock:
278a3de5 1149 unix_state_double_unlock(sk, other);
1da177e4
LT
1150 sock_put(other);
1151out:
1152 return err;
1153}
1154
1155static long unix_wait_for_peer(struct sock *other, long timeo)
1156{
1157 struct unix_sock *u = unix_sk(other);
1158 int sched;
1159 DEFINE_WAIT(wait);
1160
1161 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1162
1163 sched = !sock_flag(other, SOCK_DEAD) &&
1164 !(other->sk_shutdown & RCV_SHUTDOWN) &&
3c73419c 1165 unix_recvq_full(other);
1da177e4 1166
1c92b4e5 1167 unix_state_unlock(other);
1da177e4
LT
1168
1169 if (sched)
1170 timeo = schedule_timeout(timeo);
1171
1172 finish_wait(&u->peer_wait, &wait);
1173 return timeo;
1174}
1175
1176static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1177 int addr_len, int flags)
1178{
e27dfcea 1179 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1da177e4 1180 struct sock *sk = sock->sk;
3b1e0a65 1181 struct net *net = sock_net(sk);
1da177e4
LT
1182 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1183 struct sock *newsk = NULL;
1184 struct sock *other = NULL;
1185 struct sk_buff *skb = NULL;
95c96174 1186 unsigned int hash;
1da177e4
LT
1187 int st;
1188 int err;
1189 long timeo;
1190
1191 err = unix_mkname(sunaddr, addr_len, &hash);
1192 if (err < 0)
1193 goto out;
1194 addr_len = err;
1195
f64f9e71
JP
1196 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1197 (err = unix_autobind(sock)) != 0)
1da177e4
LT
1198 goto out;
1199
1200 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1201
1202 /* First of all allocate resources.
1203 If we will make it after state is locked,
1204 we will have to recheck all again in any case.
1205 */
1206
1207 err = -ENOMEM;
1208
1209 /* create new sock for complete connection */
3b1e0a65 1210 newsk = unix_create1(sock_net(sk), NULL);
1da177e4
LT
1211 if (newsk == NULL)
1212 goto out;
1213
1214 /* Allocate skb for sending to listening sock */
1215 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1216 if (skb == NULL)
1217 goto out;
1218
1219restart:
1220 /* Find listening sock. */
097e66c5 1221 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1da177e4
LT
1222 if (!other)
1223 goto out;
1224
1225 /* Latch state of peer */
1c92b4e5 1226 unix_state_lock(other);
1da177e4
LT
1227
1228 /* Apparently VFS overslept socket death. Retry. */
1229 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1230 unix_state_unlock(other);
1da177e4
LT
1231 sock_put(other);
1232 goto restart;
1233 }
1234
1235 err = -ECONNREFUSED;
1236 if (other->sk_state != TCP_LISTEN)
1237 goto out_unlock;
77238f2b
TS
1238 if (other->sk_shutdown & RCV_SHUTDOWN)
1239 goto out_unlock;
1da177e4 1240
3c73419c 1241 if (unix_recvq_full(other)) {
1da177e4
LT
1242 err = -EAGAIN;
1243 if (!timeo)
1244 goto out_unlock;
1245
1246 timeo = unix_wait_for_peer(other, timeo);
1247
1248 err = sock_intr_errno(timeo);
1249 if (signal_pending(current))
1250 goto out;
1251 sock_put(other);
1252 goto restart;
ac7bfa62 1253 }
1da177e4
LT
1254
1255 /* Latch our state.
1256
e5537bfc 1257 It is tricky place. We need to grab our state lock and cannot
1da177e4
LT
1258 drop lock on peer. It is dangerous because deadlock is
1259 possible. Connect to self case and simultaneous
1260 attempt to connect are eliminated by checking socket
1261 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1262 check this before attempt to grab lock.
1263
1264 Well, and we have to recheck the state after socket locked.
1265 */
1266 st = sk->sk_state;
1267
1268 switch (st) {
1269 case TCP_CLOSE:
1270 /* This is ok... continue with connect */
1271 break;
1272 case TCP_ESTABLISHED:
1273 /* Socket is already connected */
1274 err = -EISCONN;
1275 goto out_unlock;
1276 default:
1277 err = -EINVAL;
1278 goto out_unlock;
1279 }
1280
1c92b4e5 1281 unix_state_lock_nested(sk);
1da177e4
LT
1282
1283 if (sk->sk_state != st) {
1c92b4e5
DM
1284 unix_state_unlock(sk);
1285 unix_state_unlock(other);
1da177e4
LT
1286 sock_put(other);
1287 goto restart;
1288 }
1289
3610cda5 1290 err = security_unix_stream_connect(sk, other, newsk);
1da177e4 1291 if (err) {
1c92b4e5 1292 unix_state_unlock(sk);
1da177e4
LT
1293 goto out_unlock;
1294 }
1295
1296 /* The way is open! Fastly set all the necessary fields... */
1297
1298 sock_hold(sk);
1299 unix_peer(newsk) = sk;
1300 newsk->sk_state = TCP_ESTABLISHED;
1301 newsk->sk_type = sk->sk_type;
109f6e39 1302 init_peercred(newsk);
1da177e4 1303 newu = unix_sk(newsk);
eaefd110 1304 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1da177e4
LT
1305 otheru = unix_sk(other);
1306
1307 /* copy address information from listening to new sock*/
1308 if (otheru->addr) {
1309 atomic_inc(&otheru->addr->refcnt);
1310 newu->addr = otheru->addr;
1311 }
40ffe67d
AV
1312 if (otheru->path.dentry) {
1313 path_get(&otheru->path);
1314 newu->path = otheru->path;
1da177e4
LT
1315 }
1316
1317 /* Set credentials */
109f6e39 1318 copy_peercred(sk, other);
1da177e4 1319
1da177e4
LT
1320 sock->state = SS_CONNECTED;
1321 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1322 sock_hold(newsk);
1323
1324 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1325 unix_peer(sk) = newsk;
1da177e4 1326
1c92b4e5 1327 unix_state_unlock(sk);
1da177e4
LT
1328
1329 /* take ten and and send info to listening sock */
1330 spin_lock(&other->sk_receive_queue.lock);
1331 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1332 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1333 unix_state_unlock(other);
1da177e4
LT
1334 other->sk_data_ready(other, 0);
1335 sock_put(other);
1336 return 0;
1337
1338out_unlock:
1339 if (other)
1c92b4e5 1340 unix_state_unlock(other);
1da177e4
LT
1341
1342out:
40d44446 1343 kfree_skb(skb);
1da177e4
LT
1344 if (newsk)
1345 unix_release_sock(newsk, 0);
1346 if (other)
1347 sock_put(other);
1348 return err;
1349}
1350
1351static int unix_socketpair(struct socket *socka, struct socket *sockb)
1352{
e27dfcea 1353 struct sock *ska = socka->sk, *skb = sockb->sk;
1da177e4
LT
1354
1355 /* Join our sockets back to back */
1356 sock_hold(ska);
1357 sock_hold(skb);
e27dfcea
JK
1358 unix_peer(ska) = skb;
1359 unix_peer(skb) = ska;
109f6e39
EB
1360 init_peercred(ska);
1361 init_peercred(skb);
1da177e4
LT
1362
1363 if (ska->sk_type != SOCK_DGRAM) {
1364 ska->sk_state = TCP_ESTABLISHED;
1365 skb->sk_state = TCP_ESTABLISHED;
1366 socka->state = SS_CONNECTED;
1367 sockb->state = SS_CONNECTED;
1368 }
1369 return 0;
1370}
1371
a769ad65
DB
1372static void unix_sock_inherit_flags(const struct socket *old,
1373 struct socket *new)
1374{
1375 if (test_bit(SOCK_PASSCRED, &old->flags))
1376 set_bit(SOCK_PASSCRED, &new->flags);
1377 if (test_bit(SOCK_PASSSEC, &old->flags))
1378 set_bit(SOCK_PASSSEC, &new->flags);
1379}
1380
1da177e4
LT
1381static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1382{
1383 struct sock *sk = sock->sk;
1384 struct sock *tsk;
1385 struct sk_buff *skb;
1386 int err;
1387
1388 err = -EOPNOTSUPP;
6eba6a37 1389 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1da177e4
LT
1390 goto out;
1391
1392 err = -EINVAL;
1393 if (sk->sk_state != TCP_LISTEN)
1394 goto out;
1395
1396 /* If socket state is TCP_LISTEN it cannot change (for now...),
1397 * so that no locks are necessary.
1398 */
1399
1400 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1401 if (!skb) {
1402 /* This means receive shutdown. */
1403 if (err == 0)
1404 err = -EINVAL;
1405 goto out;
1406 }
1407
1408 tsk = skb->sk;
1409 skb_free_datagram(sk, skb);
1410 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1411
1412 /* attach accepted sock to socket */
1c92b4e5 1413 unix_state_lock(tsk);
1da177e4 1414 newsock->state = SS_CONNECTED;
a769ad65 1415 unix_sock_inherit_flags(sock, newsock);
1da177e4 1416 sock_graft(tsk, newsock);
1c92b4e5 1417 unix_state_unlock(tsk);
1da177e4
LT
1418 return 0;
1419
1420out:
1421 return err;
1422}
1423
1424
1425static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1426{
1427 struct sock *sk = sock->sk;
1428 struct unix_sock *u;
13cfa97b 1429 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1da177e4
LT
1430 int err = 0;
1431
1432 if (peer) {
1433 sk = unix_peer_get(sk);
1434
1435 err = -ENOTCONN;
1436 if (!sk)
1437 goto out;
1438 err = 0;
1439 } else {
1440 sock_hold(sk);
1441 }
1442
1443 u = unix_sk(sk);
1c92b4e5 1444 unix_state_lock(sk);
1da177e4
LT
1445 if (!u->addr) {
1446 sunaddr->sun_family = AF_UNIX;
1447 sunaddr->sun_path[0] = 0;
1448 *uaddr_len = sizeof(short);
1449 } else {
1450 struct unix_address *addr = u->addr;
1451
1452 *uaddr_len = addr->len;
1453 memcpy(sunaddr, addr->name, *uaddr_len);
1454 }
1c92b4e5 1455 unix_state_unlock(sk);
1da177e4
LT
1456 sock_put(sk);
1457out:
1458 return err;
1459}
1460
1461static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1462{
1463 int i;
1464
1465 scm->fp = UNIXCB(skb).fp;
1da177e4
LT
1466 UNIXCB(skb).fp = NULL;
1467
6eba6a37 1468 for (i = scm->fp->count-1; i >= 0; i--)
1da177e4
LT
1469 unix_notinflight(scm->fp->fp[i]);
1470}
1471
7361c36c 1472static void unix_destruct_scm(struct sk_buff *skb)
1da177e4
LT
1473{
1474 struct scm_cookie scm;
1475 memset(&scm, 0, sizeof(scm));
7361c36c 1476 scm.pid = UNIXCB(skb).pid;
7361c36c
EB
1477 if (UNIXCB(skb).fp)
1478 unix_detach_fds(&scm, skb);
1da177e4
LT
1479
1480 /* Alas, it calls VFS */
1481 /* So fscking what? fput() had been SMP-safe since the last Summer */
1482 scm_destroy(&scm);
1483 sock_wfree(skb);
1484}
1485
25888e30
ED
1486#define MAX_RECURSION_LEVEL 4
1487
6209344f 1488static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1da177e4
LT
1489{
1490 int i;
25888e30
ED
1491 unsigned char max_level = 0;
1492 int unix_sock_count = 0;
1493
1494 for (i = scm->fp->count - 1; i >= 0; i--) {
1495 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1496
1497 if (sk) {
1498 unix_sock_count++;
1499 max_level = max(max_level,
1500 unix_sk(sk)->recursion_level);
1501 }
1502 }
1503 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1504 return -ETOOMANYREFS;
6209344f
MS
1505
1506 /*
1507 * Need to duplicate file references for the sake of garbage
1508 * collection. Otherwise a socket in the fps might become a
1509 * candidate for GC while the skb is not yet queued.
1510 */
1511 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1512 if (!UNIXCB(skb).fp)
1513 return -ENOMEM;
1514
25888e30
ED
1515 if (unix_sock_count) {
1516 for (i = scm->fp->count - 1; i >= 0; i--)
1517 unix_inflight(scm->fp->fp[i]);
1518 }
1519 return max_level;
1da177e4
LT
1520}
1521
f78a5fda 1522static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
7361c36c
EB
1523{
1524 int err = 0;
16e57262 1525
f78a5fda 1526 UNIXCB(skb).pid = get_pid(scm->pid);
6b0ee8c0
EB
1527 UNIXCB(skb).uid = scm->creds.uid;
1528 UNIXCB(skb).gid = scm->creds.gid;
7361c36c
EB
1529 UNIXCB(skb).fp = NULL;
1530 if (scm->fp && send_fds)
1531 err = unix_attach_fds(scm, skb);
1532
1533 skb->destructor = unix_destruct_scm;
1534 return err;
1535}
1536
16e57262
ED
1537/*
1538 * Some apps rely on write() giving SCM_CREDENTIALS
1539 * We include credentials if source or destination socket
1540 * asserted SOCK_PASSCRED.
1541 */
1542static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1543 const struct sock *other)
1544{
6b0ee8c0 1545 if (UNIXCB(skb).pid)
16e57262
ED
1546 return;
1547 if (test_bit(SOCK_PASSCRED, &sock->flags) ||
25da0e3e
EB
1548 !other->sk_socket ||
1549 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
16e57262 1550 UNIXCB(skb).pid = get_pid(task_tgid(current));
6e0895c2 1551 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
16e57262
ED
1552 }
1553}
1554
1da177e4
LT
1555/*
1556 * Send AF_UNIX data.
1557 */
1558
1559static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1560 struct msghdr *msg, size_t len)
1561{
1562 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1563 struct sock *sk = sock->sk;
3b1e0a65 1564 struct net *net = sock_net(sk);
1da177e4 1565 struct unix_sock *u = unix_sk(sk);
e27dfcea 1566 struct sockaddr_un *sunaddr = msg->msg_name;
1da177e4
LT
1567 struct sock *other = NULL;
1568 int namelen = 0; /* fake GCC */
1569 int err;
95c96174 1570 unsigned int hash;
f78a5fda 1571 struct sk_buff *skb;
1da177e4
LT
1572 long timeo;
1573 struct scm_cookie tmp_scm;
25888e30 1574 int max_level;
eb6a2481 1575 int data_len = 0;
da8db083 1576 int sk_locked;
1da177e4
LT
1577
1578 if (NULL == siocb->scm)
1579 siocb->scm = &tmp_scm;
5f23b734 1580 wait_for_unix_gc();
e0e3cea4 1581 err = scm_send(sock, msg, siocb->scm, false);
1da177e4
LT
1582 if (err < 0)
1583 return err;
1584
1585 err = -EOPNOTSUPP;
1586 if (msg->msg_flags&MSG_OOB)
1587 goto out;
1588
1589 if (msg->msg_namelen) {
1590 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1591 if (err < 0)
1592 goto out;
1593 namelen = err;
1594 } else {
1595 sunaddr = NULL;
1596 err = -ENOTCONN;
1597 other = unix_peer_get(sk);
1598 if (!other)
1599 goto out;
1600 }
1601
f64f9e71
JP
1602 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1603 && (err = unix_autobind(sock)) != 0)
1da177e4
LT
1604 goto out;
1605
1606 err = -EMSGSIZE;
1607 if (len > sk->sk_sndbuf - 32)
1608 goto out;
1609
eb6a2481
ED
1610 if (len > SKB_MAX_ALLOC)
1611 data_len = min_t(size_t,
1612 len - SKB_MAX_ALLOC,
1613 MAX_SKB_FRAGS * PAGE_SIZE);
1614
1615 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1616 msg->msg_flags & MSG_DONTWAIT, &err);
e27dfcea 1617 if (skb == NULL)
1da177e4
LT
1618 goto out;
1619
f78a5fda 1620 err = unix_scm_to_skb(siocb->scm, skb, true);
25888e30 1621 if (err < 0)
7361c36c 1622 goto out_free;
25888e30 1623 max_level = err + 1;
dc49c1f9 1624 unix_get_secdata(siocb->scm, skb);
877ce7c1 1625
eb6a2481
ED
1626 skb_put(skb, len - data_len);
1627 skb->data_len = data_len;
1628 skb->len = len;
1629 err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, len);
1da177e4
LT
1630 if (err)
1631 goto out_free;
1632
1633 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1634
1635restart:
1636 if (!other) {
1637 err = -ECONNRESET;
1638 if (sunaddr == NULL)
1639 goto out_free;
1640
097e66c5 1641 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1da177e4 1642 hash, &err);
e27dfcea 1643 if (other == NULL)
1da177e4
LT
1644 goto out_free;
1645 }
1646
d6ae3bae
AC
1647 if (sk_filter(other, skb) < 0) {
1648 /* Toss the packet but do not return any error to the sender */
1649 err = len;
1650 goto out_free;
1651 }
1652
da8db083 1653 sk_locked = 0;
1c92b4e5 1654 unix_state_lock(other);
da8db083 1655restart_locked:
1da177e4
LT
1656 err = -EPERM;
1657 if (!unix_may_send(sk, other))
1658 goto out_unlock;
1659
da8db083 1660 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1da177e4
LT
1661 /*
1662 * Check with 1003.1g - what should
1663 * datagram error
1664 */
1c92b4e5 1665 unix_state_unlock(other);
1da177e4
LT
1666 sock_put(other);
1667
da8db083
RW
1668 if (!sk_locked)
1669 unix_state_lock(sk);
1670
1da177e4 1671 err = 0;
1da177e4 1672 if (unix_peer(sk) == other) {
e27dfcea 1673 unix_peer(sk) = NULL;
da8db083
RW
1674 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1675
1c92b4e5 1676 unix_state_unlock(sk);
1da177e4
LT
1677
1678 unix_dgram_disconnected(sk, other);
1679 sock_put(other);
1680 err = -ECONNREFUSED;
1681 } else {
1c92b4e5 1682 unix_state_unlock(sk);
1da177e4
LT
1683 }
1684
1685 other = NULL;
1686 if (err)
1687 goto out_free;
1688 goto restart;
1689 }
1690
1691 err = -EPIPE;
1692 if (other->sk_shutdown & RCV_SHUTDOWN)
1693 goto out_unlock;
1694
1695 if (sk->sk_type != SOCK_SEQPACKET) {
1696 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1697 if (err)
1698 goto out_unlock;
1699 }
1700
da8db083
RW
1701 if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1702 if (timeo) {
1703 timeo = unix_wait_for_peer(other, timeo);
1704
1705 err = sock_intr_errno(timeo);
1706 if (signal_pending(current))
1707 goto out_free;
1708
1709 goto restart;
1da177e4
LT
1710 }
1711
da8db083
RW
1712 if (!sk_locked) {
1713 unix_state_unlock(other);
1714 unix_state_double_lock(sk, other);
1715 }
1da177e4 1716
da8db083
RW
1717 if (unix_peer(sk) != other ||
1718 unix_dgram_peer_wake_me(sk, other)) {
1719 err = -EAGAIN;
1720 sk_locked = 1;
1721 goto out_unlock;
1722 }
1da177e4 1723
da8db083
RW
1724 if (!sk_locked) {
1725 sk_locked = 1;
1726 goto restart_locked;
1727 }
1da177e4
LT
1728 }
1729
da8db083
RW
1730 if (unlikely(sk_locked))
1731 unix_state_unlock(sk);
1732
3f66116e
AC
1733 if (sock_flag(other, SOCK_RCVTSTAMP))
1734 __net_timestamp(skb);
16e57262 1735 maybe_add_creds(skb, sock, other);
1da177e4 1736 skb_queue_tail(&other->sk_receive_queue, skb);
25888e30
ED
1737 if (max_level > unix_sk(other)->recursion_level)
1738 unix_sk(other)->recursion_level = max_level;
1c92b4e5 1739 unix_state_unlock(other);
1da177e4
LT
1740 other->sk_data_ready(other, len);
1741 sock_put(other);
f78a5fda 1742 scm_destroy(siocb->scm);
1da177e4
LT
1743 return len;
1744
1745out_unlock:
da8db083
RW
1746 if (sk_locked)
1747 unix_state_unlock(sk);
1c92b4e5 1748 unix_state_unlock(other);
1da177e4
LT
1749out_free:
1750 kfree_skb(skb);
1751out:
1752 if (other)
1753 sock_put(other);
f78a5fda 1754 scm_destroy(siocb->scm);
1da177e4
LT
1755 return err;
1756}
1757
ac7bfa62 1758
1da177e4
LT
1759static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1760 struct msghdr *msg, size_t len)
1761{
1762 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1763 struct sock *sk = sock->sk;
1764 struct sock *other = NULL;
6eba6a37 1765 int err, size;
f78a5fda 1766 struct sk_buff *skb;
e27dfcea 1767 int sent = 0;
1da177e4 1768 struct scm_cookie tmp_scm;
8ba69ba6 1769 bool fds_sent = false;
25888e30 1770 int max_level;
1da177e4
LT
1771
1772 if (NULL == siocb->scm)
1773 siocb->scm = &tmp_scm;
5f23b734 1774 wait_for_unix_gc();
e0e3cea4 1775 err = scm_send(sock, msg, siocb->scm, false);
1da177e4
LT
1776 if (err < 0)
1777 return err;
1778
1779 err = -EOPNOTSUPP;
1780 if (msg->msg_flags&MSG_OOB)
1781 goto out_err;
1782
1783 if (msg->msg_namelen) {
1784 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1785 goto out_err;
1786 } else {
1da177e4 1787 err = -ENOTCONN;
830a1e5c 1788 other = unix_peer(sk);
1da177e4
LT
1789 if (!other)
1790 goto out_err;
1791 }
1792
1793 if (sk->sk_shutdown & SEND_SHUTDOWN)
1794 goto pipe_err;
1795
6eba6a37 1796 while (sent < len) {
1da177e4 1797 /*
e9df7d7f
BL
1798 * Optimisation for the fact that under 0.01% of X
1799 * messages typically need breaking up.
1da177e4
LT
1800 */
1801
e9df7d7f 1802 size = len-sent;
1da177e4
LT
1803
1804 /* Keep two messages in the pipe so it schedules better */
e9df7d7f
BL
1805 if (size > ((sk->sk_sndbuf >> 1) - 64))
1806 size = (sk->sk_sndbuf >> 1) - 64;
1da177e4
LT
1807
1808 if (size > SKB_MAX_ALLOC)
1809 size = SKB_MAX_ALLOC;
ac7bfa62 1810
1da177e4
LT
1811 /*
1812 * Grab a buffer
1813 */
ac7bfa62 1814
6eba6a37
ED
1815 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1816 &err);
1da177e4 1817
e27dfcea 1818 if (skb == NULL)
1da177e4
LT
1819 goto out_err;
1820
1821 /*
1822 * If you pass two values to the sock_alloc_send_skb
1823 * it tries to grab the large buffer with GFP_NOFS
1824 * (which can fail easily), and if it fails grab the
1825 * fallback size buffer which is under a page and will
1826 * succeed. [Alan]
1827 */
1828 size = min_t(int, size, skb_tailroom(skb));
1829
7361c36c 1830
f78a5fda
DM
1831 /* Only send the fds in the first buffer */
1832 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
25888e30 1833 if (err < 0) {
7361c36c 1834 kfree_skb(skb);
f78a5fda 1835 goto out_err;
6209344f 1836 }
25888e30 1837 max_level = err + 1;
7361c36c 1838 fds_sent = true;
1da177e4 1839
6eba6a37
ED
1840 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1841 if (err) {
1da177e4 1842 kfree_skb(skb);
f78a5fda 1843 goto out_err;
1da177e4
LT
1844 }
1845
1c92b4e5 1846 unix_state_lock(other);
1da177e4
LT
1847
1848 if (sock_flag(other, SOCK_DEAD) ||
1849 (other->sk_shutdown & RCV_SHUTDOWN))
1850 goto pipe_err_free;
1851
16e57262 1852 maybe_add_creds(skb, sock, other);
1da177e4 1853 skb_queue_tail(&other->sk_receive_queue, skb);
25888e30
ED
1854 if (max_level > unix_sk(other)->recursion_level)
1855 unix_sk(other)->recursion_level = max_level;
1c92b4e5 1856 unix_state_unlock(other);
1da177e4 1857 other->sk_data_ready(other, size);
e27dfcea 1858 sent += size;
1da177e4 1859 }
1da177e4 1860
f78a5fda 1861 scm_destroy(siocb->scm);
1da177e4
LT
1862 siocb->scm = NULL;
1863
1864 return sent;
1865
1866pipe_err_free:
1c92b4e5 1867 unix_state_unlock(other);
1da177e4
LT
1868 kfree_skb(skb);
1869pipe_err:
6eba6a37
ED
1870 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1871 send_sig(SIGPIPE, current, 0);
1da177e4
LT
1872 err = -EPIPE;
1873out_err:
f78a5fda 1874 scm_destroy(siocb->scm);
1da177e4
LT
1875 siocb->scm = NULL;
1876 return sent ? : err;
1877}
1878
1879static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1880 struct msghdr *msg, size_t len)
1881{
1882 int err;
1883 struct sock *sk = sock->sk;
ac7bfa62 1884
1da177e4
LT
1885 err = sock_error(sk);
1886 if (err)
1887 return err;
1888
1889 if (sk->sk_state != TCP_ESTABLISHED)
1890 return -ENOTCONN;
1891
1892 if (msg->msg_namelen)
1893 msg->msg_namelen = 0;
1894
1895 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1896}
ac7bfa62 1897
a05d2ad1
EB
1898static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1899 struct msghdr *msg, size_t size,
1900 int flags)
1901{
1902 struct sock *sk = sock->sk;
1903
1904 if (sk->sk_state != TCP_ESTABLISHED)
1905 return -ENOTCONN;
1906
1907 return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1908}
1909
1da177e4
LT
1910static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1911{
1912 struct unix_sock *u = unix_sk(sk);
1913
1da177e4
LT
1914 if (u->addr) {
1915 msg->msg_namelen = u->addr->len;
1916 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1917 }
1918}
1919
1920static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1921 struct msghdr *msg, size_t size,
1922 int flags)
1923{
1924 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1925 struct scm_cookie tmp_scm;
1926 struct sock *sk = sock->sk;
1927 struct unix_sock *u = unix_sk(sk);
1928 int noblock = flags & MSG_DONTWAIT;
1929 struct sk_buff *skb;
1930 int err;
f55bb7f9 1931 int peeked, skip;
1da177e4
LT
1932
1933 err = -EOPNOTSUPP;
1934 if (flags&MSG_OOB)
1935 goto out;
1936
85ec9232
WT
1937 err = mutex_lock_interruptible(&u->readlock);
1938 if (unlikely(err)) {
1939 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1940 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1941 */
1942 err = noblock ? -EAGAIN : -ERESTARTSYS;
1943 goto out;
1944 }
1da177e4 1945
f55bb7f9
PE
1946 skip = sk_peek_offset(sk, flags);
1947
1948 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
0a112258
FZ
1949 if (!skb) {
1950 unix_state_lock(sk);
1951 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1952 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1953 (sk->sk_shutdown & RCV_SHUTDOWN))
1954 err = 0;
1955 unix_state_unlock(sk);
1da177e4 1956 goto out_unlock;
0a112258 1957 }
1da177e4 1958
67426b75
ED
1959 wake_up_interruptible_sync_poll(&u->peer_wait,
1960 POLLOUT | POLLWRNORM | POLLWRBAND);
1da177e4
LT
1961
1962 if (msg->msg_name)
1963 unix_copy_addr(msg, skb->sk);
1964
f55bb7f9
PE
1965 if (size > skb->len - skip)
1966 size = skb->len - skip;
1967 else if (size < skb->len - skip)
1da177e4
LT
1968 msg->msg_flags |= MSG_TRUNC;
1969
f55bb7f9 1970 err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
1da177e4
LT
1971 if (err)
1972 goto out_free;
1973
3f66116e
AC
1974 if (sock_flag(sk, SOCK_RCVTSTAMP))
1975 __sock_recv_timestamp(msg, sk, skb);
1976
1da177e4
LT
1977 if (!siocb->scm) {
1978 siocb->scm = &tmp_scm;
1979 memset(&tmp_scm, 0, sizeof(tmp_scm));
1980 }
6b0ee8c0 1981 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
877ce7c1 1982 unix_set_secdata(siocb->scm, skb);
1da177e4 1983
6eba6a37 1984 if (!(flags & MSG_PEEK)) {
1da177e4
LT
1985 if (UNIXCB(skb).fp)
1986 unix_detach_fds(siocb->scm, skb);
f55bb7f9
PE
1987
1988 sk_peek_offset_bwd(sk, skb->len);
6eba6a37 1989 } else {
1da177e4
LT
1990 /* It is questionable: on PEEK we could:
1991 - do not return fds - good, but too simple 8)
1992 - return fds, and do not return them on read (old strategy,
1993 apparently wrong)
1994 - clone fds (I chose it for now, it is the most universal
1995 solution)
ac7bfa62
YH
1996
1997 POSIX 1003.1g does not actually define this clearly
1998 at all. POSIX 1003.1g doesn't define a lot of things
1999 clearly however!
2000
1da177e4 2001 */
f55bb7f9
PE
2002
2003 sk_peek_offset_fwd(sk, size);
2004
1da177e4
LT
2005 if (UNIXCB(skb).fp)
2006 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2007 }
9f6f9af7 2008 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1da177e4
LT
2009
2010 scm_recv(sock, msg, siocb->scm, flags);
2011
2012out_free:
6eba6a37 2013 skb_free_datagram(sk, skb);
1da177e4 2014out_unlock:
57b47a53 2015 mutex_unlock(&u->readlock);
1da177e4
LT
2016out:
2017 return err;
2018}
2019
2020/*
79f632c7 2021 * Sleep until more data has arrived. But check for races..
1da177e4 2022 */
79f632c7
BP
2023static long unix_stream_data_wait(struct sock *sk, long timeo,
2024 struct sk_buff *last)
1da177e4
LT
2025{
2026 DEFINE_WAIT(wait);
2027
1c92b4e5 2028 unix_state_lock(sk);
1da177e4
LT
2029
2030 for (;;) {
aa395145 2031 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4 2032
79f632c7 2033 if (skb_peek_tail(&sk->sk_receive_queue) != last ||
1da177e4
LT
2034 sk->sk_err ||
2035 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2036 signal_pending(current) ||
2037 !timeo)
2038 break;
2039
2040 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1c92b4e5 2041 unix_state_unlock(sk);
1da177e4 2042 timeo = schedule_timeout(timeo);
1c92b4e5 2043 unix_state_lock(sk);
7659c934
MS
2044
2045 if (sock_flag(sk, SOCK_DEAD))
2046 break;
2047
1da177e4
LT
2048 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2049 }
2050
aa395145 2051 finish_wait(sk_sleep(sk), &wait);
1c92b4e5 2052 unix_state_unlock(sk);
1da177e4
LT
2053 return timeo;
2054}
2055
1da177e4
LT
2056static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
2057 struct msghdr *msg, size_t size,
2058 int flags)
2059{
2060 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
2061 struct scm_cookie tmp_scm;
2062 struct sock *sk = sock->sk;
2063 struct unix_sock *u = unix_sk(sk);
e27dfcea 2064 struct sockaddr_un *sunaddr = msg->msg_name;
1da177e4 2065 int copied = 0;
f423fefe 2066 int noblock = flags & MSG_DONTWAIT;
1da177e4
LT
2067 int check_creds = 0;
2068 int target;
2069 int err = 0;
2070 long timeo;
fc0d7536 2071 int skip;
1da177e4
LT
2072
2073 err = -EINVAL;
2074 if (sk->sk_state != TCP_ESTABLISHED)
2075 goto out;
2076
2077 err = -EOPNOTSUPP;
2078 if (flags&MSG_OOB)
2079 goto out;
2080
2081 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
f423fefe 2082 timeo = sock_rcvtimeo(sk, noblock);
1da177e4 2083
1da177e4
LT
2084 /* Lock the socket to prevent queue disordering
2085 * while sleeps in memcpy_tomsg
2086 */
2087
2088 if (!siocb->scm) {
2089 siocb->scm = &tmp_scm;
2090 memset(&tmp_scm, 0, sizeof(tmp_scm));
2091 }
2092
85ec9232 2093 mutex_lock(&u->readlock);
1da177e4 2094
6eba6a37 2095 do {
1da177e4 2096 int chunk;
79f632c7 2097 struct sk_buff *skb, *last;
1da177e4 2098
3c0d2f37 2099 unix_state_lock(sk);
7659c934
MS
2100 if (sock_flag(sk, SOCK_DEAD)) {
2101 err = -ECONNRESET;
2102 goto unlock;
2103 }
79f632c7 2104 last = skb = skb_peek(&sk->sk_receive_queue);
fc0d7536 2105again:
6eba6a37 2106 if (skb == NULL) {
25888e30 2107 unix_sk(sk)->recursion_level = 0;
1da177e4 2108 if (copied >= target)
3c0d2f37 2109 goto unlock;
1da177e4
LT
2110
2111 /*
2112 * POSIX 1003.1g mandates this order.
2113 */
ac7bfa62 2114
6eba6a37
ED
2115 err = sock_error(sk);
2116 if (err)
3c0d2f37 2117 goto unlock;
1da177e4 2118 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
2119 goto unlock;
2120
2121 unix_state_unlock(sk);
1da177e4
LT
2122 err = -EAGAIN;
2123 if (!timeo)
2124 break;
57b47a53 2125 mutex_unlock(&u->readlock);
1da177e4 2126
79f632c7 2127 timeo = unix_stream_data_wait(sk, timeo, last);
1da177e4 2128
3a57e783 2129 if (signal_pending(current)) {
1da177e4
LT
2130 err = sock_intr_errno(timeo);
2131 goto out;
2132 }
b3ca9b02 2133
3a57e783 2134 mutex_lock(&u->readlock);
1da177e4 2135 continue;
3c0d2f37
MS
2136 unlock:
2137 unix_state_unlock(sk);
2138 break;
1da177e4 2139 }
fc0d7536 2140
79f632c7
BP
2141 skip = sk_peek_offset(sk, flags);
2142 while (skip >= skb->len) {
fc0d7536 2143 skip -= skb->len;
79f632c7 2144 last = skb;
fc0d7536 2145 skb = skb_peek_next(skb, &sk->sk_receive_queue);
79f632c7
BP
2146 if (!skb)
2147 goto again;
fc0d7536
PE
2148 }
2149
3c0d2f37 2150 unix_state_unlock(sk);
1da177e4
LT
2151
2152 if (check_creds) {
2153 /* Never glue messages from different writers */
7361c36c 2154 if ((UNIXCB(skb).pid != siocb->scm->pid) ||
6b0ee8c0
EB
2155 !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
2156 !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
1da177e4 2157 break;
0e82e7f6 2158 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1da177e4 2159 /* Copy credentials */
6b0ee8c0 2160 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
1da177e4
LT
2161 check_creds = 1;
2162 }
2163
2164 /* Copy address just once */
6eba6a37 2165 if (sunaddr) {
1da177e4
LT
2166 unix_copy_addr(msg, skb->sk);
2167 sunaddr = NULL;
2168 }
2169
fc0d7536
PE
2170 chunk = min_t(unsigned int, skb->len - skip, size);
2171 if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
1da177e4
LT
2172 if (copied == 0)
2173 copied = -EFAULT;
2174 break;
2175 }
2176 copied += chunk;
2177 size -= chunk;
2178
2179 /* Mark read part of skb as used */
6eba6a37 2180 if (!(flags & MSG_PEEK)) {
1da177e4
LT
2181 skb_pull(skb, chunk);
2182
fc0d7536
PE
2183 sk_peek_offset_bwd(sk, chunk);
2184
1da177e4
LT
2185 if (UNIXCB(skb).fp)
2186 unix_detach_fds(siocb->scm, skb);
2187
6f01fd6e 2188 if (skb->len)
1da177e4 2189 break;
1da177e4 2190
6f01fd6e 2191 skb_unlink(skb, &sk->sk_receive_queue);
70d4bf6d 2192 consume_skb(skb);
1da177e4
LT
2193
2194 if (siocb->scm->fp)
2195 break;
6eba6a37 2196 } else {
1da177e4
LT
2197 /* It is questionable, see note in unix_dgram_recvmsg.
2198 */
2199 if (UNIXCB(skb).fp)
2200 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2201
1f21dc67
AC
2202 if (skip) {
2203 sk_peek_offset_fwd(sk, chunk);
2204 skip -= chunk;
2205 }
2206
2207 if (UNIXCB(skb).fp)
2208 break;
fc0d7536 2209
1f21dc67
AC
2210 last = skb;
2211 unix_state_lock(sk);
2212 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2213 if (skb)
2214 goto again;
2215 unix_state_unlock(sk);
1da177e4
LT
2216 break;
2217 }
2218 } while (size);
2219
57b47a53 2220 mutex_unlock(&u->readlock);
1da177e4
LT
2221 scm_recv(sock, msg, siocb->scm, flags);
2222out:
2223 return copied ? : err;
2224}
2225
2226static int unix_shutdown(struct socket *sock, int mode)
2227{
2228 struct sock *sk = sock->sk;
2229 struct sock *other;
2230
fc61b928
XW
2231 if (mode < SHUT_RD || mode > SHUT_RDWR)
2232 return -EINVAL;
2233 /* This maps:
2234 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2235 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2236 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2237 */
2238 ++mode;
7180a031
AC
2239
2240 unix_state_lock(sk);
2241 sk->sk_shutdown |= mode;
2242 other = unix_peer(sk);
2243 if (other)
2244 sock_hold(other);
2245 unix_state_unlock(sk);
2246 sk->sk_state_change(sk);
2247
2248 if (other &&
2249 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2250
2251 int peer_mode = 0;
2252
2253 if (mode&RCV_SHUTDOWN)
2254 peer_mode |= SEND_SHUTDOWN;
2255 if (mode&SEND_SHUTDOWN)
2256 peer_mode |= RCV_SHUTDOWN;
2257 unix_state_lock(other);
2258 other->sk_shutdown |= peer_mode;
2259 unix_state_unlock(other);
2260 other->sk_state_change(other);
2261 if (peer_mode == SHUTDOWN_MASK)
2262 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2263 else if (peer_mode & RCV_SHUTDOWN)
2264 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1da177e4 2265 }
7180a031
AC
2266 if (other)
2267 sock_put(other);
2268
1da177e4
LT
2269 return 0;
2270}
2271
885ee74d
PE
2272long unix_inq_len(struct sock *sk)
2273{
2274 struct sk_buff *skb;
2275 long amount = 0;
2276
2277 if (sk->sk_state == TCP_LISTEN)
2278 return -EINVAL;
2279
2280 spin_lock(&sk->sk_receive_queue.lock);
2281 if (sk->sk_type == SOCK_STREAM ||
2282 sk->sk_type == SOCK_SEQPACKET) {
2283 skb_queue_walk(&sk->sk_receive_queue, skb)
2284 amount += skb->len;
2285 } else {
2286 skb = skb_peek(&sk->sk_receive_queue);
2287 if (skb)
2288 amount = skb->len;
2289 }
2290 spin_unlock(&sk->sk_receive_queue.lock);
2291
2292 return amount;
2293}
2294EXPORT_SYMBOL_GPL(unix_inq_len);
2295
2296long unix_outq_len(struct sock *sk)
2297{
2298 return sk_wmem_alloc_get(sk);
2299}
2300EXPORT_SYMBOL_GPL(unix_outq_len);
2301
1da177e4
LT
2302static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2303{
2304 struct sock *sk = sock->sk;
e27dfcea 2305 long amount = 0;
1da177e4
LT
2306 int err;
2307
6eba6a37
ED
2308 switch (cmd) {
2309 case SIOCOUTQ:
885ee74d 2310 amount = unix_outq_len(sk);
6eba6a37
ED
2311 err = put_user(amount, (int __user *)arg);
2312 break;
2313 case SIOCINQ:
885ee74d
PE
2314 amount = unix_inq_len(sk);
2315 if (amount < 0)
2316 err = amount;
2317 else
1da177e4 2318 err = put_user(amount, (int __user *)arg);
885ee74d 2319 break;
6eba6a37
ED
2320 default:
2321 err = -ENOIOCTLCMD;
2322 break;
1da177e4
LT
2323 }
2324 return err;
2325}
2326
6eba6a37 2327static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1da177e4
LT
2328{
2329 struct sock *sk = sock->sk;
2330 unsigned int mask;
2331
aa395145 2332 sock_poll_wait(file, sk_sleep(sk), wait);
1da177e4
LT
2333 mask = 0;
2334
2335 /* exceptional events? */
2336 if (sk->sk_err)
2337 mask |= POLLERR;
2338 if (sk->sk_shutdown == SHUTDOWN_MASK)
2339 mask |= POLLHUP;
f348d70a 2340 if (sk->sk_shutdown & RCV_SHUTDOWN)
db40980f 2341 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
1da177e4
LT
2342
2343 /* readable? */
db40980f 2344 if (!skb_queue_empty(&sk->sk_receive_queue))
1da177e4
LT
2345 mask |= POLLIN | POLLRDNORM;
2346
2347 /* Connection-based need to check for termination and startup */
6eba6a37
ED
2348 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2349 sk->sk_state == TCP_CLOSE)
1da177e4
LT
2350 mask |= POLLHUP;
2351
2352 /*
2353 * we set writable also when the other side has shut down the
2354 * connection. This prevents stuck sockets.
2355 */
2356 if (unix_writable(sk))
2357 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2358
2359 return mask;
2360}
2361
ec0d215f
RW
2362static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2363 poll_table *wait)
3c73419c 2364{
ec0d215f
RW
2365 struct sock *sk = sock->sk, *other;
2366 unsigned int mask, writable;
3c73419c 2367
aa395145 2368 sock_poll_wait(file, sk_sleep(sk), wait);
3c73419c
RW
2369 mask = 0;
2370
2371 /* exceptional events? */
2372 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
7d4c04fc 2373 mask |= POLLERR |
8facd5fb 2374 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
7d4c04fc 2375
3c73419c 2376 if (sk->sk_shutdown & RCV_SHUTDOWN)
5456f09a 2377 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
3c73419c
RW
2378 if (sk->sk_shutdown == SHUTDOWN_MASK)
2379 mask |= POLLHUP;
2380
2381 /* readable? */
5456f09a 2382 if (!skb_queue_empty(&sk->sk_receive_queue))
3c73419c
RW
2383 mask |= POLLIN | POLLRDNORM;
2384
2385 /* Connection-based need to check for termination and startup */
2386 if (sk->sk_type == SOCK_SEQPACKET) {
2387 if (sk->sk_state == TCP_CLOSE)
2388 mask |= POLLHUP;
2389 /* connection hasn't started yet? */
2390 if (sk->sk_state == TCP_SYN_SENT)
2391 return mask;
2392 }
2393
973a34aa 2394 /* No write status requested, avoid expensive OUT tests. */
626cf236 2395 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
973a34aa
ED
2396 return mask;
2397
ec0d215f 2398 writable = unix_writable(sk);
da8db083
RW
2399 if (writable) {
2400 unix_state_lock(sk);
2401
2402 other = unix_peer(sk);
2403 if (other && unix_peer(other) != sk &&
2404 unix_recvq_full(other) &&
2405 unix_dgram_peer_wake_me(sk, other))
2406 writable = 0;
2407
2408 unix_state_unlock(sk);
ec0d215f
RW
2409 }
2410
2411 if (writable)
3c73419c
RW
2412 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2413 else
2414 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2415
3c73419c
RW
2416 return mask;
2417}
1da177e4
LT
2418
2419#ifdef CONFIG_PROC_FS
a53eb3fe 2420
7123aaa3
ED
2421#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2422
2423#define get_bucket(x) ((x) >> BUCKET_SPACE)
2424#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2425#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
a53eb3fe 2426
7123aaa3 2427static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
1da177e4 2428{
7123aaa3
ED
2429 unsigned long offset = get_offset(*pos);
2430 unsigned long bucket = get_bucket(*pos);
2431 struct sock *sk;
2432 unsigned long count = 0;
1da177e4 2433
7123aaa3
ED
2434 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2435 if (sock_net(sk) != seq_file_net(seq))
097e66c5 2436 continue;
7123aaa3
ED
2437 if (++count == offset)
2438 break;
2439 }
2440
2441 return sk;
2442}
2443
2444static struct sock *unix_next_socket(struct seq_file *seq,
2445 struct sock *sk,
2446 loff_t *pos)
2447{
2448 unsigned long bucket;
2449
2450 while (sk > (struct sock *)SEQ_START_TOKEN) {
2451 sk = sk_next(sk);
2452 if (!sk)
2453 goto next_bucket;
2454 if (sock_net(sk) == seq_file_net(seq))
2455 return sk;
1da177e4 2456 }
7123aaa3
ED
2457
2458 do {
2459 sk = unix_from_bucket(seq, pos);
2460 if (sk)
2461 return sk;
2462
2463next_bucket:
2464 bucket = get_bucket(*pos) + 1;
2465 *pos = set_bucket_offset(bucket, 1);
2466 } while (bucket < ARRAY_SIZE(unix_socket_table));
2467
1da177e4
LT
2468 return NULL;
2469}
2470
1da177e4 2471static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 2472 __acquires(unix_table_lock)
1da177e4 2473{
fbe9cc4a 2474 spin_lock(&unix_table_lock);
7123aaa3
ED
2475
2476 if (!*pos)
2477 return SEQ_START_TOKEN;
2478
2479 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2480 return NULL;
2481
2482 return unix_next_socket(seq, NULL, pos);
1da177e4
LT
2483}
2484
2485static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2486{
2487 ++*pos;
7123aaa3 2488 return unix_next_socket(seq, v, pos);
1da177e4
LT
2489}
2490
2491static void unix_seq_stop(struct seq_file *seq, void *v)
9a429c49 2492 __releases(unix_table_lock)
1da177e4 2493{
fbe9cc4a 2494 spin_unlock(&unix_table_lock);
1da177e4
LT
2495}
2496
2497static int unix_seq_show(struct seq_file *seq, void *v)
2498{
ac7bfa62 2499
b9f3124f 2500 if (v == SEQ_START_TOKEN)
1da177e4
LT
2501 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2502 "Inode Path\n");
2503 else {
2504 struct sock *s = v;
2505 struct unix_sock *u = unix_sk(s);
1c92b4e5 2506 unix_state_lock(s);
1da177e4 2507
71338aa7 2508 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
1da177e4
LT
2509 s,
2510 atomic_read(&s->sk_refcnt),
2511 0,
2512 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2513 s->sk_type,
2514 s->sk_socket ?
2515 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2516 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2517 sock_i_ino(s));
2518
2519 if (u->addr) {
2520 int i, len;
2521 seq_putc(seq, ' ');
2522
2523 i = 0;
2524 len = u->addr->len - sizeof(short);
2525 if (!UNIX_ABSTRACT(s))
2526 len--;
2527 else {
2528 seq_putc(seq, '@');
2529 i++;
2530 }
2531 for ( ; i < len; i++)
2532 seq_putc(seq, u->addr->name->sun_path[i]);
2533 }
1c92b4e5 2534 unix_state_unlock(s);
1da177e4
LT
2535 seq_putc(seq, '\n');
2536 }
2537
2538 return 0;
2539}
2540
56b3d975 2541static const struct seq_operations unix_seq_ops = {
1da177e4
LT
2542 .start = unix_seq_start,
2543 .next = unix_seq_next,
2544 .stop = unix_seq_stop,
2545 .show = unix_seq_show,
2546};
2547
1da177e4
LT
2548static int unix_seq_open(struct inode *inode, struct file *file)
2549{
e372c414 2550 return seq_open_net(inode, file, &unix_seq_ops,
8b51b064 2551 sizeof(struct seq_net_private));
1da177e4
LT
2552}
2553
da7071d7 2554static const struct file_operations unix_seq_fops = {
1da177e4
LT
2555 .owner = THIS_MODULE,
2556 .open = unix_seq_open,
2557 .read = seq_read,
2558 .llseek = seq_lseek,
e372c414 2559 .release = seq_release_net,
1da177e4
LT
2560};
2561
2562#endif
2563
ec1b4cf7 2564static const struct net_proto_family unix_family_ops = {
1da177e4
LT
2565 .family = PF_UNIX,
2566 .create = unix_create,
2567 .owner = THIS_MODULE,
2568};
2569
097e66c5 2570
2c8c1e72 2571static int __net_init unix_net_init(struct net *net)
097e66c5
DL
2572{
2573 int error = -ENOMEM;
2574
a0a53c8b 2575 net->unx.sysctl_max_dgram_qlen = 10;
1597fbc0
PE
2576 if (unix_sysctl_register(net))
2577 goto out;
d392e497 2578
097e66c5 2579#ifdef CONFIG_PROC_FS
d4beaa66 2580 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
1597fbc0 2581 unix_sysctl_unregister(net);
097e66c5 2582 goto out;
1597fbc0 2583 }
097e66c5
DL
2584#endif
2585 error = 0;
2586out:
48dcc33e 2587 return error;
097e66c5
DL
2588}
2589
2c8c1e72 2590static void __net_exit unix_net_exit(struct net *net)
097e66c5 2591{
1597fbc0 2592 unix_sysctl_unregister(net);
ece31ffd 2593 remove_proc_entry("unix", net->proc_net);
097e66c5
DL
2594}
2595
2596static struct pernet_operations unix_net_ops = {
2597 .init = unix_net_init,
2598 .exit = unix_net_exit,
2599};
2600
1da177e4
LT
2601static int __init af_unix_init(void)
2602{
2603 int rc = -1;
1da177e4 2604
b4fff5f8 2605 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
1da177e4
LT
2606
2607 rc = proto_register(&unix_proto, 1);
ac7bfa62
YH
2608 if (rc != 0) {
2609 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
0dc47877 2610 __func__);
1da177e4
LT
2611 goto out;
2612 }
2613
2614 sock_register(&unix_family_ops);
097e66c5 2615 register_pernet_subsys(&unix_net_ops);
1da177e4
LT
2616out:
2617 return rc;
2618}
2619
2620static void __exit af_unix_exit(void)
2621{
2622 sock_unregister(PF_UNIX);
1da177e4 2623 proto_unregister(&unix_proto);
097e66c5 2624 unregister_pernet_subsys(&unix_net_ops);
1da177e4
LT
2625}
2626
3d366960
DW
2627/* Earlier than device_initcall() so that other drivers invoking
2628 request_module() don't end up in a loop when modprobe tries
2629 to use a UNIX socket. But later than subsys_initcall() because
2630 we depend on stuff initialised there */
2631fs_initcall(af_unix_init);
1da177e4
LT
2632module_exit(af_unix_exit);
2633
2634MODULE_LICENSE("GPL");
2635MODULE_ALIAS_NETPROTO(PF_UNIX);