Linux 3.10.94
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / unix / af_unix.c
CommitLineData
1da177e4
LT
1/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
113aa838 4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
1da177e4
LT
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
83#include <linux/module.h>
1da177e4 84#include <linux/kernel.h>
1da177e4
LT
85#include <linux/signal.h>
86#include <linux/sched.h>
87#include <linux/errno.h>
88#include <linux/string.h>
89#include <linux/stat.h>
90#include <linux/dcache.h>
91#include <linux/namei.h>
92#include <linux/socket.h>
93#include <linux/un.h>
94#include <linux/fcntl.h>
95#include <linux/termios.h>
96#include <linux/sockios.h>
97#include <linux/net.h>
98#include <linux/in.h>
99#include <linux/fs.h>
100#include <linux/slab.h>
101#include <asm/uaccess.h>
102#include <linux/skbuff.h>
103#include <linux/netdevice.h>
457c4cbc 104#include <net/net_namespace.h>
1da177e4 105#include <net/sock.h>
c752f073 106#include <net/tcp_states.h>
1da177e4
LT
107#include <net/af_unix.h>
108#include <linux/proc_fs.h>
109#include <linux/seq_file.h>
110#include <net/scm.h>
111#include <linux/init.h>
112#include <linux/poll.h>
1da177e4
LT
113#include <linux/rtnetlink.h>
114#include <linux/mount.h>
115#include <net/checksum.h>
116#include <linux/security.h>
117
7123aaa3 118struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
fa7ff56f
PE
119EXPORT_SYMBOL_GPL(unix_socket_table);
120DEFINE_SPINLOCK(unix_table_lock);
121EXPORT_SYMBOL_GPL(unix_table_lock);
518de9b3 122static atomic_long_t unix_nr_socks;
1da177e4 123
1da177e4 124
7123aaa3
ED
125static struct hlist_head *unix_sockets_unbound(void *addr)
126{
127 unsigned long hash = (unsigned long)addr;
128
129 hash ^= hash >> 16;
130 hash ^= hash >> 8;
131 hash %= UNIX_HASH_SIZE;
132 return &unix_socket_table[UNIX_HASH_SIZE + hash];
133}
134
135#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
1da177e4 136
877ce7c1 137#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 138static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 139{
dc49c1f9 140 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
877ce7c1
CZ
141}
142
143static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
144{
dc49c1f9 145 scm->secid = *UNIXSID(skb);
877ce7c1
CZ
146}
147#else
dc49c1f9 148static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
149{ }
150
151static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
152{ }
153#endif /* CONFIG_SECURITY_NETWORK */
154
1da177e4
LT
155/*
156 * SMP locking strategy:
fbe9cc4a 157 * hash table is protected with spinlock unix_table_lock
663717f6 158 * each socket state is protected by separate spin lock.
1da177e4
LT
159 */
160
95c96174 161static inline unsigned int unix_hash_fold(__wsum n)
1da177e4 162{
83bb80f4 163 unsigned int hash = (__force unsigned int)csum_fold(n);
95c96174 164
1da177e4
LT
165 hash ^= hash>>8;
166 return hash&(UNIX_HASH_SIZE-1);
167}
168
169#define unix_peer(sk) (unix_sk(sk)->peer)
170
171static inline int unix_our_peer(struct sock *sk, struct sock *osk)
172{
173 return unix_peer(osk) == sk;
174}
175
176static inline int unix_may_send(struct sock *sk, struct sock *osk)
177{
6eba6a37 178 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
1da177e4
LT
179}
180
3c73419c
RW
181static inline int unix_recvq_full(struct sock const *sk)
182{
183 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
184}
185
fa7ff56f 186struct sock *unix_peer_get(struct sock *s)
1da177e4
LT
187{
188 struct sock *peer;
189
1c92b4e5 190 unix_state_lock(s);
1da177e4
LT
191 peer = unix_peer(s);
192 if (peer)
193 sock_hold(peer);
1c92b4e5 194 unix_state_unlock(s);
1da177e4
LT
195 return peer;
196}
fa7ff56f 197EXPORT_SYMBOL_GPL(unix_peer_get);
1da177e4
LT
198
199static inline void unix_release_addr(struct unix_address *addr)
200{
201 if (atomic_dec_and_test(&addr->refcnt))
202 kfree(addr);
203}
204
205/*
206 * Check unix socket name:
207 * - should be not zero length.
208 * - if started by not zero, should be NULL terminated (FS object)
209 * - if started by zero, it is abstract name.
210 */
ac7bfa62 211
95c96174 212static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
1da177e4
LT
213{
214 if (len <= sizeof(short) || len > sizeof(*sunaddr))
215 return -EINVAL;
216 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
217 return -EINVAL;
218 if (sunaddr->sun_path[0]) {
219 /*
220 * This may look like an off by one error but it is a bit more
221 * subtle. 108 is the longest valid AF_UNIX path for a binding.
25985edc 222 * sun_path[108] doesn't as such exist. However in kernel space
1da177e4
LT
223 * we are guaranteed that it is a valid memory location in our
224 * kernel address buffer.
225 */
e27dfcea 226 ((char *)sunaddr)[len] = 0;
1da177e4
LT
227 len = strlen(sunaddr->sun_path)+1+sizeof(short);
228 return len;
229 }
230
07f0757a 231 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
1da177e4
LT
232 return len;
233}
234
235static void __unix_remove_socket(struct sock *sk)
236{
237 sk_del_node_init(sk);
238}
239
240static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
241{
547b792c 242 WARN_ON(!sk_unhashed(sk));
1da177e4
LT
243 sk_add_node(sk, list);
244}
245
246static inline void unix_remove_socket(struct sock *sk)
247{
fbe9cc4a 248 spin_lock(&unix_table_lock);
1da177e4 249 __unix_remove_socket(sk);
fbe9cc4a 250 spin_unlock(&unix_table_lock);
1da177e4
LT
251}
252
253static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
254{
fbe9cc4a 255 spin_lock(&unix_table_lock);
1da177e4 256 __unix_insert_socket(list, sk);
fbe9cc4a 257 spin_unlock(&unix_table_lock);
1da177e4
LT
258}
259
097e66c5
DL
260static struct sock *__unix_find_socket_byname(struct net *net,
261 struct sockaddr_un *sunname,
95c96174 262 int len, int type, unsigned int hash)
1da177e4
LT
263{
264 struct sock *s;
1da177e4 265
b67bfe0d 266 sk_for_each(s, &unix_socket_table[hash ^ type]) {
1da177e4
LT
267 struct unix_sock *u = unix_sk(s);
268
878628fb 269 if (!net_eq(sock_net(s), net))
097e66c5
DL
270 continue;
271
1da177e4
LT
272 if (u->addr->len == len &&
273 !memcmp(u->addr->name, sunname, len))
274 goto found;
275 }
276 s = NULL;
277found:
278 return s;
279}
280
097e66c5
DL
281static inline struct sock *unix_find_socket_byname(struct net *net,
282 struct sockaddr_un *sunname,
1da177e4 283 int len, int type,
95c96174 284 unsigned int hash)
1da177e4
LT
285{
286 struct sock *s;
287
fbe9cc4a 288 spin_lock(&unix_table_lock);
097e66c5 289 s = __unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
290 if (s)
291 sock_hold(s);
fbe9cc4a 292 spin_unlock(&unix_table_lock);
1da177e4
LT
293 return s;
294}
295
6616f788 296static struct sock *unix_find_socket_byinode(struct inode *i)
1da177e4
LT
297{
298 struct sock *s;
1da177e4 299
fbe9cc4a 300 spin_lock(&unix_table_lock);
b67bfe0d 301 sk_for_each(s,
1da177e4 302 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
40ffe67d 303 struct dentry *dentry = unix_sk(s)->path.dentry;
1da177e4 304
6eba6a37 305 if (dentry && dentry->d_inode == i) {
1da177e4
LT
306 sock_hold(s);
307 goto found;
308 }
309 }
310 s = NULL;
311found:
fbe9cc4a 312 spin_unlock(&unix_table_lock);
1da177e4
LT
313 return s;
314}
315
316static inline int unix_writable(struct sock *sk)
317{
318 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
319}
320
321static void unix_write_space(struct sock *sk)
322{
43815482
ED
323 struct socket_wq *wq;
324
325 rcu_read_lock();
1da177e4 326 if (unix_writable(sk)) {
43815482
ED
327 wq = rcu_dereference(sk->sk_wq);
328 if (wq_has_sleeper(wq))
67426b75
ED
329 wake_up_interruptible_sync_poll(&wq->wait,
330 POLLOUT | POLLWRNORM | POLLWRBAND);
8d8ad9d7 331 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4 332 }
43815482 333 rcu_read_unlock();
1da177e4
LT
334}
335
336/* When dgram socket disconnects (or changes its peer), we clear its receive
337 * queue of packets arrived from previous peer. First, it allows to do
338 * flow control based only on wmem_alloc; second, sk connected to peer
339 * may receive messages only from that peer. */
340static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
341{
b03efcfb 342 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
343 skb_queue_purge(&sk->sk_receive_queue);
344 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
345
346 /* If one link of bidirectional dgram pipe is disconnected,
347 * we signal error. Messages are lost. Do not make this,
348 * when peer was not connected to us.
349 */
350 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
351 other->sk_err = ECONNRESET;
352 other->sk_error_report(other);
353 }
354 }
355}
356
357static void unix_sock_destructor(struct sock *sk)
358{
359 struct unix_sock *u = unix_sk(sk);
360
361 skb_queue_purge(&sk->sk_receive_queue);
362
547b792c
IJ
363 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
364 WARN_ON(!sk_unhashed(sk));
365 WARN_ON(sk->sk_socket);
1da177e4 366 if (!sock_flag(sk, SOCK_DEAD)) {
6b41e7dd 367 printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
1da177e4
LT
368 return;
369 }
370
371 if (u->addr)
372 unix_release_addr(u->addr);
373
518de9b3 374 atomic_long_dec(&unix_nr_socks);
6f756a8c 375 local_bh_disable();
a8076d8d 376 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
6f756a8c 377 local_bh_enable();
1da177e4 378#ifdef UNIX_REFCNT_DEBUG
518de9b3
ED
379 printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
380 atomic_long_read(&unix_nr_socks));
1da177e4
LT
381#endif
382}
383
ded34e0f 384static void unix_release_sock(struct sock *sk, int embrion)
1da177e4
LT
385{
386 struct unix_sock *u = unix_sk(sk);
40ffe67d 387 struct path path;
1da177e4
LT
388 struct sock *skpair;
389 struct sk_buff *skb;
390 int state;
391
392 unix_remove_socket(sk);
393
394 /* Clear state */
1c92b4e5 395 unix_state_lock(sk);
1da177e4
LT
396 sock_orphan(sk);
397 sk->sk_shutdown = SHUTDOWN_MASK;
40ffe67d
AV
398 path = u->path;
399 u->path.dentry = NULL;
400 u->path.mnt = NULL;
1da177e4
LT
401 state = sk->sk_state;
402 sk->sk_state = TCP_CLOSE;
1c92b4e5 403 unix_state_unlock(sk);
1da177e4
LT
404
405 wake_up_interruptible_all(&u->peer_wait);
406
e27dfcea 407 skpair = unix_peer(sk);
1da177e4 408
e27dfcea 409 if (skpair != NULL) {
1da177e4 410 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
1c92b4e5 411 unix_state_lock(skpair);
1da177e4
LT
412 /* No more writes */
413 skpair->sk_shutdown = SHUTDOWN_MASK;
414 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
415 skpair->sk_err = ECONNRESET;
1c92b4e5 416 unix_state_unlock(skpair);
1da177e4 417 skpair->sk_state_change(skpair);
8d8ad9d7 418 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4
LT
419 }
420 sock_put(skpair); /* It may now die */
421 unix_peer(sk) = NULL;
422 }
423
424 /* Try to flush out this socket. Throw out buffers at least */
425
426 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
e27dfcea 427 if (state == TCP_LISTEN)
1da177e4
LT
428 unix_release_sock(skb->sk, 1);
429 /* passed fds are erased in the kfree_skb hook */
430 kfree_skb(skb);
431 }
432
40ffe67d
AV
433 if (path.dentry)
434 path_put(&path);
1da177e4
LT
435
436 sock_put(sk);
437
438 /* ---- Socket is dead now and most probably destroyed ---- */
439
440 /*
e04dae84 441 * Fixme: BSD difference: In BSD all sockets connected to us get
1da177e4
LT
442 * ECONNRESET and we die on the spot. In Linux we behave
443 * like files and pipes do and wait for the last
444 * dereference.
445 *
446 * Can't we simply set sock->err?
447 *
448 * What the above comment does talk about? --ANK(980817)
449 */
450
9305cfa4 451 if (unix_tot_inflight)
ac7bfa62 452 unix_gc(); /* Garbage collect fds */
1da177e4
LT
453}
454
109f6e39
EB
455static void init_peercred(struct sock *sk)
456{
457 put_pid(sk->sk_peer_pid);
458 if (sk->sk_peer_cred)
459 put_cred(sk->sk_peer_cred);
460 sk->sk_peer_pid = get_pid(task_tgid(current));
461 sk->sk_peer_cred = get_current_cred();
462}
463
464static void copy_peercred(struct sock *sk, struct sock *peersk)
465{
466 put_pid(sk->sk_peer_pid);
467 if (sk->sk_peer_cred)
468 put_cred(sk->sk_peer_cred);
469 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
470 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
471}
472
1da177e4
LT
473static int unix_listen(struct socket *sock, int backlog)
474{
475 int err;
476 struct sock *sk = sock->sk;
477 struct unix_sock *u = unix_sk(sk);
109f6e39 478 struct pid *old_pid = NULL;
1da177e4
LT
479
480 err = -EOPNOTSUPP;
6eba6a37
ED
481 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
482 goto out; /* Only stream/seqpacket sockets accept */
1da177e4
LT
483 err = -EINVAL;
484 if (!u->addr)
6eba6a37 485 goto out; /* No listens on an unbound socket */
1c92b4e5 486 unix_state_lock(sk);
1da177e4
LT
487 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
488 goto out_unlock;
489 if (backlog > sk->sk_max_ack_backlog)
490 wake_up_interruptible_all(&u->peer_wait);
491 sk->sk_max_ack_backlog = backlog;
492 sk->sk_state = TCP_LISTEN;
493 /* set credentials so connect can copy them */
109f6e39 494 init_peercred(sk);
1da177e4
LT
495 err = 0;
496
497out_unlock:
1c92b4e5 498 unix_state_unlock(sk);
109f6e39 499 put_pid(old_pid);
1da177e4
LT
500out:
501 return err;
502}
503
504static int unix_release(struct socket *);
505static int unix_bind(struct socket *, struct sockaddr *, int);
506static int unix_stream_connect(struct socket *, struct sockaddr *,
507 int addr_len, int flags);
508static int unix_socketpair(struct socket *, struct socket *);
509static int unix_accept(struct socket *, struct socket *, int);
510static int unix_getname(struct socket *, struct sockaddr *, int *, int);
511static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
ec0d215f
RW
512static unsigned int unix_dgram_poll(struct file *, struct socket *,
513 poll_table *);
1da177e4
LT
514static int unix_ioctl(struct socket *, unsigned int, unsigned long);
515static int unix_shutdown(struct socket *, int);
516static int unix_stream_sendmsg(struct kiocb *, struct socket *,
517 struct msghdr *, size_t);
518static int unix_stream_recvmsg(struct kiocb *, struct socket *,
519 struct msghdr *, size_t, int);
520static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
521 struct msghdr *, size_t);
522static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
523 struct msghdr *, size_t, int);
524static int unix_dgram_connect(struct socket *, struct sockaddr *,
525 int, int);
526static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
527 struct msghdr *, size_t);
a05d2ad1
EB
528static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
529 struct msghdr *, size_t, int);
1da177e4 530
d90d9ff6 531static int unix_set_peek_off(struct sock *sk, int val)
f55bb7f9
PE
532{
533 struct unix_sock *u = unix_sk(sk);
534
d90d9ff6
SL
535 if (mutex_lock_interruptible(&u->readlock))
536 return -EINTR;
537
f55bb7f9
PE
538 sk->sk_peek_off = val;
539 mutex_unlock(&u->readlock);
d90d9ff6
SL
540
541 return 0;
f55bb7f9
PE
542}
543
544
90ddc4f0 545static const struct proto_ops unix_stream_ops = {
1da177e4
LT
546 .family = PF_UNIX,
547 .owner = THIS_MODULE,
548 .release = unix_release,
549 .bind = unix_bind,
550 .connect = unix_stream_connect,
551 .socketpair = unix_socketpair,
552 .accept = unix_accept,
553 .getname = unix_getname,
554 .poll = unix_poll,
555 .ioctl = unix_ioctl,
556 .listen = unix_listen,
557 .shutdown = unix_shutdown,
558 .setsockopt = sock_no_setsockopt,
559 .getsockopt = sock_no_getsockopt,
560 .sendmsg = unix_stream_sendmsg,
561 .recvmsg = unix_stream_recvmsg,
562 .mmap = sock_no_mmap,
563 .sendpage = sock_no_sendpage,
fc0d7536 564 .set_peek_off = unix_set_peek_off,
1da177e4
LT
565};
566
90ddc4f0 567static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
568 .family = PF_UNIX,
569 .owner = THIS_MODULE,
570 .release = unix_release,
571 .bind = unix_bind,
572 .connect = unix_dgram_connect,
573 .socketpair = unix_socketpair,
574 .accept = sock_no_accept,
575 .getname = unix_getname,
ec0d215f 576 .poll = unix_dgram_poll,
1da177e4
LT
577 .ioctl = unix_ioctl,
578 .listen = sock_no_listen,
579 .shutdown = unix_shutdown,
580 .setsockopt = sock_no_setsockopt,
581 .getsockopt = sock_no_getsockopt,
582 .sendmsg = unix_dgram_sendmsg,
583 .recvmsg = unix_dgram_recvmsg,
584 .mmap = sock_no_mmap,
585 .sendpage = sock_no_sendpage,
f55bb7f9 586 .set_peek_off = unix_set_peek_off,
1da177e4
LT
587};
588
90ddc4f0 589static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
590 .family = PF_UNIX,
591 .owner = THIS_MODULE,
592 .release = unix_release,
593 .bind = unix_bind,
594 .connect = unix_stream_connect,
595 .socketpair = unix_socketpair,
596 .accept = unix_accept,
597 .getname = unix_getname,
ec0d215f 598 .poll = unix_dgram_poll,
1da177e4
LT
599 .ioctl = unix_ioctl,
600 .listen = unix_listen,
601 .shutdown = unix_shutdown,
602 .setsockopt = sock_no_setsockopt,
603 .getsockopt = sock_no_getsockopt,
604 .sendmsg = unix_seqpacket_sendmsg,
a05d2ad1 605 .recvmsg = unix_seqpacket_recvmsg,
1da177e4
LT
606 .mmap = sock_no_mmap,
607 .sendpage = sock_no_sendpage,
f55bb7f9 608 .set_peek_off = unix_set_peek_off,
1da177e4
LT
609};
610
611static struct proto unix_proto = {
248969ae
ED
612 .name = "UNIX",
613 .owner = THIS_MODULE,
248969ae 614 .obj_size = sizeof(struct unix_sock),
1da177e4
LT
615};
616
a09785a2
IM
617/*
618 * AF_UNIX sockets do not interact with hardware, hence they
619 * dont trigger interrupts - so it's safe for them to have
620 * bh-unsafe locking for their sk_receive_queue.lock. Split off
621 * this special lock-class by reinitializing the spinlock key:
622 */
623static struct lock_class_key af_unix_sk_receive_queue_lock_key;
624
6eba6a37 625static struct sock *unix_create1(struct net *net, struct socket *sock)
1da177e4
LT
626{
627 struct sock *sk = NULL;
628 struct unix_sock *u;
629
518de9b3
ED
630 atomic_long_inc(&unix_nr_socks);
631 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
1da177e4
LT
632 goto out;
633
6257ff21 634 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
1da177e4
LT
635 if (!sk)
636 goto out;
637
6eba6a37 638 sock_init_data(sock, sk);
a09785a2
IM
639 lockdep_set_class(&sk->sk_receive_queue.lock,
640 &af_unix_sk_receive_queue_lock_key);
1da177e4
LT
641
642 sk->sk_write_space = unix_write_space;
a0a53c8b 643 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
1da177e4
LT
644 sk->sk_destruct = unix_sock_destructor;
645 u = unix_sk(sk);
40ffe67d
AV
646 u->path.dentry = NULL;
647 u->path.mnt = NULL;
fd19f329 648 spin_lock_init(&u->lock);
516e0cc5 649 atomic_long_set(&u->inflight, 0);
1fd05ba5 650 INIT_LIST_HEAD(&u->link);
57b47a53 651 mutex_init(&u->readlock); /* single task reading lock */
1da177e4 652 init_waitqueue_head(&u->peer_wait);
7123aaa3 653 unix_insert_socket(unix_sockets_unbound(sk), sk);
1da177e4 654out:
284b327b 655 if (sk == NULL)
518de9b3 656 atomic_long_dec(&unix_nr_socks);
920de804
ED
657 else {
658 local_bh_disable();
a8076d8d 659 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
920de804
ED
660 local_bh_enable();
661 }
1da177e4
LT
662 return sk;
663}
664
3f378b68
EP
665static int unix_create(struct net *net, struct socket *sock, int protocol,
666 int kern)
1da177e4
LT
667{
668 if (protocol && protocol != PF_UNIX)
669 return -EPROTONOSUPPORT;
670
671 sock->state = SS_UNCONNECTED;
672
673 switch (sock->type) {
674 case SOCK_STREAM:
675 sock->ops = &unix_stream_ops;
676 break;
677 /*
678 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
679 * nothing uses it.
680 */
681 case SOCK_RAW:
e27dfcea 682 sock->type = SOCK_DGRAM;
1da177e4
LT
683 case SOCK_DGRAM:
684 sock->ops = &unix_dgram_ops;
685 break;
686 case SOCK_SEQPACKET:
687 sock->ops = &unix_seqpacket_ops;
688 break;
689 default:
690 return -ESOCKTNOSUPPORT;
691 }
692
1b8d7ae4 693 return unix_create1(net, sock) ? 0 : -ENOMEM;
1da177e4
LT
694}
695
696static int unix_release(struct socket *sock)
697{
698 struct sock *sk = sock->sk;
699
700 if (!sk)
701 return 0;
702
ded34e0f 703 unix_release_sock(sk, 0);
1da177e4
LT
704 sock->sk = NULL;
705
ded34e0f 706 return 0;
1da177e4
LT
707}
708
709static int unix_autobind(struct socket *sock)
710{
711 struct sock *sk = sock->sk;
3b1e0a65 712 struct net *net = sock_net(sk);
1da177e4
LT
713 struct unix_sock *u = unix_sk(sk);
714 static u32 ordernum = 1;
6eba6a37 715 struct unix_address *addr;
1da177e4 716 int err;
8df73ff9 717 unsigned int retries = 0;
1da177e4 718
57bc52eb
SL
719 err = mutex_lock_interruptible(&u->readlock);
720 if (err)
721 return err;
1da177e4
LT
722
723 err = 0;
724 if (u->addr)
725 goto out;
726
727 err = -ENOMEM;
0da974f4 728 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
1da177e4
LT
729 if (!addr)
730 goto out;
731
1da177e4
LT
732 addr->name->sun_family = AF_UNIX;
733 atomic_set(&addr->refcnt, 1);
734
735retry:
736 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
07f0757a 737 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
1da177e4 738
fbe9cc4a 739 spin_lock(&unix_table_lock);
1da177e4
LT
740 ordernum = (ordernum+1)&0xFFFFF;
741
097e66c5 742 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
1da177e4 743 addr->hash)) {
fbe9cc4a 744 spin_unlock(&unix_table_lock);
8df73ff9
TH
745 /*
746 * __unix_find_socket_byname() may take long time if many names
747 * are already in use.
748 */
749 cond_resched();
750 /* Give up if all names seems to be in use. */
751 if (retries++ == 0xFFFFF) {
752 err = -ENOSPC;
753 kfree(addr);
754 goto out;
755 }
1da177e4
LT
756 goto retry;
757 }
758 addr->hash ^= sk->sk_type;
759
760 __unix_remove_socket(sk);
761 u->addr = addr;
762 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
fbe9cc4a 763 spin_unlock(&unix_table_lock);
1da177e4
LT
764 err = 0;
765
57b47a53 766out: mutex_unlock(&u->readlock);
1da177e4
LT
767 return err;
768}
769
097e66c5
DL
770static struct sock *unix_find_other(struct net *net,
771 struct sockaddr_un *sunname, int len,
95c96174 772 int type, unsigned int hash, int *error)
1da177e4
LT
773{
774 struct sock *u;
421748ec 775 struct path path;
1da177e4 776 int err = 0;
ac7bfa62 777
1da177e4 778 if (sunname->sun_path[0]) {
421748ec
AV
779 struct inode *inode;
780 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1da177e4
LT
781 if (err)
782 goto fail;
421748ec
AV
783 inode = path.dentry->d_inode;
784 err = inode_permission(inode, MAY_WRITE);
1da177e4
LT
785 if (err)
786 goto put_fail;
787
788 err = -ECONNREFUSED;
421748ec 789 if (!S_ISSOCK(inode->i_mode))
1da177e4 790 goto put_fail;
6616f788 791 u = unix_find_socket_byinode(inode);
1da177e4
LT
792 if (!u)
793 goto put_fail;
794
795 if (u->sk_type == type)
68ac1234 796 touch_atime(&path);
1da177e4 797
421748ec 798 path_put(&path);
1da177e4 799
e27dfcea 800 err = -EPROTOTYPE;
1da177e4
LT
801 if (u->sk_type != type) {
802 sock_put(u);
803 goto fail;
804 }
805 } else {
806 err = -ECONNREFUSED;
e27dfcea 807 u = unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
808 if (u) {
809 struct dentry *dentry;
40ffe67d 810 dentry = unix_sk(u)->path.dentry;
1da177e4 811 if (dentry)
68ac1234 812 touch_atime(&unix_sk(u)->path);
1da177e4
LT
813 } else
814 goto fail;
815 }
816 return u;
817
818put_fail:
421748ec 819 path_put(&path);
1da177e4 820fail:
e27dfcea 821 *error = err;
1da177e4
LT
822 return NULL;
823}
824
faf02010
AV
825static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
826{
827 struct dentry *dentry;
828 struct path path;
829 int err = 0;
830 /*
831 * Get the parent directory, calculate the hash for last
832 * component.
833 */
834 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
835 err = PTR_ERR(dentry);
836 if (IS_ERR(dentry))
837 return err;
838
839 /*
840 * All right, let's create it.
841 */
842 err = security_path_mknod(&path, dentry, mode, 0);
843 if (!err) {
844 err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
845 if (!err) {
846 res->mnt = mntget(path.mnt);
847 res->dentry = dget(dentry);
848 }
849 }
850 done_path_create(&path, dentry);
851 return err;
852}
1da177e4
LT
853
854static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
855{
856 struct sock *sk = sock->sk;
3b1e0a65 857 struct net *net = sock_net(sk);
1da177e4 858 struct unix_sock *u = unix_sk(sk);
e27dfcea 859 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
dae6ad8f 860 char *sun_path = sunaddr->sun_path;
1da177e4 861 int err;
95c96174 862 unsigned int hash;
1da177e4
LT
863 struct unix_address *addr;
864 struct hlist_head *list;
865
866 err = -EINVAL;
867 if (sunaddr->sun_family != AF_UNIX)
868 goto out;
869
e27dfcea 870 if (addr_len == sizeof(short)) {
1da177e4
LT
871 err = unix_autobind(sock);
872 goto out;
873 }
874
875 err = unix_mkname(sunaddr, addr_len, &hash);
876 if (err < 0)
877 goto out;
878 addr_len = err;
879
57bc52eb
SL
880 err = mutex_lock_interruptible(&u->readlock);
881 if (err)
882 goto out;
1da177e4
LT
883
884 err = -EINVAL;
885 if (u->addr)
886 goto out_up;
887
888 err = -ENOMEM;
889 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
890 if (!addr)
891 goto out_up;
892
893 memcpy(addr->name, sunaddr, addr_len);
894 addr->len = addr_len;
895 addr->hash = hash ^ sk->sk_type;
896 atomic_set(&addr->refcnt, 1);
897
dae6ad8f 898 if (sun_path[0]) {
faf02010
AV
899 struct path path;
900 umode_t mode = S_IFSOCK |
ce3b0f8d 901 (SOCK_INODE(sock)->i_mode & ~current_umask());
faf02010
AV
902 err = unix_mknod(sun_path, mode, &path);
903 if (err) {
904 if (err == -EEXIST)
905 err = -EADDRINUSE;
906 unix_release_addr(addr);
907 goto out_up;
908 }
1da177e4 909 addr->hash = UNIX_HASH_SIZE;
faf02010
AV
910 hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
911 spin_lock(&unix_table_lock);
912 u->path = path;
913 list = &unix_socket_table[hash];
914 } else {
915 spin_lock(&unix_table_lock);
1da177e4 916 err = -EADDRINUSE;
097e66c5 917 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1da177e4
LT
918 sk->sk_type, hash)) {
919 unix_release_addr(addr);
920 goto out_unlock;
921 }
922
923 list = &unix_socket_table[addr->hash];
1da177e4
LT
924 }
925
926 err = 0;
927 __unix_remove_socket(sk);
928 u->addr = addr;
929 __unix_insert_socket(list, sk);
930
931out_unlock:
fbe9cc4a 932 spin_unlock(&unix_table_lock);
1da177e4 933out_up:
57b47a53 934 mutex_unlock(&u->readlock);
1da177e4
LT
935out:
936 return err;
1da177e4
LT
937}
938
278a3de5
DM
939static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
940{
941 if (unlikely(sk1 == sk2) || !sk2) {
942 unix_state_lock(sk1);
943 return;
944 }
945 if (sk1 < sk2) {
946 unix_state_lock(sk1);
947 unix_state_lock_nested(sk2);
948 } else {
949 unix_state_lock(sk2);
950 unix_state_lock_nested(sk1);
951 }
952}
953
954static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
955{
956 if (unlikely(sk1 == sk2) || !sk2) {
957 unix_state_unlock(sk1);
958 return;
959 }
960 unix_state_unlock(sk1);
961 unix_state_unlock(sk2);
962}
963
1da177e4
LT
964static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
965 int alen, int flags)
966{
967 struct sock *sk = sock->sk;
3b1e0a65 968 struct net *net = sock_net(sk);
e27dfcea 969 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1da177e4 970 struct sock *other;
95c96174 971 unsigned int hash;
1da177e4
LT
972 int err;
973
974 if (addr->sa_family != AF_UNSPEC) {
975 err = unix_mkname(sunaddr, alen, &hash);
976 if (err < 0)
977 goto out;
978 alen = err;
979
980 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
981 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
982 goto out;
983
278a3de5 984restart:
e27dfcea 985 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1da177e4
LT
986 if (!other)
987 goto out;
988
278a3de5
DM
989 unix_state_double_lock(sk, other);
990
991 /* Apparently VFS overslept socket death. Retry. */
992 if (sock_flag(other, SOCK_DEAD)) {
993 unix_state_double_unlock(sk, other);
994 sock_put(other);
995 goto restart;
996 }
1da177e4
LT
997
998 err = -EPERM;
999 if (!unix_may_send(sk, other))
1000 goto out_unlock;
1001
1002 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1003 if (err)
1004 goto out_unlock;
1005
1006 } else {
1007 /*
1008 * 1003.1g breaking connected state with AF_UNSPEC
1009 */
1010 other = NULL;
278a3de5 1011 unix_state_double_lock(sk, other);
1da177e4
LT
1012 }
1013
1014 /*
1015 * If it was connected, reconnect.
1016 */
1017 if (unix_peer(sk)) {
1018 struct sock *old_peer = unix_peer(sk);
e27dfcea 1019 unix_peer(sk) = other;
278a3de5 1020 unix_state_double_unlock(sk, other);
1da177e4
LT
1021
1022 if (other != old_peer)
1023 unix_dgram_disconnected(sk, old_peer);
1024 sock_put(old_peer);
1025 } else {
e27dfcea 1026 unix_peer(sk) = other;
278a3de5 1027 unix_state_double_unlock(sk, other);
1da177e4 1028 }
ac7bfa62 1029 return 0;
1da177e4
LT
1030
1031out_unlock:
278a3de5 1032 unix_state_double_unlock(sk, other);
1da177e4
LT
1033 sock_put(other);
1034out:
1035 return err;
1036}
1037
1038static long unix_wait_for_peer(struct sock *other, long timeo)
1039{
1040 struct unix_sock *u = unix_sk(other);
1041 int sched;
1042 DEFINE_WAIT(wait);
1043
1044 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1045
1046 sched = !sock_flag(other, SOCK_DEAD) &&
1047 !(other->sk_shutdown & RCV_SHUTDOWN) &&
3c73419c 1048 unix_recvq_full(other);
1da177e4 1049
1c92b4e5 1050 unix_state_unlock(other);
1da177e4
LT
1051
1052 if (sched)
1053 timeo = schedule_timeout(timeo);
1054
1055 finish_wait(&u->peer_wait, &wait);
1056 return timeo;
1057}
1058
1059static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1060 int addr_len, int flags)
1061{
e27dfcea 1062 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1da177e4 1063 struct sock *sk = sock->sk;
3b1e0a65 1064 struct net *net = sock_net(sk);
1da177e4
LT
1065 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1066 struct sock *newsk = NULL;
1067 struct sock *other = NULL;
1068 struct sk_buff *skb = NULL;
95c96174 1069 unsigned int hash;
1da177e4
LT
1070 int st;
1071 int err;
1072 long timeo;
1073
1074 err = unix_mkname(sunaddr, addr_len, &hash);
1075 if (err < 0)
1076 goto out;
1077 addr_len = err;
1078
f64f9e71
JP
1079 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1080 (err = unix_autobind(sock)) != 0)
1da177e4
LT
1081 goto out;
1082
1083 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1084
1085 /* First of all allocate resources.
1086 If we will make it after state is locked,
1087 we will have to recheck all again in any case.
1088 */
1089
1090 err = -ENOMEM;
1091
1092 /* create new sock for complete connection */
3b1e0a65 1093 newsk = unix_create1(sock_net(sk), NULL);
1da177e4
LT
1094 if (newsk == NULL)
1095 goto out;
1096
1097 /* Allocate skb for sending to listening sock */
1098 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1099 if (skb == NULL)
1100 goto out;
1101
1102restart:
1103 /* Find listening sock. */
097e66c5 1104 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1da177e4
LT
1105 if (!other)
1106 goto out;
1107
1108 /* Latch state of peer */
1c92b4e5 1109 unix_state_lock(other);
1da177e4
LT
1110
1111 /* Apparently VFS overslept socket death. Retry. */
1112 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1113 unix_state_unlock(other);
1da177e4
LT
1114 sock_put(other);
1115 goto restart;
1116 }
1117
1118 err = -ECONNREFUSED;
1119 if (other->sk_state != TCP_LISTEN)
1120 goto out_unlock;
77238f2b
TS
1121 if (other->sk_shutdown & RCV_SHUTDOWN)
1122 goto out_unlock;
1da177e4 1123
3c73419c 1124 if (unix_recvq_full(other)) {
1da177e4
LT
1125 err = -EAGAIN;
1126 if (!timeo)
1127 goto out_unlock;
1128
1129 timeo = unix_wait_for_peer(other, timeo);
1130
1131 err = sock_intr_errno(timeo);
1132 if (signal_pending(current))
1133 goto out;
1134 sock_put(other);
1135 goto restart;
ac7bfa62 1136 }
1da177e4
LT
1137
1138 /* Latch our state.
1139
e5537bfc 1140 It is tricky place. We need to grab our state lock and cannot
1da177e4
LT
1141 drop lock on peer. It is dangerous because deadlock is
1142 possible. Connect to self case and simultaneous
1143 attempt to connect are eliminated by checking socket
1144 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1145 check this before attempt to grab lock.
1146
1147 Well, and we have to recheck the state after socket locked.
1148 */
1149 st = sk->sk_state;
1150
1151 switch (st) {
1152 case TCP_CLOSE:
1153 /* This is ok... continue with connect */
1154 break;
1155 case TCP_ESTABLISHED:
1156 /* Socket is already connected */
1157 err = -EISCONN;
1158 goto out_unlock;
1159 default:
1160 err = -EINVAL;
1161 goto out_unlock;
1162 }
1163
1c92b4e5 1164 unix_state_lock_nested(sk);
1da177e4
LT
1165
1166 if (sk->sk_state != st) {
1c92b4e5
DM
1167 unix_state_unlock(sk);
1168 unix_state_unlock(other);
1da177e4
LT
1169 sock_put(other);
1170 goto restart;
1171 }
1172
3610cda5 1173 err = security_unix_stream_connect(sk, other, newsk);
1da177e4 1174 if (err) {
1c92b4e5 1175 unix_state_unlock(sk);
1da177e4
LT
1176 goto out_unlock;
1177 }
1178
1179 /* The way is open! Fastly set all the necessary fields... */
1180
1181 sock_hold(sk);
1182 unix_peer(newsk) = sk;
1183 newsk->sk_state = TCP_ESTABLISHED;
1184 newsk->sk_type = sk->sk_type;
109f6e39 1185 init_peercred(newsk);
1da177e4 1186 newu = unix_sk(newsk);
eaefd110 1187 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1da177e4
LT
1188 otheru = unix_sk(other);
1189
1190 /* copy address information from listening to new sock*/
1191 if (otheru->addr) {
1192 atomic_inc(&otheru->addr->refcnt);
1193 newu->addr = otheru->addr;
1194 }
40ffe67d
AV
1195 if (otheru->path.dentry) {
1196 path_get(&otheru->path);
1197 newu->path = otheru->path;
1da177e4
LT
1198 }
1199
1200 /* Set credentials */
109f6e39 1201 copy_peercred(sk, other);
1da177e4 1202
1da177e4
LT
1203 sock->state = SS_CONNECTED;
1204 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1205 sock_hold(newsk);
1206
1207 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1208 unix_peer(sk) = newsk;
1da177e4 1209
1c92b4e5 1210 unix_state_unlock(sk);
1da177e4
LT
1211
1212 /* take ten and and send info to listening sock */
1213 spin_lock(&other->sk_receive_queue.lock);
1214 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1215 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1216 unix_state_unlock(other);
1da177e4
LT
1217 other->sk_data_ready(other, 0);
1218 sock_put(other);
1219 return 0;
1220
1221out_unlock:
1222 if (other)
1c92b4e5 1223 unix_state_unlock(other);
1da177e4
LT
1224
1225out:
40d44446 1226 kfree_skb(skb);
1da177e4
LT
1227 if (newsk)
1228 unix_release_sock(newsk, 0);
1229 if (other)
1230 sock_put(other);
1231 return err;
1232}
1233
1234static int unix_socketpair(struct socket *socka, struct socket *sockb)
1235{
e27dfcea 1236 struct sock *ska = socka->sk, *skb = sockb->sk;
1da177e4
LT
1237
1238 /* Join our sockets back to back */
1239 sock_hold(ska);
1240 sock_hold(skb);
e27dfcea
JK
1241 unix_peer(ska) = skb;
1242 unix_peer(skb) = ska;
109f6e39
EB
1243 init_peercred(ska);
1244 init_peercred(skb);
1da177e4
LT
1245
1246 if (ska->sk_type != SOCK_DGRAM) {
1247 ska->sk_state = TCP_ESTABLISHED;
1248 skb->sk_state = TCP_ESTABLISHED;
1249 socka->state = SS_CONNECTED;
1250 sockb->state = SS_CONNECTED;
1251 }
1252 return 0;
1253}
1254
a769ad65
DB
1255static void unix_sock_inherit_flags(const struct socket *old,
1256 struct socket *new)
1257{
1258 if (test_bit(SOCK_PASSCRED, &old->flags))
1259 set_bit(SOCK_PASSCRED, &new->flags);
1260 if (test_bit(SOCK_PASSSEC, &old->flags))
1261 set_bit(SOCK_PASSSEC, &new->flags);
1262}
1263
1da177e4
LT
1264static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1265{
1266 struct sock *sk = sock->sk;
1267 struct sock *tsk;
1268 struct sk_buff *skb;
1269 int err;
1270
1271 err = -EOPNOTSUPP;
6eba6a37 1272 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1da177e4
LT
1273 goto out;
1274
1275 err = -EINVAL;
1276 if (sk->sk_state != TCP_LISTEN)
1277 goto out;
1278
1279 /* If socket state is TCP_LISTEN it cannot change (for now...),
1280 * so that no locks are necessary.
1281 */
1282
1283 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1284 if (!skb) {
1285 /* This means receive shutdown. */
1286 if (err == 0)
1287 err = -EINVAL;
1288 goto out;
1289 }
1290
1291 tsk = skb->sk;
1292 skb_free_datagram(sk, skb);
1293 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1294
1295 /* attach accepted sock to socket */
1c92b4e5 1296 unix_state_lock(tsk);
1da177e4 1297 newsock->state = SS_CONNECTED;
a769ad65 1298 unix_sock_inherit_flags(sock, newsock);
1da177e4 1299 sock_graft(tsk, newsock);
1c92b4e5 1300 unix_state_unlock(tsk);
1da177e4
LT
1301 return 0;
1302
1303out:
1304 return err;
1305}
1306
1307
1308static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1309{
1310 struct sock *sk = sock->sk;
1311 struct unix_sock *u;
13cfa97b 1312 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1da177e4
LT
1313 int err = 0;
1314
1315 if (peer) {
1316 sk = unix_peer_get(sk);
1317
1318 err = -ENOTCONN;
1319 if (!sk)
1320 goto out;
1321 err = 0;
1322 } else {
1323 sock_hold(sk);
1324 }
1325
1326 u = unix_sk(sk);
1c92b4e5 1327 unix_state_lock(sk);
1da177e4
LT
1328 if (!u->addr) {
1329 sunaddr->sun_family = AF_UNIX;
1330 sunaddr->sun_path[0] = 0;
1331 *uaddr_len = sizeof(short);
1332 } else {
1333 struct unix_address *addr = u->addr;
1334
1335 *uaddr_len = addr->len;
1336 memcpy(sunaddr, addr->name, *uaddr_len);
1337 }
1c92b4e5 1338 unix_state_unlock(sk);
1da177e4
LT
1339 sock_put(sk);
1340out:
1341 return err;
1342}
1343
1344static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1345{
1346 int i;
1347
1348 scm->fp = UNIXCB(skb).fp;
1da177e4
LT
1349 UNIXCB(skb).fp = NULL;
1350
6eba6a37 1351 for (i = scm->fp->count-1; i >= 0; i--)
1da177e4
LT
1352 unix_notinflight(scm->fp->fp[i]);
1353}
1354
7361c36c 1355static void unix_destruct_scm(struct sk_buff *skb)
1da177e4
LT
1356{
1357 struct scm_cookie scm;
1358 memset(&scm, 0, sizeof(scm));
7361c36c 1359 scm.pid = UNIXCB(skb).pid;
7361c36c
EB
1360 if (UNIXCB(skb).fp)
1361 unix_detach_fds(&scm, skb);
1da177e4
LT
1362
1363 /* Alas, it calls VFS */
1364 /* So fscking what? fput() had been SMP-safe since the last Summer */
1365 scm_destroy(&scm);
1366 sock_wfree(skb);
1367}
1368
25888e30
ED
1369#define MAX_RECURSION_LEVEL 4
1370
6209344f 1371static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1da177e4
LT
1372{
1373 int i;
25888e30
ED
1374 unsigned char max_level = 0;
1375 int unix_sock_count = 0;
1376
1377 for (i = scm->fp->count - 1; i >= 0; i--) {
1378 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1379
1380 if (sk) {
1381 unix_sock_count++;
1382 max_level = max(max_level,
1383 unix_sk(sk)->recursion_level);
1384 }
1385 }
1386 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1387 return -ETOOMANYREFS;
6209344f
MS
1388
1389 /*
1390 * Need to duplicate file references for the sake of garbage
1391 * collection. Otherwise a socket in the fps might become a
1392 * candidate for GC while the skb is not yet queued.
1393 */
1394 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1395 if (!UNIXCB(skb).fp)
1396 return -ENOMEM;
1397
25888e30
ED
1398 if (unix_sock_count) {
1399 for (i = scm->fp->count - 1; i >= 0; i--)
1400 unix_inflight(scm->fp->fp[i]);
1401 }
1402 return max_level;
1da177e4
LT
1403}
1404
f78a5fda 1405static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
7361c36c
EB
1406{
1407 int err = 0;
16e57262 1408
f78a5fda 1409 UNIXCB(skb).pid = get_pid(scm->pid);
6b0ee8c0
EB
1410 UNIXCB(skb).uid = scm->creds.uid;
1411 UNIXCB(skb).gid = scm->creds.gid;
7361c36c
EB
1412 UNIXCB(skb).fp = NULL;
1413 if (scm->fp && send_fds)
1414 err = unix_attach_fds(scm, skb);
1415
1416 skb->destructor = unix_destruct_scm;
1417 return err;
1418}
1419
16e57262
ED
1420/*
1421 * Some apps rely on write() giving SCM_CREDENTIALS
1422 * We include credentials if source or destination socket
1423 * asserted SOCK_PASSCRED.
1424 */
1425static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1426 const struct sock *other)
1427{
6b0ee8c0 1428 if (UNIXCB(skb).pid)
16e57262
ED
1429 return;
1430 if (test_bit(SOCK_PASSCRED, &sock->flags) ||
25da0e3e
EB
1431 !other->sk_socket ||
1432 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
16e57262 1433 UNIXCB(skb).pid = get_pid(task_tgid(current));
6e0895c2 1434 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
16e57262
ED
1435 }
1436}
1437
1da177e4
LT
1438/*
1439 * Send AF_UNIX data.
1440 */
1441
1442static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1443 struct msghdr *msg, size_t len)
1444{
1445 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1446 struct sock *sk = sock->sk;
3b1e0a65 1447 struct net *net = sock_net(sk);
1da177e4 1448 struct unix_sock *u = unix_sk(sk);
e27dfcea 1449 struct sockaddr_un *sunaddr = msg->msg_name;
1da177e4
LT
1450 struct sock *other = NULL;
1451 int namelen = 0; /* fake GCC */
1452 int err;
95c96174 1453 unsigned int hash;
f78a5fda 1454 struct sk_buff *skb;
1da177e4
LT
1455 long timeo;
1456 struct scm_cookie tmp_scm;
25888e30 1457 int max_level;
eb6a2481 1458 int data_len = 0;
1da177e4
LT
1459
1460 if (NULL == siocb->scm)
1461 siocb->scm = &tmp_scm;
5f23b734 1462 wait_for_unix_gc();
e0e3cea4 1463 err = scm_send(sock, msg, siocb->scm, false);
1da177e4
LT
1464 if (err < 0)
1465 return err;
1466
1467 err = -EOPNOTSUPP;
1468 if (msg->msg_flags&MSG_OOB)
1469 goto out;
1470
1471 if (msg->msg_namelen) {
1472 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1473 if (err < 0)
1474 goto out;
1475 namelen = err;
1476 } else {
1477 sunaddr = NULL;
1478 err = -ENOTCONN;
1479 other = unix_peer_get(sk);
1480 if (!other)
1481 goto out;
1482 }
1483
f64f9e71
JP
1484 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1485 && (err = unix_autobind(sock)) != 0)
1da177e4
LT
1486 goto out;
1487
1488 err = -EMSGSIZE;
1489 if (len > sk->sk_sndbuf - 32)
1490 goto out;
1491
eb6a2481
ED
1492 if (len > SKB_MAX_ALLOC)
1493 data_len = min_t(size_t,
1494 len - SKB_MAX_ALLOC,
1495 MAX_SKB_FRAGS * PAGE_SIZE);
1496
1497 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1498 msg->msg_flags & MSG_DONTWAIT, &err);
e27dfcea 1499 if (skb == NULL)
1da177e4
LT
1500 goto out;
1501
f78a5fda 1502 err = unix_scm_to_skb(siocb->scm, skb, true);
25888e30 1503 if (err < 0)
7361c36c 1504 goto out_free;
25888e30 1505 max_level = err + 1;
dc49c1f9 1506 unix_get_secdata(siocb->scm, skb);
877ce7c1 1507
eb6a2481
ED
1508 skb_put(skb, len - data_len);
1509 skb->data_len = data_len;
1510 skb->len = len;
1511 err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, len);
1da177e4
LT
1512 if (err)
1513 goto out_free;
1514
1515 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1516
1517restart:
1518 if (!other) {
1519 err = -ECONNRESET;
1520 if (sunaddr == NULL)
1521 goto out_free;
1522
097e66c5 1523 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1da177e4 1524 hash, &err);
e27dfcea 1525 if (other == NULL)
1da177e4
LT
1526 goto out_free;
1527 }
1528
d6ae3bae
AC
1529 if (sk_filter(other, skb) < 0) {
1530 /* Toss the packet but do not return any error to the sender */
1531 err = len;
1532 goto out_free;
1533 }
1534
1c92b4e5 1535 unix_state_lock(other);
1da177e4
LT
1536 err = -EPERM;
1537 if (!unix_may_send(sk, other))
1538 goto out_unlock;
1539
1540 if (sock_flag(other, SOCK_DEAD)) {
1541 /*
1542 * Check with 1003.1g - what should
1543 * datagram error
1544 */
1c92b4e5 1545 unix_state_unlock(other);
1da177e4
LT
1546 sock_put(other);
1547
1548 err = 0;
1c92b4e5 1549 unix_state_lock(sk);
1da177e4 1550 if (unix_peer(sk) == other) {
e27dfcea 1551 unix_peer(sk) = NULL;
1c92b4e5 1552 unix_state_unlock(sk);
1da177e4
LT
1553
1554 unix_dgram_disconnected(sk, other);
1555 sock_put(other);
1556 err = -ECONNREFUSED;
1557 } else {
1c92b4e5 1558 unix_state_unlock(sk);
1da177e4
LT
1559 }
1560
1561 other = NULL;
1562 if (err)
1563 goto out_free;
1564 goto restart;
1565 }
1566
1567 err = -EPIPE;
1568 if (other->sk_shutdown & RCV_SHUTDOWN)
1569 goto out_unlock;
1570
1571 if (sk->sk_type != SOCK_SEQPACKET) {
1572 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1573 if (err)
1574 goto out_unlock;
1575 }
1576
3c73419c 1577 if (unix_peer(other) != sk && unix_recvq_full(other)) {
1da177e4
LT
1578 if (!timeo) {
1579 err = -EAGAIN;
1580 goto out_unlock;
1581 }
1582
1583 timeo = unix_wait_for_peer(other, timeo);
1584
1585 err = sock_intr_errno(timeo);
1586 if (signal_pending(current))
1587 goto out_free;
1588
1589 goto restart;
1590 }
1591
3f66116e
AC
1592 if (sock_flag(other, SOCK_RCVTSTAMP))
1593 __net_timestamp(skb);
16e57262 1594 maybe_add_creds(skb, sock, other);
1da177e4 1595 skb_queue_tail(&other->sk_receive_queue, skb);
25888e30
ED
1596 if (max_level > unix_sk(other)->recursion_level)
1597 unix_sk(other)->recursion_level = max_level;
1c92b4e5 1598 unix_state_unlock(other);
1da177e4
LT
1599 other->sk_data_ready(other, len);
1600 sock_put(other);
f78a5fda 1601 scm_destroy(siocb->scm);
1da177e4
LT
1602 return len;
1603
1604out_unlock:
1c92b4e5 1605 unix_state_unlock(other);
1da177e4
LT
1606out_free:
1607 kfree_skb(skb);
1608out:
1609 if (other)
1610 sock_put(other);
f78a5fda 1611 scm_destroy(siocb->scm);
1da177e4
LT
1612 return err;
1613}
1614
ac7bfa62 1615
1da177e4
LT
1616static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1617 struct msghdr *msg, size_t len)
1618{
1619 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1620 struct sock *sk = sock->sk;
1621 struct sock *other = NULL;
6eba6a37 1622 int err, size;
f78a5fda 1623 struct sk_buff *skb;
e27dfcea 1624 int sent = 0;
1da177e4 1625 struct scm_cookie tmp_scm;
8ba69ba6 1626 bool fds_sent = false;
25888e30 1627 int max_level;
1da177e4
LT
1628
1629 if (NULL == siocb->scm)
1630 siocb->scm = &tmp_scm;
5f23b734 1631 wait_for_unix_gc();
e0e3cea4 1632 err = scm_send(sock, msg, siocb->scm, false);
1da177e4
LT
1633 if (err < 0)
1634 return err;
1635
1636 err = -EOPNOTSUPP;
1637 if (msg->msg_flags&MSG_OOB)
1638 goto out_err;
1639
1640 if (msg->msg_namelen) {
1641 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1642 goto out_err;
1643 } else {
1da177e4 1644 err = -ENOTCONN;
830a1e5c 1645 other = unix_peer(sk);
1da177e4
LT
1646 if (!other)
1647 goto out_err;
1648 }
1649
1650 if (sk->sk_shutdown & SEND_SHUTDOWN)
1651 goto pipe_err;
1652
6eba6a37 1653 while (sent < len) {
1da177e4 1654 /*
e9df7d7f
BL
1655 * Optimisation for the fact that under 0.01% of X
1656 * messages typically need breaking up.
1da177e4
LT
1657 */
1658
e9df7d7f 1659 size = len-sent;
1da177e4
LT
1660
1661 /* Keep two messages in the pipe so it schedules better */
e9df7d7f
BL
1662 if (size > ((sk->sk_sndbuf >> 1) - 64))
1663 size = (sk->sk_sndbuf >> 1) - 64;
1da177e4
LT
1664
1665 if (size > SKB_MAX_ALLOC)
1666 size = SKB_MAX_ALLOC;
ac7bfa62 1667
1da177e4
LT
1668 /*
1669 * Grab a buffer
1670 */
ac7bfa62 1671
6eba6a37
ED
1672 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1673 &err);
1da177e4 1674
e27dfcea 1675 if (skb == NULL)
1da177e4
LT
1676 goto out_err;
1677
1678 /*
1679 * If you pass two values to the sock_alloc_send_skb
1680 * it tries to grab the large buffer with GFP_NOFS
1681 * (which can fail easily), and if it fails grab the
1682 * fallback size buffer which is under a page and will
1683 * succeed. [Alan]
1684 */
1685 size = min_t(int, size, skb_tailroom(skb));
1686
7361c36c 1687
f78a5fda
DM
1688 /* Only send the fds in the first buffer */
1689 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
25888e30 1690 if (err < 0) {
7361c36c 1691 kfree_skb(skb);
f78a5fda 1692 goto out_err;
6209344f 1693 }
25888e30 1694 max_level = err + 1;
7361c36c 1695 fds_sent = true;
1da177e4 1696
6eba6a37
ED
1697 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1698 if (err) {
1da177e4 1699 kfree_skb(skb);
f78a5fda 1700 goto out_err;
1da177e4
LT
1701 }
1702
1c92b4e5 1703 unix_state_lock(other);
1da177e4
LT
1704
1705 if (sock_flag(other, SOCK_DEAD) ||
1706 (other->sk_shutdown & RCV_SHUTDOWN))
1707 goto pipe_err_free;
1708
16e57262 1709 maybe_add_creds(skb, sock, other);
1da177e4 1710 skb_queue_tail(&other->sk_receive_queue, skb);
25888e30
ED
1711 if (max_level > unix_sk(other)->recursion_level)
1712 unix_sk(other)->recursion_level = max_level;
1c92b4e5 1713 unix_state_unlock(other);
1da177e4 1714 other->sk_data_ready(other, size);
e27dfcea 1715 sent += size;
1da177e4 1716 }
1da177e4 1717
f78a5fda 1718 scm_destroy(siocb->scm);
1da177e4
LT
1719 siocb->scm = NULL;
1720
1721 return sent;
1722
1723pipe_err_free:
1c92b4e5 1724 unix_state_unlock(other);
1da177e4
LT
1725 kfree_skb(skb);
1726pipe_err:
6eba6a37
ED
1727 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1728 send_sig(SIGPIPE, current, 0);
1da177e4
LT
1729 err = -EPIPE;
1730out_err:
f78a5fda 1731 scm_destroy(siocb->scm);
1da177e4
LT
1732 siocb->scm = NULL;
1733 return sent ? : err;
1734}
1735
1736static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1737 struct msghdr *msg, size_t len)
1738{
1739 int err;
1740 struct sock *sk = sock->sk;
ac7bfa62 1741
1da177e4
LT
1742 err = sock_error(sk);
1743 if (err)
1744 return err;
1745
1746 if (sk->sk_state != TCP_ESTABLISHED)
1747 return -ENOTCONN;
1748
1749 if (msg->msg_namelen)
1750 msg->msg_namelen = 0;
1751
1752 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1753}
ac7bfa62 1754
a05d2ad1
EB
1755static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1756 struct msghdr *msg, size_t size,
1757 int flags)
1758{
1759 struct sock *sk = sock->sk;
1760
1761 if (sk->sk_state != TCP_ESTABLISHED)
1762 return -ENOTCONN;
1763
1764 return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1765}
1766
1da177e4
LT
1767static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1768{
1769 struct unix_sock *u = unix_sk(sk);
1770
1da177e4
LT
1771 if (u->addr) {
1772 msg->msg_namelen = u->addr->len;
1773 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1774 }
1775}
1776
1777static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1778 struct msghdr *msg, size_t size,
1779 int flags)
1780{
1781 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1782 struct scm_cookie tmp_scm;
1783 struct sock *sk = sock->sk;
1784 struct unix_sock *u = unix_sk(sk);
1785 int noblock = flags & MSG_DONTWAIT;
1786 struct sk_buff *skb;
1787 int err;
f55bb7f9 1788 int peeked, skip;
1da177e4
LT
1789
1790 err = -EOPNOTSUPP;
1791 if (flags&MSG_OOB)
1792 goto out;
1793
b3ca9b02 1794 err = mutex_lock_interruptible(&u->readlock);
f423fefe
ED
1795 if (unlikely(err)) {
1796 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1797 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1798 */
1799 err = noblock ? -EAGAIN : -ERESTARTSYS;
b3ca9b02
RW
1800 goto out;
1801 }
1da177e4 1802
f55bb7f9
PE
1803 skip = sk_peek_offset(sk, flags);
1804
1805 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
0a112258
FZ
1806 if (!skb) {
1807 unix_state_lock(sk);
1808 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1809 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1810 (sk->sk_shutdown & RCV_SHUTDOWN))
1811 err = 0;
1812 unix_state_unlock(sk);
1da177e4 1813 goto out_unlock;
0a112258 1814 }
1da177e4 1815
67426b75
ED
1816 wake_up_interruptible_sync_poll(&u->peer_wait,
1817 POLLOUT | POLLWRNORM | POLLWRBAND);
1da177e4
LT
1818
1819 if (msg->msg_name)
1820 unix_copy_addr(msg, skb->sk);
1821
f55bb7f9
PE
1822 if (size > skb->len - skip)
1823 size = skb->len - skip;
1824 else if (size < skb->len - skip)
1da177e4
LT
1825 msg->msg_flags |= MSG_TRUNC;
1826
f55bb7f9 1827 err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
1da177e4
LT
1828 if (err)
1829 goto out_free;
1830
3f66116e
AC
1831 if (sock_flag(sk, SOCK_RCVTSTAMP))
1832 __sock_recv_timestamp(msg, sk, skb);
1833
1da177e4
LT
1834 if (!siocb->scm) {
1835 siocb->scm = &tmp_scm;
1836 memset(&tmp_scm, 0, sizeof(tmp_scm));
1837 }
6b0ee8c0 1838 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
877ce7c1 1839 unix_set_secdata(siocb->scm, skb);
1da177e4 1840
6eba6a37 1841 if (!(flags & MSG_PEEK)) {
1da177e4
LT
1842 if (UNIXCB(skb).fp)
1843 unix_detach_fds(siocb->scm, skb);
f55bb7f9
PE
1844
1845 sk_peek_offset_bwd(sk, skb->len);
6eba6a37 1846 } else {
1da177e4
LT
1847 /* It is questionable: on PEEK we could:
1848 - do not return fds - good, but too simple 8)
1849 - return fds, and do not return them on read (old strategy,
1850 apparently wrong)
1851 - clone fds (I chose it for now, it is the most universal
1852 solution)
ac7bfa62
YH
1853
1854 POSIX 1003.1g does not actually define this clearly
1855 at all. POSIX 1003.1g doesn't define a lot of things
1856 clearly however!
1857
1da177e4 1858 */
f55bb7f9
PE
1859
1860 sk_peek_offset_fwd(sk, size);
1861
1da177e4
LT
1862 if (UNIXCB(skb).fp)
1863 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1864 }
9f6f9af7 1865 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1da177e4
LT
1866
1867 scm_recv(sock, msg, siocb->scm, flags);
1868
1869out_free:
6eba6a37 1870 skb_free_datagram(sk, skb);
1da177e4 1871out_unlock:
57b47a53 1872 mutex_unlock(&u->readlock);
1da177e4
LT
1873out:
1874 return err;
1875}
1876
1877/*
79f632c7 1878 * Sleep until more data has arrived. But check for races..
1da177e4 1879 */
79f632c7
BP
1880static long unix_stream_data_wait(struct sock *sk, long timeo,
1881 struct sk_buff *last)
1da177e4
LT
1882{
1883 DEFINE_WAIT(wait);
1884
1c92b4e5 1885 unix_state_lock(sk);
1da177e4
LT
1886
1887 for (;;) {
aa395145 1888 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4 1889
79f632c7 1890 if (skb_peek_tail(&sk->sk_receive_queue) != last ||
1da177e4
LT
1891 sk->sk_err ||
1892 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1893 signal_pending(current) ||
1894 !timeo)
1895 break;
1896
1897 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1c92b4e5 1898 unix_state_unlock(sk);
1da177e4 1899 timeo = schedule_timeout(timeo);
1c92b4e5 1900 unix_state_lock(sk);
7659c934
MS
1901
1902 if (sock_flag(sk, SOCK_DEAD))
1903 break;
1904
1da177e4
LT
1905 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1906 }
1907
aa395145 1908 finish_wait(sk_sleep(sk), &wait);
1c92b4e5 1909 unix_state_unlock(sk);
1da177e4
LT
1910 return timeo;
1911}
1912
1da177e4
LT
1913static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1914 struct msghdr *msg, size_t size,
1915 int flags)
1916{
1917 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1918 struct scm_cookie tmp_scm;
1919 struct sock *sk = sock->sk;
1920 struct unix_sock *u = unix_sk(sk);
e27dfcea 1921 struct sockaddr_un *sunaddr = msg->msg_name;
1da177e4 1922 int copied = 0;
f423fefe 1923 int noblock = flags & MSG_DONTWAIT;
1da177e4
LT
1924 int check_creds = 0;
1925 int target;
1926 int err = 0;
1927 long timeo;
fc0d7536 1928 int skip;
1da177e4
LT
1929
1930 err = -EINVAL;
1931 if (sk->sk_state != TCP_ESTABLISHED)
1932 goto out;
1933
1934 err = -EOPNOTSUPP;
1935 if (flags&MSG_OOB)
1936 goto out;
1937
1938 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
f423fefe 1939 timeo = sock_rcvtimeo(sk, noblock);
1da177e4 1940
1da177e4
LT
1941 /* Lock the socket to prevent queue disordering
1942 * while sleeps in memcpy_tomsg
1943 */
1944
1945 if (!siocb->scm) {
1946 siocb->scm = &tmp_scm;
1947 memset(&tmp_scm, 0, sizeof(tmp_scm));
1948 }
1949
b3ca9b02 1950 err = mutex_lock_interruptible(&u->readlock);
f423fefe
ED
1951 if (unlikely(err)) {
1952 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1953 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1954 */
1955 err = noblock ? -EAGAIN : -ERESTARTSYS;
b3ca9b02
RW
1956 goto out;
1957 }
1da177e4 1958
6eba6a37 1959 do {
1da177e4 1960 int chunk;
79f632c7 1961 struct sk_buff *skb, *last;
1da177e4 1962
3c0d2f37 1963 unix_state_lock(sk);
7659c934
MS
1964 if (sock_flag(sk, SOCK_DEAD)) {
1965 err = -ECONNRESET;
1966 goto unlock;
1967 }
79f632c7 1968 last = skb = skb_peek(&sk->sk_receive_queue);
fc0d7536 1969again:
6eba6a37 1970 if (skb == NULL) {
25888e30 1971 unix_sk(sk)->recursion_level = 0;
1da177e4 1972 if (copied >= target)
3c0d2f37 1973 goto unlock;
1da177e4
LT
1974
1975 /*
1976 * POSIX 1003.1g mandates this order.
1977 */
ac7bfa62 1978
6eba6a37
ED
1979 err = sock_error(sk);
1980 if (err)
3c0d2f37 1981 goto unlock;
1da177e4 1982 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
1983 goto unlock;
1984
1985 unix_state_unlock(sk);
1da177e4
LT
1986 err = -EAGAIN;
1987 if (!timeo)
1988 break;
57b47a53 1989 mutex_unlock(&u->readlock);
1da177e4 1990
79f632c7 1991 timeo = unix_stream_data_wait(sk, timeo, last);
1da177e4 1992
b3ca9b02
RW
1993 if (signal_pending(current)
1994 || mutex_lock_interruptible(&u->readlock)) {
1da177e4
LT
1995 err = sock_intr_errno(timeo);
1996 goto out;
1997 }
b3ca9b02 1998
1da177e4 1999 continue;
3c0d2f37
MS
2000 unlock:
2001 unix_state_unlock(sk);
2002 break;
1da177e4 2003 }
fc0d7536 2004
79f632c7
BP
2005 skip = sk_peek_offset(sk, flags);
2006 while (skip >= skb->len) {
fc0d7536 2007 skip -= skb->len;
79f632c7 2008 last = skb;
fc0d7536 2009 skb = skb_peek_next(skb, &sk->sk_receive_queue);
79f632c7
BP
2010 if (!skb)
2011 goto again;
fc0d7536
PE
2012 }
2013
3c0d2f37 2014 unix_state_unlock(sk);
1da177e4
LT
2015
2016 if (check_creds) {
2017 /* Never glue messages from different writers */
7361c36c 2018 if ((UNIXCB(skb).pid != siocb->scm->pid) ||
6b0ee8c0
EB
2019 !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
2020 !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
1da177e4 2021 break;
0e82e7f6 2022 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1da177e4 2023 /* Copy credentials */
6b0ee8c0 2024 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
1da177e4
LT
2025 check_creds = 1;
2026 }
2027
2028 /* Copy address just once */
6eba6a37 2029 if (sunaddr) {
1da177e4
LT
2030 unix_copy_addr(msg, skb->sk);
2031 sunaddr = NULL;
2032 }
2033
fc0d7536
PE
2034 chunk = min_t(unsigned int, skb->len - skip, size);
2035 if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
1da177e4
LT
2036 if (copied == 0)
2037 copied = -EFAULT;
2038 break;
2039 }
2040 copied += chunk;
2041 size -= chunk;
2042
2043 /* Mark read part of skb as used */
6eba6a37 2044 if (!(flags & MSG_PEEK)) {
1da177e4
LT
2045 skb_pull(skb, chunk);
2046
fc0d7536
PE
2047 sk_peek_offset_bwd(sk, chunk);
2048
1da177e4
LT
2049 if (UNIXCB(skb).fp)
2050 unix_detach_fds(siocb->scm, skb);
2051
6f01fd6e 2052 if (skb->len)
1da177e4 2053 break;
1da177e4 2054
6f01fd6e 2055 skb_unlink(skb, &sk->sk_receive_queue);
70d4bf6d 2056 consume_skb(skb);
1da177e4
LT
2057
2058 if (siocb->scm->fp)
2059 break;
6eba6a37 2060 } else {
1da177e4
LT
2061 /* It is questionable, see note in unix_dgram_recvmsg.
2062 */
2063 if (UNIXCB(skb).fp)
2064 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2065
1f21dc67
AC
2066 if (skip) {
2067 sk_peek_offset_fwd(sk, chunk);
2068 skip -= chunk;
2069 }
2070
2071 if (UNIXCB(skb).fp)
2072 break;
fc0d7536 2073
1f21dc67
AC
2074 last = skb;
2075 unix_state_lock(sk);
2076 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2077 if (skb)
2078 goto again;
2079 unix_state_unlock(sk);
1da177e4
LT
2080 break;
2081 }
2082 } while (size);
2083
57b47a53 2084 mutex_unlock(&u->readlock);
1da177e4
LT
2085 scm_recv(sock, msg, siocb->scm, flags);
2086out:
2087 return copied ? : err;
2088}
2089
2090static int unix_shutdown(struct socket *sock, int mode)
2091{
2092 struct sock *sk = sock->sk;
2093 struct sock *other;
2094
fc61b928
XW
2095 if (mode < SHUT_RD || mode > SHUT_RDWR)
2096 return -EINVAL;
2097 /* This maps:
2098 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2099 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2100 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2101 */
2102 ++mode;
7180a031
AC
2103
2104 unix_state_lock(sk);
2105 sk->sk_shutdown |= mode;
2106 other = unix_peer(sk);
2107 if (other)
2108 sock_hold(other);
2109 unix_state_unlock(sk);
2110 sk->sk_state_change(sk);
2111
2112 if (other &&
2113 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2114
2115 int peer_mode = 0;
2116
2117 if (mode&RCV_SHUTDOWN)
2118 peer_mode |= SEND_SHUTDOWN;
2119 if (mode&SEND_SHUTDOWN)
2120 peer_mode |= RCV_SHUTDOWN;
2121 unix_state_lock(other);
2122 other->sk_shutdown |= peer_mode;
2123 unix_state_unlock(other);
2124 other->sk_state_change(other);
2125 if (peer_mode == SHUTDOWN_MASK)
2126 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2127 else if (peer_mode & RCV_SHUTDOWN)
2128 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1da177e4 2129 }
7180a031
AC
2130 if (other)
2131 sock_put(other);
2132
1da177e4
LT
2133 return 0;
2134}
2135
885ee74d
PE
2136long unix_inq_len(struct sock *sk)
2137{
2138 struct sk_buff *skb;
2139 long amount = 0;
2140
2141 if (sk->sk_state == TCP_LISTEN)
2142 return -EINVAL;
2143
2144 spin_lock(&sk->sk_receive_queue.lock);
2145 if (sk->sk_type == SOCK_STREAM ||
2146 sk->sk_type == SOCK_SEQPACKET) {
2147 skb_queue_walk(&sk->sk_receive_queue, skb)
2148 amount += skb->len;
2149 } else {
2150 skb = skb_peek(&sk->sk_receive_queue);
2151 if (skb)
2152 amount = skb->len;
2153 }
2154 spin_unlock(&sk->sk_receive_queue.lock);
2155
2156 return amount;
2157}
2158EXPORT_SYMBOL_GPL(unix_inq_len);
2159
2160long unix_outq_len(struct sock *sk)
2161{
2162 return sk_wmem_alloc_get(sk);
2163}
2164EXPORT_SYMBOL_GPL(unix_outq_len);
2165
1da177e4
LT
2166static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2167{
2168 struct sock *sk = sock->sk;
e27dfcea 2169 long amount = 0;
1da177e4
LT
2170 int err;
2171
6eba6a37
ED
2172 switch (cmd) {
2173 case SIOCOUTQ:
885ee74d 2174 amount = unix_outq_len(sk);
6eba6a37
ED
2175 err = put_user(amount, (int __user *)arg);
2176 break;
2177 case SIOCINQ:
885ee74d
PE
2178 amount = unix_inq_len(sk);
2179 if (amount < 0)
2180 err = amount;
2181 else
1da177e4 2182 err = put_user(amount, (int __user *)arg);
885ee74d 2183 break;
6eba6a37
ED
2184 default:
2185 err = -ENOIOCTLCMD;
2186 break;
1da177e4
LT
2187 }
2188 return err;
2189}
2190
6eba6a37 2191static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1da177e4
LT
2192{
2193 struct sock *sk = sock->sk;
2194 unsigned int mask;
2195
aa395145 2196 sock_poll_wait(file, sk_sleep(sk), wait);
1da177e4
LT
2197 mask = 0;
2198
2199 /* exceptional events? */
2200 if (sk->sk_err)
2201 mask |= POLLERR;
2202 if (sk->sk_shutdown == SHUTDOWN_MASK)
2203 mask |= POLLHUP;
f348d70a 2204 if (sk->sk_shutdown & RCV_SHUTDOWN)
db40980f 2205 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
1da177e4
LT
2206
2207 /* readable? */
db40980f 2208 if (!skb_queue_empty(&sk->sk_receive_queue))
1da177e4
LT
2209 mask |= POLLIN | POLLRDNORM;
2210
2211 /* Connection-based need to check for termination and startup */
6eba6a37
ED
2212 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2213 sk->sk_state == TCP_CLOSE)
1da177e4
LT
2214 mask |= POLLHUP;
2215
2216 /*
2217 * we set writable also when the other side has shut down the
2218 * connection. This prevents stuck sockets.
2219 */
2220 if (unix_writable(sk))
2221 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2222
2223 return mask;
2224}
2225
ec0d215f
RW
2226static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2227 poll_table *wait)
3c73419c 2228{
ec0d215f
RW
2229 struct sock *sk = sock->sk, *other;
2230 unsigned int mask, writable;
3c73419c 2231
aa395145 2232 sock_poll_wait(file, sk_sleep(sk), wait);
3c73419c
RW
2233 mask = 0;
2234
2235 /* exceptional events? */
2236 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
7d4c04fc 2237 mask |= POLLERR |
8facd5fb 2238 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
7d4c04fc 2239
3c73419c 2240 if (sk->sk_shutdown & RCV_SHUTDOWN)
5456f09a 2241 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
3c73419c
RW
2242 if (sk->sk_shutdown == SHUTDOWN_MASK)
2243 mask |= POLLHUP;
2244
2245 /* readable? */
5456f09a 2246 if (!skb_queue_empty(&sk->sk_receive_queue))
3c73419c
RW
2247 mask |= POLLIN | POLLRDNORM;
2248
2249 /* Connection-based need to check for termination and startup */
2250 if (sk->sk_type == SOCK_SEQPACKET) {
2251 if (sk->sk_state == TCP_CLOSE)
2252 mask |= POLLHUP;
2253 /* connection hasn't started yet? */
2254 if (sk->sk_state == TCP_SYN_SENT)
2255 return mask;
2256 }
2257
973a34aa 2258 /* No write status requested, avoid expensive OUT tests. */
626cf236 2259 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
973a34aa
ED
2260 return mask;
2261
ec0d215f 2262 writable = unix_writable(sk);
5456f09a
ED
2263 other = unix_peer_get(sk);
2264 if (other) {
2265 if (unix_peer(other) != sk) {
2266 sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2267 if (unix_recvq_full(other))
2268 writable = 0;
ec0d215f 2269 }
5456f09a 2270 sock_put(other);
ec0d215f
RW
2271 }
2272
2273 if (writable)
3c73419c
RW
2274 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2275 else
2276 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2277
3c73419c
RW
2278 return mask;
2279}
1da177e4
LT
2280
2281#ifdef CONFIG_PROC_FS
a53eb3fe 2282
7123aaa3
ED
2283#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2284
2285#define get_bucket(x) ((x) >> BUCKET_SPACE)
2286#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2287#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
a53eb3fe 2288
7123aaa3 2289static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
1da177e4 2290{
7123aaa3
ED
2291 unsigned long offset = get_offset(*pos);
2292 unsigned long bucket = get_bucket(*pos);
2293 struct sock *sk;
2294 unsigned long count = 0;
1da177e4 2295
7123aaa3
ED
2296 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2297 if (sock_net(sk) != seq_file_net(seq))
097e66c5 2298 continue;
7123aaa3
ED
2299 if (++count == offset)
2300 break;
2301 }
2302
2303 return sk;
2304}
2305
2306static struct sock *unix_next_socket(struct seq_file *seq,
2307 struct sock *sk,
2308 loff_t *pos)
2309{
2310 unsigned long bucket;
2311
2312 while (sk > (struct sock *)SEQ_START_TOKEN) {
2313 sk = sk_next(sk);
2314 if (!sk)
2315 goto next_bucket;
2316 if (sock_net(sk) == seq_file_net(seq))
2317 return sk;
1da177e4 2318 }
7123aaa3
ED
2319
2320 do {
2321 sk = unix_from_bucket(seq, pos);
2322 if (sk)
2323 return sk;
2324
2325next_bucket:
2326 bucket = get_bucket(*pos) + 1;
2327 *pos = set_bucket_offset(bucket, 1);
2328 } while (bucket < ARRAY_SIZE(unix_socket_table));
2329
1da177e4
LT
2330 return NULL;
2331}
2332
1da177e4 2333static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 2334 __acquires(unix_table_lock)
1da177e4 2335{
fbe9cc4a 2336 spin_lock(&unix_table_lock);
7123aaa3
ED
2337
2338 if (!*pos)
2339 return SEQ_START_TOKEN;
2340
2341 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2342 return NULL;
2343
2344 return unix_next_socket(seq, NULL, pos);
1da177e4
LT
2345}
2346
2347static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2348{
2349 ++*pos;
7123aaa3 2350 return unix_next_socket(seq, v, pos);
1da177e4
LT
2351}
2352
2353static void unix_seq_stop(struct seq_file *seq, void *v)
9a429c49 2354 __releases(unix_table_lock)
1da177e4 2355{
fbe9cc4a 2356 spin_unlock(&unix_table_lock);
1da177e4
LT
2357}
2358
2359static int unix_seq_show(struct seq_file *seq, void *v)
2360{
ac7bfa62 2361
b9f3124f 2362 if (v == SEQ_START_TOKEN)
1da177e4
LT
2363 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2364 "Inode Path\n");
2365 else {
2366 struct sock *s = v;
2367 struct unix_sock *u = unix_sk(s);
1c92b4e5 2368 unix_state_lock(s);
1da177e4 2369
71338aa7 2370 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
1da177e4
LT
2371 s,
2372 atomic_read(&s->sk_refcnt),
2373 0,
2374 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2375 s->sk_type,
2376 s->sk_socket ?
2377 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2378 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2379 sock_i_ino(s));
2380
2381 if (u->addr) {
2382 int i, len;
2383 seq_putc(seq, ' ');
2384
2385 i = 0;
2386 len = u->addr->len - sizeof(short);
2387 if (!UNIX_ABSTRACT(s))
2388 len--;
2389 else {
2390 seq_putc(seq, '@');
2391 i++;
2392 }
2393 for ( ; i < len; i++)
2394 seq_putc(seq, u->addr->name->sun_path[i]);
2395 }
1c92b4e5 2396 unix_state_unlock(s);
1da177e4
LT
2397 seq_putc(seq, '\n');
2398 }
2399
2400 return 0;
2401}
2402
56b3d975 2403static const struct seq_operations unix_seq_ops = {
1da177e4
LT
2404 .start = unix_seq_start,
2405 .next = unix_seq_next,
2406 .stop = unix_seq_stop,
2407 .show = unix_seq_show,
2408};
2409
1da177e4
LT
2410static int unix_seq_open(struct inode *inode, struct file *file)
2411{
e372c414 2412 return seq_open_net(inode, file, &unix_seq_ops,
8b51b064 2413 sizeof(struct seq_net_private));
1da177e4
LT
2414}
2415
da7071d7 2416static const struct file_operations unix_seq_fops = {
1da177e4
LT
2417 .owner = THIS_MODULE,
2418 .open = unix_seq_open,
2419 .read = seq_read,
2420 .llseek = seq_lseek,
e372c414 2421 .release = seq_release_net,
1da177e4
LT
2422};
2423
2424#endif
2425
ec1b4cf7 2426static const struct net_proto_family unix_family_ops = {
1da177e4
LT
2427 .family = PF_UNIX,
2428 .create = unix_create,
2429 .owner = THIS_MODULE,
2430};
2431
097e66c5 2432
2c8c1e72 2433static int __net_init unix_net_init(struct net *net)
097e66c5
DL
2434{
2435 int error = -ENOMEM;
2436
a0a53c8b 2437 net->unx.sysctl_max_dgram_qlen = 10;
1597fbc0
PE
2438 if (unix_sysctl_register(net))
2439 goto out;
d392e497 2440
097e66c5 2441#ifdef CONFIG_PROC_FS
d4beaa66 2442 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
1597fbc0 2443 unix_sysctl_unregister(net);
097e66c5 2444 goto out;
1597fbc0 2445 }
097e66c5
DL
2446#endif
2447 error = 0;
2448out:
48dcc33e 2449 return error;
097e66c5
DL
2450}
2451
2c8c1e72 2452static void __net_exit unix_net_exit(struct net *net)
097e66c5 2453{
1597fbc0 2454 unix_sysctl_unregister(net);
ece31ffd 2455 remove_proc_entry("unix", net->proc_net);
097e66c5
DL
2456}
2457
2458static struct pernet_operations unix_net_ops = {
2459 .init = unix_net_init,
2460 .exit = unix_net_exit,
2461};
2462
1da177e4
LT
2463static int __init af_unix_init(void)
2464{
2465 int rc = -1;
1da177e4 2466
b4fff5f8 2467 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
1da177e4
LT
2468
2469 rc = proto_register(&unix_proto, 1);
ac7bfa62
YH
2470 if (rc != 0) {
2471 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
0dc47877 2472 __func__);
1da177e4
LT
2473 goto out;
2474 }
2475
2476 sock_register(&unix_family_ops);
097e66c5 2477 register_pernet_subsys(&unix_net_ops);
1da177e4
LT
2478out:
2479 return rc;
2480}
2481
2482static void __exit af_unix_exit(void)
2483{
2484 sock_unregister(PF_UNIX);
1da177e4 2485 proto_unregister(&unix_proto);
097e66c5 2486 unregister_pernet_subsys(&unix_net_ops);
1da177e4
LT
2487}
2488
3d366960
DW
2489/* Earlier than device_initcall() so that other drivers invoking
2490 request_module() don't end up in a loop when modprobe tries
2491 to use a UNIX socket. But later than subsys_initcall() because
2492 we depend on stuff initialised there */
2493fs_initcall(af_unix_init);
1da177e4
LT
2494module_exit(af_unix_exit);
2495
2496MODULE_LICENSE("GPL");
2497MODULE_ALIAS_NETPROTO(PF_UNIX);