remove libdss from Makefile
[GitHub/moto-9609/android_kernel_motorola_exynos9610.git] / net / unix / af_unix.c
... / ...
CommitLineData
1/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
83#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84
85#include <linux/module.h>
86#include <linux/kernel.h>
87#include <linux/signal.h>
88#include <linux/sched/signal.h>
89#include <linux/errno.h>
90#include <linux/string.h>
91#include <linux/stat.h>
92#include <linux/dcache.h>
93#include <linux/namei.h>
94#include <linux/socket.h>
95#include <linux/un.h>
96#include <linux/fcntl.h>
97#include <linux/termios.h>
98#include <linux/sockios.h>
99#include <linux/net.h>
100#include <linux/in.h>
101#include <linux/fs.h>
102#include <linux/slab.h>
103#include <linux/uaccess.h>
104#include <linux/skbuff.h>
105#include <linux/netdevice.h>
106#include <net/net_namespace.h>
107#include <net/sock.h>
108#include <net/tcp_states.h>
109#include <net/af_unix.h>
110#include <linux/proc_fs.h>
111#include <linux/seq_file.h>
112#include <net/scm.h>
113#include <linux/init.h>
114#include <linux/poll.h>
115#include <linux/rtnetlink.h>
116#include <linux/mount.h>
117#include <net/checksum.h>
118#include <linux/security.h>
119#include <linux/freezer.h>
120#include <linux/file.h>
121
122struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
123EXPORT_SYMBOL_GPL(unix_socket_table);
124DEFINE_SPINLOCK(unix_table_lock);
125EXPORT_SYMBOL_GPL(unix_table_lock);
126static atomic_long_t unix_nr_socks;
127
128
129static struct hlist_head *unix_sockets_unbound(void *addr)
130{
131 unsigned long hash = (unsigned long)addr;
132
133 hash ^= hash >> 16;
134 hash ^= hash >> 8;
135 hash %= UNIX_HASH_SIZE;
136 return &unix_socket_table[UNIX_HASH_SIZE + hash];
137}
138
139#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
140
141#ifdef CONFIG_SECURITY_NETWORK
142static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
143{
144 UNIXCB(skb).secid = scm->secid;
145}
146
147static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
148{
149 scm->secid = UNIXCB(skb).secid;
150}
151
152static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
153{
154 return (scm->secid == UNIXCB(skb).secid);
155}
156#else
157static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158{ }
159
160static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
161{ }
162
163static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
164{
165 return true;
166}
167#endif /* CONFIG_SECURITY_NETWORK */
168
169/*
170 * SMP locking strategy:
171 * hash table is protected with spinlock unix_table_lock
172 * each socket state is protected by separate spin lock.
173 */
174
175static inline unsigned int unix_hash_fold(__wsum n)
176{
177 unsigned int hash = (__force unsigned int)csum_fold(n);
178
179 hash ^= hash>>8;
180 return hash&(UNIX_HASH_SIZE-1);
181}
182
183#define unix_peer(sk) (unix_sk(sk)->peer)
184
185static inline int unix_our_peer(struct sock *sk, struct sock *osk)
186{
187 return unix_peer(osk) == sk;
188}
189
190static inline int unix_may_send(struct sock *sk, struct sock *osk)
191{
192 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
193}
194
195static inline int unix_recvq_full(struct sock const *sk)
196{
197 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
198}
199
200struct sock *unix_peer_get(struct sock *s)
201{
202 struct sock *peer;
203
204 unix_state_lock(s);
205 peer = unix_peer(s);
206 if (peer)
207 sock_hold(peer);
208 unix_state_unlock(s);
209 return peer;
210}
211EXPORT_SYMBOL_GPL(unix_peer_get);
212
213static inline void unix_release_addr(struct unix_address *addr)
214{
215 if (refcount_dec_and_test(&addr->refcnt))
216 kfree(addr);
217}
218
219/*
220 * Check unix socket name:
221 * - should be not zero length.
222 * - if started by not zero, should be NULL terminated (FS object)
223 * - if started by zero, it is abstract name.
224 */
225
226static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
227{
228 if (len <= sizeof(short) || len > sizeof(*sunaddr))
229 return -EINVAL;
230 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
231 return -EINVAL;
232 if (sunaddr->sun_path[0]) {
233 /*
234 * This may look like an off by one error but it is a bit more
235 * subtle. 108 is the longest valid AF_UNIX path for a binding.
236 * sun_path[108] doesn't as such exist. However in kernel space
237 * we are guaranteed that it is a valid memory location in our
238 * kernel address buffer.
239 */
240 ((char *)sunaddr)[len] = 0;
241 len = strlen(sunaddr->sun_path)+1+sizeof(short);
242 return len;
243 }
244
245 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
246 return len;
247}
248
249static void __unix_remove_socket(struct sock *sk)
250{
251 sk_del_node_init(sk);
252}
253
254static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
255{
256 WARN_ON(!sk_unhashed(sk));
257 sk_add_node(sk, list);
258}
259
260static inline void unix_remove_socket(struct sock *sk)
261{
262 spin_lock(&unix_table_lock);
263 __unix_remove_socket(sk);
264 spin_unlock(&unix_table_lock);
265}
266
267static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
268{
269 spin_lock(&unix_table_lock);
270 __unix_insert_socket(list, sk);
271 spin_unlock(&unix_table_lock);
272}
273
274static struct sock *__unix_find_socket_byname(struct net *net,
275 struct sockaddr_un *sunname,
276 int len, int type, unsigned int hash)
277{
278 struct sock *s;
279
280 sk_for_each(s, &unix_socket_table[hash ^ type]) {
281 struct unix_sock *u = unix_sk(s);
282
283 if (!net_eq(sock_net(s), net))
284 continue;
285
286 if (u->addr->len == len &&
287 !memcmp(u->addr->name, sunname, len))
288 goto found;
289 }
290 s = NULL;
291found:
292 return s;
293}
294
295static inline struct sock *unix_find_socket_byname(struct net *net,
296 struct sockaddr_un *sunname,
297 int len, int type,
298 unsigned int hash)
299{
300 struct sock *s;
301
302 spin_lock(&unix_table_lock);
303 s = __unix_find_socket_byname(net, sunname, len, type, hash);
304 if (s)
305 sock_hold(s);
306 spin_unlock(&unix_table_lock);
307 return s;
308}
309
310static struct sock *unix_find_socket_byinode(struct inode *i)
311{
312 struct sock *s;
313
314 spin_lock(&unix_table_lock);
315 sk_for_each(s,
316 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
317 struct dentry *dentry = unix_sk(s)->path.dentry;
318
319 if (dentry && d_backing_inode(dentry) == i) {
320 sock_hold(s);
321 goto found;
322 }
323 }
324 s = NULL;
325found:
326 spin_unlock(&unix_table_lock);
327 return s;
328}
329
330/* Support code for asymmetrically connected dgram sockets
331 *
332 * If a datagram socket is connected to a socket not itself connected
333 * to the first socket (eg, /dev/log), clients may only enqueue more
334 * messages if the present receive queue of the server socket is not
335 * "too large". This means there's a second writeability condition
336 * poll and sendmsg need to test. The dgram recv code will do a wake
337 * up on the peer_wait wait queue of a socket upon reception of a
338 * datagram which needs to be propagated to sleeping would-be writers
339 * since these might not have sent anything so far. This can't be
340 * accomplished via poll_wait because the lifetime of the server
341 * socket might be less than that of its clients if these break their
342 * association with it or if the server socket is closed while clients
343 * are still connected to it and there's no way to inform "a polling
344 * implementation" that it should let go of a certain wait queue
345 *
346 * In order to propagate a wake up, a wait_queue_entry_t of the client
347 * socket is enqueued on the peer_wait queue of the server socket
348 * whose wake function does a wake_up on the ordinary client socket
349 * wait queue. This connection is established whenever a write (or
350 * poll for write) hit the flow control condition and broken when the
351 * association to the server socket is dissolved or after a wake up
352 * was relayed.
353 */
354
355static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
356 void *key)
357{
358 struct unix_sock *u;
359 wait_queue_head_t *u_sleep;
360
361 u = container_of(q, struct unix_sock, peer_wake);
362
363 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
364 q);
365 u->peer_wake.private = NULL;
366
367 /* relaying can only happen while the wq still exists */
368 u_sleep = sk_sleep(&u->sk);
369 if (u_sleep)
370 wake_up_interruptible_poll(u_sleep, key);
371
372 return 0;
373}
374
375static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
376{
377 struct unix_sock *u, *u_other;
378 int rc;
379
380 u = unix_sk(sk);
381 u_other = unix_sk(other);
382 rc = 0;
383 spin_lock(&u_other->peer_wait.lock);
384
385 if (!u->peer_wake.private) {
386 u->peer_wake.private = other;
387 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
388
389 rc = 1;
390 }
391
392 spin_unlock(&u_other->peer_wait.lock);
393 return rc;
394}
395
396static void unix_dgram_peer_wake_disconnect(struct sock *sk,
397 struct sock *other)
398{
399 struct unix_sock *u, *u_other;
400
401 u = unix_sk(sk);
402 u_other = unix_sk(other);
403 spin_lock(&u_other->peer_wait.lock);
404
405 if (u->peer_wake.private == other) {
406 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
407 u->peer_wake.private = NULL;
408 }
409
410 spin_unlock(&u_other->peer_wait.lock);
411}
412
413static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
414 struct sock *other)
415{
416 unix_dgram_peer_wake_disconnect(sk, other);
417 wake_up_interruptible_poll(sk_sleep(sk),
418 POLLOUT |
419 POLLWRNORM |
420 POLLWRBAND);
421}
422
423/* preconditions:
424 * - unix_peer(sk) == other
425 * - association is stable
426 */
427static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
428{
429 int connected;
430
431 connected = unix_dgram_peer_wake_connect(sk, other);
432
433 if (unix_recvq_full(other))
434 return 1;
435
436 if (connected)
437 unix_dgram_peer_wake_disconnect(sk, other);
438
439 return 0;
440}
441
442static int unix_writable(const struct sock *sk)
443{
444 return sk->sk_state != TCP_LISTEN &&
445 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
446}
447
448static void unix_write_space(struct sock *sk)
449{
450 struct socket_wq *wq;
451
452 rcu_read_lock();
453 if (unix_writable(sk)) {
454 wq = rcu_dereference(sk->sk_wq);
455 if (skwq_has_sleeper(wq))
456 wake_up_interruptible_sync_poll(&wq->wait,
457 POLLOUT | POLLWRNORM | POLLWRBAND);
458 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
459 }
460 rcu_read_unlock();
461}
462
463/* When dgram socket disconnects (or changes its peer), we clear its receive
464 * queue of packets arrived from previous peer. First, it allows to do
465 * flow control based only on wmem_alloc; second, sk connected to peer
466 * may receive messages only from that peer. */
467static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
468{
469 if (!skb_queue_empty(&sk->sk_receive_queue)) {
470 skb_queue_purge(&sk->sk_receive_queue);
471 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
472
473 /* If one link of bidirectional dgram pipe is disconnected,
474 * we signal error. Messages are lost. Do not make this,
475 * when peer was not connected to us.
476 */
477 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
478 other->sk_err = ECONNRESET;
479 other->sk_error_report(other);
480 }
481 }
482}
483
484static void unix_sock_destructor(struct sock *sk)
485{
486 struct unix_sock *u = unix_sk(sk);
487
488 skb_queue_purge(&sk->sk_receive_queue);
489
490 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
491 WARN_ON(!sk_unhashed(sk));
492 WARN_ON(sk->sk_socket);
493 if (!sock_flag(sk, SOCK_DEAD)) {
494 pr_info("Attempt to release alive unix socket: %p\n", sk);
495 return;
496 }
497
498 if (u->addr)
499 unix_release_addr(u->addr);
500
501 atomic_long_dec(&unix_nr_socks);
502 local_bh_disable();
503 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
504 local_bh_enable();
505#ifdef UNIX_REFCNT_DEBUG
506 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
507 atomic_long_read(&unix_nr_socks));
508#endif
509}
510
511static void unix_release_sock(struct sock *sk, int embrion)
512{
513 struct unix_sock *u = unix_sk(sk);
514 struct path path;
515 struct sock *skpair;
516 struct sk_buff *skb;
517 int state;
518
519 unix_remove_socket(sk);
520
521 /* Clear state */
522 unix_state_lock(sk);
523 sock_orphan(sk);
524 sk->sk_shutdown = SHUTDOWN_MASK;
525 path = u->path;
526 u->path.dentry = NULL;
527 u->path.mnt = NULL;
528 state = sk->sk_state;
529 sk->sk_state = TCP_CLOSE;
530 unix_state_unlock(sk);
531
532 wake_up_interruptible_all(&u->peer_wait);
533
534 skpair = unix_peer(sk);
535
536 if (skpair != NULL) {
537 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
538 unix_state_lock(skpair);
539 /* No more writes */
540 skpair->sk_shutdown = SHUTDOWN_MASK;
541 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
542 skpair->sk_err = ECONNRESET;
543 unix_state_unlock(skpair);
544 skpair->sk_state_change(skpair);
545 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
546 }
547
548 unix_dgram_peer_wake_disconnect(sk, skpair);
549 sock_put(skpair); /* It may now die */
550 unix_peer(sk) = NULL;
551 }
552
553 /* Try to flush out this socket. Throw out buffers at least */
554
555 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
556 if (state == TCP_LISTEN)
557 unix_release_sock(skb->sk, 1);
558 /* passed fds are erased in the kfree_skb hook */
559 UNIXCB(skb).consumed = skb->len;
560 kfree_skb(skb);
561 }
562
563 if (path.dentry)
564 path_put(&path);
565
566 sock_put(sk);
567
568 /* ---- Socket is dead now and most probably destroyed ---- */
569
570 /*
571 * Fixme: BSD difference: In BSD all sockets connected to us get
572 * ECONNRESET and we die on the spot. In Linux we behave
573 * like files and pipes do and wait for the last
574 * dereference.
575 *
576 * Can't we simply set sock->err?
577 *
578 * What the above comment does talk about? --ANK(980817)
579 */
580
581 if (unix_tot_inflight)
582 unix_gc(); /* Garbage collect fds */
583}
584
585static void init_peercred(struct sock *sk)
586{
587 put_pid(sk->sk_peer_pid);
588 if (sk->sk_peer_cred)
589 put_cred(sk->sk_peer_cred);
590 sk->sk_peer_pid = get_pid(task_tgid(current));
591 sk->sk_peer_cred = get_current_cred();
592}
593
594static void copy_peercred(struct sock *sk, struct sock *peersk)
595{
596 put_pid(sk->sk_peer_pid);
597 if (sk->sk_peer_cred)
598 put_cred(sk->sk_peer_cred);
599 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
600 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
601}
602
603static int unix_listen(struct socket *sock, int backlog)
604{
605 int err;
606 struct sock *sk = sock->sk;
607 struct unix_sock *u = unix_sk(sk);
608 struct pid *old_pid = NULL;
609
610 err = -EOPNOTSUPP;
611 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
612 goto out; /* Only stream/seqpacket sockets accept */
613 err = -EINVAL;
614 if (!u->addr)
615 goto out; /* No listens on an unbound socket */
616 unix_state_lock(sk);
617 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
618 goto out_unlock;
619 if (backlog > sk->sk_max_ack_backlog)
620 wake_up_interruptible_all(&u->peer_wait);
621 sk->sk_max_ack_backlog = backlog;
622 sk->sk_state = TCP_LISTEN;
623 /* set credentials so connect can copy them */
624 init_peercred(sk);
625 err = 0;
626
627out_unlock:
628 unix_state_unlock(sk);
629 put_pid(old_pid);
630out:
631 return err;
632}
633
634static int unix_release(struct socket *);
635static int unix_bind(struct socket *, struct sockaddr *, int);
636static int unix_stream_connect(struct socket *, struct sockaddr *,
637 int addr_len, int flags);
638static int unix_socketpair(struct socket *, struct socket *);
639static int unix_accept(struct socket *, struct socket *, int, bool);
640static int unix_getname(struct socket *, struct sockaddr *, int *, int);
641static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
642static unsigned int unix_dgram_poll(struct file *, struct socket *,
643 poll_table *);
644static int unix_ioctl(struct socket *, unsigned int, unsigned long);
645static int unix_shutdown(struct socket *, int);
646static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
647static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
648static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
649 size_t size, int flags);
650static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
651 struct pipe_inode_info *, size_t size,
652 unsigned int flags);
653static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
654static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
655static int unix_dgram_connect(struct socket *, struct sockaddr *,
656 int, int);
657static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
658static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
659 int);
660
661static int unix_set_peek_off(struct sock *sk, int val)
662{
663 struct unix_sock *u = unix_sk(sk);
664
665 if (mutex_lock_interruptible(&u->iolock))
666 return -EINTR;
667
668 sk->sk_peek_off = val;
669 mutex_unlock(&u->iolock);
670
671 return 0;
672}
673
674
675static const struct proto_ops unix_stream_ops = {
676 .family = PF_UNIX,
677 .owner = THIS_MODULE,
678 .release = unix_release,
679 .bind = unix_bind,
680 .connect = unix_stream_connect,
681 .socketpair = unix_socketpair,
682 .accept = unix_accept,
683 .getname = unix_getname,
684 .poll = unix_poll,
685 .ioctl = unix_ioctl,
686 .listen = unix_listen,
687 .shutdown = unix_shutdown,
688 .setsockopt = sock_no_setsockopt,
689 .getsockopt = sock_no_getsockopt,
690 .sendmsg = unix_stream_sendmsg,
691 .recvmsg = unix_stream_recvmsg,
692 .mmap = sock_no_mmap,
693 .sendpage = unix_stream_sendpage,
694 .splice_read = unix_stream_splice_read,
695 .set_peek_off = unix_set_peek_off,
696};
697
698static const struct proto_ops unix_dgram_ops = {
699 .family = PF_UNIX,
700 .owner = THIS_MODULE,
701 .release = unix_release,
702 .bind = unix_bind,
703 .connect = unix_dgram_connect,
704 .socketpair = unix_socketpair,
705 .accept = sock_no_accept,
706 .getname = unix_getname,
707 .poll = unix_dgram_poll,
708 .ioctl = unix_ioctl,
709 .listen = sock_no_listen,
710 .shutdown = unix_shutdown,
711 .setsockopt = sock_no_setsockopt,
712 .getsockopt = sock_no_getsockopt,
713 .sendmsg = unix_dgram_sendmsg,
714 .recvmsg = unix_dgram_recvmsg,
715 .mmap = sock_no_mmap,
716 .sendpage = sock_no_sendpage,
717 .set_peek_off = unix_set_peek_off,
718};
719
720static const struct proto_ops unix_seqpacket_ops = {
721 .family = PF_UNIX,
722 .owner = THIS_MODULE,
723 .release = unix_release,
724 .bind = unix_bind,
725 .connect = unix_stream_connect,
726 .socketpair = unix_socketpair,
727 .accept = unix_accept,
728 .getname = unix_getname,
729 .poll = unix_dgram_poll,
730 .ioctl = unix_ioctl,
731 .listen = unix_listen,
732 .shutdown = unix_shutdown,
733 .setsockopt = sock_no_setsockopt,
734 .getsockopt = sock_no_getsockopt,
735 .sendmsg = unix_seqpacket_sendmsg,
736 .recvmsg = unix_seqpacket_recvmsg,
737 .mmap = sock_no_mmap,
738 .sendpage = sock_no_sendpage,
739 .set_peek_off = unix_set_peek_off,
740};
741
742static struct proto unix_proto = {
743 .name = "UNIX",
744 .owner = THIS_MODULE,
745 .obj_size = sizeof(struct unix_sock),
746};
747
748/*
749 * AF_UNIX sockets do not interact with hardware, hence they
750 * dont trigger interrupts - so it's safe for them to have
751 * bh-unsafe locking for their sk_receive_queue.lock. Split off
752 * this special lock-class by reinitializing the spinlock key:
753 */
754static struct lock_class_key af_unix_sk_receive_queue_lock_key;
755
756static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
757{
758 struct sock *sk = NULL;
759 struct unix_sock *u;
760
761 atomic_long_inc(&unix_nr_socks);
762 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
763 goto out;
764
765 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
766 if (!sk)
767 goto out;
768
769 sock_init_data(sock, sk);
770 lockdep_set_class(&sk->sk_receive_queue.lock,
771 &af_unix_sk_receive_queue_lock_key);
772
773 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
774 sk->sk_write_space = unix_write_space;
775 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
776 sk->sk_destruct = unix_sock_destructor;
777 u = unix_sk(sk);
778 u->path.dentry = NULL;
779 u->path.mnt = NULL;
780 spin_lock_init(&u->lock);
781 atomic_long_set(&u->inflight, 0);
782 INIT_LIST_HEAD(&u->link);
783 mutex_init(&u->iolock); /* single task reading lock */
784 mutex_init(&u->bindlock); /* single task binding lock */
785 init_waitqueue_head(&u->peer_wait);
786 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
787 unix_insert_socket(unix_sockets_unbound(sk), sk);
788out:
789 if (sk == NULL)
790 atomic_long_dec(&unix_nr_socks);
791 else {
792 local_bh_disable();
793 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
794 local_bh_enable();
795 }
796 return sk;
797}
798
799static int unix_create(struct net *net, struct socket *sock, int protocol,
800 int kern)
801{
802 if (protocol && protocol != PF_UNIX)
803 return -EPROTONOSUPPORT;
804
805 sock->state = SS_UNCONNECTED;
806
807 switch (sock->type) {
808 case SOCK_STREAM:
809 sock->ops = &unix_stream_ops;
810 break;
811 /*
812 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
813 * nothing uses it.
814 */
815 case SOCK_RAW:
816 sock->type = SOCK_DGRAM;
817 case SOCK_DGRAM:
818 sock->ops = &unix_dgram_ops;
819 break;
820 case SOCK_SEQPACKET:
821 sock->ops = &unix_seqpacket_ops;
822 break;
823 default:
824 return -ESOCKTNOSUPPORT;
825 }
826
827 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
828}
829
830static int unix_release(struct socket *sock)
831{
832 struct sock *sk = sock->sk;
833
834 if (!sk)
835 return 0;
836
837 unix_release_sock(sk, 0);
838 sock->sk = NULL;
839
840 return 0;
841}
842
843static int unix_autobind(struct socket *sock)
844{
845 struct sock *sk = sock->sk;
846 struct net *net = sock_net(sk);
847 struct unix_sock *u = unix_sk(sk);
848 static u32 ordernum = 1;
849 struct unix_address *addr;
850 int err;
851 unsigned int retries = 0;
852
853 err = mutex_lock_interruptible(&u->bindlock);
854 if (err)
855 return err;
856
857 err = 0;
858 if (u->addr)
859 goto out;
860
861 err = -ENOMEM;
862 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
863 if (!addr)
864 goto out;
865
866 addr->name->sun_family = AF_UNIX;
867 refcount_set(&addr->refcnt, 1);
868
869retry:
870 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
871 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
872
873 spin_lock(&unix_table_lock);
874 ordernum = (ordernum+1)&0xFFFFF;
875
876 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
877 addr->hash)) {
878 spin_unlock(&unix_table_lock);
879 /*
880 * __unix_find_socket_byname() may take long time if many names
881 * are already in use.
882 */
883 cond_resched();
884 /* Give up if all names seems to be in use. */
885 if (retries++ == 0xFFFFF) {
886 err = -ENOSPC;
887 kfree(addr);
888 goto out;
889 }
890 goto retry;
891 }
892 addr->hash ^= sk->sk_type;
893
894 __unix_remove_socket(sk);
895 smp_store_release(&u->addr, addr);
896 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
897 spin_unlock(&unix_table_lock);
898 err = 0;
899
900out: mutex_unlock(&u->bindlock);
901 return err;
902}
903
904static struct sock *unix_find_other(struct net *net,
905 struct sockaddr_un *sunname, int len,
906 int type, unsigned int hash, int *error)
907{
908 struct sock *u;
909 struct path path;
910 int err = 0;
911
912 if (sunname->sun_path[0]) {
913 struct inode *inode;
914 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
915 if (err)
916 goto fail;
917 inode = d_backing_inode(path.dentry);
918 err = inode_permission(inode, MAY_WRITE);
919 if (err)
920 goto put_fail;
921
922 err = -ECONNREFUSED;
923 if (!S_ISSOCK(inode->i_mode))
924 goto put_fail;
925 u = unix_find_socket_byinode(inode);
926 if (!u)
927 goto put_fail;
928
929 if (u->sk_type == type)
930 touch_atime(&path);
931
932 path_put(&path);
933
934 err = -EPROTOTYPE;
935 if (u->sk_type != type) {
936 sock_put(u);
937 goto fail;
938 }
939 } else {
940 err = -ECONNREFUSED;
941 u = unix_find_socket_byname(net, sunname, len, type, hash);
942 if (u) {
943 struct dentry *dentry;
944 dentry = unix_sk(u)->path.dentry;
945 if (dentry)
946 touch_atime(&unix_sk(u)->path);
947 } else
948 goto fail;
949 }
950 return u;
951
952put_fail:
953 path_put(&path);
954fail:
955 *error = err;
956 return NULL;
957}
958
959static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
960{
961 struct dentry *dentry;
962 struct path path;
963 int err = 0;
964 /*
965 * Get the parent directory, calculate the hash for last
966 * component.
967 */
968 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
969 err = PTR_ERR(dentry);
970 if (IS_ERR(dentry))
971 return err;
972
973 /*
974 * All right, let's create it.
975 */
976 err = security_path_mknod(&path, dentry, mode, 0);
977 if (!err) {
978 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
979 if (!err) {
980 res->mnt = mntget(path.mnt);
981 res->dentry = dget(dentry);
982 }
983 }
984 done_path_create(&path, dentry);
985 return err;
986}
987
988static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
989{
990 struct sock *sk = sock->sk;
991 struct net *net = sock_net(sk);
992 struct unix_sock *u = unix_sk(sk);
993 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
994 char *sun_path = sunaddr->sun_path;
995 int err;
996 unsigned int hash;
997 struct unix_address *addr;
998 struct hlist_head *list;
999 struct path path = { };
1000
1001 err = -EINVAL;
1002 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1003 sunaddr->sun_family != AF_UNIX)
1004 goto out;
1005
1006 if (addr_len == sizeof(short)) {
1007 err = unix_autobind(sock);
1008 goto out;
1009 }
1010
1011 err = unix_mkname(sunaddr, addr_len, &hash);
1012 if (err < 0)
1013 goto out;
1014 addr_len = err;
1015
1016 if (sun_path[0]) {
1017 umode_t mode = S_IFSOCK |
1018 (SOCK_INODE(sock)->i_mode & ~current_umask());
1019 err = unix_mknod(sun_path, mode, &path);
1020 if (err) {
1021 if (err == -EEXIST)
1022 err = -EADDRINUSE;
1023 goto out;
1024 }
1025 }
1026
1027 err = mutex_lock_interruptible(&u->bindlock);
1028 if (err)
1029 goto out_put;
1030
1031 err = -EINVAL;
1032 if (u->addr)
1033 goto out_up;
1034
1035 err = -ENOMEM;
1036 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1037 if (!addr)
1038 goto out_up;
1039
1040 memcpy(addr->name, sunaddr, addr_len);
1041 addr->len = addr_len;
1042 addr->hash = hash ^ sk->sk_type;
1043 refcount_set(&addr->refcnt, 1);
1044
1045 if (sun_path[0]) {
1046 addr->hash = UNIX_HASH_SIZE;
1047 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1048 spin_lock(&unix_table_lock);
1049 u->path = path;
1050 list = &unix_socket_table[hash];
1051 } else {
1052 spin_lock(&unix_table_lock);
1053 err = -EADDRINUSE;
1054 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1055 sk->sk_type, hash)) {
1056 unix_release_addr(addr);
1057 goto out_unlock;
1058 }
1059
1060 list = &unix_socket_table[addr->hash];
1061 }
1062
1063 err = 0;
1064 __unix_remove_socket(sk);
1065 smp_store_release(&u->addr, addr);
1066 __unix_insert_socket(list, sk);
1067
1068out_unlock:
1069 spin_unlock(&unix_table_lock);
1070out_up:
1071 mutex_unlock(&u->bindlock);
1072out_put:
1073 if (err)
1074 path_put(&path);
1075out:
1076 return err;
1077}
1078
1079static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1080{
1081 if (unlikely(sk1 == sk2) || !sk2) {
1082 unix_state_lock(sk1);
1083 return;
1084 }
1085 if (sk1 < sk2) {
1086 unix_state_lock(sk1);
1087 unix_state_lock_nested(sk2);
1088 } else {
1089 unix_state_lock(sk2);
1090 unix_state_lock_nested(sk1);
1091 }
1092}
1093
1094static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1095{
1096 if (unlikely(sk1 == sk2) || !sk2) {
1097 unix_state_unlock(sk1);
1098 return;
1099 }
1100 unix_state_unlock(sk1);
1101 unix_state_unlock(sk2);
1102}
1103
1104static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1105 int alen, int flags)
1106{
1107 struct sock *sk = sock->sk;
1108 struct net *net = sock_net(sk);
1109 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1110 struct sock *other;
1111 unsigned int hash;
1112 int err;
1113
1114 err = -EINVAL;
1115 if (alen < offsetofend(struct sockaddr, sa_family))
1116 goto out;
1117
1118 if (addr->sa_family != AF_UNSPEC) {
1119 err = unix_mkname(sunaddr, alen, &hash);
1120 if (err < 0)
1121 goto out;
1122 alen = err;
1123
1124 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1125 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1126 goto out;
1127
1128restart:
1129 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1130 if (!other)
1131 goto out;
1132
1133 unix_state_double_lock(sk, other);
1134
1135 /* Apparently VFS overslept socket death. Retry. */
1136 if (sock_flag(other, SOCK_DEAD)) {
1137 unix_state_double_unlock(sk, other);
1138 sock_put(other);
1139 goto restart;
1140 }
1141
1142 err = -EPERM;
1143 if (!unix_may_send(sk, other))
1144 goto out_unlock;
1145
1146 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1147 if (err)
1148 goto out_unlock;
1149
1150 } else {
1151 /*
1152 * 1003.1g breaking connected state with AF_UNSPEC
1153 */
1154 other = NULL;
1155 unix_state_double_lock(sk, other);
1156 }
1157
1158 /*
1159 * If it was connected, reconnect.
1160 */
1161 if (unix_peer(sk)) {
1162 struct sock *old_peer = unix_peer(sk);
1163 unix_peer(sk) = other;
1164 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1165
1166 unix_state_double_unlock(sk, other);
1167
1168 if (other != old_peer)
1169 unix_dgram_disconnected(sk, old_peer);
1170 sock_put(old_peer);
1171 } else {
1172 unix_peer(sk) = other;
1173 unix_state_double_unlock(sk, other);
1174 }
1175 return 0;
1176
1177out_unlock:
1178 unix_state_double_unlock(sk, other);
1179 sock_put(other);
1180out:
1181 return err;
1182}
1183
1184static long unix_wait_for_peer(struct sock *other, long timeo)
1185{
1186 struct unix_sock *u = unix_sk(other);
1187 int sched;
1188 DEFINE_WAIT(wait);
1189
1190 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1191
1192 sched = !sock_flag(other, SOCK_DEAD) &&
1193 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1194 unix_recvq_full(other);
1195
1196 unix_state_unlock(other);
1197
1198 if (sched)
1199 timeo = schedule_timeout(timeo);
1200
1201 finish_wait(&u->peer_wait, &wait);
1202 return timeo;
1203}
1204
1205static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1206 int addr_len, int flags)
1207{
1208 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1209 struct sock *sk = sock->sk;
1210 struct net *net = sock_net(sk);
1211 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1212 struct sock *newsk = NULL;
1213 struct sock *other = NULL;
1214 struct sk_buff *skb = NULL;
1215 unsigned int hash;
1216 int st;
1217 int err;
1218 long timeo;
1219
1220 err = unix_mkname(sunaddr, addr_len, &hash);
1221 if (err < 0)
1222 goto out;
1223 addr_len = err;
1224
1225 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1226 (err = unix_autobind(sock)) != 0)
1227 goto out;
1228
1229 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1230
1231 /* First of all allocate resources.
1232 If we will make it after state is locked,
1233 we will have to recheck all again in any case.
1234 */
1235
1236 err = -ENOMEM;
1237
1238 /* create new sock for complete connection */
1239 newsk = unix_create1(sock_net(sk), NULL, 0);
1240 if (newsk == NULL)
1241 goto out;
1242
1243 /* Allocate skb for sending to listening sock */
1244 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1245 if (skb == NULL)
1246 goto out;
1247
1248restart:
1249 /* Find listening sock. */
1250 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1251 if (!other)
1252 goto out;
1253
1254 /* Latch state of peer */
1255 unix_state_lock(other);
1256
1257 /* Apparently VFS overslept socket death. Retry. */
1258 if (sock_flag(other, SOCK_DEAD)) {
1259 unix_state_unlock(other);
1260 sock_put(other);
1261 goto restart;
1262 }
1263
1264 err = -ECONNREFUSED;
1265 if (other->sk_state != TCP_LISTEN)
1266 goto out_unlock;
1267 if (other->sk_shutdown & RCV_SHUTDOWN)
1268 goto out_unlock;
1269
1270 if (unix_recvq_full(other)) {
1271 err = -EAGAIN;
1272 if (!timeo)
1273 goto out_unlock;
1274
1275 timeo = unix_wait_for_peer(other, timeo);
1276
1277 err = sock_intr_errno(timeo);
1278 if (signal_pending(current))
1279 goto out;
1280 sock_put(other);
1281 goto restart;
1282 }
1283
1284 /* Latch our state.
1285
1286 It is tricky place. We need to grab our state lock and cannot
1287 drop lock on peer. It is dangerous because deadlock is
1288 possible. Connect to self case and simultaneous
1289 attempt to connect are eliminated by checking socket
1290 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1291 check this before attempt to grab lock.
1292
1293 Well, and we have to recheck the state after socket locked.
1294 */
1295 st = sk->sk_state;
1296
1297 switch (st) {
1298 case TCP_CLOSE:
1299 /* This is ok... continue with connect */
1300 break;
1301 case TCP_ESTABLISHED:
1302 /* Socket is already connected */
1303 err = -EISCONN;
1304 goto out_unlock;
1305 default:
1306 err = -EINVAL;
1307 goto out_unlock;
1308 }
1309
1310 unix_state_lock_nested(sk);
1311
1312 if (sk->sk_state != st) {
1313 unix_state_unlock(sk);
1314 unix_state_unlock(other);
1315 sock_put(other);
1316 goto restart;
1317 }
1318
1319 err = security_unix_stream_connect(sk, other, newsk);
1320 if (err) {
1321 unix_state_unlock(sk);
1322 goto out_unlock;
1323 }
1324
1325 /* The way is open! Fastly set all the necessary fields... */
1326
1327 sock_hold(sk);
1328 unix_peer(newsk) = sk;
1329 newsk->sk_state = TCP_ESTABLISHED;
1330 newsk->sk_type = sk->sk_type;
1331 init_peercred(newsk);
1332 newu = unix_sk(newsk);
1333 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1334 otheru = unix_sk(other);
1335
1336 /* copy address information from listening to new sock
1337 *
1338 * The contents of *(otheru->addr) and otheru->path
1339 * are seen fully set up here, since we have found
1340 * otheru in hash under unix_table_lock. Insertion
1341 * into the hash chain we'd found it in had been done
1342 * in an earlier critical area protected by unix_table_lock,
1343 * the same one where we'd set *(otheru->addr) contents,
1344 * as well as otheru->path and otheru->addr itself.
1345 *
1346 * Using smp_store_release() here to set newu->addr
1347 * is enough to make those stores, as well as stores
1348 * to newu->path visible to anyone who gets newu->addr
1349 * by smp_load_acquire(). IOW, the same warranties
1350 * as for unix_sock instances bound in unix_bind() or
1351 * in unix_autobind().
1352 */
1353 if (otheru->path.dentry) {
1354 path_get(&otheru->path);
1355 newu->path = otheru->path;
1356 }
1357 refcount_inc(&otheru->addr->refcnt);
1358 smp_store_release(&newu->addr, otheru->addr);
1359
1360 /* Set credentials */
1361 copy_peercred(sk, other);
1362
1363 sock->state = SS_CONNECTED;
1364 sk->sk_state = TCP_ESTABLISHED;
1365 sock_hold(newsk);
1366
1367 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1368 unix_peer(sk) = newsk;
1369
1370 unix_state_unlock(sk);
1371
1372 /* take ten and and send info to listening sock */
1373 spin_lock(&other->sk_receive_queue.lock);
1374 __skb_queue_tail(&other->sk_receive_queue, skb);
1375 spin_unlock(&other->sk_receive_queue.lock);
1376 unix_state_unlock(other);
1377 other->sk_data_ready(other);
1378 sock_put(other);
1379 return 0;
1380
1381out_unlock:
1382 if (other)
1383 unix_state_unlock(other);
1384
1385out:
1386 kfree_skb(skb);
1387 if (newsk)
1388 unix_release_sock(newsk, 0);
1389 if (other)
1390 sock_put(other);
1391 return err;
1392}
1393
1394static int unix_socketpair(struct socket *socka, struct socket *sockb)
1395{
1396 struct sock *ska = socka->sk, *skb = sockb->sk;
1397
1398 /* Join our sockets back to back */
1399 sock_hold(ska);
1400 sock_hold(skb);
1401 unix_peer(ska) = skb;
1402 unix_peer(skb) = ska;
1403 init_peercred(ska);
1404 init_peercred(skb);
1405
1406 if (ska->sk_type != SOCK_DGRAM) {
1407 ska->sk_state = TCP_ESTABLISHED;
1408 skb->sk_state = TCP_ESTABLISHED;
1409 socka->state = SS_CONNECTED;
1410 sockb->state = SS_CONNECTED;
1411 }
1412 return 0;
1413}
1414
1415static void unix_sock_inherit_flags(const struct socket *old,
1416 struct socket *new)
1417{
1418 if (test_bit(SOCK_PASSCRED, &old->flags))
1419 set_bit(SOCK_PASSCRED, &new->flags);
1420 if (test_bit(SOCK_PASSSEC, &old->flags))
1421 set_bit(SOCK_PASSSEC, &new->flags);
1422}
1423
1424static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1425 bool kern)
1426{
1427 struct sock *sk = sock->sk;
1428 struct sock *tsk;
1429 struct sk_buff *skb;
1430 int err;
1431
1432 err = -EOPNOTSUPP;
1433 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1434 goto out;
1435
1436 err = -EINVAL;
1437 if (sk->sk_state != TCP_LISTEN)
1438 goto out;
1439
1440 /* If socket state is TCP_LISTEN it cannot change (for now...),
1441 * so that no locks are necessary.
1442 */
1443
1444 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1445 if (!skb) {
1446 /* This means receive shutdown. */
1447 if (err == 0)
1448 err = -EINVAL;
1449 goto out;
1450 }
1451
1452 tsk = skb->sk;
1453 skb_free_datagram(sk, skb);
1454 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1455
1456 /* attach accepted sock to socket */
1457 unix_state_lock(tsk);
1458 newsock->state = SS_CONNECTED;
1459 unix_sock_inherit_flags(sock, newsock);
1460 sock_graft(tsk, newsock);
1461 unix_state_unlock(tsk);
1462 return 0;
1463
1464out:
1465 return err;
1466}
1467
1468
1469static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1470{
1471 struct sock *sk = sock->sk;
1472 struct unix_address *addr;
1473 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1474 int err = 0;
1475
1476 if (peer) {
1477 sk = unix_peer_get(sk);
1478
1479 err = -ENOTCONN;
1480 if (!sk)
1481 goto out;
1482 err = 0;
1483 } else {
1484 sock_hold(sk);
1485 }
1486
1487 addr = smp_load_acquire(&unix_sk(sk)->addr);
1488 if (!addr) {
1489 sunaddr->sun_family = AF_UNIX;
1490 sunaddr->sun_path[0] = 0;
1491 *uaddr_len = sizeof(short);
1492 } else {
1493 *uaddr_len = addr->len;
1494 memcpy(sunaddr, addr->name, *uaddr_len);
1495 }
1496 sock_put(sk);
1497out:
1498 return err;
1499}
1500
1501static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1502{
1503 int i;
1504
1505 scm->fp = UNIXCB(skb).fp;
1506 UNIXCB(skb).fp = NULL;
1507
1508 for (i = scm->fp->count-1; i >= 0; i--)
1509 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1510}
1511
1512static void unix_destruct_scm(struct sk_buff *skb)
1513{
1514 struct scm_cookie scm;
1515 memset(&scm, 0, sizeof(scm));
1516 scm.pid = UNIXCB(skb).pid;
1517 if (UNIXCB(skb).fp)
1518 unix_detach_fds(&scm, skb);
1519
1520 /* Alas, it calls VFS */
1521 /* So fscking what? fput() had been SMP-safe since the last Summer */
1522 scm_destroy(&scm);
1523 sock_wfree(skb);
1524}
1525
1526/*
1527 * The "user->unix_inflight" variable is protected by the garbage
1528 * collection lock, and we just read it locklessly here. If you go
1529 * over the limit, there might be a tiny race in actually noticing
1530 * it across threads. Tough.
1531 */
1532static inline bool too_many_unix_fds(struct task_struct *p)
1533{
1534 struct user_struct *user = current_user();
1535
1536 if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1537 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1538 return false;
1539}
1540
1541static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1542{
1543 int i;
1544
1545 if (too_many_unix_fds(current))
1546 return -ETOOMANYREFS;
1547
1548 /*
1549 * Need to duplicate file references for the sake of garbage
1550 * collection. Otherwise a socket in the fps might become a
1551 * candidate for GC while the skb is not yet queued.
1552 */
1553 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1554 if (!UNIXCB(skb).fp)
1555 return -ENOMEM;
1556
1557 for (i = scm->fp->count - 1; i >= 0; i--)
1558 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1559 return 0;
1560}
1561
1562static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1563{
1564 int err = 0;
1565
1566 UNIXCB(skb).pid = get_pid(scm->pid);
1567 UNIXCB(skb).uid = scm->creds.uid;
1568 UNIXCB(skb).gid = scm->creds.gid;
1569 UNIXCB(skb).fp = NULL;
1570 unix_get_secdata(scm, skb);
1571 if (scm->fp && send_fds)
1572 err = unix_attach_fds(scm, skb);
1573
1574 skb->destructor = unix_destruct_scm;
1575 return err;
1576}
1577
1578static bool unix_passcred_enabled(const struct socket *sock,
1579 const struct sock *other)
1580{
1581 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1582 !other->sk_socket ||
1583 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1584}
1585
1586/*
1587 * Some apps rely on write() giving SCM_CREDENTIALS
1588 * We include credentials if source or destination socket
1589 * asserted SOCK_PASSCRED.
1590 */
1591static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1592 const struct sock *other)
1593{
1594 if (UNIXCB(skb).pid)
1595 return;
1596 if (unix_passcred_enabled(sock, other)) {
1597 UNIXCB(skb).pid = get_pid(task_tgid(current));
1598 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1599 }
1600}
1601
1602static int maybe_init_creds(struct scm_cookie *scm,
1603 struct socket *socket,
1604 const struct sock *other)
1605{
1606 int err;
1607 struct msghdr msg = { .msg_controllen = 0 };
1608
1609 err = scm_send(socket, &msg, scm, false);
1610 if (err)
1611 return err;
1612
1613 if (unix_passcred_enabled(socket, other)) {
1614 scm->pid = get_pid(task_tgid(current));
1615 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1616 }
1617 return err;
1618}
1619
1620static bool unix_skb_scm_eq(struct sk_buff *skb,
1621 struct scm_cookie *scm)
1622{
1623 const struct unix_skb_parms *u = &UNIXCB(skb);
1624
1625 return u->pid == scm->pid &&
1626 uid_eq(u->uid, scm->creds.uid) &&
1627 gid_eq(u->gid, scm->creds.gid) &&
1628 unix_secdata_eq(scm, skb);
1629}
1630
1631/*
1632 * Send AF_UNIX data.
1633 */
1634
1635static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1636 size_t len)
1637{
1638 struct sock *sk = sock->sk;
1639 struct net *net = sock_net(sk);
1640 struct unix_sock *u = unix_sk(sk);
1641 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1642 struct sock *other = NULL;
1643 int namelen = 0; /* fake GCC */
1644 int err;
1645 unsigned int hash;
1646 struct sk_buff *skb;
1647 long timeo;
1648 struct scm_cookie scm;
1649 int data_len = 0;
1650 int sk_locked;
1651
1652 wait_for_unix_gc();
1653 err = scm_send(sock, msg, &scm, false);
1654 if (err < 0)
1655 return err;
1656
1657 err = -EOPNOTSUPP;
1658 if (msg->msg_flags&MSG_OOB)
1659 goto out;
1660
1661 if (msg->msg_namelen) {
1662 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1663 if (err < 0)
1664 goto out;
1665 namelen = err;
1666 } else {
1667 sunaddr = NULL;
1668 err = -ENOTCONN;
1669 other = unix_peer_get(sk);
1670 if (!other)
1671 goto out;
1672 }
1673
1674 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1675 && (err = unix_autobind(sock)) != 0)
1676 goto out;
1677
1678 err = -EMSGSIZE;
1679 if (len > sk->sk_sndbuf - 32)
1680 goto out;
1681
1682 if (len > SKB_MAX_ALLOC) {
1683 data_len = min_t(size_t,
1684 len - SKB_MAX_ALLOC,
1685 MAX_SKB_FRAGS * PAGE_SIZE);
1686 data_len = PAGE_ALIGN(data_len);
1687
1688 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1689 }
1690
1691 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1692 msg->msg_flags & MSG_DONTWAIT, &err,
1693 PAGE_ALLOC_COSTLY_ORDER);
1694 if (skb == NULL)
1695 goto out;
1696
1697 err = unix_scm_to_skb(&scm, skb, true);
1698 if (err < 0)
1699 goto out_free;
1700
1701 skb_put(skb, len - data_len);
1702 skb->data_len = data_len;
1703 skb->len = len;
1704 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1705 if (err)
1706 goto out_free;
1707
1708 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1709
1710restart:
1711 if (!other) {
1712 err = -ECONNRESET;
1713 if (sunaddr == NULL)
1714 goto out_free;
1715
1716 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1717 hash, &err);
1718 if (other == NULL)
1719 goto out_free;
1720 }
1721
1722 if (sk_filter(other, skb) < 0) {
1723 /* Toss the packet but do not return any error to the sender */
1724 err = len;
1725 goto out_free;
1726 }
1727
1728 sk_locked = 0;
1729 unix_state_lock(other);
1730restart_locked:
1731 err = -EPERM;
1732 if (!unix_may_send(sk, other))
1733 goto out_unlock;
1734
1735 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1736 /*
1737 * Check with 1003.1g - what should
1738 * datagram error
1739 */
1740 unix_state_unlock(other);
1741 sock_put(other);
1742
1743 if (!sk_locked)
1744 unix_state_lock(sk);
1745
1746 err = 0;
1747 if (unix_peer(sk) == other) {
1748 unix_peer(sk) = NULL;
1749 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1750
1751 unix_state_unlock(sk);
1752
1753 unix_dgram_disconnected(sk, other);
1754 sock_put(other);
1755 err = -ECONNREFUSED;
1756 } else {
1757 unix_state_unlock(sk);
1758 }
1759
1760 other = NULL;
1761 if (err)
1762 goto out_free;
1763 goto restart;
1764 }
1765
1766 err = -EPIPE;
1767 if (other->sk_shutdown & RCV_SHUTDOWN)
1768 goto out_unlock;
1769
1770 if (sk->sk_type != SOCK_SEQPACKET) {
1771 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1772 if (err)
1773 goto out_unlock;
1774 }
1775
1776 /* other == sk && unix_peer(other) != sk if
1777 * - unix_peer(sk) == NULL, destination address bound to sk
1778 * - unix_peer(sk) == sk by time of get but disconnected before lock
1779 */
1780 if (other != sk &&
1781 unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1782 if (timeo) {
1783 timeo = unix_wait_for_peer(other, timeo);
1784
1785 err = sock_intr_errno(timeo);
1786 if (signal_pending(current))
1787 goto out_free;
1788
1789 goto restart;
1790 }
1791
1792 if (!sk_locked) {
1793 unix_state_unlock(other);
1794 unix_state_double_lock(sk, other);
1795 }
1796
1797 if (unix_peer(sk) != other ||
1798 unix_dgram_peer_wake_me(sk, other)) {
1799 err = -EAGAIN;
1800 sk_locked = 1;
1801 goto out_unlock;
1802 }
1803
1804 if (!sk_locked) {
1805 sk_locked = 1;
1806 goto restart_locked;
1807 }
1808 }
1809
1810 if (unlikely(sk_locked))
1811 unix_state_unlock(sk);
1812
1813 if (sock_flag(other, SOCK_RCVTSTAMP))
1814 __net_timestamp(skb);
1815 maybe_add_creds(skb, sock, other);
1816 skb_queue_tail(&other->sk_receive_queue, skb);
1817 unix_state_unlock(other);
1818 other->sk_data_ready(other);
1819 sock_put(other);
1820 scm_destroy(&scm);
1821 return len;
1822
1823out_unlock:
1824 if (sk_locked)
1825 unix_state_unlock(sk);
1826 unix_state_unlock(other);
1827out_free:
1828 kfree_skb(skb);
1829out:
1830 if (other)
1831 sock_put(other);
1832 scm_destroy(&scm);
1833 return err;
1834}
1835
1836/* We use paged skbs for stream sockets, and limit occupancy to 32768
1837 * bytes, and a minimun of a full page.
1838 */
1839#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1840
1841static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1842 size_t len)
1843{
1844 struct sock *sk = sock->sk;
1845 struct sock *other = NULL;
1846 int err, size;
1847 struct sk_buff *skb;
1848 int sent = 0;
1849 struct scm_cookie scm;
1850 bool fds_sent = false;
1851 int data_len;
1852
1853 wait_for_unix_gc();
1854 err = scm_send(sock, msg, &scm, false);
1855 if (err < 0)
1856 return err;
1857
1858 err = -EOPNOTSUPP;
1859 if (msg->msg_flags&MSG_OOB)
1860 goto out_err;
1861
1862 if (msg->msg_namelen) {
1863 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1864 goto out_err;
1865 } else {
1866 err = -ENOTCONN;
1867 other = unix_peer(sk);
1868 if (!other)
1869 goto out_err;
1870 }
1871
1872 if (sk->sk_shutdown & SEND_SHUTDOWN)
1873 goto pipe_err;
1874
1875 while (sent < len) {
1876 size = len - sent;
1877
1878 /* Keep two messages in the pipe so it schedules better */
1879 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1880
1881 /* allow fallback to order-0 allocations */
1882 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1883
1884 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1885
1886 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1887
1888 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1889 msg->msg_flags & MSG_DONTWAIT, &err,
1890 get_order(UNIX_SKB_FRAGS_SZ));
1891 if (!skb)
1892 goto out_err;
1893
1894 /* Only send the fds in the first buffer */
1895 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1896 if (err < 0) {
1897 kfree_skb(skb);
1898 goto out_err;
1899 }
1900 fds_sent = true;
1901
1902 skb_put(skb, size - data_len);
1903 skb->data_len = data_len;
1904 skb->len = size;
1905 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1906 if (err) {
1907 kfree_skb(skb);
1908 goto out_err;
1909 }
1910
1911 unix_state_lock(other);
1912
1913 if (sock_flag(other, SOCK_DEAD) ||
1914 (other->sk_shutdown & RCV_SHUTDOWN))
1915 goto pipe_err_free;
1916
1917 maybe_add_creds(skb, sock, other);
1918 skb_queue_tail(&other->sk_receive_queue, skb);
1919 unix_state_unlock(other);
1920 other->sk_data_ready(other);
1921 sent += size;
1922 }
1923
1924 scm_destroy(&scm);
1925
1926 return sent;
1927
1928pipe_err_free:
1929 unix_state_unlock(other);
1930 kfree_skb(skb);
1931pipe_err:
1932 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1933 send_sig(SIGPIPE, current, 0);
1934 err = -EPIPE;
1935out_err:
1936 scm_destroy(&scm);
1937 return sent ? : err;
1938}
1939
1940static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1941 int offset, size_t size, int flags)
1942{
1943 int err;
1944 bool send_sigpipe = false;
1945 bool init_scm = true;
1946 struct scm_cookie scm;
1947 struct sock *other, *sk = socket->sk;
1948 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1949
1950 if (flags & MSG_OOB)
1951 return -EOPNOTSUPP;
1952
1953 other = unix_peer(sk);
1954 if (!other || sk->sk_state != TCP_ESTABLISHED)
1955 return -ENOTCONN;
1956
1957 if (false) {
1958alloc_skb:
1959 unix_state_unlock(other);
1960 mutex_unlock(&unix_sk(other)->iolock);
1961 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1962 &err, 0);
1963 if (!newskb)
1964 goto err;
1965 }
1966
1967 /* we must acquire iolock as we modify already present
1968 * skbs in the sk_receive_queue and mess with skb->len
1969 */
1970 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1971 if (err) {
1972 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1973 goto err;
1974 }
1975
1976 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1977 err = -EPIPE;
1978 send_sigpipe = true;
1979 goto err_unlock;
1980 }
1981
1982 unix_state_lock(other);
1983
1984 if (sock_flag(other, SOCK_DEAD) ||
1985 other->sk_shutdown & RCV_SHUTDOWN) {
1986 err = -EPIPE;
1987 send_sigpipe = true;
1988 goto err_state_unlock;
1989 }
1990
1991 if (init_scm) {
1992 err = maybe_init_creds(&scm, socket, other);
1993 if (err)
1994 goto err_state_unlock;
1995 init_scm = false;
1996 }
1997
1998 skb = skb_peek_tail(&other->sk_receive_queue);
1999 if (tail && tail == skb) {
2000 skb = newskb;
2001 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2002 if (newskb) {
2003 skb = newskb;
2004 } else {
2005 tail = skb;
2006 goto alloc_skb;
2007 }
2008 } else if (newskb) {
2009 /* this is fast path, we don't necessarily need to
2010 * call to kfree_skb even though with newskb == NULL
2011 * this - does no harm
2012 */
2013 consume_skb(newskb);
2014 newskb = NULL;
2015 }
2016
2017 if (skb_append_pagefrags(skb, page, offset, size)) {
2018 tail = skb;
2019 goto alloc_skb;
2020 }
2021
2022 skb->len += size;
2023 skb->data_len += size;
2024 skb->truesize += size;
2025 refcount_add(size, &sk->sk_wmem_alloc);
2026
2027 if (newskb) {
2028 err = unix_scm_to_skb(&scm, skb, false);
2029 if (err)
2030 goto err_state_unlock;
2031 spin_lock(&other->sk_receive_queue.lock);
2032 __skb_queue_tail(&other->sk_receive_queue, newskb);
2033 spin_unlock(&other->sk_receive_queue.lock);
2034 }
2035
2036 unix_state_unlock(other);
2037 mutex_unlock(&unix_sk(other)->iolock);
2038
2039 other->sk_data_ready(other);
2040 scm_destroy(&scm);
2041 return size;
2042
2043err_state_unlock:
2044 unix_state_unlock(other);
2045err_unlock:
2046 mutex_unlock(&unix_sk(other)->iolock);
2047err:
2048 kfree_skb(newskb);
2049 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2050 send_sig(SIGPIPE, current, 0);
2051 if (!init_scm)
2052 scm_destroy(&scm);
2053 return err;
2054}
2055
2056static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2057 size_t len)
2058{
2059 int err;
2060 struct sock *sk = sock->sk;
2061
2062 err = sock_error(sk);
2063 if (err)
2064 return err;
2065
2066 if (sk->sk_state != TCP_ESTABLISHED)
2067 return -ENOTCONN;
2068
2069 if (msg->msg_namelen)
2070 msg->msg_namelen = 0;
2071
2072 return unix_dgram_sendmsg(sock, msg, len);
2073}
2074
2075static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2076 size_t size, int flags)
2077{
2078 struct sock *sk = sock->sk;
2079
2080 if (sk->sk_state != TCP_ESTABLISHED)
2081 return -ENOTCONN;
2082
2083 return unix_dgram_recvmsg(sock, msg, size, flags);
2084}
2085
2086static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2087{
2088 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2089
2090 if (addr) {
2091 msg->msg_namelen = addr->len;
2092 memcpy(msg->msg_name, addr->name, addr->len);
2093 }
2094}
2095
2096static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2097 size_t size, int flags)
2098{
2099 struct scm_cookie scm;
2100 struct sock *sk = sock->sk;
2101 struct unix_sock *u = unix_sk(sk);
2102 struct sk_buff *skb, *last;
2103 long timeo;
2104 int err;
2105 int peeked, skip;
2106
2107 err = -EOPNOTSUPP;
2108 if (flags&MSG_OOB)
2109 goto out;
2110
2111 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2112
2113 do {
2114 mutex_lock(&u->iolock);
2115
2116 skip = sk_peek_offset(sk, flags);
2117 skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2118 &err, &last);
2119 if (skb)
2120 break;
2121
2122 mutex_unlock(&u->iolock);
2123
2124 if (err != -EAGAIN)
2125 break;
2126 } while (timeo &&
2127 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2128
2129 if (!skb) { /* implies iolock unlocked */
2130 unix_state_lock(sk);
2131 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2132 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2133 (sk->sk_shutdown & RCV_SHUTDOWN))
2134 err = 0;
2135 unix_state_unlock(sk);
2136 goto out;
2137 }
2138
2139 if (wq_has_sleeper(&u->peer_wait))
2140 wake_up_interruptible_sync_poll(&u->peer_wait,
2141 POLLOUT | POLLWRNORM |
2142 POLLWRBAND);
2143
2144 if (msg->msg_name)
2145 unix_copy_addr(msg, skb->sk);
2146
2147 if (size > skb->len - skip)
2148 size = skb->len - skip;
2149 else if (size < skb->len - skip)
2150 msg->msg_flags |= MSG_TRUNC;
2151
2152 err = skb_copy_datagram_msg(skb, skip, msg, size);
2153 if (err)
2154 goto out_free;
2155
2156 if (sock_flag(sk, SOCK_RCVTSTAMP))
2157 __sock_recv_timestamp(msg, sk, skb);
2158
2159 memset(&scm, 0, sizeof(scm));
2160
2161 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2162 unix_set_secdata(&scm, skb);
2163
2164 if (!(flags & MSG_PEEK)) {
2165 if (UNIXCB(skb).fp)
2166 unix_detach_fds(&scm, skb);
2167
2168 sk_peek_offset_bwd(sk, skb->len);
2169 } else {
2170 /* It is questionable: on PEEK we could:
2171 - do not return fds - good, but too simple 8)
2172 - return fds, and do not return them on read (old strategy,
2173 apparently wrong)
2174 - clone fds (I chose it for now, it is the most universal
2175 solution)
2176
2177 POSIX 1003.1g does not actually define this clearly
2178 at all. POSIX 1003.1g doesn't define a lot of things
2179 clearly however!
2180
2181 */
2182
2183 sk_peek_offset_fwd(sk, size);
2184
2185 if (UNIXCB(skb).fp)
2186 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2187 }
2188 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2189
2190 scm_recv(sock, msg, &scm, flags);
2191
2192out_free:
2193 skb_free_datagram(sk, skb);
2194 mutex_unlock(&u->iolock);
2195out:
2196 return err;
2197}
2198
2199/*
2200 * Sleep until more data has arrived. But check for races..
2201 */
2202static long unix_stream_data_wait(struct sock *sk, long timeo,
2203 struct sk_buff *last, unsigned int last_len,
2204 bool freezable)
2205{
2206 struct sk_buff *tail;
2207 DEFINE_WAIT(wait);
2208
2209 unix_state_lock(sk);
2210
2211 for (;;) {
2212 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2213
2214 tail = skb_peek_tail(&sk->sk_receive_queue);
2215 if (tail != last ||
2216 (tail && tail->len != last_len) ||
2217 sk->sk_err ||
2218 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2219 signal_pending(current) ||
2220 !timeo)
2221 break;
2222
2223 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2224 unix_state_unlock(sk);
2225 if (freezable)
2226 timeo = freezable_schedule_timeout(timeo);
2227 else
2228 timeo = schedule_timeout(timeo);
2229 unix_state_lock(sk);
2230
2231 if (sock_flag(sk, SOCK_DEAD))
2232 break;
2233
2234 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2235 }
2236
2237 finish_wait(sk_sleep(sk), &wait);
2238 unix_state_unlock(sk);
2239 return timeo;
2240}
2241
2242static unsigned int unix_skb_len(const struct sk_buff *skb)
2243{
2244 return skb->len - UNIXCB(skb).consumed;
2245}
2246
2247struct unix_stream_read_state {
2248 int (*recv_actor)(struct sk_buff *, int, int,
2249 struct unix_stream_read_state *);
2250 struct socket *socket;
2251 struct msghdr *msg;
2252 struct pipe_inode_info *pipe;
2253 size_t size;
2254 int flags;
2255 unsigned int splice_flags;
2256};
2257
2258static int unix_stream_read_generic(struct unix_stream_read_state *state,
2259 bool freezable)
2260{
2261 struct scm_cookie scm;
2262 struct socket *sock = state->socket;
2263 struct sock *sk = sock->sk;
2264 struct unix_sock *u = unix_sk(sk);
2265 int copied = 0;
2266 int flags = state->flags;
2267 int noblock = flags & MSG_DONTWAIT;
2268 bool check_creds = false;
2269 int target;
2270 int err = 0;
2271 long timeo;
2272 int skip;
2273 size_t size = state->size;
2274 unsigned int last_len;
2275
2276 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2277 err = -EINVAL;
2278 goto out;
2279 }
2280
2281 if (unlikely(flags & MSG_OOB)) {
2282 err = -EOPNOTSUPP;
2283 goto out;
2284 }
2285
2286 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2287 timeo = sock_rcvtimeo(sk, noblock);
2288
2289 memset(&scm, 0, sizeof(scm));
2290
2291 /* Lock the socket to prevent queue disordering
2292 * while sleeps in memcpy_tomsg
2293 */
2294 mutex_lock(&u->iolock);
2295
2296 skip = max(sk_peek_offset(sk, flags), 0);
2297
2298 do {
2299 int chunk;
2300 bool drop_skb;
2301 struct sk_buff *skb, *last;
2302
2303redo:
2304 unix_state_lock(sk);
2305 if (sock_flag(sk, SOCK_DEAD)) {
2306 err = -ECONNRESET;
2307 goto unlock;
2308 }
2309 last = skb = skb_peek(&sk->sk_receive_queue);
2310 last_len = last ? last->len : 0;
2311again:
2312 if (skb == NULL) {
2313 if (copied >= target)
2314 goto unlock;
2315
2316 /*
2317 * POSIX 1003.1g mandates this order.
2318 */
2319
2320 err = sock_error(sk);
2321 if (err)
2322 goto unlock;
2323 if (sk->sk_shutdown & RCV_SHUTDOWN)
2324 goto unlock;
2325
2326 unix_state_unlock(sk);
2327 if (!timeo) {
2328 err = -EAGAIN;
2329 break;
2330 }
2331
2332 mutex_unlock(&u->iolock);
2333
2334 timeo = unix_stream_data_wait(sk, timeo, last,
2335 last_len, freezable);
2336
2337 if (signal_pending(current)) {
2338 err = sock_intr_errno(timeo);
2339 scm_destroy(&scm);
2340 goto out;
2341 }
2342
2343 mutex_lock(&u->iolock);
2344 goto redo;
2345unlock:
2346 unix_state_unlock(sk);
2347 break;
2348 }
2349
2350 while (skip >= unix_skb_len(skb)) {
2351 skip -= unix_skb_len(skb);
2352 last = skb;
2353 last_len = skb->len;
2354 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2355 if (!skb)
2356 goto again;
2357 }
2358
2359 unix_state_unlock(sk);
2360
2361 if (check_creds) {
2362 /* Never glue messages from different writers */
2363 if (!unix_skb_scm_eq(skb, &scm))
2364 break;
2365 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2366 /* Copy credentials */
2367 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2368 unix_set_secdata(&scm, skb);
2369 check_creds = true;
2370 }
2371
2372 /* Copy address just once */
2373 if (state->msg && state->msg->msg_name) {
2374 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2375 state->msg->msg_name);
2376 unix_copy_addr(state->msg, skb->sk);
2377 sunaddr = NULL;
2378 }
2379
2380 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2381 skb_get(skb);
2382 chunk = state->recv_actor(skb, skip, chunk, state);
2383 drop_skb = !unix_skb_len(skb);
2384 /* skb is only safe to use if !drop_skb */
2385 consume_skb(skb);
2386 if (chunk < 0) {
2387 if (copied == 0)
2388 copied = -EFAULT;
2389 break;
2390 }
2391 copied += chunk;
2392 size -= chunk;
2393
2394 if (drop_skb) {
2395 /* the skb was touched by a concurrent reader;
2396 * we should not expect anything from this skb
2397 * anymore and assume it invalid - we can be
2398 * sure it was dropped from the socket queue
2399 *
2400 * let's report a short read
2401 */
2402 err = 0;
2403 break;
2404 }
2405
2406 /* Mark read part of skb as used */
2407 if (!(flags & MSG_PEEK)) {
2408 UNIXCB(skb).consumed += chunk;
2409
2410 sk_peek_offset_bwd(sk, chunk);
2411
2412 if (UNIXCB(skb).fp)
2413 unix_detach_fds(&scm, skb);
2414
2415 if (unix_skb_len(skb))
2416 break;
2417
2418 skb_unlink(skb, &sk->sk_receive_queue);
2419 consume_skb(skb);
2420
2421 if (scm.fp)
2422 break;
2423 } else {
2424 /* It is questionable, see note in unix_dgram_recvmsg.
2425 */
2426 if (UNIXCB(skb).fp)
2427 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2428
2429 sk_peek_offset_fwd(sk, chunk);
2430
2431 if (UNIXCB(skb).fp)
2432 break;
2433
2434 skip = 0;
2435 last = skb;
2436 last_len = skb->len;
2437 unix_state_lock(sk);
2438 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2439 if (skb)
2440 goto again;
2441 unix_state_unlock(sk);
2442 break;
2443 }
2444 } while (size);
2445
2446 mutex_unlock(&u->iolock);
2447 if (state->msg)
2448 scm_recv(sock, state->msg, &scm, flags);
2449 else
2450 scm_destroy(&scm);
2451out:
2452 return copied ? : err;
2453}
2454
2455static int unix_stream_read_actor(struct sk_buff *skb,
2456 int skip, int chunk,
2457 struct unix_stream_read_state *state)
2458{
2459 int ret;
2460
2461 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2462 state->msg, chunk);
2463 return ret ?: chunk;
2464}
2465
2466static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2467 size_t size, int flags)
2468{
2469 struct unix_stream_read_state state = {
2470 .recv_actor = unix_stream_read_actor,
2471 .socket = sock,
2472 .msg = msg,
2473 .size = size,
2474 .flags = flags
2475 };
2476
2477 return unix_stream_read_generic(&state, true);
2478}
2479
2480static int unix_stream_splice_actor(struct sk_buff *skb,
2481 int skip, int chunk,
2482 struct unix_stream_read_state *state)
2483{
2484 return skb_splice_bits(skb, state->socket->sk,
2485 UNIXCB(skb).consumed + skip,
2486 state->pipe, chunk, state->splice_flags);
2487}
2488
2489static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2490 struct pipe_inode_info *pipe,
2491 size_t size, unsigned int flags)
2492{
2493 struct unix_stream_read_state state = {
2494 .recv_actor = unix_stream_splice_actor,
2495 .socket = sock,
2496 .pipe = pipe,
2497 .size = size,
2498 .splice_flags = flags,
2499 };
2500
2501 if (unlikely(*ppos))
2502 return -ESPIPE;
2503
2504 if (sock->file->f_flags & O_NONBLOCK ||
2505 flags & SPLICE_F_NONBLOCK)
2506 state.flags = MSG_DONTWAIT;
2507
2508 return unix_stream_read_generic(&state, false);
2509}
2510
2511static int unix_shutdown(struct socket *sock, int mode)
2512{
2513 struct sock *sk = sock->sk;
2514 struct sock *other;
2515
2516 if (mode < SHUT_RD || mode > SHUT_RDWR)
2517 return -EINVAL;
2518 /* This maps:
2519 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2520 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2521 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2522 */
2523 ++mode;
2524
2525 unix_state_lock(sk);
2526 sk->sk_shutdown |= mode;
2527 other = unix_peer(sk);
2528 if (other)
2529 sock_hold(other);
2530 unix_state_unlock(sk);
2531 sk->sk_state_change(sk);
2532
2533 if (other &&
2534 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2535
2536 int peer_mode = 0;
2537
2538 if (mode&RCV_SHUTDOWN)
2539 peer_mode |= SEND_SHUTDOWN;
2540 if (mode&SEND_SHUTDOWN)
2541 peer_mode |= RCV_SHUTDOWN;
2542 unix_state_lock(other);
2543 other->sk_shutdown |= peer_mode;
2544 unix_state_unlock(other);
2545 other->sk_state_change(other);
2546 if (peer_mode == SHUTDOWN_MASK)
2547 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2548 else if (peer_mode & RCV_SHUTDOWN)
2549 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2550 }
2551 if (other)
2552 sock_put(other);
2553
2554 return 0;
2555}
2556
2557long unix_inq_len(struct sock *sk)
2558{
2559 struct sk_buff *skb;
2560 long amount = 0;
2561
2562 if (sk->sk_state == TCP_LISTEN)
2563 return -EINVAL;
2564
2565 spin_lock(&sk->sk_receive_queue.lock);
2566 if (sk->sk_type == SOCK_STREAM ||
2567 sk->sk_type == SOCK_SEQPACKET) {
2568 skb_queue_walk(&sk->sk_receive_queue, skb)
2569 amount += unix_skb_len(skb);
2570 } else {
2571 skb = skb_peek(&sk->sk_receive_queue);
2572 if (skb)
2573 amount = skb->len;
2574 }
2575 spin_unlock(&sk->sk_receive_queue.lock);
2576
2577 return amount;
2578}
2579EXPORT_SYMBOL_GPL(unix_inq_len);
2580
2581long unix_outq_len(struct sock *sk)
2582{
2583 return sk_wmem_alloc_get(sk);
2584}
2585EXPORT_SYMBOL_GPL(unix_outq_len);
2586
2587static int unix_open_file(struct sock *sk)
2588{
2589 struct path path;
2590 struct file *f;
2591 int fd;
2592
2593 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2594 return -EPERM;
2595
2596 if (!smp_load_acquire(&unix_sk(sk)->addr))
2597 return -ENOENT;
2598
2599 path = unix_sk(sk)->path;
2600 if (!path.dentry)
2601 return -ENOENT;
2602
2603 path_get(&path);
2604
2605 fd = get_unused_fd_flags(O_CLOEXEC);
2606 if (fd < 0)
2607 goto out;
2608
2609 f = dentry_open(&path, O_PATH, current_cred());
2610 if (IS_ERR(f)) {
2611 put_unused_fd(fd);
2612 fd = PTR_ERR(f);
2613 goto out;
2614 }
2615
2616 fd_install(fd, f);
2617out:
2618 path_put(&path);
2619
2620 return fd;
2621}
2622
2623static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2624{
2625 struct sock *sk = sock->sk;
2626 long amount = 0;
2627 int err;
2628
2629 switch (cmd) {
2630 case SIOCOUTQ:
2631 amount = unix_outq_len(sk);
2632 err = put_user(amount, (int __user *)arg);
2633 break;
2634 case SIOCINQ:
2635 amount = unix_inq_len(sk);
2636 if (amount < 0)
2637 err = amount;
2638 else
2639 err = put_user(amount, (int __user *)arg);
2640 break;
2641 case SIOCUNIXFILE:
2642 err = unix_open_file(sk);
2643 break;
2644 default:
2645 err = -ENOIOCTLCMD;
2646 break;
2647 }
2648 return err;
2649}
2650
2651static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2652{
2653 struct sock *sk = sock->sk;
2654 unsigned int mask;
2655
2656 sock_poll_wait(file, sk_sleep(sk), wait);
2657 mask = 0;
2658
2659 /* exceptional events? */
2660 if (sk->sk_err)
2661 mask |= POLLERR;
2662 if (sk->sk_shutdown == SHUTDOWN_MASK)
2663 mask |= POLLHUP;
2664 if (sk->sk_shutdown & RCV_SHUTDOWN)
2665 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2666
2667 /* readable? */
2668 if (!skb_queue_empty(&sk->sk_receive_queue))
2669 mask |= POLLIN | POLLRDNORM;
2670
2671 /* Connection-based need to check for termination and startup */
2672 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2673 sk->sk_state == TCP_CLOSE)
2674 mask |= POLLHUP;
2675
2676 /*
2677 * we set writable also when the other side has shut down the
2678 * connection. This prevents stuck sockets.
2679 */
2680 if (unix_writable(sk))
2681 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2682
2683 return mask;
2684}
2685
2686static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2687 poll_table *wait)
2688{
2689 struct sock *sk = sock->sk, *other;
2690 unsigned int mask, writable;
2691
2692 sock_poll_wait(file, sk_sleep(sk), wait);
2693 mask = 0;
2694
2695 /* exceptional events? */
2696 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2697 mask |= POLLERR |
2698 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2699
2700 if (sk->sk_shutdown & RCV_SHUTDOWN)
2701 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2702 if (sk->sk_shutdown == SHUTDOWN_MASK)
2703 mask |= POLLHUP;
2704
2705 /* readable? */
2706 if (!skb_queue_empty(&sk->sk_receive_queue))
2707 mask |= POLLIN | POLLRDNORM;
2708
2709 /* Connection-based need to check for termination and startup */
2710 if (sk->sk_type == SOCK_SEQPACKET) {
2711 if (sk->sk_state == TCP_CLOSE)
2712 mask |= POLLHUP;
2713 /* connection hasn't started yet? */
2714 if (sk->sk_state == TCP_SYN_SENT)
2715 return mask;
2716 }
2717
2718 /* No write status requested, avoid expensive OUT tests. */
2719 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2720 return mask;
2721
2722 writable = unix_writable(sk);
2723 if (writable) {
2724 unix_state_lock(sk);
2725
2726 other = unix_peer(sk);
2727 if (other && unix_peer(other) != sk &&
2728 unix_recvq_full(other) &&
2729 unix_dgram_peer_wake_me(sk, other))
2730 writable = 0;
2731
2732 unix_state_unlock(sk);
2733 }
2734
2735 if (writable)
2736 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2737 else
2738 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2739
2740 return mask;
2741}
2742
2743#ifdef CONFIG_PROC_FS
2744
2745#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2746
2747#define get_bucket(x) ((x) >> BUCKET_SPACE)
2748#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2749#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2750
2751static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2752{
2753 unsigned long offset = get_offset(*pos);
2754 unsigned long bucket = get_bucket(*pos);
2755 struct sock *sk;
2756 unsigned long count = 0;
2757
2758 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2759 if (sock_net(sk) != seq_file_net(seq))
2760 continue;
2761 if (++count == offset)
2762 break;
2763 }
2764
2765 return sk;
2766}
2767
2768static struct sock *unix_next_socket(struct seq_file *seq,
2769 struct sock *sk,
2770 loff_t *pos)
2771{
2772 unsigned long bucket;
2773
2774 while (sk > (struct sock *)SEQ_START_TOKEN) {
2775 sk = sk_next(sk);
2776 if (!sk)
2777 goto next_bucket;
2778 if (sock_net(sk) == seq_file_net(seq))
2779 return sk;
2780 }
2781
2782 do {
2783 sk = unix_from_bucket(seq, pos);
2784 if (sk)
2785 return sk;
2786
2787next_bucket:
2788 bucket = get_bucket(*pos) + 1;
2789 *pos = set_bucket_offset(bucket, 1);
2790 } while (bucket < ARRAY_SIZE(unix_socket_table));
2791
2792 return NULL;
2793}
2794
2795static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2796 __acquires(unix_table_lock)
2797{
2798 spin_lock(&unix_table_lock);
2799
2800 if (!*pos)
2801 return SEQ_START_TOKEN;
2802
2803 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2804 return NULL;
2805
2806 return unix_next_socket(seq, NULL, pos);
2807}
2808
2809static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2810{
2811 ++*pos;
2812 return unix_next_socket(seq, v, pos);
2813}
2814
2815static void unix_seq_stop(struct seq_file *seq, void *v)
2816 __releases(unix_table_lock)
2817{
2818 spin_unlock(&unix_table_lock);
2819}
2820
2821static int unix_seq_show(struct seq_file *seq, void *v)
2822{
2823
2824 if (v == SEQ_START_TOKEN)
2825 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2826 "Inode Path\n");
2827 else {
2828 struct sock *s = v;
2829 struct unix_sock *u = unix_sk(s);
2830 unix_state_lock(s);
2831
2832 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2833 s,
2834 refcount_read(&s->sk_refcnt),
2835 0,
2836 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2837 s->sk_type,
2838 s->sk_socket ?
2839 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2840 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2841 sock_i_ino(s));
2842
2843 if (u->addr) { // under unix_table_lock here
2844 int i, len;
2845 seq_putc(seq, ' ');
2846
2847 i = 0;
2848 len = u->addr->len - sizeof(short);
2849 if (!UNIX_ABSTRACT(s))
2850 len--;
2851 else {
2852 seq_putc(seq, '@');
2853 i++;
2854 }
2855 for ( ; i < len; i++)
2856 seq_putc(seq, u->addr->name->sun_path[i] ?:
2857 '@');
2858 }
2859 unix_state_unlock(s);
2860 seq_putc(seq, '\n');
2861 }
2862
2863 return 0;
2864}
2865
2866static const struct seq_operations unix_seq_ops = {
2867 .start = unix_seq_start,
2868 .next = unix_seq_next,
2869 .stop = unix_seq_stop,
2870 .show = unix_seq_show,
2871};
2872
2873static int unix_seq_open(struct inode *inode, struct file *file)
2874{
2875 return seq_open_net(inode, file, &unix_seq_ops,
2876 sizeof(struct seq_net_private));
2877}
2878
2879static const struct file_operations unix_seq_fops = {
2880 .owner = THIS_MODULE,
2881 .open = unix_seq_open,
2882 .read = seq_read,
2883 .llseek = seq_lseek,
2884 .release = seq_release_net,
2885};
2886
2887#endif
2888
2889static const struct net_proto_family unix_family_ops = {
2890 .family = PF_UNIX,
2891 .create = unix_create,
2892 .owner = THIS_MODULE,
2893};
2894
2895
2896static int __net_init unix_net_init(struct net *net)
2897{
2898 int error = -ENOMEM;
2899
2900 net->unx.sysctl_max_dgram_qlen = 10;
2901 if (unix_sysctl_register(net))
2902 goto out;
2903
2904#ifdef CONFIG_PROC_FS
2905 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2906 unix_sysctl_unregister(net);
2907 goto out;
2908 }
2909#endif
2910 error = 0;
2911out:
2912 return error;
2913}
2914
2915static void __net_exit unix_net_exit(struct net *net)
2916{
2917 unix_sysctl_unregister(net);
2918 remove_proc_entry("unix", net->proc_net);
2919}
2920
2921static struct pernet_operations unix_net_ops = {
2922 .init = unix_net_init,
2923 .exit = unix_net_exit,
2924};
2925
2926static int __init af_unix_init(void)
2927{
2928 int rc = -1;
2929
2930 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2931
2932 rc = proto_register(&unix_proto, 1);
2933 if (rc != 0) {
2934 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2935 goto out;
2936 }
2937
2938 sock_register(&unix_family_ops);
2939 register_pernet_subsys(&unix_net_ops);
2940out:
2941 return rc;
2942}
2943
2944static void __exit af_unix_exit(void)
2945{
2946 sock_unregister(PF_UNIX);
2947 proto_unregister(&unix_proto);
2948 unregister_pernet_subsys(&unix_net_ops);
2949}
2950
2951/* Earlier than device_initcall() so that other drivers invoking
2952 request_module() don't end up in a loop when modprobe tries
2953 to use a UNIX socket. But later than subsys_initcall() because
2954 we depend on stuff initialised there */
2955fs_initcall(af_unix_init);
2956module_exit(af_unix_exit);
2957
2958MODULE_LICENSE("GPL");
2959MODULE_ALIAS_NETPROTO(PF_UNIX);