Linux 3.10.95
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / unix / af_unix.c
1 /*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117
118 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
119 EXPORT_SYMBOL_GPL(unix_socket_table);
120 DEFINE_SPINLOCK(unix_table_lock);
121 EXPORT_SYMBOL_GPL(unix_table_lock);
122 static atomic_long_t unix_nr_socks;
123
124
125 static struct hlist_head *unix_sockets_unbound(void *addr)
126 {
127 unsigned long hash = (unsigned long)addr;
128
129 hash ^= hash >> 16;
130 hash ^= hash >> 8;
131 hash %= UNIX_HASH_SIZE;
132 return &unix_socket_table[UNIX_HASH_SIZE + hash];
133 }
134
135 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
136
137 #ifdef CONFIG_SECURITY_NETWORK
138 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
139 {
140 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
141 }
142
143 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
144 {
145 scm->secid = *UNIXSID(skb);
146 }
147 #else
148 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
149 { }
150
151 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
152 { }
153 #endif /* CONFIG_SECURITY_NETWORK */
154
155 /*
156 * SMP locking strategy:
157 * hash table is protected with spinlock unix_table_lock
158 * each socket state is protected by separate spin lock.
159 */
160
161 static inline unsigned int unix_hash_fold(__wsum n)
162 {
163 unsigned int hash = (__force unsigned int)csum_fold(n);
164
165 hash ^= hash>>8;
166 return hash&(UNIX_HASH_SIZE-1);
167 }
168
169 #define unix_peer(sk) (unix_sk(sk)->peer)
170
171 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
172 {
173 return unix_peer(osk) == sk;
174 }
175
176 static inline int unix_may_send(struct sock *sk, struct sock *osk)
177 {
178 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
179 }
180
181 static inline int unix_recvq_full(struct sock const *sk)
182 {
183 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
184 }
185
186 struct sock *unix_peer_get(struct sock *s)
187 {
188 struct sock *peer;
189
190 unix_state_lock(s);
191 peer = unix_peer(s);
192 if (peer)
193 sock_hold(peer);
194 unix_state_unlock(s);
195 return peer;
196 }
197 EXPORT_SYMBOL_GPL(unix_peer_get);
198
199 static inline void unix_release_addr(struct unix_address *addr)
200 {
201 if (atomic_dec_and_test(&addr->refcnt))
202 kfree(addr);
203 }
204
205 /*
206 * Check unix socket name:
207 * - should be not zero length.
208 * - if started by not zero, should be NULL terminated (FS object)
209 * - if started by zero, it is abstract name.
210 */
211
212 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
213 {
214 if (len <= sizeof(short) || len > sizeof(*sunaddr))
215 return -EINVAL;
216 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
217 return -EINVAL;
218 if (sunaddr->sun_path[0]) {
219 /*
220 * This may look like an off by one error but it is a bit more
221 * subtle. 108 is the longest valid AF_UNIX path for a binding.
222 * sun_path[108] doesn't as such exist. However in kernel space
223 * we are guaranteed that it is a valid memory location in our
224 * kernel address buffer.
225 */
226 ((char *)sunaddr)[len] = 0;
227 len = strlen(sunaddr->sun_path)+1+sizeof(short);
228 return len;
229 }
230
231 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
232 return len;
233 }
234
235 static void __unix_remove_socket(struct sock *sk)
236 {
237 sk_del_node_init(sk);
238 }
239
240 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
241 {
242 WARN_ON(!sk_unhashed(sk));
243 sk_add_node(sk, list);
244 }
245
246 static inline void unix_remove_socket(struct sock *sk)
247 {
248 spin_lock(&unix_table_lock);
249 __unix_remove_socket(sk);
250 spin_unlock(&unix_table_lock);
251 }
252
253 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
254 {
255 spin_lock(&unix_table_lock);
256 __unix_insert_socket(list, sk);
257 spin_unlock(&unix_table_lock);
258 }
259
260 static struct sock *__unix_find_socket_byname(struct net *net,
261 struct sockaddr_un *sunname,
262 int len, int type, unsigned int hash)
263 {
264 struct sock *s;
265
266 sk_for_each(s, &unix_socket_table[hash ^ type]) {
267 struct unix_sock *u = unix_sk(s);
268
269 if (!net_eq(sock_net(s), net))
270 continue;
271
272 if (u->addr->len == len &&
273 !memcmp(u->addr->name, sunname, len))
274 goto found;
275 }
276 s = NULL;
277 found:
278 return s;
279 }
280
281 static inline struct sock *unix_find_socket_byname(struct net *net,
282 struct sockaddr_un *sunname,
283 int len, int type,
284 unsigned int hash)
285 {
286 struct sock *s;
287
288 spin_lock(&unix_table_lock);
289 s = __unix_find_socket_byname(net, sunname, len, type, hash);
290 if (s)
291 sock_hold(s);
292 spin_unlock(&unix_table_lock);
293 return s;
294 }
295
296 static struct sock *unix_find_socket_byinode(struct inode *i)
297 {
298 struct sock *s;
299
300 spin_lock(&unix_table_lock);
301 sk_for_each(s,
302 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
303 struct dentry *dentry = unix_sk(s)->path.dentry;
304
305 if (dentry && dentry->d_inode == i) {
306 sock_hold(s);
307 goto found;
308 }
309 }
310 s = NULL;
311 found:
312 spin_unlock(&unix_table_lock);
313 return s;
314 }
315
316 /* Support code for asymmetrically connected dgram sockets
317 *
318 * If a datagram socket is connected to a socket not itself connected
319 * to the first socket (eg, /dev/log), clients may only enqueue more
320 * messages if the present receive queue of the server socket is not
321 * "too large". This means there's a second writeability condition
322 * poll and sendmsg need to test. The dgram recv code will do a wake
323 * up on the peer_wait wait queue of a socket upon reception of a
324 * datagram which needs to be propagated to sleeping would-be writers
325 * since these might not have sent anything so far. This can't be
326 * accomplished via poll_wait because the lifetime of the server
327 * socket might be less than that of its clients if these break their
328 * association with it or if the server socket is closed while clients
329 * are still connected to it and there's no way to inform "a polling
330 * implementation" that it should let go of a certain wait queue
331 *
332 * In order to propagate a wake up, a wait_queue_t of the client
333 * socket is enqueued on the peer_wait queue of the server socket
334 * whose wake function does a wake_up on the ordinary client socket
335 * wait queue. This connection is established whenever a write (or
336 * poll for write) hit the flow control condition and broken when the
337 * association to the server socket is dissolved or after a wake up
338 * was relayed.
339 */
340
341 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
342 void *key)
343 {
344 struct unix_sock *u;
345 wait_queue_head_t *u_sleep;
346
347 u = container_of(q, struct unix_sock, peer_wake);
348
349 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
350 q);
351 u->peer_wake.private = NULL;
352
353 /* relaying can only happen while the wq still exists */
354 u_sleep = sk_sleep(&u->sk);
355 if (u_sleep)
356 wake_up_interruptible_poll(u_sleep, key);
357
358 return 0;
359 }
360
361 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
362 {
363 struct unix_sock *u, *u_other;
364 int rc;
365
366 u = unix_sk(sk);
367 u_other = unix_sk(other);
368 rc = 0;
369 spin_lock(&u_other->peer_wait.lock);
370
371 if (!u->peer_wake.private) {
372 u->peer_wake.private = other;
373 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
374
375 rc = 1;
376 }
377
378 spin_unlock(&u_other->peer_wait.lock);
379 return rc;
380 }
381
382 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
383 struct sock *other)
384 {
385 struct unix_sock *u, *u_other;
386
387 u = unix_sk(sk);
388 u_other = unix_sk(other);
389 spin_lock(&u_other->peer_wait.lock);
390
391 if (u->peer_wake.private == other) {
392 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
393 u->peer_wake.private = NULL;
394 }
395
396 spin_unlock(&u_other->peer_wait.lock);
397 }
398
399 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
400 struct sock *other)
401 {
402 unix_dgram_peer_wake_disconnect(sk, other);
403 wake_up_interruptible_poll(sk_sleep(sk),
404 POLLOUT |
405 POLLWRNORM |
406 POLLWRBAND);
407 }
408
409 /* preconditions:
410 * - unix_peer(sk) == other
411 * - association is stable
412 */
413 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
414 {
415 int connected;
416
417 connected = unix_dgram_peer_wake_connect(sk, other);
418
419 if (unix_recvq_full(other))
420 return 1;
421
422 if (connected)
423 unix_dgram_peer_wake_disconnect(sk, other);
424
425 return 0;
426 }
427
428 static inline int unix_writable(struct sock *sk)
429 {
430 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
431 }
432
433 static void unix_write_space(struct sock *sk)
434 {
435 struct socket_wq *wq;
436
437 rcu_read_lock();
438 if (unix_writable(sk)) {
439 wq = rcu_dereference(sk->sk_wq);
440 if (wq_has_sleeper(wq))
441 wake_up_interruptible_sync_poll(&wq->wait,
442 POLLOUT | POLLWRNORM | POLLWRBAND);
443 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
444 }
445 rcu_read_unlock();
446 }
447
448 /* When dgram socket disconnects (or changes its peer), we clear its receive
449 * queue of packets arrived from previous peer. First, it allows to do
450 * flow control based only on wmem_alloc; second, sk connected to peer
451 * may receive messages only from that peer. */
452 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
453 {
454 if (!skb_queue_empty(&sk->sk_receive_queue)) {
455 skb_queue_purge(&sk->sk_receive_queue);
456 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
457
458 /* If one link of bidirectional dgram pipe is disconnected,
459 * we signal error. Messages are lost. Do not make this,
460 * when peer was not connected to us.
461 */
462 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
463 other->sk_err = ECONNRESET;
464 other->sk_error_report(other);
465 }
466 }
467 }
468
469 static void unix_sock_destructor(struct sock *sk)
470 {
471 struct unix_sock *u = unix_sk(sk);
472
473 skb_queue_purge(&sk->sk_receive_queue);
474
475 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
476 WARN_ON(!sk_unhashed(sk));
477 WARN_ON(sk->sk_socket);
478 if (!sock_flag(sk, SOCK_DEAD)) {
479 printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
480 return;
481 }
482
483 if (u->addr)
484 unix_release_addr(u->addr);
485
486 atomic_long_dec(&unix_nr_socks);
487 local_bh_disable();
488 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
489 local_bh_enable();
490 #ifdef UNIX_REFCNT_DEBUG
491 printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
492 atomic_long_read(&unix_nr_socks));
493 #endif
494 }
495
496 static void unix_release_sock(struct sock *sk, int embrion)
497 {
498 struct unix_sock *u = unix_sk(sk);
499 struct path path;
500 struct sock *skpair;
501 struct sk_buff *skb;
502 int state;
503
504 unix_remove_socket(sk);
505
506 /* Clear state */
507 unix_state_lock(sk);
508 sock_orphan(sk);
509 sk->sk_shutdown = SHUTDOWN_MASK;
510 path = u->path;
511 u->path.dentry = NULL;
512 u->path.mnt = NULL;
513 state = sk->sk_state;
514 sk->sk_state = TCP_CLOSE;
515 unix_state_unlock(sk);
516
517 wake_up_interruptible_all(&u->peer_wait);
518
519 skpair = unix_peer(sk);
520
521 if (skpair != NULL) {
522 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
523 unix_state_lock(skpair);
524 /* No more writes */
525 skpair->sk_shutdown = SHUTDOWN_MASK;
526 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
527 skpair->sk_err = ECONNRESET;
528 unix_state_unlock(skpair);
529 skpair->sk_state_change(skpair);
530 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
531 }
532
533 unix_dgram_peer_wake_disconnect(sk, skpair);
534 sock_put(skpair); /* It may now die */
535 unix_peer(sk) = NULL;
536 }
537
538 /* Try to flush out this socket. Throw out buffers at least */
539
540 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
541 if (state == TCP_LISTEN)
542 unix_release_sock(skb->sk, 1);
543 /* passed fds are erased in the kfree_skb hook */
544 kfree_skb(skb);
545 }
546
547 if (path.dentry)
548 path_put(&path);
549
550 sock_put(sk);
551
552 /* ---- Socket is dead now and most probably destroyed ---- */
553
554 /*
555 * Fixme: BSD difference: In BSD all sockets connected to us get
556 * ECONNRESET and we die on the spot. In Linux we behave
557 * like files and pipes do and wait for the last
558 * dereference.
559 *
560 * Can't we simply set sock->err?
561 *
562 * What the above comment does talk about? --ANK(980817)
563 */
564
565 if (unix_tot_inflight)
566 unix_gc(); /* Garbage collect fds */
567 }
568
569 static void init_peercred(struct sock *sk)
570 {
571 put_pid(sk->sk_peer_pid);
572 if (sk->sk_peer_cred)
573 put_cred(sk->sk_peer_cred);
574 sk->sk_peer_pid = get_pid(task_tgid(current));
575 sk->sk_peer_cred = get_current_cred();
576 }
577
578 static void copy_peercred(struct sock *sk, struct sock *peersk)
579 {
580 put_pid(sk->sk_peer_pid);
581 if (sk->sk_peer_cred)
582 put_cred(sk->sk_peer_cred);
583 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
584 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
585 }
586
587 static int unix_listen(struct socket *sock, int backlog)
588 {
589 int err;
590 struct sock *sk = sock->sk;
591 struct unix_sock *u = unix_sk(sk);
592 struct pid *old_pid = NULL;
593
594 err = -EOPNOTSUPP;
595 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
596 goto out; /* Only stream/seqpacket sockets accept */
597 err = -EINVAL;
598 if (!u->addr)
599 goto out; /* No listens on an unbound socket */
600 unix_state_lock(sk);
601 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
602 goto out_unlock;
603 if (backlog > sk->sk_max_ack_backlog)
604 wake_up_interruptible_all(&u->peer_wait);
605 sk->sk_max_ack_backlog = backlog;
606 sk->sk_state = TCP_LISTEN;
607 /* set credentials so connect can copy them */
608 init_peercred(sk);
609 err = 0;
610
611 out_unlock:
612 unix_state_unlock(sk);
613 put_pid(old_pid);
614 out:
615 return err;
616 }
617
618 static int unix_release(struct socket *);
619 static int unix_bind(struct socket *, struct sockaddr *, int);
620 static int unix_stream_connect(struct socket *, struct sockaddr *,
621 int addr_len, int flags);
622 static int unix_socketpair(struct socket *, struct socket *);
623 static int unix_accept(struct socket *, struct socket *, int);
624 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
625 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
626 static unsigned int unix_dgram_poll(struct file *, struct socket *,
627 poll_table *);
628 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
629 static int unix_shutdown(struct socket *, int);
630 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
631 struct msghdr *, size_t);
632 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
633 struct msghdr *, size_t, int);
634 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
635 struct msghdr *, size_t);
636 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
637 struct msghdr *, size_t, int);
638 static int unix_dgram_connect(struct socket *, struct sockaddr *,
639 int, int);
640 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
641 struct msghdr *, size_t);
642 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
643 struct msghdr *, size_t, int);
644
645 static int unix_set_peek_off(struct sock *sk, int val)
646 {
647 struct unix_sock *u = unix_sk(sk);
648
649 if (mutex_lock_interruptible(&u->readlock))
650 return -EINTR;
651
652 sk->sk_peek_off = val;
653 mutex_unlock(&u->readlock);
654
655 return 0;
656 }
657
658
659 static const struct proto_ops unix_stream_ops = {
660 .family = PF_UNIX,
661 .owner = THIS_MODULE,
662 .release = unix_release,
663 .bind = unix_bind,
664 .connect = unix_stream_connect,
665 .socketpair = unix_socketpair,
666 .accept = unix_accept,
667 .getname = unix_getname,
668 .poll = unix_poll,
669 .ioctl = unix_ioctl,
670 .listen = unix_listen,
671 .shutdown = unix_shutdown,
672 .setsockopt = sock_no_setsockopt,
673 .getsockopt = sock_no_getsockopt,
674 .sendmsg = unix_stream_sendmsg,
675 .recvmsg = unix_stream_recvmsg,
676 .mmap = sock_no_mmap,
677 .sendpage = sock_no_sendpage,
678 .set_peek_off = unix_set_peek_off,
679 };
680
681 static const struct proto_ops unix_dgram_ops = {
682 .family = PF_UNIX,
683 .owner = THIS_MODULE,
684 .release = unix_release,
685 .bind = unix_bind,
686 .connect = unix_dgram_connect,
687 .socketpair = unix_socketpair,
688 .accept = sock_no_accept,
689 .getname = unix_getname,
690 .poll = unix_dgram_poll,
691 .ioctl = unix_ioctl,
692 .listen = sock_no_listen,
693 .shutdown = unix_shutdown,
694 .setsockopt = sock_no_setsockopt,
695 .getsockopt = sock_no_getsockopt,
696 .sendmsg = unix_dgram_sendmsg,
697 .recvmsg = unix_dgram_recvmsg,
698 .mmap = sock_no_mmap,
699 .sendpage = sock_no_sendpage,
700 .set_peek_off = unix_set_peek_off,
701 };
702
703 static const struct proto_ops unix_seqpacket_ops = {
704 .family = PF_UNIX,
705 .owner = THIS_MODULE,
706 .release = unix_release,
707 .bind = unix_bind,
708 .connect = unix_stream_connect,
709 .socketpair = unix_socketpair,
710 .accept = unix_accept,
711 .getname = unix_getname,
712 .poll = unix_dgram_poll,
713 .ioctl = unix_ioctl,
714 .listen = unix_listen,
715 .shutdown = unix_shutdown,
716 .setsockopt = sock_no_setsockopt,
717 .getsockopt = sock_no_getsockopt,
718 .sendmsg = unix_seqpacket_sendmsg,
719 .recvmsg = unix_seqpacket_recvmsg,
720 .mmap = sock_no_mmap,
721 .sendpage = sock_no_sendpage,
722 .set_peek_off = unix_set_peek_off,
723 };
724
725 static struct proto unix_proto = {
726 .name = "UNIX",
727 .owner = THIS_MODULE,
728 .obj_size = sizeof(struct unix_sock),
729 };
730
731 /*
732 * AF_UNIX sockets do not interact with hardware, hence they
733 * dont trigger interrupts - so it's safe for them to have
734 * bh-unsafe locking for their sk_receive_queue.lock. Split off
735 * this special lock-class by reinitializing the spinlock key:
736 */
737 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
738
739 static struct sock *unix_create1(struct net *net, struct socket *sock)
740 {
741 struct sock *sk = NULL;
742 struct unix_sock *u;
743
744 atomic_long_inc(&unix_nr_socks);
745 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
746 goto out;
747
748 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
749 if (!sk)
750 goto out;
751
752 sock_init_data(sock, sk);
753 lockdep_set_class(&sk->sk_receive_queue.lock,
754 &af_unix_sk_receive_queue_lock_key);
755
756 sk->sk_write_space = unix_write_space;
757 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
758 sk->sk_destruct = unix_sock_destructor;
759 u = unix_sk(sk);
760 u->path.dentry = NULL;
761 u->path.mnt = NULL;
762 spin_lock_init(&u->lock);
763 atomic_long_set(&u->inflight, 0);
764 INIT_LIST_HEAD(&u->link);
765 mutex_init(&u->readlock); /* single task reading lock */
766 init_waitqueue_head(&u->peer_wait);
767 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
768 unix_insert_socket(unix_sockets_unbound(sk), sk);
769 out:
770 if (sk == NULL)
771 atomic_long_dec(&unix_nr_socks);
772 else {
773 local_bh_disable();
774 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
775 local_bh_enable();
776 }
777 return sk;
778 }
779
780 static int unix_create(struct net *net, struct socket *sock, int protocol,
781 int kern)
782 {
783 if (protocol && protocol != PF_UNIX)
784 return -EPROTONOSUPPORT;
785
786 sock->state = SS_UNCONNECTED;
787
788 switch (sock->type) {
789 case SOCK_STREAM:
790 sock->ops = &unix_stream_ops;
791 break;
792 /*
793 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
794 * nothing uses it.
795 */
796 case SOCK_RAW:
797 sock->type = SOCK_DGRAM;
798 case SOCK_DGRAM:
799 sock->ops = &unix_dgram_ops;
800 break;
801 case SOCK_SEQPACKET:
802 sock->ops = &unix_seqpacket_ops;
803 break;
804 default:
805 return -ESOCKTNOSUPPORT;
806 }
807
808 return unix_create1(net, sock) ? 0 : -ENOMEM;
809 }
810
811 static int unix_release(struct socket *sock)
812 {
813 struct sock *sk = sock->sk;
814
815 if (!sk)
816 return 0;
817
818 unix_release_sock(sk, 0);
819 sock->sk = NULL;
820
821 return 0;
822 }
823
824 static int unix_autobind(struct socket *sock)
825 {
826 struct sock *sk = sock->sk;
827 struct net *net = sock_net(sk);
828 struct unix_sock *u = unix_sk(sk);
829 static u32 ordernum = 1;
830 struct unix_address *addr;
831 int err;
832 unsigned int retries = 0;
833
834 err = mutex_lock_interruptible(&u->readlock);
835 if (err)
836 return err;
837
838 err = 0;
839 if (u->addr)
840 goto out;
841
842 err = -ENOMEM;
843 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
844 if (!addr)
845 goto out;
846
847 addr->name->sun_family = AF_UNIX;
848 atomic_set(&addr->refcnt, 1);
849
850 retry:
851 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
852 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
853
854 spin_lock(&unix_table_lock);
855 ordernum = (ordernum+1)&0xFFFFF;
856
857 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
858 addr->hash)) {
859 spin_unlock(&unix_table_lock);
860 /*
861 * __unix_find_socket_byname() may take long time if many names
862 * are already in use.
863 */
864 cond_resched();
865 /* Give up if all names seems to be in use. */
866 if (retries++ == 0xFFFFF) {
867 err = -ENOSPC;
868 kfree(addr);
869 goto out;
870 }
871 goto retry;
872 }
873 addr->hash ^= sk->sk_type;
874
875 __unix_remove_socket(sk);
876 u->addr = addr;
877 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
878 spin_unlock(&unix_table_lock);
879 err = 0;
880
881 out: mutex_unlock(&u->readlock);
882 return err;
883 }
884
885 static struct sock *unix_find_other(struct net *net,
886 struct sockaddr_un *sunname, int len,
887 int type, unsigned int hash, int *error)
888 {
889 struct sock *u;
890 struct path path;
891 int err = 0;
892
893 if (sunname->sun_path[0]) {
894 struct inode *inode;
895 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
896 if (err)
897 goto fail;
898 inode = path.dentry->d_inode;
899 err = inode_permission(inode, MAY_WRITE);
900 if (err)
901 goto put_fail;
902
903 err = -ECONNREFUSED;
904 if (!S_ISSOCK(inode->i_mode))
905 goto put_fail;
906 u = unix_find_socket_byinode(inode);
907 if (!u)
908 goto put_fail;
909
910 if (u->sk_type == type)
911 touch_atime(&path);
912
913 path_put(&path);
914
915 err = -EPROTOTYPE;
916 if (u->sk_type != type) {
917 sock_put(u);
918 goto fail;
919 }
920 } else {
921 err = -ECONNREFUSED;
922 u = unix_find_socket_byname(net, sunname, len, type, hash);
923 if (u) {
924 struct dentry *dentry;
925 dentry = unix_sk(u)->path.dentry;
926 if (dentry)
927 touch_atime(&unix_sk(u)->path);
928 } else
929 goto fail;
930 }
931 return u;
932
933 put_fail:
934 path_put(&path);
935 fail:
936 *error = err;
937 return NULL;
938 }
939
940 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
941 {
942 struct dentry *dentry;
943 struct path path;
944 int err = 0;
945 /*
946 * Get the parent directory, calculate the hash for last
947 * component.
948 */
949 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
950 err = PTR_ERR(dentry);
951 if (IS_ERR(dentry))
952 return err;
953
954 /*
955 * All right, let's create it.
956 */
957 err = security_path_mknod(&path, dentry, mode, 0);
958 if (!err) {
959 err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
960 if (!err) {
961 res->mnt = mntget(path.mnt);
962 res->dentry = dget(dentry);
963 }
964 }
965 done_path_create(&path, dentry);
966 return err;
967 }
968
969 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
970 {
971 struct sock *sk = sock->sk;
972 struct net *net = sock_net(sk);
973 struct unix_sock *u = unix_sk(sk);
974 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
975 char *sun_path = sunaddr->sun_path;
976 int err;
977 unsigned int hash;
978 struct unix_address *addr;
979 struct hlist_head *list;
980
981 err = -EINVAL;
982 if (sunaddr->sun_family != AF_UNIX)
983 goto out;
984
985 if (addr_len == sizeof(short)) {
986 err = unix_autobind(sock);
987 goto out;
988 }
989
990 err = unix_mkname(sunaddr, addr_len, &hash);
991 if (err < 0)
992 goto out;
993 addr_len = err;
994
995 err = mutex_lock_interruptible(&u->readlock);
996 if (err)
997 goto out;
998
999 err = -EINVAL;
1000 if (u->addr)
1001 goto out_up;
1002
1003 err = -ENOMEM;
1004 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1005 if (!addr)
1006 goto out_up;
1007
1008 memcpy(addr->name, sunaddr, addr_len);
1009 addr->len = addr_len;
1010 addr->hash = hash ^ sk->sk_type;
1011 atomic_set(&addr->refcnt, 1);
1012
1013 if (sun_path[0]) {
1014 struct path path;
1015 umode_t mode = S_IFSOCK |
1016 (SOCK_INODE(sock)->i_mode & ~current_umask());
1017 err = unix_mknod(sun_path, mode, &path);
1018 if (err) {
1019 if (err == -EEXIST)
1020 err = -EADDRINUSE;
1021 unix_release_addr(addr);
1022 goto out_up;
1023 }
1024 addr->hash = UNIX_HASH_SIZE;
1025 hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
1026 spin_lock(&unix_table_lock);
1027 u->path = path;
1028 list = &unix_socket_table[hash];
1029 } else {
1030 spin_lock(&unix_table_lock);
1031 err = -EADDRINUSE;
1032 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1033 sk->sk_type, hash)) {
1034 unix_release_addr(addr);
1035 goto out_unlock;
1036 }
1037
1038 list = &unix_socket_table[addr->hash];
1039 }
1040
1041 err = 0;
1042 __unix_remove_socket(sk);
1043 u->addr = addr;
1044 __unix_insert_socket(list, sk);
1045
1046 out_unlock:
1047 spin_unlock(&unix_table_lock);
1048 out_up:
1049 mutex_unlock(&u->readlock);
1050 out:
1051 return err;
1052 }
1053
1054 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1055 {
1056 if (unlikely(sk1 == sk2) || !sk2) {
1057 unix_state_lock(sk1);
1058 return;
1059 }
1060 if (sk1 < sk2) {
1061 unix_state_lock(sk1);
1062 unix_state_lock_nested(sk2);
1063 } else {
1064 unix_state_lock(sk2);
1065 unix_state_lock_nested(sk1);
1066 }
1067 }
1068
1069 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1070 {
1071 if (unlikely(sk1 == sk2) || !sk2) {
1072 unix_state_unlock(sk1);
1073 return;
1074 }
1075 unix_state_unlock(sk1);
1076 unix_state_unlock(sk2);
1077 }
1078
1079 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1080 int alen, int flags)
1081 {
1082 struct sock *sk = sock->sk;
1083 struct net *net = sock_net(sk);
1084 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1085 struct sock *other;
1086 unsigned int hash;
1087 int err;
1088
1089 if (addr->sa_family != AF_UNSPEC) {
1090 err = unix_mkname(sunaddr, alen, &hash);
1091 if (err < 0)
1092 goto out;
1093 alen = err;
1094
1095 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1096 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1097 goto out;
1098
1099 restart:
1100 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1101 if (!other)
1102 goto out;
1103
1104 unix_state_double_lock(sk, other);
1105
1106 /* Apparently VFS overslept socket death. Retry. */
1107 if (sock_flag(other, SOCK_DEAD)) {
1108 unix_state_double_unlock(sk, other);
1109 sock_put(other);
1110 goto restart;
1111 }
1112
1113 err = -EPERM;
1114 if (!unix_may_send(sk, other))
1115 goto out_unlock;
1116
1117 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1118 if (err)
1119 goto out_unlock;
1120
1121 } else {
1122 /*
1123 * 1003.1g breaking connected state with AF_UNSPEC
1124 */
1125 other = NULL;
1126 unix_state_double_lock(sk, other);
1127 }
1128
1129 /*
1130 * If it was connected, reconnect.
1131 */
1132 if (unix_peer(sk)) {
1133 struct sock *old_peer = unix_peer(sk);
1134 unix_peer(sk) = other;
1135 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1136
1137 unix_state_double_unlock(sk, other);
1138
1139 if (other != old_peer)
1140 unix_dgram_disconnected(sk, old_peer);
1141 sock_put(old_peer);
1142 } else {
1143 unix_peer(sk) = other;
1144 unix_state_double_unlock(sk, other);
1145 }
1146 return 0;
1147
1148 out_unlock:
1149 unix_state_double_unlock(sk, other);
1150 sock_put(other);
1151 out:
1152 return err;
1153 }
1154
1155 static long unix_wait_for_peer(struct sock *other, long timeo)
1156 {
1157 struct unix_sock *u = unix_sk(other);
1158 int sched;
1159 DEFINE_WAIT(wait);
1160
1161 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1162
1163 sched = !sock_flag(other, SOCK_DEAD) &&
1164 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1165 unix_recvq_full(other);
1166
1167 unix_state_unlock(other);
1168
1169 if (sched)
1170 timeo = schedule_timeout(timeo);
1171
1172 finish_wait(&u->peer_wait, &wait);
1173 return timeo;
1174 }
1175
1176 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1177 int addr_len, int flags)
1178 {
1179 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1180 struct sock *sk = sock->sk;
1181 struct net *net = sock_net(sk);
1182 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1183 struct sock *newsk = NULL;
1184 struct sock *other = NULL;
1185 struct sk_buff *skb = NULL;
1186 unsigned int hash;
1187 int st;
1188 int err;
1189 long timeo;
1190
1191 err = unix_mkname(sunaddr, addr_len, &hash);
1192 if (err < 0)
1193 goto out;
1194 addr_len = err;
1195
1196 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1197 (err = unix_autobind(sock)) != 0)
1198 goto out;
1199
1200 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1201
1202 /* First of all allocate resources.
1203 If we will make it after state is locked,
1204 we will have to recheck all again in any case.
1205 */
1206
1207 err = -ENOMEM;
1208
1209 /* create new sock for complete connection */
1210 newsk = unix_create1(sock_net(sk), NULL);
1211 if (newsk == NULL)
1212 goto out;
1213
1214 /* Allocate skb for sending to listening sock */
1215 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1216 if (skb == NULL)
1217 goto out;
1218
1219 restart:
1220 /* Find listening sock. */
1221 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1222 if (!other)
1223 goto out;
1224
1225 /* Latch state of peer */
1226 unix_state_lock(other);
1227
1228 /* Apparently VFS overslept socket death. Retry. */
1229 if (sock_flag(other, SOCK_DEAD)) {
1230 unix_state_unlock(other);
1231 sock_put(other);
1232 goto restart;
1233 }
1234
1235 err = -ECONNREFUSED;
1236 if (other->sk_state != TCP_LISTEN)
1237 goto out_unlock;
1238 if (other->sk_shutdown & RCV_SHUTDOWN)
1239 goto out_unlock;
1240
1241 if (unix_recvq_full(other)) {
1242 err = -EAGAIN;
1243 if (!timeo)
1244 goto out_unlock;
1245
1246 timeo = unix_wait_for_peer(other, timeo);
1247
1248 err = sock_intr_errno(timeo);
1249 if (signal_pending(current))
1250 goto out;
1251 sock_put(other);
1252 goto restart;
1253 }
1254
1255 /* Latch our state.
1256
1257 It is tricky place. We need to grab our state lock and cannot
1258 drop lock on peer. It is dangerous because deadlock is
1259 possible. Connect to self case and simultaneous
1260 attempt to connect are eliminated by checking socket
1261 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1262 check this before attempt to grab lock.
1263
1264 Well, and we have to recheck the state after socket locked.
1265 */
1266 st = sk->sk_state;
1267
1268 switch (st) {
1269 case TCP_CLOSE:
1270 /* This is ok... continue with connect */
1271 break;
1272 case TCP_ESTABLISHED:
1273 /* Socket is already connected */
1274 err = -EISCONN;
1275 goto out_unlock;
1276 default:
1277 err = -EINVAL;
1278 goto out_unlock;
1279 }
1280
1281 unix_state_lock_nested(sk);
1282
1283 if (sk->sk_state != st) {
1284 unix_state_unlock(sk);
1285 unix_state_unlock(other);
1286 sock_put(other);
1287 goto restart;
1288 }
1289
1290 err = security_unix_stream_connect(sk, other, newsk);
1291 if (err) {
1292 unix_state_unlock(sk);
1293 goto out_unlock;
1294 }
1295
1296 /* The way is open! Fastly set all the necessary fields... */
1297
1298 sock_hold(sk);
1299 unix_peer(newsk) = sk;
1300 newsk->sk_state = TCP_ESTABLISHED;
1301 newsk->sk_type = sk->sk_type;
1302 init_peercred(newsk);
1303 newu = unix_sk(newsk);
1304 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1305 otheru = unix_sk(other);
1306
1307 /* copy address information from listening to new sock*/
1308 if (otheru->addr) {
1309 atomic_inc(&otheru->addr->refcnt);
1310 newu->addr = otheru->addr;
1311 }
1312 if (otheru->path.dentry) {
1313 path_get(&otheru->path);
1314 newu->path = otheru->path;
1315 }
1316
1317 /* Set credentials */
1318 copy_peercred(sk, other);
1319
1320 sock->state = SS_CONNECTED;
1321 sk->sk_state = TCP_ESTABLISHED;
1322 sock_hold(newsk);
1323
1324 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1325 unix_peer(sk) = newsk;
1326
1327 unix_state_unlock(sk);
1328
1329 /* take ten and and send info to listening sock */
1330 spin_lock(&other->sk_receive_queue.lock);
1331 __skb_queue_tail(&other->sk_receive_queue, skb);
1332 spin_unlock(&other->sk_receive_queue.lock);
1333 unix_state_unlock(other);
1334 other->sk_data_ready(other, 0);
1335 sock_put(other);
1336 return 0;
1337
1338 out_unlock:
1339 if (other)
1340 unix_state_unlock(other);
1341
1342 out:
1343 kfree_skb(skb);
1344 if (newsk)
1345 unix_release_sock(newsk, 0);
1346 if (other)
1347 sock_put(other);
1348 return err;
1349 }
1350
1351 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1352 {
1353 struct sock *ska = socka->sk, *skb = sockb->sk;
1354
1355 /* Join our sockets back to back */
1356 sock_hold(ska);
1357 sock_hold(skb);
1358 unix_peer(ska) = skb;
1359 unix_peer(skb) = ska;
1360 init_peercred(ska);
1361 init_peercred(skb);
1362
1363 if (ska->sk_type != SOCK_DGRAM) {
1364 ska->sk_state = TCP_ESTABLISHED;
1365 skb->sk_state = TCP_ESTABLISHED;
1366 socka->state = SS_CONNECTED;
1367 sockb->state = SS_CONNECTED;
1368 }
1369 return 0;
1370 }
1371
1372 static void unix_sock_inherit_flags(const struct socket *old,
1373 struct socket *new)
1374 {
1375 if (test_bit(SOCK_PASSCRED, &old->flags))
1376 set_bit(SOCK_PASSCRED, &new->flags);
1377 if (test_bit(SOCK_PASSSEC, &old->flags))
1378 set_bit(SOCK_PASSSEC, &new->flags);
1379 }
1380
1381 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1382 {
1383 struct sock *sk = sock->sk;
1384 struct sock *tsk;
1385 struct sk_buff *skb;
1386 int err;
1387
1388 err = -EOPNOTSUPP;
1389 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1390 goto out;
1391
1392 err = -EINVAL;
1393 if (sk->sk_state != TCP_LISTEN)
1394 goto out;
1395
1396 /* If socket state is TCP_LISTEN it cannot change (for now...),
1397 * so that no locks are necessary.
1398 */
1399
1400 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1401 if (!skb) {
1402 /* This means receive shutdown. */
1403 if (err == 0)
1404 err = -EINVAL;
1405 goto out;
1406 }
1407
1408 tsk = skb->sk;
1409 skb_free_datagram(sk, skb);
1410 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1411
1412 /* attach accepted sock to socket */
1413 unix_state_lock(tsk);
1414 newsock->state = SS_CONNECTED;
1415 unix_sock_inherit_flags(sock, newsock);
1416 sock_graft(tsk, newsock);
1417 unix_state_unlock(tsk);
1418 return 0;
1419
1420 out:
1421 return err;
1422 }
1423
1424
1425 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1426 {
1427 struct sock *sk = sock->sk;
1428 struct unix_sock *u;
1429 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1430 int err = 0;
1431
1432 if (peer) {
1433 sk = unix_peer_get(sk);
1434
1435 err = -ENOTCONN;
1436 if (!sk)
1437 goto out;
1438 err = 0;
1439 } else {
1440 sock_hold(sk);
1441 }
1442
1443 u = unix_sk(sk);
1444 unix_state_lock(sk);
1445 if (!u->addr) {
1446 sunaddr->sun_family = AF_UNIX;
1447 sunaddr->sun_path[0] = 0;
1448 *uaddr_len = sizeof(short);
1449 } else {
1450 struct unix_address *addr = u->addr;
1451
1452 *uaddr_len = addr->len;
1453 memcpy(sunaddr, addr->name, *uaddr_len);
1454 }
1455 unix_state_unlock(sk);
1456 sock_put(sk);
1457 out:
1458 return err;
1459 }
1460
1461 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1462 {
1463 int i;
1464
1465 scm->fp = UNIXCB(skb).fp;
1466 UNIXCB(skb).fp = NULL;
1467
1468 for (i = scm->fp->count-1; i >= 0; i--)
1469 unix_notinflight(scm->fp->fp[i]);
1470 }
1471
1472 static void unix_destruct_scm(struct sk_buff *skb)
1473 {
1474 struct scm_cookie scm;
1475 memset(&scm, 0, sizeof(scm));
1476 scm.pid = UNIXCB(skb).pid;
1477 if (UNIXCB(skb).fp)
1478 unix_detach_fds(&scm, skb);
1479
1480 /* Alas, it calls VFS */
1481 /* So fscking what? fput() had been SMP-safe since the last Summer */
1482 scm_destroy(&scm);
1483 sock_wfree(skb);
1484 }
1485
1486 #define MAX_RECURSION_LEVEL 4
1487
1488 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1489 {
1490 int i;
1491 unsigned char max_level = 0;
1492 int unix_sock_count = 0;
1493
1494 for (i = scm->fp->count - 1; i >= 0; i--) {
1495 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1496
1497 if (sk) {
1498 unix_sock_count++;
1499 max_level = max(max_level,
1500 unix_sk(sk)->recursion_level);
1501 }
1502 }
1503 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1504 return -ETOOMANYREFS;
1505
1506 /*
1507 * Need to duplicate file references for the sake of garbage
1508 * collection. Otherwise a socket in the fps might become a
1509 * candidate for GC while the skb is not yet queued.
1510 */
1511 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1512 if (!UNIXCB(skb).fp)
1513 return -ENOMEM;
1514
1515 if (unix_sock_count) {
1516 for (i = scm->fp->count - 1; i >= 0; i--)
1517 unix_inflight(scm->fp->fp[i]);
1518 }
1519 return max_level;
1520 }
1521
1522 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1523 {
1524 int err = 0;
1525
1526 UNIXCB(skb).pid = get_pid(scm->pid);
1527 UNIXCB(skb).uid = scm->creds.uid;
1528 UNIXCB(skb).gid = scm->creds.gid;
1529 UNIXCB(skb).fp = NULL;
1530 if (scm->fp && send_fds)
1531 err = unix_attach_fds(scm, skb);
1532
1533 skb->destructor = unix_destruct_scm;
1534 return err;
1535 }
1536
1537 /*
1538 * Some apps rely on write() giving SCM_CREDENTIALS
1539 * We include credentials if source or destination socket
1540 * asserted SOCK_PASSCRED.
1541 */
1542 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1543 const struct sock *other)
1544 {
1545 if (UNIXCB(skb).pid)
1546 return;
1547 if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1548 !other->sk_socket ||
1549 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1550 UNIXCB(skb).pid = get_pid(task_tgid(current));
1551 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1552 }
1553 }
1554
1555 /*
1556 * Send AF_UNIX data.
1557 */
1558
1559 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1560 struct msghdr *msg, size_t len)
1561 {
1562 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1563 struct sock *sk = sock->sk;
1564 struct net *net = sock_net(sk);
1565 struct unix_sock *u = unix_sk(sk);
1566 struct sockaddr_un *sunaddr = msg->msg_name;
1567 struct sock *other = NULL;
1568 int namelen = 0; /* fake GCC */
1569 int err;
1570 unsigned int hash;
1571 struct sk_buff *skb;
1572 long timeo;
1573 struct scm_cookie tmp_scm;
1574 int max_level;
1575 int data_len = 0;
1576 int sk_locked;
1577
1578 if (NULL == siocb->scm)
1579 siocb->scm = &tmp_scm;
1580 wait_for_unix_gc();
1581 err = scm_send(sock, msg, siocb->scm, false);
1582 if (err < 0)
1583 return err;
1584
1585 err = -EOPNOTSUPP;
1586 if (msg->msg_flags&MSG_OOB)
1587 goto out;
1588
1589 if (msg->msg_namelen) {
1590 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1591 if (err < 0)
1592 goto out;
1593 namelen = err;
1594 } else {
1595 sunaddr = NULL;
1596 err = -ENOTCONN;
1597 other = unix_peer_get(sk);
1598 if (!other)
1599 goto out;
1600 }
1601
1602 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1603 && (err = unix_autobind(sock)) != 0)
1604 goto out;
1605
1606 err = -EMSGSIZE;
1607 if (len > sk->sk_sndbuf - 32)
1608 goto out;
1609
1610 if (len > SKB_MAX_ALLOC)
1611 data_len = min_t(size_t,
1612 len - SKB_MAX_ALLOC,
1613 MAX_SKB_FRAGS * PAGE_SIZE);
1614
1615 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1616 msg->msg_flags & MSG_DONTWAIT, &err);
1617 if (skb == NULL)
1618 goto out;
1619
1620 err = unix_scm_to_skb(siocb->scm, skb, true);
1621 if (err < 0)
1622 goto out_free;
1623 max_level = err + 1;
1624 unix_get_secdata(siocb->scm, skb);
1625
1626 skb_put(skb, len - data_len);
1627 skb->data_len = data_len;
1628 skb->len = len;
1629 err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, len);
1630 if (err)
1631 goto out_free;
1632
1633 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1634
1635 restart:
1636 if (!other) {
1637 err = -ECONNRESET;
1638 if (sunaddr == NULL)
1639 goto out_free;
1640
1641 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1642 hash, &err);
1643 if (other == NULL)
1644 goto out_free;
1645 }
1646
1647 if (sk_filter(other, skb) < 0) {
1648 /* Toss the packet but do not return any error to the sender */
1649 err = len;
1650 goto out_free;
1651 }
1652
1653 sk_locked = 0;
1654 unix_state_lock(other);
1655 restart_locked:
1656 err = -EPERM;
1657 if (!unix_may_send(sk, other))
1658 goto out_unlock;
1659
1660 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1661 /*
1662 * Check with 1003.1g - what should
1663 * datagram error
1664 */
1665 unix_state_unlock(other);
1666 sock_put(other);
1667
1668 if (!sk_locked)
1669 unix_state_lock(sk);
1670
1671 err = 0;
1672 if (unix_peer(sk) == other) {
1673 unix_peer(sk) = NULL;
1674 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1675
1676 unix_state_unlock(sk);
1677
1678 unix_dgram_disconnected(sk, other);
1679 sock_put(other);
1680 err = -ECONNREFUSED;
1681 } else {
1682 unix_state_unlock(sk);
1683 }
1684
1685 other = NULL;
1686 if (err)
1687 goto out_free;
1688 goto restart;
1689 }
1690
1691 err = -EPIPE;
1692 if (other->sk_shutdown & RCV_SHUTDOWN)
1693 goto out_unlock;
1694
1695 if (sk->sk_type != SOCK_SEQPACKET) {
1696 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1697 if (err)
1698 goto out_unlock;
1699 }
1700
1701 if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1702 if (timeo) {
1703 timeo = unix_wait_for_peer(other, timeo);
1704
1705 err = sock_intr_errno(timeo);
1706 if (signal_pending(current))
1707 goto out_free;
1708
1709 goto restart;
1710 }
1711
1712 if (!sk_locked) {
1713 unix_state_unlock(other);
1714 unix_state_double_lock(sk, other);
1715 }
1716
1717 if (unix_peer(sk) != other ||
1718 unix_dgram_peer_wake_me(sk, other)) {
1719 err = -EAGAIN;
1720 sk_locked = 1;
1721 goto out_unlock;
1722 }
1723
1724 if (!sk_locked) {
1725 sk_locked = 1;
1726 goto restart_locked;
1727 }
1728 }
1729
1730 if (unlikely(sk_locked))
1731 unix_state_unlock(sk);
1732
1733 if (sock_flag(other, SOCK_RCVTSTAMP))
1734 __net_timestamp(skb);
1735 maybe_add_creds(skb, sock, other);
1736 skb_queue_tail(&other->sk_receive_queue, skb);
1737 if (max_level > unix_sk(other)->recursion_level)
1738 unix_sk(other)->recursion_level = max_level;
1739 unix_state_unlock(other);
1740 other->sk_data_ready(other, len);
1741 sock_put(other);
1742 scm_destroy(siocb->scm);
1743 return len;
1744
1745 out_unlock:
1746 if (sk_locked)
1747 unix_state_unlock(sk);
1748 unix_state_unlock(other);
1749 out_free:
1750 kfree_skb(skb);
1751 out:
1752 if (other)
1753 sock_put(other);
1754 scm_destroy(siocb->scm);
1755 return err;
1756 }
1757
1758
1759 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1760 struct msghdr *msg, size_t len)
1761 {
1762 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1763 struct sock *sk = sock->sk;
1764 struct sock *other = NULL;
1765 int err, size;
1766 struct sk_buff *skb;
1767 int sent = 0;
1768 struct scm_cookie tmp_scm;
1769 bool fds_sent = false;
1770 int max_level;
1771
1772 if (NULL == siocb->scm)
1773 siocb->scm = &tmp_scm;
1774 wait_for_unix_gc();
1775 err = scm_send(sock, msg, siocb->scm, false);
1776 if (err < 0)
1777 return err;
1778
1779 err = -EOPNOTSUPP;
1780 if (msg->msg_flags&MSG_OOB)
1781 goto out_err;
1782
1783 if (msg->msg_namelen) {
1784 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1785 goto out_err;
1786 } else {
1787 err = -ENOTCONN;
1788 other = unix_peer(sk);
1789 if (!other)
1790 goto out_err;
1791 }
1792
1793 if (sk->sk_shutdown & SEND_SHUTDOWN)
1794 goto pipe_err;
1795
1796 while (sent < len) {
1797 /*
1798 * Optimisation for the fact that under 0.01% of X
1799 * messages typically need breaking up.
1800 */
1801
1802 size = len-sent;
1803
1804 /* Keep two messages in the pipe so it schedules better */
1805 if (size > ((sk->sk_sndbuf >> 1) - 64))
1806 size = (sk->sk_sndbuf >> 1) - 64;
1807
1808 if (size > SKB_MAX_ALLOC)
1809 size = SKB_MAX_ALLOC;
1810
1811 /*
1812 * Grab a buffer
1813 */
1814
1815 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1816 &err);
1817
1818 if (skb == NULL)
1819 goto out_err;
1820
1821 /*
1822 * If you pass two values to the sock_alloc_send_skb
1823 * it tries to grab the large buffer with GFP_NOFS
1824 * (which can fail easily), and if it fails grab the
1825 * fallback size buffer which is under a page and will
1826 * succeed. [Alan]
1827 */
1828 size = min_t(int, size, skb_tailroom(skb));
1829
1830
1831 /* Only send the fds in the first buffer */
1832 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1833 if (err < 0) {
1834 kfree_skb(skb);
1835 goto out_err;
1836 }
1837 max_level = err + 1;
1838 fds_sent = true;
1839
1840 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1841 if (err) {
1842 kfree_skb(skb);
1843 goto out_err;
1844 }
1845
1846 unix_state_lock(other);
1847
1848 if (sock_flag(other, SOCK_DEAD) ||
1849 (other->sk_shutdown & RCV_SHUTDOWN))
1850 goto pipe_err_free;
1851
1852 maybe_add_creds(skb, sock, other);
1853 skb_queue_tail(&other->sk_receive_queue, skb);
1854 if (max_level > unix_sk(other)->recursion_level)
1855 unix_sk(other)->recursion_level = max_level;
1856 unix_state_unlock(other);
1857 other->sk_data_ready(other, size);
1858 sent += size;
1859 }
1860
1861 scm_destroy(siocb->scm);
1862 siocb->scm = NULL;
1863
1864 return sent;
1865
1866 pipe_err_free:
1867 unix_state_unlock(other);
1868 kfree_skb(skb);
1869 pipe_err:
1870 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1871 send_sig(SIGPIPE, current, 0);
1872 err = -EPIPE;
1873 out_err:
1874 scm_destroy(siocb->scm);
1875 siocb->scm = NULL;
1876 return sent ? : err;
1877 }
1878
1879 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1880 struct msghdr *msg, size_t len)
1881 {
1882 int err;
1883 struct sock *sk = sock->sk;
1884
1885 err = sock_error(sk);
1886 if (err)
1887 return err;
1888
1889 if (sk->sk_state != TCP_ESTABLISHED)
1890 return -ENOTCONN;
1891
1892 if (msg->msg_namelen)
1893 msg->msg_namelen = 0;
1894
1895 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1896 }
1897
1898 static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1899 struct msghdr *msg, size_t size,
1900 int flags)
1901 {
1902 struct sock *sk = sock->sk;
1903
1904 if (sk->sk_state != TCP_ESTABLISHED)
1905 return -ENOTCONN;
1906
1907 return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1908 }
1909
1910 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1911 {
1912 struct unix_sock *u = unix_sk(sk);
1913
1914 if (u->addr) {
1915 msg->msg_namelen = u->addr->len;
1916 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1917 }
1918 }
1919
1920 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1921 struct msghdr *msg, size_t size,
1922 int flags)
1923 {
1924 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1925 struct scm_cookie tmp_scm;
1926 struct sock *sk = sock->sk;
1927 struct unix_sock *u = unix_sk(sk);
1928 int noblock = flags & MSG_DONTWAIT;
1929 struct sk_buff *skb;
1930 int err;
1931 int peeked, skip;
1932
1933 err = -EOPNOTSUPP;
1934 if (flags&MSG_OOB)
1935 goto out;
1936
1937 mutex_lock(&u->readlock);
1938
1939 skip = sk_peek_offset(sk, flags);
1940
1941 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
1942 if (!skb) {
1943 unix_state_lock(sk);
1944 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1945 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1946 (sk->sk_shutdown & RCV_SHUTDOWN))
1947 err = 0;
1948 unix_state_unlock(sk);
1949 goto out_unlock;
1950 }
1951
1952 wake_up_interruptible_sync_poll(&u->peer_wait,
1953 POLLOUT | POLLWRNORM | POLLWRBAND);
1954
1955 if (msg->msg_name)
1956 unix_copy_addr(msg, skb->sk);
1957
1958 if (size > skb->len - skip)
1959 size = skb->len - skip;
1960 else if (size < skb->len - skip)
1961 msg->msg_flags |= MSG_TRUNC;
1962
1963 err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
1964 if (err)
1965 goto out_free;
1966
1967 if (sock_flag(sk, SOCK_RCVTSTAMP))
1968 __sock_recv_timestamp(msg, sk, skb);
1969
1970 if (!siocb->scm) {
1971 siocb->scm = &tmp_scm;
1972 memset(&tmp_scm, 0, sizeof(tmp_scm));
1973 }
1974 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
1975 unix_set_secdata(siocb->scm, skb);
1976
1977 if (!(flags & MSG_PEEK)) {
1978 if (UNIXCB(skb).fp)
1979 unix_detach_fds(siocb->scm, skb);
1980
1981 sk_peek_offset_bwd(sk, skb->len);
1982 } else {
1983 /* It is questionable: on PEEK we could:
1984 - do not return fds - good, but too simple 8)
1985 - return fds, and do not return them on read (old strategy,
1986 apparently wrong)
1987 - clone fds (I chose it for now, it is the most universal
1988 solution)
1989
1990 POSIX 1003.1g does not actually define this clearly
1991 at all. POSIX 1003.1g doesn't define a lot of things
1992 clearly however!
1993
1994 */
1995
1996 sk_peek_offset_fwd(sk, size);
1997
1998 if (UNIXCB(skb).fp)
1999 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2000 }
2001 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2002
2003 scm_recv(sock, msg, siocb->scm, flags);
2004
2005 out_free:
2006 skb_free_datagram(sk, skb);
2007 out_unlock:
2008 mutex_unlock(&u->readlock);
2009 out:
2010 return err;
2011 }
2012
2013 /*
2014 * Sleep until more data has arrived. But check for races..
2015 */
2016 static long unix_stream_data_wait(struct sock *sk, long timeo,
2017 struct sk_buff *last)
2018 {
2019 DEFINE_WAIT(wait);
2020
2021 unix_state_lock(sk);
2022
2023 for (;;) {
2024 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2025
2026 if (skb_peek_tail(&sk->sk_receive_queue) != last ||
2027 sk->sk_err ||
2028 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2029 signal_pending(current) ||
2030 !timeo)
2031 break;
2032
2033 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2034 unix_state_unlock(sk);
2035 timeo = schedule_timeout(timeo);
2036 unix_state_lock(sk);
2037
2038 if (sock_flag(sk, SOCK_DEAD))
2039 break;
2040
2041 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2042 }
2043
2044 finish_wait(sk_sleep(sk), &wait);
2045 unix_state_unlock(sk);
2046 return timeo;
2047 }
2048
2049 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
2050 struct msghdr *msg, size_t size,
2051 int flags)
2052 {
2053 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
2054 struct scm_cookie tmp_scm;
2055 struct sock *sk = sock->sk;
2056 struct unix_sock *u = unix_sk(sk);
2057 struct sockaddr_un *sunaddr = msg->msg_name;
2058 int copied = 0;
2059 int noblock = flags & MSG_DONTWAIT;
2060 int check_creds = 0;
2061 int target;
2062 int err = 0;
2063 long timeo;
2064 int skip;
2065
2066 err = -EINVAL;
2067 if (sk->sk_state != TCP_ESTABLISHED)
2068 goto out;
2069
2070 err = -EOPNOTSUPP;
2071 if (flags&MSG_OOB)
2072 goto out;
2073
2074 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
2075 timeo = sock_rcvtimeo(sk, noblock);
2076
2077 /* Lock the socket to prevent queue disordering
2078 * while sleeps in memcpy_tomsg
2079 */
2080
2081 if (!siocb->scm) {
2082 siocb->scm = &tmp_scm;
2083 memset(&tmp_scm, 0, sizeof(tmp_scm));
2084 }
2085
2086 err = mutex_lock_interruptible(&u->readlock);
2087 if (unlikely(err)) {
2088 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
2089 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
2090 */
2091 err = noblock ? -EAGAIN : -ERESTARTSYS;
2092 goto out;
2093 }
2094
2095 do {
2096 int chunk;
2097 struct sk_buff *skb, *last;
2098
2099 unix_state_lock(sk);
2100 if (sock_flag(sk, SOCK_DEAD)) {
2101 err = -ECONNRESET;
2102 goto unlock;
2103 }
2104 last = skb = skb_peek(&sk->sk_receive_queue);
2105 again:
2106 if (skb == NULL) {
2107 unix_sk(sk)->recursion_level = 0;
2108 if (copied >= target)
2109 goto unlock;
2110
2111 /*
2112 * POSIX 1003.1g mandates this order.
2113 */
2114
2115 err = sock_error(sk);
2116 if (err)
2117 goto unlock;
2118 if (sk->sk_shutdown & RCV_SHUTDOWN)
2119 goto unlock;
2120
2121 unix_state_unlock(sk);
2122 err = -EAGAIN;
2123 if (!timeo)
2124 break;
2125 mutex_unlock(&u->readlock);
2126
2127 timeo = unix_stream_data_wait(sk, timeo, last);
2128
2129 if (signal_pending(current)) {
2130 err = sock_intr_errno(timeo);
2131 goto out;
2132 }
2133
2134 mutex_lock(&u->readlock);
2135 continue;
2136 unlock:
2137 unix_state_unlock(sk);
2138 break;
2139 }
2140
2141 skip = sk_peek_offset(sk, flags);
2142 while (skip >= skb->len) {
2143 skip -= skb->len;
2144 last = skb;
2145 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2146 if (!skb)
2147 goto again;
2148 }
2149
2150 unix_state_unlock(sk);
2151
2152 if (check_creds) {
2153 /* Never glue messages from different writers */
2154 if ((UNIXCB(skb).pid != siocb->scm->pid) ||
2155 !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
2156 !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
2157 break;
2158 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2159 /* Copy credentials */
2160 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2161 check_creds = 1;
2162 }
2163
2164 /* Copy address just once */
2165 if (sunaddr) {
2166 unix_copy_addr(msg, skb->sk);
2167 sunaddr = NULL;
2168 }
2169
2170 chunk = min_t(unsigned int, skb->len - skip, size);
2171 if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
2172 if (copied == 0)
2173 copied = -EFAULT;
2174 break;
2175 }
2176 copied += chunk;
2177 size -= chunk;
2178
2179 /* Mark read part of skb as used */
2180 if (!(flags & MSG_PEEK)) {
2181 skb_pull(skb, chunk);
2182
2183 sk_peek_offset_bwd(sk, chunk);
2184
2185 if (UNIXCB(skb).fp)
2186 unix_detach_fds(siocb->scm, skb);
2187
2188 if (skb->len)
2189 break;
2190
2191 skb_unlink(skb, &sk->sk_receive_queue);
2192 consume_skb(skb);
2193
2194 if (siocb->scm->fp)
2195 break;
2196 } else {
2197 /* It is questionable, see note in unix_dgram_recvmsg.
2198 */
2199 if (UNIXCB(skb).fp)
2200 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2201
2202 if (skip) {
2203 sk_peek_offset_fwd(sk, chunk);
2204 skip -= chunk;
2205 }
2206
2207 if (UNIXCB(skb).fp)
2208 break;
2209
2210 last = skb;
2211 unix_state_lock(sk);
2212 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2213 if (skb)
2214 goto again;
2215 unix_state_unlock(sk);
2216 break;
2217 }
2218 } while (size);
2219
2220 mutex_unlock(&u->readlock);
2221 scm_recv(sock, msg, siocb->scm, flags);
2222 out:
2223 return copied ? : err;
2224 }
2225
2226 static int unix_shutdown(struct socket *sock, int mode)
2227 {
2228 struct sock *sk = sock->sk;
2229 struct sock *other;
2230
2231 if (mode < SHUT_RD || mode > SHUT_RDWR)
2232 return -EINVAL;
2233 /* This maps:
2234 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2235 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2236 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2237 */
2238 ++mode;
2239
2240 unix_state_lock(sk);
2241 sk->sk_shutdown |= mode;
2242 other = unix_peer(sk);
2243 if (other)
2244 sock_hold(other);
2245 unix_state_unlock(sk);
2246 sk->sk_state_change(sk);
2247
2248 if (other &&
2249 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2250
2251 int peer_mode = 0;
2252
2253 if (mode&RCV_SHUTDOWN)
2254 peer_mode |= SEND_SHUTDOWN;
2255 if (mode&SEND_SHUTDOWN)
2256 peer_mode |= RCV_SHUTDOWN;
2257 unix_state_lock(other);
2258 other->sk_shutdown |= peer_mode;
2259 unix_state_unlock(other);
2260 other->sk_state_change(other);
2261 if (peer_mode == SHUTDOWN_MASK)
2262 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2263 else if (peer_mode & RCV_SHUTDOWN)
2264 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2265 }
2266 if (other)
2267 sock_put(other);
2268
2269 return 0;
2270 }
2271
2272 long unix_inq_len(struct sock *sk)
2273 {
2274 struct sk_buff *skb;
2275 long amount = 0;
2276
2277 if (sk->sk_state == TCP_LISTEN)
2278 return -EINVAL;
2279
2280 spin_lock(&sk->sk_receive_queue.lock);
2281 if (sk->sk_type == SOCK_STREAM ||
2282 sk->sk_type == SOCK_SEQPACKET) {
2283 skb_queue_walk(&sk->sk_receive_queue, skb)
2284 amount += skb->len;
2285 } else {
2286 skb = skb_peek(&sk->sk_receive_queue);
2287 if (skb)
2288 amount = skb->len;
2289 }
2290 spin_unlock(&sk->sk_receive_queue.lock);
2291
2292 return amount;
2293 }
2294 EXPORT_SYMBOL_GPL(unix_inq_len);
2295
2296 long unix_outq_len(struct sock *sk)
2297 {
2298 return sk_wmem_alloc_get(sk);
2299 }
2300 EXPORT_SYMBOL_GPL(unix_outq_len);
2301
2302 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2303 {
2304 struct sock *sk = sock->sk;
2305 long amount = 0;
2306 int err;
2307
2308 switch (cmd) {
2309 case SIOCOUTQ:
2310 amount = unix_outq_len(sk);
2311 err = put_user(amount, (int __user *)arg);
2312 break;
2313 case SIOCINQ:
2314 amount = unix_inq_len(sk);
2315 if (amount < 0)
2316 err = amount;
2317 else
2318 err = put_user(amount, (int __user *)arg);
2319 break;
2320 default:
2321 err = -ENOIOCTLCMD;
2322 break;
2323 }
2324 return err;
2325 }
2326
2327 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2328 {
2329 struct sock *sk = sock->sk;
2330 unsigned int mask;
2331
2332 sock_poll_wait(file, sk_sleep(sk), wait);
2333 mask = 0;
2334
2335 /* exceptional events? */
2336 if (sk->sk_err)
2337 mask |= POLLERR;
2338 if (sk->sk_shutdown == SHUTDOWN_MASK)
2339 mask |= POLLHUP;
2340 if (sk->sk_shutdown & RCV_SHUTDOWN)
2341 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2342
2343 /* readable? */
2344 if (!skb_queue_empty(&sk->sk_receive_queue))
2345 mask |= POLLIN | POLLRDNORM;
2346
2347 /* Connection-based need to check for termination and startup */
2348 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2349 sk->sk_state == TCP_CLOSE)
2350 mask |= POLLHUP;
2351
2352 /*
2353 * we set writable also when the other side has shut down the
2354 * connection. This prevents stuck sockets.
2355 */
2356 if (unix_writable(sk))
2357 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2358
2359 return mask;
2360 }
2361
2362 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2363 poll_table *wait)
2364 {
2365 struct sock *sk = sock->sk, *other;
2366 unsigned int mask, writable;
2367
2368 sock_poll_wait(file, sk_sleep(sk), wait);
2369 mask = 0;
2370
2371 /* exceptional events? */
2372 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2373 mask |= POLLERR |
2374 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2375
2376 if (sk->sk_shutdown & RCV_SHUTDOWN)
2377 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2378 if (sk->sk_shutdown == SHUTDOWN_MASK)
2379 mask |= POLLHUP;
2380
2381 /* readable? */
2382 if (!skb_queue_empty(&sk->sk_receive_queue))
2383 mask |= POLLIN | POLLRDNORM;
2384
2385 /* Connection-based need to check for termination and startup */
2386 if (sk->sk_type == SOCK_SEQPACKET) {
2387 if (sk->sk_state == TCP_CLOSE)
2388 mask |= POLLHUP;
2389 /* connection hasn't started yet? */
2390 if (sk->sk_state == TCP_SYN_SENT)
2391 return mask;
2392 }
2393
2394 /* No write status requested, avoid expensive OUT tests. */
2395 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2396 return mask;
2397
2398 writable = unix_writable(sk);
2399 if (writable) {
2400 unix_state_lock(sk);
2401
2402 other = unix_peer(sk);
2403 if (other && unix_peer(other) != sk &&
2404 unix_recvq_full(other) &&
2405 unix_dgram_peer_wake_me(sk, other))
2406 writable = 0;
2407
2408 unix_state_unlock(sk);
2409 }
2410
2411 if (writable)
2412 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2413 else
2414 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2415
2416 return mask;
2417 }
2418
2419 #ifdef CONFIG_PROC_FS
2420
2421 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2422
2423 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2424 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2425 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2426
2427 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2428 {
2429 unsigned long offset = get_offset(*pos);
2430 unsigned long bucket = get_bucket(*pos);
2431 struct sock *sk;
2432 unsigned long count = 0;
2433
2434 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2435 if (sock_net(sk) != seq_file_net(seq))
2436 continue;
2437 if (++count == offset)
2438 break;
2439 }
2440
2441 return sk;
2442 }
2443
2444 static struct sock *unix_next_socket(struct seq_file *seq,
2445 struct sock *sk,
2446 loff_t *pos)
2447 {
2448 unsigned long bucket;
2449
2450 while (sk > (struct sock *)SEQ_START_TOKEN) {
2451 sk = sk_next(sk);
2452 if (!sk)
2453 goto next_bucket;
2454 if (sock_net(sk) == seq_file_net(seq))
2455 return sk;
2456 }
2457
2458 do {
2459 sk = unix_from_bucket(seq, pos);
2460 if (sk)
2461 return sk;
2462
2463 next_bucket:
2464 bucket = get_bucket(*pos) + 1;
2465 *pos = set_bucket_offset(bucket, 1);
2466 } while (bucket < ARRAY_SIZE(unix_socket_table));
2467
2468 return NULL;
2469 }
2470
2471 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2472 __acquires(unix_table_lock)
2473 {
2474 spin_lock(&unix_table_lock);
2475
2476 if (!*pos)
2477 return SEQ_START_TOKEN;
2478
2479 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2480 return NULL;
2481
2482 return unix_next_socket(seq, NULL, pos);
2483 }
2484
2485 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2486 {
2487 ++*pos;
2488 return unix_next_socket(seq, v, pos);
2489 }
2490
2491 static void unix_seq_stop(struct seq_file *seq, void *v)
2492 __releases(unix_table_lock)
2493 {
2494 spin_unlock(&unix_table_lock);
2495 }
2496
2497 static int unix_seq_show(struct seq_file *seq, void *v)
2498 {
2499
2500 if (v == SEQ_START_TOKEN)
2501 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2502 "Inode Path\n");
2503 else {
2504 struct sock *s = v;
2505 struct unix_sock *u = unix_sk(s);
2506 unix_state_lock(s);
2507
2508 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2509 s,
2510 atomic_read(&s->sk_refcnt),
2511 0,
2512 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2513 s->sk_type,
2514 s->sk_socket ?
2515 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2516 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2517 sock_i_ino(s));
2518
2519 if (u->addr) {
2520 int i, len;
2521 seq_putc(seq, ' ');
2522
2523 i = 0;
2524 len = u->addr->len - sizeof(short);
2525 if (!UNIX_ABSTRACT(s))
2526 len--;
2527 else {
2528 seq_putc(seq, '@');
2529 i++;
2530 }
2531 for ( ; i < len; i++)
2532 seq_putc(seq, u->addr->name->sun_path[i]);
2533 }
2534 unix_state_unlock(s);
2535 seq_putc(seq, '\n');
2536 }
2537
2538 return 0;
2539 }
2540
2541 static const struct seq_operations unix_seq_ops = {
2542 .start = unix_seq_start,
2543 .next = unix_seq_next,
2544 .stop = unix_seq_stop,
2545 .show = unix_seq_show,
2546 };
2547
2548 static int unix_seq_open(struct inode *inode, struct file *file)
2549 {
2550 return seq_open_net(inode, file, &unix_seq_ops,
2551 sizeof(struct seq_net_private));
2552 }
2553
2554 static const struct file_operations unix_seq_fops = {
2555 .owner = THIS_MODULE,
2556 .open = unix_seq_open,
2557 .read = seq_read,
2558 .llseek = seq_lseek,
2559 .release = seq_release_net,
2560 };
2561
2562 #endif
2563
2564 static const struct net_proto_family unix_family_ops = {
2565 .family = PF_UNIX,
2566 .create = unix_create,
2567 .owner = THIS_MODULE,
2568 };
2569
2570
2571 static int __net_init unix_net_init(struct net *net)
2572 {
2573 int error = -ENOMEM;
2574
2575 net->unx.sysctl_max_dgram_qlen = 10;
2576 if (unix_sysctl_register(net))
2577 goto out;
2578
2579 #ifdef CONFIG_PROC_FS
2580 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2581 unix_sysctl_unregister(net);
2582 goto out;
2583 }
2584 #endif
2585 error = 0;
2586 out:
2587 return error;
2588 }
2589
2590 static void __net_exit unix_net_exit(struct net *net)
2591 {
2592 unix_sysctl_unregister(net);
2593 remove_proc_entry("unix", net->proc_net);
2594 }
2595
2596 static struct pernet_operations unix_net_ops = {
2597 .init = unix_net_init,
2598 .exit = unix_net_exit,
2599 };
2600
2601 static int __init af_unix_init(void)
2602 {
2603 int rc = -1;
2604
2605 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2606
2607 rc = proto_register(&unix_proto, 1);
2608 if (rc != 0) {
2609 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2610 __func__);
2611 goto out;
2612 }
2613
2614 sock_register(&unix_family_ops);
2615 register_pernet_subsys(&unix_net_ops);
2616 out:
2617 return rc;
2618 }
2619
2620 static void __exit af_unix_exit(void)
2621 {
2622 sock_unregister(PF_UNIX);
2623 proto_unregister(&unix_proto);
2624 unregister_pernet_subsys(&unix_net_ops);
2625 }
2626
2627 /* Earlier than device_initcall() so that other drivers invoking
2628 request_module() don't end up in a loop when modprobe tries
2629 to use a UNIX socket. But later than subsys_initcall() because
2630 we depend on stuff initialised there */
2631 fs_initcall(af_unix_init);
2632 module_exit(af_unix_exit);
2633
2634 MODULE_LICENSE("GPL");
2635 MODULE_ALIAS_NETPROTO(PF_UNIX);