Linux 3.10.81
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / unix / af_unix.c
1 /*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117
118 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
119 EXPORT_SYMBOL_GPL(unix_socket_table);
120 DEFINE_SPINLOCK(unix_table_lock);
121 EXPORT_SYMBOL_GPL(unix_table_lock);
122 static atomic_long_t unix_nr_socks;
123
124
125 static struct hlist_head *unix_sockets_unbound(void *addr)
126 {
127 unsigned long hash = (unsigned long)addr;
128
129 hash ^= hash >> 16;
130 hash ^= hash >> 8;
131 hash %= UNIX_HASH_SIZE;
132 return &unix_socket_table[UNIX_HASH_SIZE + hash];
133 }
134
135 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
136
137 #ifdef CONFIG_SECURITY_NETWORK
138 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
139 {
140 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
141 }
142
143 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
144 {
145 scm->secid = *UNIXSID(skb);
146 }
147 #else
148 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
149 { }
150
151 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
152 { }
153 #endif /* CONFIG_SECURITY_NETWORK */
154
155 /*
156 * SMP locking strategy:
157 * hash table is protected with spinlock unix_table_lock
158 * each socket state is protected by separate spin lock.
159 */
160
161 static inline unsigned int unix_hash_fold(__wsum n)
162 {
163 unsigned int hash = (__force unsigned int)csum_fold(n);
164
165 hash ^= hash>>8;
166 return hash&(UNIX_HASH_SIZE-1);
167 }
168
169 #define unix_peer(sk) (unix_sk(sk)->peer)
170
171 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
172 {
173 return unix_peer(osk) == sk;
174 }
175
176 static inline int unix_may_send(struct sock *sk, struct sock *osk)
177 {
178 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
179 }
180
181 static inline int unix_recvq_full(struct sock const *sk)
182 {
183 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
184 }
185
186 struct sock *unix_peer_get(struct sock *s)
187 {
188 struct sock *peer;
189
190 unix_state_lock(s);
191 peer = unix_peer(s);
192 if (peer)
193 sock_hold(peer);
194 unix_state_unlock(s);
195 return peer;
196 }
197 EXPORT_SYMBOL_GPL(unix_peer_get);
198
199 static inline void unix_release_addr(struct unix_address *addr)
200 {
201 if (atomic_dec_and_test(&addr->refcnt))
202 kfree(addr);
203 }
204
205 /*
206 * Check unix socket name:
207 * - should be not zero length.
208 * - if started by not zero, should be NULL terminated (FS object)
209 * - if started by zero, it is abstract name.
210 */
211
212 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
213 {
214 if (len <= sizeof(short) || len > sizeof(*sunaddr))
215 return -EINVAL;
216 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
217 return -EINVAL;
218 if (sunaddr->sun_path[0]) {
219 /*
220 * This may look like an off by one error but it is a bit more
221 * subtle. 108 is the longest valid AF_UNIX path for a binding.
222 * sun_path[108] doesn't as such exist. However in kernel space
223 * we are guaranteed that it is a valid memory location in our
224 * kernel address buffer.
225 */
226 ((char *)sunaddr)[len] = 0;
227 len = strlen(sunaddr->sun_path)+1+sizeof(short);
228 return len;
229 }
230
231 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
232 return len;
233 }
234
235 static void __unix_remove_socket(struct sock *sk)
236 {
237 sk_del_node_init(sk);
238 }
239
240 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
241 {
242 WARN_ON(!sk_unhashed(sk));
243 sk_add_node(sk, list);
244 }
245
246 static inline void unix_remove_socket(struct sock *sk)
247 {
248 spin_lock(&unix_table_lock);
249 __unix_remove_socket(sk);
250 spin_unlock(&unix_table_lock);
251 }
252
253 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
254 {
255 spin_lock(&unix_table_lock);
256 __unix_insert_socket(list, sk);
257 spin_unlock(&unix_table_lock);
258 }
259
260 static struct sock *__unix_find_socket_byname(struct net *net,
261 struct sockaddr_un *sunname,
262 int len, int type, unsigned int hash)
263 {
264 struct sock *s;
265
266 sk_for_each(s, &unix_socket_table[hash ^ type]) {
267 struct unix_sock *u = unix_sk(s);
268
269 if (!net_eq(sock_net(s), net))
270 continue;
271
272 if (u->addr->len == len &&
273 !memcmp(u->addr->name, sunname, len))
274 goto found;
275 }
276 s = NULL;
277 found:
278 return s;
279 }
280
281 static inline struct sock *unix_find_socket_byname(struct net *net,
282 struct sockaddr_un *sunname,
283 int len, int type,
284 unsigned int hash)
285 {
286 struct sock *s;
287
288 spin_lock(&unix_table_lock);
289 s = __unix_find_socket_byname(net, sunname, len, type, hash);
290 if (s)
291 sock_hold(s);
292 spin_unlock(&unix_table_lock);
293 return s;
294 }
295
296 static struct sock *unix_find_socket_byinode(struct inode *i)
297 {
298 struct sock *s;
299
300 spin_lock(&unix_table_lock);
301 sk_for_each(s,
302 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
303 struct dentry *dentry = unix_sk(s)->path.dentry;
304
305 if (dentry && dentry->d_inode == i) {
306 sock_hold(s);
307 goto found;
308 }
309 }
310 s = NULL;
311 found:
312 spin_unlock(&unix_table_lock);
313 return s;
314 }
315
316 static inline int unix_writable(struct sock *sk)
317 {
318 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
319 }
320
321 static void unix_write_space(struct sock *sk)
322 {
323 struct socket_wq *wq;
324
325 rcu_read_lock();
326 if (unix_writable(sk)) {
327 wq = rcu_dereference(sk->sk_wq);
328 if (wq_has_sleeper(wq))
329 wake_up_interruptible_sync_poll(&wq->wait,
330 POLLOUT | POLLWRNORM | POLLWRBAND);
331 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
332 }
333 rcu_read_unlock();
334 }
335
336 /* When dgram socket disconnects (or changes its peer), we clear its receive
337 * queue of packets arrived from previous peer. First, it allows to do
338 * flow control based only on wmem_alloc; second, sk connected to peer
339 * may receive messages only from that peer. */
340 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
341 {
342 if (!skb_queue_empty(&sk->sk_receive_queue)) {
343 skb_queue_purge(&sk->sk_receive_queue);
344 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
345
346 /* If one link of bidirectional dgram pipe is disconnected,
347 * we signal error. Messages are lost. Do not make this,
348 * when peer was not connected to us.
349 */
350 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
351 other->sk_err = ECONNRESET;
352 other->sk_error_report(other);
353 }
354 }
355 }
356
357 static void unix_sock_destructor(struct sock *sk)
358 {
359 struct unix_sock *u = unix_sk(sk);
360
361 skb_queue_purge(&sk->sk_receive_queue);
362
363 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
364 WARN_ON(!sk_unhashed(sk));
365 WARN_ON(sk->sk_socket);
366 if (!sock_flag(sk, SOCK_DEAD)) {
367 printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
368 return;
369 }
370
371 if (u->addr)
372 unix_release_addr(u->addr);
373
374 atomic_long_dec(&unix_nr_socks);
375 local_bh_disable();
376 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
377 local_bh_enable();
378 #ifdef UNIX_REFCNT_DEBUG
379 printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
380 atomic_long_read(&unix_nr_socks));
381 #endif
382 }
383
384 static void unix_release_sock(struct sock *sk, int embrion)
385 {
386 struct unix_sock *u = unix_sk(sk);
387 struct path path;
388 struct sock *skpair;
389 struct sk_buff *skb;
390 int state;
391
392 unix_remove_socket(sk);
393
394 /* Clear state */
395 unix_state_lock(sk);
396 sock_orphan(sk);
397 sk->sk_shutdown = SHUTDOWN_MASK;
398 path = u->path;
399 u->path.dentry = NULL;
400 u->path.mnt = NULL;
401 state = sk->sk_state;
402 sk->sk_state = TCP_CLOSE;
403 unix_state_unlock(sk);
404
405 wake_up_interruptible_all(&u->peer_wait);
406
407 skpair = unix_peer(sk);
408
409 if (skpair != NULL) {
410 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
411 unix_state_lock(skpair);
412 /* No more writes */
413 skpair->sk_shutdown = SHUTDOWN_MASK;
414 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
415 skpair->sk_err = ECONNRESET;
416 unix_state_unlock(skpair);
417 skpair->sk_state_change(skpair);
418 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
419 }
420 sock_put(skpair); /* It may now die */
421 unix_peer(sk) = NULL;
422 }
423
424 /* Try to flush out this socket. Throw out buffers at least */
425
426 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
427 if (state == TCP_LISTEN)
428 unix_release_sock(skb->sk, 1);
429 /* passed fds are erased in the kfree_skb hook */
430 kfree_skb(skb);
431 }
432
433 if (path.dentry)
434 path_put(&path);
435
436 sock_put(sk);
437
438 /* ---- Socket is dead now and most probably destroyed ---- */
439
440 /*
441 * Fixme: BSD difference: In BSD all sockets connected to us get
442 * ECONNRESET and we die on the spot. In Linux we behave
443 * like files and pipes do and wait for the last
444 * dereference.
445 *
446 * Can't we simply set sock->err?
447 *
448 * What the above comment does talk about? --ANK(980817)
449 */
450
451 if (unix_tot_inflight)
452 unix_gc(); /* Garbage collect fds */
453 }
454
455 static void init_peercred(struct sock *sk)
456 {
457 put_pid(sk->sk_peer_pid);
458 if (sk->sk_peer_cred)
459 put_cred(sk->sk_peer_cred);
460 sk->sk_peer_pid = get_pid(task_tgid(current));
461 sk->sk_peer_cred = get_current_cred();
462 }
463
464 static void copy_peercred(struct sock *sk, struct sock *peersk)
465 {
466 put_pid(sk->sk_peer_pid);
467 if (sk->sk_peer_cred)
468 put_cred(sk->sk_peer_cred);
469 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
470 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
471 }
472
473 static int unix_listen(struct socket *sock, int backlog)
474 {
475 int err;
476 struct sock *sk = sock->sk;
477 struct unix_sock *u = unix_sk(sk);
478 struct pid *old_pid = NULL;
479
480 err = -EOPNOTSUPP;
481 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
482 goto out; /* Only stream/seqpacket sockets accept */
483 err = -EINVAL;
484 if (!u->addr)
485 goto out; /* No listens on an unbound socket */
486 unix_state_lock(sk);
487 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
488 goto out_unlock;
489 if (backlog > sk->sk_max_ack_backlog)
490 wake_up_interruptible_all(&u->peer_wait);
491 sk->sk_max_ack_backlog = backlog;
492 sk->sk_state = TCP_LISTEN;
493 /* set credentials so connect can copy them */
494 init_peercred(sk);
495 err = 0;
496
497 out_unlock:
498 unix_state_unlock(sk);
499 put_pid(old_pid);
500 out:
501 return err;
502 }
503
504 static int unix_release(struct socket *);
505 static int unix_bind(struct socket *, struct sockaddr *, int);
506 static int unix_stream_connect(struct socket *, struct sockaddr *,
507 int addr_len, int flags);
508 static int unix_socketpair(struct socket *, struct socket *);
509 static int unix_accept(struct socket *, struct socket *, int);
510 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
511 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
512 static unsigned int unix_dgram_poll(struct file *, struct socket *,
513 poll_table *);
514 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
515 static int unix_shutdown(struct socket *, int);
516 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
517 struct msghdr *, size_t);
518 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
519 struct msghdr *, size_t, int);
520 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
521 struct msghdr *, size_t);
522 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
523 struct msghdr *, size_t, int);
524 static int unix_dgram_connect(struct socket *, struct sockaddr *,
525 int, int);
526 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
527 struct msghdr *, size_t);
528 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
529 struct msghdr *, size_t, int);
530
531 static int unix_set_peek_off(struct sock *sk, int val)
532 {
533 struct unix_sock *u = unix_sk(sk);
534
535 if (mutex_lock_interruptible(&u->readlock))
536 return -EINTR;
537
538 sk->sk_peek_off = val;
539 mutex_unlock(&u->readlock);
540
541 return 0;
542 }
543
544
545 static const struct proto_ops unix_stream_ops = {
546 .family = PF_UNIX,
547 .owner = THIS_MODULE,
548 .release = unix_release,
549 .bind = unix_bind,
550 .connect = unix_stream_connect,
551 .socketpair = unix_socketpair,
552 .accept = unix_accept,
553 .getname = unix_getname,
554 .poll = unix_poll,
555 .ioctl = unix_ioctl,
556 .listen = unix_listen,
557 .shutdown = unix_shutdown,
558 .setsockopt = sock_no_setsockopt,
559 .getsockopt = sock_no_getsockopt,
560 .sendmsg = unix_stream_sendmsg,
561 .recvmsg = unix_stream_recvmsg,
562 .mmap = sock_no_mmap,
563 .sendpage = sock_no_sendpage,
564 .set_peek_off = unix_set_peek_off,
565 };
566
567 static const struct proto_ops unix_dgram_ops = {
568 .family = PF_UNIX,
569 .owner = THIS_MODULE,
570 .release = unix_release,
571 .bind = unix_bind,
572 .connect = unix_dgram_connect,
573 .socketpair = unix_socketpair,
574 .accept = sock_no_accept,
575 .getname = unix_getname,
576 .poll = unix_dgram_poll,
577 .ioctl = unix_ioctl,
578 .listen = sock_no_listen,
579 .shutdown = unix_shutdown,
580 .setsockopt = sock_no_setsockopt,
581 .getsockopt = sock_no_getsockopt,
582 .sendmsg = unix_dgram_sendmsg,
583 .recvmsg = unix_dgram_recvmsg,
584 .mmap = sock_no_mmap,
585 .sendpage = sock_no_sendpage,
586 .set_peek_off = unix_set_peek_off,
587 };
588
589 static const struct proto_ops unix_seqpacket_ops = {
590 .family = PF_UNIX,
591 .owner = THIS_MODULE,
592 .release = unix_release,
593 .bind = unix_bind,
594 .connect = unix_stream_connect,
595 .socketpair = unix_socketpair,
596 .accept = unix_accept,
597 .getname = unix_getname,
598 .poll = unix_dgram_poll,
599 .ioctl = unix_ioctl,
600 .listen = unix_listen,
601 .shutdown = unix_shutdown,
602 .setsockopt = sock_no_setsockopt,
603 .getsockopt = sock_no_getsockopt,
604 .sendmsg = unix_seqpacket_sendmsg,
605 .recvmsg = unix_seqpacket_recvmsg,
606 .mmap = sock_no_mmap,
607 .sendpage = sock_no_sendpage,
608 .set_peek_off = unix_set_peek_off,
609 };
610
611 static struct proto unix_proto = {
612 .name = "UNIX",
613 .owner = THIS_MODULE,
614 .obj_size = sizeof(struct unix_sock),
615 };
616
617 /*
618 * AF_UNIX sockets do not interact with hardware, hence they
619 * dont trigger interrupts - so it's safe for them to have
620 * bh-unsafe locking for their sk_receive_queue.lock. Split off
621 * this special lock-class by reinitializing the spinlock key:
622 */
623 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
624
625 static struct sock *unix_create1(struct net *net, struct socket *sock)
626 {
627 struct sock *sk = NULL;
628 struct unix_sock *u;
629
630 atomic_long_inc(&unix_nr_socks);
631 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
632 goto out;
633
634 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
635 if (!sk)
636 goto out;
637
638 sock_init_data(sock, sk);
639 lockdep_set_class(&sk->sk_receive_queue.lock,
640 &af_unix_sk_receive_queue_lock_key);
641
642 sk->sk_write_space = unix_write_space;
643 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
644 sk->sk_destruct = unix_sock_destructor;
645 u = unix_sk(sk);
646 u->path.dentry = NULL;
647 u->path.mnt = NULL;
648 spin_lock_init(&u->lock);
649 atomic_long_set(&u->inflight, 0);
650 INIT_LIST_HEAD(&u->link);
651 mutex_init(&u->readlock); /* single task reading lock */
652 init_waitqueue_head(&u->peer_wait);
653 unix_insert_socket(unix_sockets_unbound(sk), sk);
654 out:
655 if (sk == NULL)
656 atomic_long_dec(&unix_nr_socks);
657 else {
658 local_bh_disable();
659 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
660 local_bh_enable();
661 }
662 return sk;
663 }
664
665 static int unix_create(struct net *net, struct socket *sock, int protocol,
666 int kern)
667 {
668 if (protocol && protocol != PF_UNIX)
669 return -EPROTONOSUPPORT;
670
671 sock->state = SS_UNCONNECTED;
672
673 switch (sock->type) {
674 case SOCK_STREAM:
675 sock->ops = &unix_stream_ops;
676 break;
677 /*
678 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
679 * nothing uses it.
680 */
681 case SOCK_RAW:
682 sock->type = SOCK_DGRAM;
683 case SOCK_DGRAM:
684 sock->ops = &unix_dgram_ops;
685 break;
686 case SOCK_SEQPACKET:
687 sock->ops = &unix_seqpacket_ops;
688 break;
689 default:
690 return -ESOCKTNOSUPPORT;
691 }
692
693 return unix_create1(net, sock) ? 0 : -ENOMEM;
694 }
695
696 static int unix_release(struct socket *sock)
697 {
698 struct sock *sk = sock->sk;
699
700 if (!sk)
701 return 0;
702
703 unix_release_sock(sk, 0);
704 sock->sk = NULL;
705
706 return 0;
707 }
708
709 static int unix_autobind(struct socket *sock)
710 {
711 struct sock *sk = sock->sk;
712 struct net *net = sock_net(sk);
713 struct unix_sock *u = unix_sk(sk);
714 static u32 ordernum = 1;
715 struct unix_address *addr;
716 int err;
717 unsigned int retries = 0;
718
719 err = mutex_lock_interruptible(&u->readlock);
720 if (err)
721 return err;
722
723 err = 0;
724 if (u->addr)
725 goto out;
726
727 err = -ENOMEM;
728 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
729 if (!addr)
730 goto out;
731
732 addr->name->sun_family = AF_UNIX;
733 atomic_set(&addr->refcnt, 1);
734
735 retry:
736 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
737 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
738
739 spin_lock(&unix_table_lock);
740 ordernum = (ordernum+1)&0xFFFFF;
741
742 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
743 addr->hash)) {
744 spin_unlock(&unix_table_lock);
745 /*
746 * __unix_find_socket_byname() may take long time if many names
747 * are already in use.
748 */
749 cond_resched();
750 /* Give up if all names seems to be in use. */
751 if (retries++ == 0xFFFFF) {
752 err = -ENOSPC;
753 kfree(addr);
754 goto out;
755 }
756 goto retry;
757 }
758 addr->hash ^= sk->sk_type;
759
760 __unix_remove_socket(sk);
761 u->addr = addr;
762 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
763 spin_unlock(&unix_table_lock);
764 err = 0;
765
766 out: mutex_unlock(&u->readlock);
767 return err;
768 }
769
770 static struct sock *unix_find_other(struct net *net,
771 struct sockaddr_un *sunname, int len,
772 int type, unsigned int hash, int *error)
773 {
774 struct sock *u;
775 struct path path;
776 int err = 0;
777
778 if (sunname->sun_path[0]) {
779 struct inode *inode;
780 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
781 if (err)
782 goto fail;
783 inode = path.dentry->d_inode;
784 err = inode_permission(inode, MAY_WRITE);
785 if (err)
786 goto put_fail;
787
788 err = -ECONNREFUSED;
789 if (!S_ISSOCK(inode->i_mode))
790 goto put_fail;
791 u = unix_find_socket_byinode(inode);
792 if (!u)
793 goto put_fail;
794
795 if (u->sk_type == type)
796 touch_atime(&path);
797
798 path_put(&path);
799
800 err = -EPROTOTYPE;
801 if (u->sk_type != type) {
802 sock_put(u);
803 goto fail;
804 }
805 } else {
806 err = -ECONNREFUSED;
807 u = unix_find_socket_byname(net, sunname, len, type, hash);
808 if (u) {
809 struct dentry *dentry;
810 dentry = unix_sk(u)->path.dentry;
811 if (dentry)
812 touch_atime(&unix_sk(u)->path);
813 } else
814 goto fail;
815 }
816 return u;
817
818 put_fail:
819 path_put(&path);
820 fail:
821 *error = err;
822 return NULL;
823 }
824
825 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
826 {
827 struct dentry *dentry;
828 struct path path;
829 int err = 0;
830 /*
831 * Get the parent directory, calculate the hash for last
832 * component.
833 */
834 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
835 err = PTR_ERR(dentry);
836 if (IS_ERR(dentry))
837 return err;
838
839 /*
840 * All right, let's create it.
841 */
842 err = security_path_mknod(&path, dentry, mode, 0);
843 if (!err) {
844 err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
845 if (!err) {
846 res->mnt = mntget(path.mnt);
847 res->dentry = dget(dentry);
848 }
849 }
850 done_path_create(&path, dentry);
851 return err;
852 }
853
854 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
855 {
856 struct sock *sk = sock->sk;
857 struct net *net = sock_net(sk);
858 struct unix_sock *u = unix_sk(sk);
859 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
860 char *sun_path = sunaddr->sun_path;
861 int err;
862 unsigned int hash;
863 struct unix_address *addr;
864 struct hlist_head *list;
865
866 err = -EINVAL;
867 if (sunaddr->sun_family != AF_UNIX)
868 goto out;
869
870 if (addr_len == sizeof(short)) {
871 err = unix_autobind(sock);
872 goto out;
873 }
874
875 err = unix_mkname(sunaddr, addr_len, &hash);
876 if (err < 0)
877 goto out;
878 addr_len = err;
879
880 err = mutex_lock_interruptible(&u->readlock);
881 if (err)
882 goto out;
883
884 err = -EINVAL;
885 if (u->addr)
886 goto out_up;
887
888 err = -ENOMEM;
889 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
890 if (!addr)
891 goto out_up;
892
893 memcpy(addr->name, sunaddr, addr_len);
894 addr->len = addr_len;
895 addr->hash = hash ^ sk->sk_type;
896 atomic_set(&addr->refcnt, 1);
897
898 if (sun_path[0]) {
899 struct path path;
900 umode_t mode = S_IFSOCK |
901 (SOCK_INODE(sock)->i_mode & ~current_umask());
902 err = unix_mknod(sun_path, mode, &path);
903 if (err) {
904 if (err == -EEXIST)
905 err = -EADDRINUSE;
906 unix_release_addr(addr);
907 goto out_up;
908 }
909 addr->hash = UNIX_HASH_SIZE;
910 hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
911 spin_lock(&unix_table_lock);
912 u->path = path;
913 list = &unix_socket_table[hash];
914 } else {
915 spin_lock(&unix_table_lock);
916 err = -EADDRINUSE;
917 if (__unix_find_socket_byname(net, sunaddr, addr_len,
918 sk->sk_type, hash)) {
919 unix_release_addr(addr);
920 goto out_unlock;
921 }
922
923 list = &unix_socket_table[addr->hash];
924 }
925
926 err = 0;
927 __unix_remove_socket(sk);
928 u->addr = addr;
929 __unix_insert_socket(list, sk);
930
931 out_unlock:
932 spin_unlock(&unix_table_lock);
933 out_up:
934 mutex_unlock(&u->readlock);
935 out:
936 return err;
937 }
938
939 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
940 {
941 if (unlikely(sk1 == sk2) || !sk2) {
942 unix_state_lock(sk1);
943 return;
944 }
945 if (sk1 < sk2) {
946 unix_state_lock(sk1);
947 unix_state_lock_nested(sk2);
948 } else {
949 unix_state_lock(sk2);
950 unix_state_lock_nested(sk1);
951 }
952 }
953
954 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
955 {
956 if (unlikely(sk1 == sk2) || !sk2) {
957 unix_state_unlock(sk1);
958 return;
959 }
960 unix_state_unlock(sk1);
961 unix_state_unlock(sk2);
962 }
963
964 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
965 int alen, int flags)
966 {
967 struct sock *sk = sock->sk;
968 struct net *net = sock_net(sk);
969 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
970 struct sock *other;
971 unsigned int hash;
972 int err;
973
974 if (addr->sa_family != AF_UNSPEC) {
975 err = unix_mkname(sunaddr, alen, &hash);
976 if (err < 0)
977 goto out;
978 alen = err;
979
980 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
981 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
982 goto out;
983
984 restart:
985 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
986 if (!other)
987 goto out;
988
989 unix_state_double_lock(sk, other);
990
991 /* Apparently VFS overslept socket death. Retry. */
992 if (sock_flag(other, SOCK_DEAD)) {
993 unix_state_double_unlock(sk, other);
994 sock_put(other);
995 goto restart;
996 }
997
998 err = -EPERM;
999 if (!unix_may_send(sk, other))
1000 goto out_unlock;
1001
1002 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1003 if (err)
1004 goto out_unlock;
1005
1006 } else {
1007 /*
1008 * 1003.1g breaking connected state with AF_UNSPEC
1009 */
1010 other = NULL;
1011 unix_state_double_lock(sk, other);
1012 }
1013
1014 /*
1015 * If it was connected, reconnect.
1016 */
1017 if (unix_peer(sk)) {
1018 struct sock *old_peer = unix_peer(sk);
1019 unix_peer(sk) = other;
1020 unix_state_double_unlock(sk, other);
1021
1022 if (other != old_peer)
1023 unix_dgram_disconnected(sk, old_peer);
1024 sock_put(old_peer);
1025 } else {
1026 unix_peer(sk) = other;
1027 unix_state_double_unlock(sk, other);
1028 }
1029 return 0;
1030
1031 out_unlock:
1032 unix_state_double_unlock(sk, other);
1033 sock_put(other);
1034 out:
1035 return err;
1036 }
1037
1038 static long unix_wait_for_peer(struct sock *other, long timeo)
1039 {
1040 struct unix_sock *u = unix_sk(other);
1041 int sched;
1042 DEFINE_WAIT(wait);
1043
1044 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1045
1046 sched = !sock_flag(other, SOCK_DEAD) &&
1047 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1048 unix_recvq_full(other);
1049
1050 unix_state_unlock(other);
1051
1052 if (sched)
1053 timeo = schedule_timeout(timeo);
1054
1055 finish_wait(&u->peer_wait, &wait);
1056 return timeo;
1057 }
1058
1059 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1060 int addr_len, int flags)
1061 {
1062 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1063 struct sock *sk = sock->sk;
1064 struct net *net = sock_net(sk);
1065 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1066 struct sock *newsk = NULL;
1067 struct sock *other = NULL;
1068 struct sk_buff *skb = NULL;
1069 unsigned int hash;
1070 int st;
1071 int err;
1072 long timeo;
1073
1074 err = unix_mkname(sunaddr, addr_len, &hash);
1075 if (err < 0)
1076 goto out;
1077 addr_len = err;
1078
1079 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1080 (err = unix_autobind(sock)) != 0)
1081 goto out;
1082
1083 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1084
1085 /* First of all allocate resources.
1086 If we will make it after state is locked,
1087 we will have to recheck all again in any case.
1088 */
1089
1090 err = -ENOMEM;
1091
1092 /* create new sock for complete connection */
1093 newsk = unix_create1(sock_net(sk), NULL);
1094 if (newsk == NULL)
1095 goto out;
1096
1097 /* Allocate skb for sending to listening sock */
1098 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1099 if (skb == NULL)
1100 goto out;
1101
1102 restart:
1103 /* Find listening sock. */
1104 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1105 if (!other)
1106 goto out;
1107
1108 /* Latch state of peer */
1109 unix_state_lock(other);
1110
1111 /* Apparently VFS overslept socket death. Retry. */
1112 if (sock_flag(other, SOCK_DEAD)) {
1113 unix_state_unlock(other);
1114 sock_put(other);
1115 goto restart;
1116 }
1117
1118 err = -ECONNREFUSED;
1119 if (other->sk_state != TCP_LISTEN)
1120 goto out_unlock;
1121 if (other->sk_shutdown & RCV_SHUTDOWN)
1122 goto out_unlock;
1123
1124 if (unix_recvq_full(other)) {
1125 err = -EAGAIN;
1126 if (!timeo)
1127 goto out_unlock;
1128
1129 timeo = unix_wait_for_peer(other, timeo);
1130
1131 err = sock_intr_errno(timeo);
1132 if (signal_pending(current))
1133 goto out;
1134 sock_put(other);
1135 goto restart;
1136 }
1137
1138 /* Latch our state.
1139
1140 It is tricky place. We need to grab our state lock and cannot
1141 drop lock on peer. It is dangerous because deadlock is
1142 possible. Connect to self case and simultaneous
1143 attempt to connect are eliminated by checking socket
1144 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1145 check this before attempt to grab lock.
1146
1147 Well, and we have to recheck the state after socket locked.
1148 */
1149 st = sk->sk_state;
1150
1151 switch (st) {
1152 case TCP_CLOSE:
1153 /* This is ok... continue with connect */
1154 break;
1155 case TCP_ESTABLISHED:
1156 /* Socket is already connected */
1157 err = -EISCONN;
1158 goto out_unlock;
1159 default:
1160 err = -EINVAL;
1161 goto out_unlock;
1162 }
1163
1164 unix_state_lock_nested(sk);
1165
1166 if (sk->sk_state != st) {
1167 unix_state_unlock(sk);
1168 unix_state_unlock(other);
1169 sock_put(other);
1170 goto restart;
1171 }
1172
1173 err = security_unix_stream_connect(sk, other, newsk);
1174 if (err) {
1175 unix_state_unlock(sk);
1176 goto out_unlock;
1177 }
1178
1179 /* The way is open! Fastly set all the necessary fields... */
1180
1181 sock_hold(sk);
1182 unix_peer(newsk) = sk;
1183 newsk->sk_state = TCP_ESTABLISHED;
1184 newsk->sk_type = sk->sk_type;
1185 init_peercred(newsk);
1186 newu = unix_sk(newsk);
1187 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1188 otheru = unix_sk(other);
1189
1190 /* copy address information from listening to new sock*/
1191 if (otheru->addr) {
1192 atomic_inc(&otheru->addr->refcnt);
1193 newu->addr = otheru->addr;
1194 }
1195 if (otheru->path.dentry) {
1196 path_get(&otheru->path);
1197 newu->path = otheru->path;
1198 }
1199
1200 /* Set credentials */
1201 copy_peercred(sk, other);
1202
1203 sock->state = SS_CONNECTED;
1204 sk->sk_state = TCP_ESTABLISHED;
1205 sock_hold(newsk);
1206
1207 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1208 unix_peer(sk) = newsk;
1209
1210 unix_state_unlock(sk);
1211
1212 /* take ten and and send info to listening sock */
1213 spin_lock(&other->sk_receive_queue.lock);
1214 __skb_queue_tail(&other->sk_receive_queue, skb);
1215 spin_unlock(&other->sk_receive_queue.lock);
1216 unix_state_unlock(other);
1217 other->sk_data_ready(other, 0);
1218 sock_put(other);
1219 return 0;
1220
1221 out_unlock:
1222 if (other)
1223 unix_state_unlock(other);
1224
1225 out:
1226 kfree_skb(skb);
1227 if (newsk)
1228 unix_release_sock(newsk, 0);
1229 if (other)
1230 sock_put(other);
1231 return err;
1232 }
1233
1234 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1235 {
1236 struct sock *ska = socka->sk, *skb = sockb->sk;
1237
1238 /* Join our sockets back to back */
1239 sock_hold(ska);
1240 sock_hold(skb);
1241 unix_peer(ska) = skb;
1242 unix_peer(skb) = ska;
1243 init_peercred(ska);
1244 init_peercred(skb);
1245
1246 if (ska->sk_type != SOCK_DGRAM) {
1247 ska->sk_state = TCP_ESTABLISHED;
1248 skb->sk_state = TCP_ESTABLISHED;
1249 socka->state = SS_CONNECTED;
1250 sockb->state = SS_CONNECTED;
1251 }
1252 return 0;
1253 }
1254
1255 static void unix_sock_inherit_flags(const struct socket *old,
1256 struct socket *new)
1257 {
1258 if (test_bit(SOCK_PASSCRED, &old->flags))
1259 set_bit(SOCK_PASSCRED, &new->flags);
1260 if (test_bit(SOCK_PASSSEC, &old->flags))
1261 set_bit(SOCK_PASSSEC, &new->flags);
1262 }
1263
1264 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1265 {
1266 struct sock *sk = sock->sk;
1267 struct sock *tsk;
1268 struct sk_buff *skb;
1269 int err;
1270
1271 err = -EOPNOTSUPP;
1272 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1273 goto out;
1274
1275 err = -EINVAL;
1276 if (sk->sk_state != TCP_LISTEN)
1277 goto out;
1278
1279 /* If socket state is TCP_LISTEN it cannot change (for now...),
1280 * so that no locks are necessary.
1281 */
1282
1283 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1284 if (!skb) {
1285 /* This means receive shutdown. */
1286 if (err == 0)
1287 err = -EINVAL;
1288 goto out;
1289 }
1290
1291 tsk = skb->sk;
1292 skb_free_datagram(sk, skb);
1293 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1294
1295 /* attach accepted sock to socket */
1296 unix_state_lock(tsk);
1297 newsock->state = SS_CONNECTED;
1298 unix_sock_inherit_flags(sock, newsock);
1299 sock_graft(tsk, newsock);
1300 unix_state_unlock(tsk);
1301 return 0;
1302
1303 out:
1304 return err;
1305 }
1306
1307
1308 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1309 {
1310 struct sock *sk = sock->sk;
1311 struct unix_sock *u;
1312 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1313 int err = 0;
1314
1315 if (peer) {
1316 sk = unix_peer_get(sk);
1317
1318 err = -ENOTCONN;
1319 if (!sk)
1320 goto out;
1321 err = 0;
1322 } else {
1323 sock_hold(sk);
1324 }
1325
1326 u = unix_sk(sk);
1327 unix_state_lock(sk);
1328 if (!u->addr) {
1329 sunaddr->sun_family = AF_UNIX;
1330 sunaddr->sun_path[0] = 0;
1331 *uaddr_len = sizeof(short);
1332 } else {
1333 struct unix_address *addr = u->addr;
1334
1335 *uaddr_len = addr->len;
1336 memcpy(sunaddr, addr->name, *uaddr_len);
1337 }
1338 unix_state_unlock(sk);
1339 sock_put(sk);
1340 out:
1341 return err;
1342 }
1343
1344 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1345 {
1346 int i;
1347
1348 scm->fp = UNIXCB(skb).fp;
1349 UNIXCB(skb).fp = NULL;
1350
1351 for (i = scm->fp->count-1; i >= 0; i--)
1352 unix_notinflight(scm->fp->fp[i]);
1353 }
1354
1355 static void unix_destruct_scm(struct sk_buff *skb)
1356 {
1357 struct scm_cookie scm;
1358 memset(&scm, 0, sizeof(scm));
1359 scm.pid = UNIXCB(skb).pid;
1360 if (UNIXCB(skb).fp)
1361 unix_detach_fds(&scm, skb);
1362
1363 /* Alas, it calls VFS */
1364 /* So fscking what? fput() had been SMP-safe since the last Summer */
1365 scm_destroy(&scm);
1366 sock_wfree(skb);
1367 }
1368
1369 #define MAX_RECURSION_LEVEL 4
1370
1371 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1372 {
1373 int i;
1374 unsigned char max_level = 0;
1375 int unix_sock_count = 0;
1376
1377 for (i = scm->fp->count - 1; i >= 0; i--) {
1378 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1379
1380 if (sk) {
1381 unix_sock_count++;
1382 max_level = max(max_level,
1383 unix_sk(sk)->recursion_level);
1384 }
1385 }
1386 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1387 return -ETOOMANYREFS;
1388
1389 /*
1390 * Need to duplicate file references for the sake of garbage
1391 * collection. Otherwise a socket in the fps might become a
1392 * candidate for GC while the skb is not yet queued.
1393 */
1394 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1395 if (!UNIXCB(skb).fp)
1396 return -ENOMEM;
1397
1398 if (unix_sock_count) {
1399 for (i = scm->fp->count - 1; i >= 0; i--)
1400 unix_inflight(scm->fp->fp[i]);
1401 }
1402 return max_level;
1403 }
1404
1405 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1406 {
1407 int err = 0;
1408
1409 UNIXCB(skb).pid = get_pid(scm->pid);
1410 UNIXCB(skb).uid = scm->creds.uid;
1411 UNIXCB(skb).gid = scm->creds.gid;
1412 UNIXCB(skb).fp = NULL;
1413 if (scm->fp && send_fds)
1414 err = unix_attach_fds(scm, skb);
1415
1416 skb->destructor = unix_destruct_scm;
1417 return err;
1418 }
1419
1420 /*
1421 * Some apps rely on write() giving SCM_CREDENTIALS
1422 * We include credentials if source or destination socket
1423 * asserted SOCK_PASSCRED.
1424 */
1425 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1426 const struct sock *other)
1427 {
1428 if (UNIXCB(skb).pid)
1429 return;
1430 if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1431 !other->sk_socket ||
1432 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1433 UNIXCB(skb).pid = get_pid(task_tgid(current));
1434 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1435 }
1436 }
1437
1438 /*
1439 * Send AF_UNIX data.
1440 */
1441
1442 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1443 struct msghdr *msg, size_t len)
1444 {
1445 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1446 struct sock *sk = sock->sk;
1447 struct net *net = sock_net(sk);
1448 struct unix_sock *u = unix_sk(sk);
1449 struct sockaddr_un *sunaddr = msg->msg_name;
1450 struct sock *other = NULL;
1451 int namelen = 0; /* fake GCC */
1452 int err;
1453 unsigned int hash;
1454 struct sk_buff *skb;
1455 long timeo;
1456 struct scm_cookie tmp_scm;
1457 int max_level;
1458 int data_len = 0;
1459
1460 if (NULL == siocb->scm)
1461 siocb->scm = &tmp_scm;
1462 wait_for_unix_gc();
1463 err = scm_send(sock, msg, siocb->scm, false);
1464 if (err < 0)
1465 return err;
1466
1467 err = -EOPNOTSUPP;
1468 if (msg->msg_flags&MSG_OOB)
1469 goto out;
1470
1471 if (msg->msg_namelen) {
1472 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1473 if (err < 0)
1474 goto out;
1475 namelen = err;
1476 } else {
1477 sunaddr = NULL;
1478 err = -ENOTCONN;
1479 other = unix_peer_get(sk);
1480 if (!other)
1481 goto out;
1482 }
1483
1484 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1485 && (err = unix_autobind(sock)) != 0)
1486 goto out;
1487
1488 err = -EMSGSIZE;
1489 if (len > sk->sk_sndbuf - 32)
1490 goto out;
1491
1492 if (len > SKB_MAX_ALLOC)
1493 data_len = min_t(size_t,
1494 len - SKB_MAX_ALLOC,
1495 MAX_SKB_FRAGS * PAGE_SIZE);
1496
1497 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1498 msg->msg_flags & MSG_DONTWAIT, &err);
1499 if (skb == NULL)
1500 goto out;
1501
1502 err = unix_scm_to_skb(siocb->scm, skb, true);
1503 if (err < 0)
1504 goto out_free;
1505 max_level = err + 1;
1506 unix_get_secdata(siocb->scm, skb);
1507
1508 skb_put(skb, len - data_len);
1509 skb->data_len = data_len;
1510 skb->len = len;
1511 err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, len);
1512 if (err)
1513 goto out_free;
1514
1515 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1516
1517 restart:
1518 if (!other) {
1519 err = -ECONNRESET;
1520 if (sunaddr == NULL)
1521 goto out_free;
1522
1523 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1524 hash, &err);
1525 if (other == NULL)
1526 goto out_free;
1527 }
1528
1529 if (sk_filter(other, skb) < 0) {
1530 /* Toss the packet but do not return any error to the sender */
1531 err = len;
1532 goto out_free;
1533 }
1534
1535 unix_state_lock(other);
1536 err = -EPERM;
1537 if (!unix_may_send(sk, other))
1538 goto out_unlock;
1539
1540 if (sock_flag(other, SOCK_DEAD)) {
1541 /*
1542 * Check with 1003.1g - what should
1543 * datagram error
1544 */
1545 unix_state_unlock(other);
1546 sock_put(other);
1547
1548 err = 0;
1549 unix_state_lock(sk);
1550 if (unix_peer(sk) == other) {
1551 unix_peer(sk) = NULL;
1552 unix_state_unlock(sk);
1553
1554 unix_dgram_disconnected(sk, other);
1555 sock_put(other);
1556 err = -ECONNREFUSED;
1557 } else {
1558 unix_state_unlock(sk);
1559 }
1560
1561 other = NULL;
1562 if (err)
1563 goto out_free;
1564 goto restart;
1565 }
1566
1567 err = -EPIPE;
1568 if (other->sk_shutdown & RCV_SHUTDOWN)
1569 goto out_unlock;
1570
1571 if (sk->sk_type != SOCK_SEQPACKET) {
1572 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1573 if (err)
1574 goto out_unlock;
1575 }
1576
1577 if (unix_peer(other) != sk && unix_recvq_full(other)) {
1578 if (!timeo) {
1579 err = -EAGAIN;
1580 goto out_unlock;
1581 }
1582
1583 timeo = unix_wait_for_peer(other, timeo);
1584
1585 err = sock_intr_errno(timeo);
1586 if (signal_pending(current))
1587 goto out_free;
1588
1589 goto restart;
1590 }
1591
1592 if (sock_flag(other, SOCK_RCVTSTAMP))
1593 __net_timestamp(skb);
1594 maybe_add_creds(skb, sock, other);
1595 skb_queue_tail(&other->sk_receive_queue, skb);
1596 if (max_level > unix_sk(other)->recursion_level)
1597 unix_sk(other)->recursion_level = max_level;
1598 unix_state_unlock(other);
1599 other->sk_data_ready(other, len);
1600 sock_put(other);
1601 scm_destroy(siocb->scm);
1602 return len;
1603
1604 out_unlock:
1605 unix_state_unlock(other);
1606 out_free:
1607 kfree_skb(skb);
1608 out:
1609 if (other)
1610 sock_put(other);
1611 scm_destroy(siocb->scm);
1612 return err;
1613 }
1614
1615
1616 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1617 struct msghdr *msg, size_t len)
1618 {
1619 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1620 struct sock *sk = sock->sk;
1621 struct sock *other = NULL;
1622 int err, size;
1623 struct sk_buff *skb;
1624 int sent = 0;
1625 struct scm_cookie tmp_scm;
1626 bool fds_sent = false;
1627 int max_level;
1628
1629 if (NULL == siocb->scm)
1630 siocb->scm = &tmp_scm;
1631 wait_for_unix_gc();
1632 err = scm_send(sock, msg, siocb->scm, false);
1633 if (err < 0)
1634 return err;
1635
1636 err = -EOPNOTSUPP;
1637 if (msg->msg_flags&MSG_OOB)
1638 goto out_err;
1639
1640 if (msg->msg_namelen) {
1641 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1642 goto out_err;
1643 } else {
1644 err = -ENOTCONN;
1645 other = unix_peer(sk);
1646 if (!other)
1647 goto out_err;
1648 }
1649
1650 if (sk->sk_shutdown & SEND_SHUTDOWN)
1651 goto pipe_err;
1652
1653 while (sent < len) {
1654 /*
1655 * Optimisation for the fact that under 0.01% of X
1656 * messages typically need breaking up.
1657 */
1658
1659 size = len-sent;
1660
1661 /* Keep two messages in the pipe so it schedules better */
1662 if (size > ((sk->sk_sndbuf >> 1) - 64))
1663 size = (sk->sk_sndbuf >> 1) - 64;
1664
1665 if (size > SKB_MAX_ALLOC)
1666 size = SKB_MAX_ALLOC;
1667
1668 /*
1669 * Grab a buffer
1670 */
1671
1672 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1673 &err);
1674
1675 if (skb == NULL)
1676 goto out_err;
1677
1678 /*
1679 * If you pass two values to the sock_alloc_send_skb
1680 * it tries to grab the large buffer with GFP_NOFS
1681 * (which can fail easily), and if it fails grab the
1682 * fallback size buffer which is under a page and will
1683 * succeed. [Alan]
1684 */
1685 size = min_t(int, size, skb_tailroom(skb));
1686
1687
1688 /* Only send the fds in the first buffer */
1689 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1690 if (err < 0) {
1691 kfree_skb(skb);
1692 goto out_err;
1693 }
1694 max_level = err + 1;
1695 fds_sent = true;
1696
1697 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1698 if (err) {
1699 kfree_skb(skb);
1700 goto out_err;
1701 }
1702
1703 unix_state_lock(other);
1704
1705 if (sock_flag(other, SOCK_DEAD) ||
1706 (other->sk_shutdown & RCV_SHUTDOWN))
1707 goto pipe_err_free;
1708
1709 maybe_add_creds(skb, sock, other);
1710 skb_queue_tail(&other->sk_receive_queue, skb);
1711 if (max_level > unix_sk(other)->recursion_level)
1712 unix_sk(other)->recursion_level = max_level;
1713 unix_state_unlock(other);
1714 other->sk_data_ready(other, size);
1715 sent += size;
1716 }
1717
1718 scm_destroy(siocb->scm);
1719 siocb->scm = NULL;
1720
1721 return sent;
1722
1723 pipe_err_free:
1724 unix_state_unlock(other);
1725 kfree_skb(skb);
1726 pipe_err:
1727 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1728 send_sig(SIGPIPE, current, 0);
1729 err = -EPIPE;
1730 out_err:
1731 scm_destroy(siocb->scm);
1732 siocb->scm = NULL;
1733 return sent ? : err;
1734 }
1735
1736 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1737 struct msghdr *msg, size_t len)
1738 {
1739 int err;
1740 struct sock *sk = sock->sk;
1741
1742 err = sock_error(sk);
1743 if (err)
1744 return err;
1745
1746 if (sk->sk_state != TCP_ESTABLISHED)
1747 return -ENOTCONN;
1748
1749 if (msg->msg_namelen)
1750 msg->msg_namelen = 0;
1751
1752 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1753 }
1754
1755 static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1756 struct msghdr *msg, size_t size,
1757 int flags)
1758 {
1759 struct sock *sk = sock->sk;
1760
1761 if (sk->sk_state != TCP_ESTABLISHED)
1762 return -ENOTCONN;
1763
1764 return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1765 }
1766
1767 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1768 {
1769 struct unix_sock *u = unix_sk(sk);
1770
1771 if (u->addr) {
1772 msg->msg_namelen = u->addr->len;
1773 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1774 }
1775 }
1776
1777 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1778 struct msghdr *msg, size_t size,
1779 int flags)
1780 {
1781 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1782 struct scm_cookie tmp_scm;
1783 struct sock *sk = sock->sk;
1784 struct unix_sock *u = unix_sk(sk);
1785 int noblock = flags & MSG_DONTWAIT;
1786 struct sk_buff *skb;
1787 int err;
1788 int peeked, skip;
1789
1790 err = -EOPNOTSUPP;
1791 if (flags&MSG_OOB)
1792 goto out;
1793
1794 err = mutex_lock_interruptible(&u->readlock);
1795 if (unlikely(err)) {
1796 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1797 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1798 */
1799 err = noblock ? -EAGAIN : -ERESTARTSYS;
1800 goto out;
1801 }
1802
1803 skip = sk_peek_offset(sk, flags);
1804
1805 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
1806 if (!skb) {
1807 unix_state_lock(sk);
1808 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1809 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1810 (sk->sk_shutdown & RCV_SHUTDOWN))
1811 err = 0;
1812 unix_state_unlock(sk);
1813 goto out_unlock;
1814 }
1815
1816 wake_up_interruptible_sync_poll(&u->peer_wait,
1817 POLLOUT | POLLWRNORM | POLLWRBAND);
1818
1819 if (msg->msg_name)
1820 unix_copy_addr(msg, skb->sk);
1821
1822 if (size > skb->len - skip)
1823 size = skb->len - skip;
1824 else if (size < skb->len - skip)
1825 msg->msg_flags |= MSG_TRUNC;
1826
1827 err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
1828 if (err)
1829 goto out_free;
1830
1831 if (sock_flag(sk, SOCK_RCVTSTAMP))
1832 __sock_recv_timestamp(msg, sk, skb);
1833
1834 if (!siocb->scm) {
1835 siocb->scm = &tmp_scm;
1836 memset(&tmp_scm, 0, sizeof(tmp_scm));
1837 }
1838 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
1839 unix_set_secdata(siocb->scm, skb);
1840
1841 if (!(flags & MSG_PEEK)) {
1842 if (UNIXCB(skb).fp)
1843 unix_detach_fds(siocb->scm, skb);
1844
1845 sk_peek_offset_bwd(sk, skb->len);
1846 } else {
1847 /* It is questionable: on PEEK we could:
1848 - do not return fds - good, but too simple 8)
1849 - return fds, and do not return them on read (old strategy,
1850 apparently wrong)
1851 - clone fds (I chose it for now, it is the most universal
1852 solution)
1853
1854 POSIX 1003.1g does not actually define this clearly
1855 at all. POSIX 1003.1g doesn't define a lot of things
1856 clearly however!
1857
1858 */
1859
1860 sk_peek_offset_fwd(sk, size);
1861
1862 if (UNIXCB(skb).fp)
1863 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1864 }
1865 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1866
1867 scm_recv(sock, msg, siocb->scm, flags);
1868
1869 out_free:
1870 skb_free_datagram(sk, skb);
1871 out_unlock:
1872 mutex_unlock(&u->readlock);
1873 out:
1874 return err;
1875 }
1876
1877 /*
1878 * Sleep until more data has arrived. But check for races..
1879 */
1880 static long unix_stream_data_wait(struct sock *sk, long timeo,
1881 struct sk_buff *last)
1882 {
1883 DEFINE_WAIT(wait);
1884
1885 unix_state_lock(sk);
1886
1887 for (;;) {
1888 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1889
1890 if (skb_peek_tail(&sk->sk_receive_queue) != last ||
1891 sk->sk_err ||
1892 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1893 signal_pending(current) ||
1894 !timeo)
1895 break;
1896
1897 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1898 unix_state_unlock(sk);
1899 timeo = schedule_timeout(timeo);
1900 unix_state_lock(sk);
1901
1902 if (sock_flag(sk, SOCK_DEAD))
1903 break;
1904
1905 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1906 }
1907
1908 finish_wait(sk_sleep(sk), &wait);
1909 unix_state_unlock(sk);
1910 return timeo;
1911 }
1912
1913 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1914 struct msghdr *msg, size_t size,
1915 int flags)
1916 {
1917 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1918 struct scm_cookie tmp_scm;
1919 struct sock *sk = sock->sk;
1920 struct unix_sock *u = unix_sk(sk);
1921 struct sockaddr_un *sunaddr = msg->msg_name;
1922 int copied = 0;
1923 int noblock = flags & MSG_DONTWAIT;
1924 int check_creds = 0;
1925 int target;
1926 int err = 0;
1927 long timeo;
1928 int skip;
1929
1930 err = -EINVAL;
1931 if (sk->sk_state != TCP_ESTABLISHED)
1932 goto out;
1933
1934 err = -EOPNOTSUPP;
1935 if (flags&MSG_OOB)
1936 goto out;
1937
1938 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1939 timeo = sock_rcvtimeo(sk, noblock);
1940
1941 /* Lock the socket to prevent queue disordering
1942 * while sleeps in memcpy_tomsg
1943 */
1944
1945 if (!siocb->scm) {
1946 siocb->scm = &tmp_scm;
1947 memset(&tmp_scm, 0, sizeof(tmp_scm));
1948 }
1949
1950 err = mutex_lock_interruptible(&u->readlock);
1951 if (unlikely(err)) {
1952 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1953 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1954 */
1955 err = noblock ? -EAGAIN : -ERESTARTSYS;
1956 goto out;
1957 }
1958
1959 do {
1960 int chunk;
1961 struct sk_buff *skb, *last;
1962
1963 unix_state_lock(sk);
1964 if (sock_flag(sk, SOCK_DEAD)) {
1965 err = -ECONNRESET;
1966 goto unlock;
1967 }
1968 last = skb = skb_peek(&sk->sk_receive_queue);
1969 again:
1970 if (skb == NULL) {
1971 unix_sk(sk)->recursion_level = 0;
1972 if (copied >= target)
1973 goto unlock;
1974
1975 /*
1976 * POSIX 1003.1g mandates this order.
1977 */
1978
1979 err = sock_error(sk);
1980 if (err)
1981 goto unlock;
1982 if (sk->sk_shutdown & RCV_SHUTDOWN)
1983 goto unlock;
1984
1985 unix_state_unlock(sk);
1986 err = -EAGAIN;
1987 if (!timeo)
1988 break;
1989 mutex_unlock(&u->readlock);
1990
1991 timeo = unix_stream_data_wait(sk, timeo, last);
1992
1993 if (signal_pending(current)
1994 || mutex_lock_interruptible(&u->readlock)) {
1995 err = sock_intr_errno(timeo);
1996 goto out;
1997 }
1998
1999 continue;
2000 unlock:
2001 unix_state_unlock(sk);
2002 break;
2003 }
2004
2005 skip = sk_peek_offset(sk, flags);
2006 while (skip >= skb->len) {
2007 skip -= skb->len;
2008 last = skb;
2009 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2010 if (!skb)
2011 goto again;
2012 }
2013
2014 unix_state_unlock(sk);
2015
2016 if (check_creds) {
2017 /* Never glue messages from different writers */
2018 if ((UNIXCB(skb).pid != siocb->scm->pid) ||
2019 !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
2020 !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
2021 break;
2022 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2023 /* Copy credentials */
2024 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2025 check_creds = 1;
2026 }
2027
2028 /* Copy address just once */
2029 if (sunaddr) {
2030 unix_copy_addr(msg, skb->sk);
2031 sunaddr = NULL;
2032 }
2033
2034 chunk = min_t(unsigned int, skb->len - skip, size);
2035 if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
2036 if (copied == 0)
2037 copied = -EFAULT;
2038 break;
2039 }
2040 copied += chunk;
2041 size -= chunk;
2042
2043 /* Mark read part of skb as used */
2044 if (!(flags & MSG_PEEK)) {
2045 skb_pull(skb, chunk);
2046
2047 sk_peek_offset_bwd(sk, chunk);
2048
2049 if (UNIXCB(skb).fp)
2050 unix_detach_fds(siocb->scm, skb);
2051
2052 if (skb->len)
2053 break;
2054
2055 skb_unlink(skb, &sk->sk_receive_queue);
2056 consume_skb(skb);
2057
2058 if (siocb->scm->fp)
2059 break;
2060 } else {
2061 /* It is questionable, see note in unix_dgram_recvmsg.
2062 */
2063 if (UNIXCB(skb).fp)
2064 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2065
2066 sk_peek_offset_fwd(sk, chunk);
2067
2068 break;
2069 }
2070 } while (size);
2071
2072 mutex_unlock(&u->readlock);
2073 scm_recv(sock, msg, siocb->scm, flags);
2074 out:
2075 return copied ? : err;
2076 }
2077
2078 static int unix_shutdown(struct socket *sock, int mode)
2079 {
2080 struct sock *sk = sock->sk;
2081 struct sock *other;
2082
2083 if (mode < SHUT_RD || mode > SHUT_RDWR)
2084 return -EINVAL;
2085 /* This maps:
2086 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2087 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2088 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2089 */
2090 ++mode;
2091
2092 unix_state_lock(sk);
2093 sk->sk_shutdown |= mode;
2094 other = unix_peer(sk);
2095 if (other)
2096 sock_hold(other);
2097 unix_state_unlock(sk);
2098 sk->sk_state_change(sk);
2099
2100 if (other &&
2101 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2102
2103 int peer_mode = 0;
2104
2105 if (mode&RCV_SHUTDOWN)
2106 peer_mode |= SEND_SHUTDOWN;
2107 if (mode&SEND_SHUTDOWN)
2108 peer_mode |= RCV_SHUTDOWN;
2109 unix_state_lock(other);
2110 other->sk_shutdown |= peer_mode;
2111 unix_state_unlock(other);
2112 other->sk_state_change(other);
2113 if (peer_mode == SHUTDOWN_MASK)
2114 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2115 else if (peer_mode & RCV_SHUTDOWN)
2116 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2117 }
2118 if (other)
2119 sock_put(other);
2120
2121 return 0;
2122 }
2123
2124 long unix_inq_len(struct sock *sk)
2125 {
2126 struct sk_buff *skb;
2127 long amount = 0;
2128
2129 if (sk->sk_state == TCP_LISTEN)
2130 return -EINVAL;
2131
2132 spin_lock(&sk->sk_receive_queue.lock);
2133 if (sk->sk_type == SOCK_STREAM ||
2134 sk->sk_type == SOCK_SEQPACKET) {
2135 skb_queue_walk(&sk->sk_receive_queue, skb)
2136 amount += skb->len;
2137 } else {
2138 skb = skb_peek(&sk->sk_receive_queue);
2139 if (skb)
2140 amount = skb->len;
2141 }
2142 spin_unlock(&sk->sk_receive_queue.lock);
2143
2144 return amount;
2145 }
2146 EXPORT_SYMBOL_GPL(unix_inq_len);
2147
2148 long unix_outq_len(struct sock *sk)
2149 {
2150 return sk_wmem_alloc_get(sk);
2151 }
2152 EXPORT_SYMBOL_GPL(unix_outq_len);
2153
2154 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2155 {
2156 struct sock *sk = sock->sk;
2157 long amount = 0;
2158 int err;
2159
2160 switch (cmd) {
2161 case SIOCOUTQ:
2162 amount = unix_outq_len(sk);
2163 err = put_user(amount, (int __user *)arg);
2164 break;
2165 case SIOCINQ:
2166 amount = unix_inq_len(sk);
2167 if (amount < 0)
2168 err = amount;
2169 else
2170 err = put_user(amount, (int __user *)arg);
2171 break;
2172 default:
2173 err = -ENOIOCTLCMD;
2174 break;
2175 }
2176 return err;
2177 }
2178
2179 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2180 {
2181 struct sock *sk = sock->sk;
2182 unsigned int mask;
2183
2184 sock_poll_wait(file, sk_sleep(sk), wait);
2185 mask = 0;
2186
2187 /* exceptional events? */
2188 if (sk->sk_err)
2189 mask |= POLLERR;
2190 if (sk->sk_shutdown == SHUTDOWN_MASK)
2191 mask |= POLLHUP;
2192 if (sk->sk_shutdown & RCV_SHUTDOWN)
2193 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2194
2195 /* readable? */
2196 if (!skb_queue_empty(&sk->sk_receive_queue))
2197 mask |= POLLIN | POLLRDNORM;
2198
2199 /* Connection-based need to check for termination and startup */
2200 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2201 sk->sk_state == TCP_CLOSE)
2202 mask |= POLLHUP;
2203
2204 /*
2205 * we set writable also when the other side has shut down the
2206 * connection. This prevents stuck sockets.
2207 */
2208 if (unix_writable(sk))
2209 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2210
2211 return mask;
2212 }
2213
2214 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2215 poll_table *wait)
2216 {
2217 struct sock *sk = sock->sk, *other;
2218 unsigned int mask, writable;
2219
2220 sock_poll_wait(file, sk_sleep(sk), wait);
2221 mask = 0;
2222
2223 /* exceptional events? */
2224 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2225 mask |= POLLERR |
2226 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2227
2228 if (sk->sk_shutdown & RCV_SHUTDOWN)
2229 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2230 if (sk->sk_shutdown == SHUTDOWN_MASK)
2231 mask |= POLLHUP;
2232
2233 /* readable? */
2234 if (!skb_queue_empty(&sk->sk_receive_queue))
2235 mask |= POLLIN | POLLRDNORM;
2236
2237 /* Connection-based need to check for termination and startup */
2238 if (sk->sk_type == SOCK_SEQPACKET) {
2239 if (sk->sk_state == TCP_CLOSE)
2240 mask |= POLLHUP;
2241 /* connection hasn't started yet? */
2242 if (sk->sk_state == TCP_SYN_SENT)
2243 return mask;
2244 }
2245
2246 /* No write status requested, avoid expensive OUT tests. */
2247 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2248 return mask;
2249
2250 writable = unix_writable(sk);
2251 other = unix_peer_get(sk);
2252 if (other) {
2253 if (unix_peer(other) != sk) {
2254 sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2255 if (unix_recvq_full(other))
2256 writable = 0;
2257 }
2258 sock_put(other);
2259 }
2260
2261 if (writable)
2262 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2263 else
2264 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2265
2266 return mask;
2267 }
2268
2269 #ifdef CONFIG_PROC_FS
2270
2271 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2272
2273 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2274 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2275 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2276
2277 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2278 {
2279 unsigned long offset = get_offset(*pos);
2280 unsigned long bucket = get_bucket(*pos);
2281 struct sock *sk;
2282 unsigned long count = 0;
2283
2284 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2285 if (sock_net(sk) != seq_file_net(seq))
2286 continue;
2287 if (++count == offset)
2288 break;
2289 }
2290
2291 return sk;
2292 }
2293
2294 static struct sock *unix_next_socket(struct seq_file *seq,
2295 struct sock *sk,
2296 loff_t *pos)
2297 {
2298 unsigned long bucket;
2299
2300 while (sk > (struct sock *)SEQ_START_TOKEN) {
2301 sk = sk_next(sk);
2302 if (!sk)
2303 goto next_bucket;
2304 if (sock_net(sk) == seq_file_net(seq))
2305 return sk;
2306 }
2307
2308 do {
2309 sk = unix_from_bucket(seq, pos);
2310 if (sk)
2311 return sk;
2312
2313 next_bucket:
2314 bucket = get_bucket(*pos) + 1;
2315 *pos = set_bucket_offset(bucket, 1);
2316 } while (bucket < ARRAY_SIZE(unix_socket_table));
2317
2318 return NULL;
2319 }
2320
2321 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2322 __acquires(unix_table_lock)
2323 {
2324 spin_lock(&unix_table_lock);
2325
2326 if (!*pos)
2327 return SEQ_START_TOKEN;
2328
2329 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2330 return NULL;
2331
2332 return unix_next_socket(seq, NULL, pos);
2333 }
2334
2335 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2336 {
2337 ++*pos;
2338 return unix_next_socket(seq, v, pos);
2339 }
2340
2341 static void unix_seq_stop(struct seq_file *seq, void *v)
2342 __releases(unix_table_lock)
2343 {
2344 spin_unlock(&unix_table_lock);
2345 }
2346
2347 static int unix_seq_show(struct seq_file *seq, void *v)
2348 {
2349
2350 if (v == SEQ_START_TOKEN)
2351 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2352 "Inode Path\n");
2353 else {
2354 struct sock *s = v;
2355 struct unix_sock *u = unix_sk(s);
2356 unix_state_lock(s);
2357
2358 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2359 s,
2360 atomic_read(&s->sk_refcnt),
2361 0,
2362 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2363 s->sk_type,
2364 s->sk_socket ?
2365 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2366 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2367 sock_i_ino(s));
2368
2369 if (u->addr) {
2370 int i, len;
2371 seq_putc(seq, ' ');
2372
2373 i = 0;
2374 len = u->addr->len - sizeof(short);
2375 if (!UNIX_ABSTRACT(s))
2376 len--;
2377 else {
2378 seq_putc(seq, '@');
2379 i++;
2380 }
2381 for ( ; i < len; i++)
2382 seq_putc(seq, u->addr->name->sun_path[i]);
2383 }
2384 unix_state_unlock(s);
2385 seq_putc(seq, '\n');
2386 }
2387
2388 return 0;
2389 }
2390
2391 static const struct seq_operations unix_seq_ops = {
2392 .start = unix_seq_start,
2393 .next = unix_seq_next,
2394 .stop = unix_seq_stop,
2395 .show = unix_seq_show,
2396 };
2397
2398 static int unix_seq_open(struct inode *inode, struct file *file)
2399 {
2400 return seq_open_net(inode, file, &unix_seq_ops,
2401 sizeof(struct seq_net_private));
2402 }
2403
2404 static const struct file_operations unix_seq_fops = {
2405 .owner = THIS_MODULE,
2406 .open = unix_seq_open,
2407 .read = seq_read,
2408 .llseek = seq_lseek,
2409 .release = seq_release_net,
2410 };
2411
2412 #endif
2413
2414 static const struct net_proto_family unix_family_ops = {
2415 .family = PF_UNIX,
2416 .create = unix_create,
2417 .owner = THIS_MODULE,
2418 };
2419
2420
2421 static int __net_init unix_net_init(struct net *net)
2422 {
2423 int error = -ENOMEM;
2424
2425 net->unx.sysctl_max_dgram_qlen = 10;
2426 if (unix_sysctl_register(net))
2427 goto out;
2428
2429 #ifdef CONFIG_PROC_FS
2430 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2431 unix_sysctl_unregister(net);
2432 goto out;
2433 }
2434 #endif
2435 error = 0;
2436 out:
2437 return error;
2438 }
2439
2440 static void __net_exit unix_net_exit(struct net *net)
2441 {
2442 unix_sysctl_unregister(net);
2443 remove_proc_entry("unix", net->proc_net);
2444 }
2445
2446 static struct pernet_operations unix_net_ops = {
2447 .init = unix_net_init,
2448 .exit = unix_net_exit,
2449 };
2450
2451 static int __init af_unix_init(void)
2452 {
2453 int rc = -1;
2454
2455 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2456
2457 rc = proto_register(&unix_proto, 1);
2458 if (rc != 0) {
2459 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2460 __func__);
2461 goto out;
2462 }
2463
2464 sock_register(&unix_family_ops);
2465 register_pernet_subsys(&unix_net_ops);
2466 out:
2467 return rc;
2468 }
2469
2470 static void __exit af_unix_exit(void)
2471 {
2472 sock_unregister(PF_UNIX);
2473 proto_unregister(&unix_proto);
2474 unregister_pernet_subsys(&unix_net_ops);
2475 }
2476
2477 /* Earlier than device_initcall() so that other drivers invoking
2478 request_module() don't end up in a loop when modprobe tries
2479 to use a UNIX socket. But later than subsys_initcall() because
2480 we depend on stuff initialised there */
2481 fs_initcall(af_unix_init);
2482 module_exit(af_unix_exit);
2483
2484 MODULE_LICENSE("GPL");
2485 MODULE_ALIAS_NETPROTO(PF_UNIX);