flag parameters: NONBLOCK in anon_inode_getfd
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
c019bbc6 72#include <linux/thread_info.h>
1da177e4
LT
73#include <linux/wanrouter.h>
74#include <linux/if_bridge.h>
20380731
ACM
75#include <linux/if_frad.h>
76#include <linux/if_vlan.h>
1da177e4
LT
77#include <linux/init.h>
78#include <linux/poll.h>
79#include <linux/cache.h>
80#include <linux/module.h>
81#include <linux/highmem.h>
1da177e4
LT
82#include <linux/mount.h>
83#include <linux/security.h>
84#include <linux/syscalls.h>
85#include <linux/compat.h>
86#include <linux/kmod.h>
3ec3b2fb 87#include <linux/audit.h>
d86b5e0e 88#include <linux/wireless.h>
1b8d7ae4 89#include <linux/nsproxy.h>
1da177e4
LT
90
91#include <asm/uaccess.h>
92#include <asm/unistd.h>
93
94#include <net/compat.h>
87de87d5 95#include <net/wext.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
100static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
101static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
102 unsigned long nr_segs, loff_t pos);
103static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
104 unsigned long nr_segs, loff_t pos);
89bddce5 105static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
106
107static int sock_close(struct inode *inode, struct file *file);
108static unsigned int sock_poll(struct file *file,
109 struct poll_table_struct *wait);
89bddce5 110static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
111#ifdef CONFIG_COMPAT
112static long compat_sock_ioctl(struct file *file,
89bddce5 113 unsigned int cmd, unsigned long arg);
89bbfc95 114#endif
1da177e4 115static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
116static ssize_t sock_sendpage(struct file *file, struct page *page,
117 int offset, size_t size, loff_t *ppos, int more);
9c55e01c
JA
118static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
119 struct pipe_inode_info *pipe, size_t len,
120 unsigned int flags);
1da177e4 121
1da177e4
LT
122/*
123 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
124 * in the operation structures but are done directly via the socketcall() multiplexor.
125 */
126
da7071d7 127static const struct file_operations socket_file_ops = {
1da177e4
LT
128 .owner = THIS_MODULE,
129 .llseek = no_llseek,
130 .aio_read = sock_aio_read,
131 .aio_write = sock_aio_write,
132 .poll = sock_poll,
133 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
134#ifdef CONFIG_COMPAT
135 .compat_ioctl = compat_sock_ioctl,
136#endif
1da177e4
LT
137 .mmap = sock_mmap,
138 .open = sock_no_open, /* special open code to disallow open via /proc */
139 .release = sock_close,
140 .fasync = sock_fasync,
5274f052
JA
141 .sendpage = sock_sendpage,
142 .splice_write = generic_splice_sendpage,
9c55e01c 143 .splice_read = sock_splice_read,
1da177e4
LT
144};
145
146/*
147 * The protocol list. Each protocol is registered in here.
148 */
149
1da177e4 150static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 151static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 152
1da177e4
LT
153/*
154 * Statistics counters of the socket lists
155 */
156
157static DEFINE_PER_CPU(int, sockets_in_use) = 0;
158
159/*
89bddce5
SH
160 * Support routines.
161 * Move socket addresses back and forth across the kernel/user
162 * divide and look after the messy bits.
1da177e4
LT
163 */
164
89bddce5 165#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
166 16 for IP, 16 for IPX,
167 24 for IPv6,
89bddce5 168 about 80 for AX.25
1da177e4
LT
169 must be at least one bigger than
170 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 171 :unix_mkname()).
1da177e4 172 */
89bddce5 173
1da177e4
LT
174/**
175 * move_addr_to_kernel - copy a socket address into kernel space
176 * @uaddr: Address in user space
177 * @kaddr: Address in kernel space
178 * @ulen: Length in user space
179 *
180 * The address is copied into kernel space. If the provided address is
181 * too long an error code of -EINVAL is returned. If the copy gives
182 * invalid addresses -EFAULT is returned. On a success 0 is returned.
183 */
184
230b1839 185int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
1da177e4 186{
230b1839 187 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 188 return -EINVAL;
89bddce5 189 if (ulen == 0)
1da177e4 190 return 0;
89bddce5 191 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 192 return -EFAULT;
3ec3b2fb 193 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
194}
195
196/**
197 * move_addr_to_user - copy an address to user space
198 * @kaddr: kernel space address
199 * @klen: length of address in kernel
200 * @uaddr: user space address
201 * @ulen: pointer to user length field
202 *
203 * The value pointed to by ulen on entry is the buffer length available.
204 * This is overwritten with the buffer space used. -EINVAL is returned
205 * if an overlong buffer is specified or a negative buffer size. -EFAULT
206 * is returned if either the buffer or the length field are not
207 * accessible.
208 * After copying the data up to the limit the user specifies, the true
209 * length of the data is written over the length limit the user
210 * specified. Zero is returned for a success.
211 */
89bddce5 212
230b1839 213int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
89bddce5 214 int __user *ulen)
1da177e4
LT
215{
216 int err;
217 int len;
218
89bddce5
SH
219 err = get_user(len, ulen);
220 if (err)
1da177e4 221 return err;
89bddce5
SH
222 if (len > klen)
223 len = klen;
230b1839 224 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 225 return -EINVAL;
89bddce5 226 if (len) {
d6fe3945
SG
227 if (audit_sockaddr(klen, kaddr))
228 return -ENOMEM;
89bddce5 229 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
230 return -EFAULT;
231 }
232 /*
89bddce5
SH
233 * "fromlen shall refer to the value before truncation.."
234 * 1003.1g
1da177e4
LT
235 */
236 return __put_user(klen, ulen);
237}
238
239#define SOCKFS_MAGIC 0x534F434B
240
e18b890b 241static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
242
243static struct inode *sock_alloc_inode(struct super_block *sb)
244{
245 struct socket_alloc *ei;
89bddce5 246
e94b1766 247 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
248 if (!ei)
249 return NULL;
250 init_waitqueue_head(&ei->socket.wait);
89bddce5 251
1da177e4
LT
252 ei->socket.fasync_list = NULL;
253 ei->socket.state = SS_UNCONNECTED;
254 ei->socket.flags = 0;
255 ei->socket.ops = NULL;
256 ei->socket.sk = NULL;
257 ei->socket.file = NULL;
1da177e4
LT
258
259 return &ei->vfs_inode;
260}
261
262static void sock_destroy_inode(struct inode *inode)
263{
264 kmem_cache_free(sock_inode_cachep,
265 container_of(inode, struct socket_alloc, vfs_inode));
266}
267
4ba9b9d0 268static void init_once(struct kmem_cache *cachep, void *foo)
1da177e4 269{
89bddce5 270 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 271
a35afb83 272 inode_init_once(&ei->vfs_inode);
1da177e4 273}
89bddce5 274
1da177e4
LT
275static int init_inodecache(void)
276{
277 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
278 sizeof(struct socket_alloc),
279 0,
280 (SLAB_HWCACHE_ALIGN |
281 SLAB_RECLAIM_ACCOUNT |
282 SLAB_MEM_SPREAD),
20c2df83 283 init_once);
1da177e4
LT
284 if (sock_inode_cachep == NULL)
285 return -ENOMEM;
286 return 0;
287}
288
289static struct super_operations sockfs_ops = {
290 .alloc_inode = sock_alloc_inode,
291 .destroy_inode =sock_destroy_inode,
292 .statfs = simple_statfs,
293};
294
454e2398 295static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
296 int flags, const char *dev_name, void *data,
297 struct vfsmount *mnt)
1da177e4 298{
454e2398
DH
299 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
300 mnt);
1da177e4
LT
301}
302
ba89966c 303static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
304
305static struct file_system_type sock_fs_type = {
306 .name = "sockfs",
307 .get_sb = sockfs_get_sb,
308 .kill_sb = kill_anon_super,
309};
89bddce5 310
1da177e4
LT
311static int sockfs_delete_dentry(struct dentry *dentry)
312{
304e61e6
ED
313 /*
314 * At creation time, we pretended this dentry was hashed
315 * (by clearing DCACHE_UNHASHED bit in d_flags)
316 * At delete time, we restore the truth : not hashed.
317 * (so that dput() can proceed correctly)
318 */
319 dentry->d_flags |= DCACHE_UNHASHED;
320 return 0;
1da177e4 321}
c23fbb6b
ED
322
323/*
324 * sockfs_dname() is called from d_path().
325 */
326static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
327{
328 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
329 dentry->d_inode->i_ino);
330}
331
1da177e4 332static struct dentry_operations sockfs_dentry_operations = {
89bddce5 333 .d_delete = sockfs_delete_dentry,
c23fbb6b 334 .d_dname = sockfs_dname,
1da177e4
LT
335};
336
337/*
338 * Obtains the first available file descriptor and sets it up for use.
339 *
39d8c1b6
DM
340 * These functions create file structures and maps them to fd space
341 * of the current process. On success it returns file descriptor
1da177e4
LT
342 * and file struct implicitly stored in sock->file.
343 * Note that another thread may close file descriptor before we return
344 * from this function. We use the fact that now we do not refer
345 * to socket after mapping. If one day we will need it, this
346 * function will increment ref. count on file by 1.
347 *
348 * In any case returned fd MAY BE not valid!
349 * This race condition is unavoidable
350 * with shared fd spaces, we cannot solve it inside kernel,
351 * but we take care of internal coherence yet.
352 */
353
a677a039 354static int sock_alloc_fd(struct file **filep, int flags)
1da177e4
LT
355{
356 int fd;
1da177e4 357
a677a039 358 fd = get_unused_fd_flags(flags);
39d8c1b6 359 if (likely(fd >= 0)) {
1da177e4
LT
360 struct file *file = get_empty_filp();
361
39d8c1b6
DM
362 *filep = file;
363 if (unlikely(!file)) {
1da177e4 364 put_unused_fd(fd);
39d8c1b6 365 return -ENFILE;
1da177e4 366 }
39d8c1b6
DM
367 } else
368 *filep = NULL;
369 return fd;
370}
1da177e4 371
39d8c1b6
DM
372static int sock_attach_fd(struct socket *sock, struct file *file)
373{
ce8d2cdf 374 struct dentry *dentry;
c23fbb6b 375 struct qstr name = { .name = "" };
39d8c1b6 376
ce8d2cdf
DH
377 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
378 if (unlikely(!dentry))
39d8c1b6
DM
379 return -ENOMEM;
380
ce8d2cdf 381 dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
382 /*
383 * We dont want to push this dentry into global dentry hash table.
384 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
385 * This permits a working /proc/$pid/fd/XXX on sockets
386 */
ce8d2cdf
DH
387 dentry->d_flags &= ~DCACHE_UNHASHED;
388 d_instantiate(dentry, SOCK_INODE(sock));
39d8c1b6
DM
389
390 sock->file = file;
ce8d2cdf
DH
391 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
392 &socket_file_ops);
393 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6
DM
394 file->f_flags = O_RDWR;
395 file->f_pos = 0;
396 file->private_data = sock;
1da177e4 397
39d8c1b6
DM
398 return 0;
399}
400
a677a039 401int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
402{
403 struct file *newfile;
a677a039 404 int fd = sock_alloc_fd(&newfile, flags);
39d8c1b6
DM
405
406 if (likely(fd >= 0)) {
407 int err = sock_attach_fd(sock, newfile);
408
409 if (unlikely(err < 0)) {
410 put_filp(newfile);
1da177e4 411 put_unused_fd(fd);
39d8c1b6 412 return err;
1da177e4 413 }
39d8c1b6 414 fd_install(fd, newfile);
1da177e4 415 }
1da177e4
LT
416 return fd;
417}
418
6cb153ca
BL
419static struct socket *sock_from_file(struct file *file, int *err)
420{
6cb153ca
BL
421 if (file->f_op == &socket_file_ops)
422 return file->private_data; /* set in sock_map_fd */
423
23bb80d2
ED
424 *err = -ENOTSOCK;
425 return NULL;
6cb153ca
BL
426}
427
1da177e4
LT
428/**
429 * sockfd_lookup - Go from a file number to its socket slot
430 * @fd: file handle
431 * @err: pointer to an error code return
432 *
433 * The file handle passed in is locked and the socket it is bound
434 * too is returned. If an error occurs the err pointer is overwritten
435 * with a negative errno code and NULL is returned. The function checks
436 * for both invalid handles and passing a handle which is not a socket.
437 *
438 * On a success the socket object pointer is returned.
439 */
440
441struct socket *sockfd_lookup(int fd, int *err)
442{
443 struct file *file;
1da177e4
LT
444 struct socket *sock;
445
89bddce5
SH
446 file = fget(fd);
447 if (!file) {
1da177e4
LT
448 *err = -EBADF;
449 return NULL;
450 }
89bddce5 451
6cb153ca
BL
452 sock = sock_from_file(file, err);
453 if (!sock)
1da177e4 454 fput(file);
6cb153ca
BL
455 return sock;
456}
1da177e4 457
6cb153ca
BL
458static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
459{
460 struct file *file;
461 struct socket *sock;
462
3672558c 463 *err = -EBADF;
6cb153ca
BL
464 file = fget_light(fd, fput_needed);
465 if (file) {
466 sock = sock_from_file(file, err);
467 if (sock)
468 return sock;
469 fput_light(file, *fput_needed);
1da177e4 470 }
6cb153ca 471 return NULL;
1da177e4
LT
472}
473
474/**
475 * sock_alloc - allocate a socket
89bddce5 476 *
1da177e4
LT
477 * Allocate a new inode and socket object. The two are bound together
478 * and initialised. The socket is then returned. If we are out of inodes
479 * NULL is returned.
480 */
481
482static struct socket *sock_alloc(void)
483{
89bddce5
SH
484 struct inode *inode;
485 struct socket *sock;
1da177e4
LT
486
487 inode = new_inode(sock_mnt->mnt_sb);
488 if (!inode)
489 return NULL;
490
491 sock = SOCKET_I(inode);
492
89bddce5 493 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
494 inode->i_uid = current->fsuid;
495 inode->i_gid = current->fsgid;
496
497 get_cpu_var(sockets_in_use)++;
498 put_cpu_var(sockets_in_use);
499 return sock;
500}
501
502/*
503 * In theory you can't get an open on this inode, but /proc provides
504 * a back door. Remember to keep it shut otherwise you'll let the
505 * creepy crawlies in.
506 */
89bddce5 507
1da177e4
LT
508static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
509{
510 return -ENXIO;
511}
512
4b6f5d20 513const struct file_operations bad_sock_fops = {
1da177e4
LT
514 .owner = THIS_MODULE,
515 .open = sock_no_open,
516};
517
518/**
519 * sock_release - close a socket
520 * @sock: socket to close
521 *
522 * The socket is released from the protocol stack if it has a release
523 * callback, and the inode is then released if the socket is bound to
89bddce5 524 * an inode not a file.
1da177e4 525 */
89bddce5 526
1da177e4
LT
527void sock_release(struct socket *sock)
528{
529 if (sock->ops) {
530 struct module *owner = sock->ops->owner;
531
532 sock->ops->release(sock);
533 sock->ops = NULL;
534 module_put(owner);
535 }
536
537 if (sock->fasync_list)
538 printk(KERN_ERR "sock_release: fasync list not empty!\n");
539
540 get_cpu_var(sockets_in_use)--;
541 put_cpu_var(sockets_in_use);
542 if (!sock->file) {
543 iput(SOCK_INODE(sock));
544 return;
545 }
89bddce5 546 sock->file = NULL;
1da177e4
LT
547}
548
89bddce5 549static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
550 struct msghdr *msg, size_t size)
551{
552 struct sock_iocb *si = kiocb_to_siocb(iocb);
553 int err;
554
555 si->sock = sock;
556 si->scm = NULL;
557 si->msg = msg;
558 si->size = size;
559
560 err = security_socket_sendmsg(sock, msg, size);
561 if (err)
562 return err;
563
564 return sock->ops->sendmsg(iocb, sock, msg, size);
565}
566
567int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
568{
569 struct kiocb iocb;
570 struct sock_iocb siocb;
571 int ret;
572
573 init_sync_kiocb(&iocb, NULL);
574 iocb.private = &siocb;
575 ret = __sock_sendmsg(&iocb, sock, msg, size);
576 if (-EIOCBQUEUED == ret)
577 ret = wait_on_sync_kiocb(&iocb);
578 return ret;
579}
580
581int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
582 struct kvec *vec, size_t num, size_t size)
583{
584 mm_segment_t oldfs = get_fs();
585 int result;
586
587 set_fs(KERNEL_DS);
588 /*
589 * the following is safe, since for compiler definitions of kvec and
590 * iovec are identical, yielding the same in-core layout and alignment
591 */
89bddce5 592 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
593 msg->msg_iovlen = num;
594 result = sock_sendmsg(sock, msg, size);
595 set_fs(oldfs);
596 return result;
597}
598
92f37fd2
ED
599/*
600 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
601 */
602void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
603 struct sk_buff *skb)
604{
605 ktime_t kt = skb->tstamp;
606
607 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
608 struct timeval tv;
609 /* Race occurred between timestamp enabling and packet
610 receiving. Fill in the current time for now. */
611 if (kt.tv64 == 0)
612 kt = ktime_get_real();
613 skb->tstamp = kt;
614 tv = ktime_to_timeval(kt);
615 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
616 } else {
617 struct timespec ts;
618 /* Race occurred between timestamp enabling and packet
619 receiving. Fill in the current time for now. */
620 if (kt.tv64 == 0)
621 kt = ktime_get_real();
622 skb->tstamp = kt;
623 ts = ktime_to_timespec(kt);
624 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
625 }
626}
627
7c81fd8b
ACM
628EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
629
89bddce5 630static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
631 struct msghdr *msg, size_t size, int flags)
632{
633 int err;
634 struct sock_iocb *si = kiocb_to_siocb(iocb);
635
636 si->sock = sock;
637 si->scm = NULL;
638 si->msg = msg;
639 si->size = size;
640 si->flags = flags;
641
642 err = security_socket_recvmsg(sock, msg, size, flags);
643 if (err)
644 return err;
645
646 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
647}
648
89bddce5 649int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
650 size_t size, int flags)
651{
652 struct kiocb iocb;
653 struct sock_iocb siocb;
654 int ret;
655
89bddce5 656 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
657 iocb.private = &siocb;
658 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
659 if (-EIOCBQUEUED == ret)
660 ret = wait_on_sync_kiocb(&iocb);
661 return ret;
662}
663
89bddce5
SH
664int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
665 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
666{
667 mm_segment_t oldfs = get_fs();
668 int result;
669
670 set_fs(KERNEL_DS);
671 /*
672 * the following is safe, since for compiler definitions of kvec and
673 * iovec are identical, yielding the same in-core layout and alignment
674 */
89bddce5 675 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
676 result = sock_recvmsg(sock, msg, size, flags);
677 set_fs(oldfs);
678 return result;
679}
680
681static void sock_aio_dtor(struct kiocb *iocb)
682{
683 kfree(iocb->private);
684}
685
ce1d4d3e
CH
686static ssize_t sock_sendpage(struct file *file, struct page *page,
687 int offset, size_t size, loff_t *ppos, int more)
1da177e4 688{
1da177e4
LT
689 struct socket *sock;
690 int flags;
691
ce1d4d3e
CH
692 sock = file->private_data;
693
694 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
695 if (more)
696 flags |= MSG_MORE;
697
698 return sock->ops->sendpage(sock, page, offset, size, flags);
699}
1da177e4 700
9c55e01c
JA
701static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
702 struct pipe_inode_info *pipe, size_t len,
703 unsigned int flags)
704{
705 struct socket *sock = file->private_data;
706
997b37da
RDC
707 if (unlikely(!sock->ops->splice_read))
708 return -EINVAL;
709
9c55e01c
JA
710 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
711}
712
ce1d4d3e 713static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 714 struct sock_iocb *siocb)
ce1d4d3e
CH
715{
716 if (!is_sync_kiocb(iocb)) {
717 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
718 if (!siocb)
719 return NULL;
1da177e4
LT
720 iocb->ki_dtor = sock_aio_dtor;
721 }
1da177e4 722
ce1d4d3e 723 siocb->kiocb = iocb;
ce1d4d3e
CH
724 iocb->private = siocb;
725 return siocb;
1da177e4
LT
726}
727
ce1d4d3e 728static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
729 struct file *file, const struct iovec *iov,
730 unsigned long nr_segs)
ce1d4d3e
CH
731{
732 struct socket *sock = file->private_data;
733 size_t size = 0;
734 int i;
1da177e4 735
89bddce5
SH
736 for (i = 0; i < nr_segs; i++)
737 size += iov[i].iov_len;
1da177e4 738
ce1d4d3e
CH
739 msg->msg_name = NULL;
740 msg->msg_namelen = 0;
741 msg->msg_control = NULL;
742 msg->msg_controllen = 0;
89bddce5 743 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
744 msg->msg_iovlen = nr_segs;
745 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
746
747 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
748}
749
027445c3
BP
750static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
751 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
752{
753 struct sock_iocb siocb, *x;
754
1da177e4
LT
755 if (pos != 0)
756 return -ESPIPE;
027445c3
BP
757
758 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
759 return 0;
760
027445c3
BP
761
762 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
763 if (!x)
764 return -ENOMEM;
027445c3 765 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
766}
767
ce1d4d3e 768static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
769 struct file *file, const struct iovec *iov,
770 unsigned long nr_segs)
1da177e4 771{
ce1d4d3e
CH
772 struct socket *sock = file->private_data;
773 size_t size = 0;
774 int i;
1da177e4 775
89bddce5
SH
776 for (i = 0; i < nr_segs; i++)
777 size += iov[i].iov_len;
1da177e4 778
ce1d4d3e
CH
779 msg->msg_name = NULL;
780 msg->msg_namelen = 0;
781 msg->msg_control = NULL;
782 msg->msg_controllen = 0;
89bddce5 783 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
784 msg->msg_iovlen = nr_segs;
785 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
786 if (sock->type == SOCK_SEQPACKET)
787 msg->msg_flags |= MSG_EOR;
1da177e4 788
ce1d4d3e 789 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
790}
791
027445c3
BP
792static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
793 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
794{
795 struct sock_iocb siocb, *x;
1da177e4 796
ce1d4d3e
CH
797 if (pos != 0)
798 return -ESPIPE;
027445c3 799
027445c3 800 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
801 if (!x)
802 return -ENOMEM;
1da177e4 803
027445c3 804 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
805}
806
1da177e4
LT
807/*
808 * Atomic setting of ioctl hooks to avoid race
809 * with module unload.
810 */
811
4a3e2f71 812static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 813static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 814
881d966b 815void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 816{
4a3e2f71 817 mutex_lock(&br_ioctl_mutex);
1da177e4 818 br_ioctl_hook = hook;
4a3e2f71 819 mutex_unlock(&br_ioctl_mutex);
1da177e4 820}
89bddce5 821
1da177e4
LT
822EXPORT_SYMBOL(brioctl_set);
823
4a3e2f71 824static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 825static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 826
881d966b 827void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 828{
4a3e2f71 829 mutex_lock(&vlan_ioctl_mutex);
1da177e4 830 vlan_ioctl_hook = hook;
4a3e2f71 831 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 832}
89bddce5 833
1da177e4
LT
834EXPORT_SYMBOL(vlan_ioctl_set);
835
4a3e2f71 836static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 837static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 838
89bddce5 839void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 840{
4a3e2f71 841 mutex_lock(&dlci_ioctl_mutex);
1da177e4 842 dlci_ioctl_hook = hook;
4a3e2f71 843 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 844}
89bddce5 845
1da177e4
LT
846EXPORT_SYMBOL(dlci_ioctl_set);
847
848/*
849 * With an ioctl, arg may well be a user mode pointer, but we don't know
850 * what to do with it - that's up to the protocol still.
851 */
852
853static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
854{
855 struct socket *sock;
881d966b 856 struct sock *sk;
1da177e4
LT
857 void __user *argp = (void __user *)arg;
858 int pid, err;
881d966b 859 struct net *net;
1da177e4 860
b69aee04 861 sock = file->private_data;
881d966b 862 sk = sock->sk;
3b1e0a65 863 net = sock_net(sk);
1da177e4 864 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 865 err = dev_ioctl(net, cmd, argp);
1da177e4 866 } else
d86b5e0e 867#ifdef CONFIG_WIRELESS_EXT
1da177e4 868 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 869 err = dev_ioctl(net, cmd, argp);
1da177e4 870 } else
89bddce5
SH
871#endif /* CONFIG_WIRELESS_EXT */
872 switch (cmd) {
1da177e4
LT
873 case FIOSETOWN:
874 case SIOCSPGRP:
875 err = -EFAULT;
876 if (get_user(pid, (int __user *)argp))
877 break;
878 err = f_setown(sock->file, pid, 1);
879 break;
880 case FIOGETOWN:
881 case SIOCGPGRP:
609d7fa9 882 err = put_user(f_getown(sock->file),
89bddce5 883 (int __user *)argp);
1da177e4
LT
884 break;
885 case SIOCGIFBR:
886 case SIOCSIFBR:
887 case SIOCBRADDBR:
888 case SIOCBRDELBR:
889 err = -ENOPKG;
890 if (!br_ioctl_hook)
891 request_module("bridge");
892
4a3e2f71 893 mutex_lock(&br_ioctl_mutex);
89bddce5 894 if (br_ioctl_hook)
881d966b 895 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 896 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
897 break;
898 case SIOCGIFVLAN:
899 case SIOCSIFVLAN:
900 err = -ENOPKG;
901 if (!vlan_ioctl_hook)
902 request_module("8021q");
903
4a3e2f71 904 mutex_lock(&vlan_ioctl_mutex);
1da177e4 905 if (vlan_ioctl_hook)
881d966b 906 err = vlan_ioctl_hook(net, argp);
4a3e2f71 907 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 908 break;
1da177e4
LT
909 case SIOCADDDLCI:
910 case SIOCDELDLCI:
911 err = -ENOPKG;
912 if (!dlci_ioctl_hook)
913 request_module("dlci");
914
7512cbf6
PE
915 mutex_lock(&dlci_ioctl_mutex);
916 if (dlci_ioctl_hook)
1da177e4 917 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 918 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
919 break;
920 default:
921 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
922
923 /*
924 * If this ioctl is unknown try to hand it down
925 * to the NIC driver.
926 */
927 if (err == -ENOIOCTLCMD)
881d966b 928 err = dev_ioctl(net, cmd, argp);
1da177e4 929 break;
89bddce5 930 }
1da177e4
LT
931 return err;
932}
933
934int sock_create_lite(int family, int type, int protocol, struct socket **res)
935{
936 int err;
937 struct socket *sock = NULL;
89bddce5 938
1da177e4
LT
939 err = security_socket_create(family, type, protocol, 1);
940 if (err)
941 goto out;
942
943 sock = sock_alloc();
944 if (!sock) {
945 err = -ENOMEM;
946 goto out;
947 }
948
1da177e4 949 sock->type = type;
7420ed23
VY
950 err = security_socket_post_create(sock, family, type, protocol, 1);
951 if (err)
952 goto out_release;
953
1da177e4
LT
954out:
955 *res = sock;
956 return err;
7420ed23
VY
957out_release:
958 sock_release(sock);
959 sock = NULL;
960 goto out;
1da177e4
LT
961}
962
963/* No kernel lock held - perfect */
89bddce5 964static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
965{
966 struct socket *sock;
967
968 /*
89bddce5 969 * We can't return errors to poll, so it's either yes or no.
1da177e4 970 */
b69aee04 971 sock = file->private_data;
1da177e4
LT
972 return sock->ops->poll(file, sock, wait);
973}
974
89bddce5 975static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 976{
b69aee04 977 struct socket *sock = file->private_data;
1da177e4
LT
978
979 return sock->ops->mmap(file, sock, vma);
980}
981
20380731 982static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
983{
984 /*
89bddce5
SH
985 * It was possible the inode is NULL we were
986 * closing an unfinished socket.
1da177e4
LT
987 */
988
89bddce5 989 if (!inode) {
1da177e4
LT
990 printk(KERN_DEBUG "sock_close: NULL inode\n");
991 return 0;
992 }
993 sock_fasync(-1, filp, 0);
994 sock_release(SOCKET_I(inode));
995 return 0;
996}
997
998/*
999 * Update the socket async list
1000 *
1001 * Fasync_list locking strategy.
1002 *
1003 * 1. fasync_list is modified only under process context socket lock
1004 * i.e. under semaphore.
1005 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1006 * or under socket lock.
1007 * 3. fasync_list can be used from softirq context, so that
1008 * modification under socket lock have to be enhanced with
1009 * write_lock_bh(&sk->sk_callback_lock).
1010 * --ANK (990710)
1011 */
1012
1013static int sock_fasync(int fd, struct file *filp, int on)
1014{
89bddce5 1015 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1016 struct socket *sock;
1017 struct sock *sk;
1018
89bddce5 1019 if (on) {
8b3a7005 1020 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1021 if (fna == NULL)
1da177e4
LT
1022 return -ENOMEM;
1023 }
1024
b69aee04 1025 sock = filp->private_data;
1da177e4 1026
89bddce5
SH
1027 sk = sock->sk;
1028 if (sk == NULL) {
1da177e4
LT
1029 kfree(fna);
1030 return -EINVAL;
1031 }
1032
1033 lock_sock(sk);
1034
89bddce5 1035 prev = &(sock->fasync_list);
1da177e4 1036
89bddce5
SH
1037 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1038 if (fa->fa_file == filp)
1da177e4
LT
1039 break;
1040
89bddce5
SH
1041 if (on) {
1042 if (fa != NULL) {
1da177e4 1043 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1044 fa->fa_fd = fd;
1da177e4
LT
1045 write_unlock_bh(&sk->sk_callback_lock);
1046
1047 kfree(fna);
1048 goto out;
1049 }
89bddce5
SH
1050 fna->fa_file = filp;
1051 fna->fa_fd = fd;
1052 fna->magic = FASYNC_MAGIC;
1053 fna->fa_next = sock->fasync_list;
1da177e4 1054 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1055 sock->fasync_list = fna;
1da177e4 1056 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1057 } else {
1058 if (fa != NULL) {
1da177e4 1059 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1060 *prev = fa->fa_next;
1da177e4
LT
1061 write_unlock_bh(&sk->sk_callback_lock);
1062 kfree(fa);
1063 }
1064 }
1065
1066out:
1067 release_sock(sock->sk);
1068 return 0;
1069}
1070
1071/* This function may be called only under socket lock or callback_lock */
1072
1073int sock_wake_async(struct socket *sock, int how, int band)
1074{
1075 if (!sock || !sock->fasync_list)
1076 return -1;
89bddce5 1077 switch (how) {
8d8ad9d7 1078 case SOCK_WAKE_WAITD:
1da177e4
LT
1079 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1080 break;
1081 goto call_kill;
8d8ad9d7 1082 case SOCK_WAKE_SPACE:
1da177e4
LT
1083 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1084 break;
1085 /* fall through */
8d8ad9d7 1086 case SOCK_WAKE_IO:
89bddce5 1087call_kill:
1da177e4
LT
1088 __kill_fasync(sock->fasync_list, SIGIO, band);
1089 break;
8d8ad9d7 1090 case SOCK_WAKE_URG:
1da177e4
LT
1091 __kill_fasync(sock->fasync_list, SIGURG, band);
1092 }
1093 return 0;
1094}
1095
1b8d7ae4 1096static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1097 struct socket **res, int kern)
1da177e4
LT
1098{
1099 int err;
1100 struct socket *sock;
55737fda 1101 const struct net_proto_family *pf;
1da177e4
LT
1102
1103 /*
89bddce5 1104 * Check protocol is in range
1da177e4
LT
1105 */
1106 if (family < 0 || family >= NPROTO)
1107 return -EAFNOSUPPORT;
1108 if (type < 0 || type >= SOCK_MAX)
1109 return -EINVAL;
1110
1111 /* Compatibility.
1112
1113 This uglymoron is moved from INET layer to here to avoid
1114 deadlock in module load.
1115 */
1116 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1117 static int warned;
1da177e4
LT
1118 if (!warned) {
1119 warned = 1;
89bddce5
SH
1120 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1121 current->comm);
1da177e4
LT
1122 }
1123 family = PF_PACKET;
1124 }
1125
1126 err = security_socket_create(family, type, protocol, kern);
1127 if (err)
1128 return err;
89bddce5 1129
55737fda
SH
1130 /*
1131 * Allocate the socket and allow the family to set things up. if
1132 * the protocol is 0, the family is instructed to select an appropriate
1133 * default.
1134 */
1135 sock = sock_alloc();
1136 if (!sock) {
1137 if (net_ratelimit())
1138 printk(KERN_WARNING "socket: no more sockets\n");
1139 return -ENFILE; /* Not exactly a match, but its the
1140 closest posix thing */
1141 }
1142
1143 sock->type = type;
1144
1da177e4 1145#if defined(CONFIG_KMOD)
89bddce5
SH
1146 /* Attempt to load a protocol module if the find failed.
1147 *
1148 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1149 * requested real, full-featured networking support upon configuration.
1150 * Otherwise module support will break!
1151 */
55737fda 1152 if (net_families[family] == NULL)
89bddce5 1153 request_module("net-pf-%d", family);
1da177e4
LT
1154#endif
1155
55737fda
SH
1156 rcu_read_lock();
1157 pf = rcu_dereference(net_families[family]);
1158 err = -EAFNOSUPPORT;
1159 if (!pf)
1160 goto out_release;
1da177e4
LT
1161
1162 /*
1163 * We will call the ->create function, that possibly is in a loadable
1164 * module, so we have to bump that loadable module refcnt first.
1165 */
55737fda 1166 if (!try_module_get(pf->owner))
1da177e4
LT
1167 goto out_release;
1168
55737fda
SH
1169 /* Now protected by module ref count */
1170 rcu_read_unlock();
1171
1b8d7ae4 1172 err = pf->create(net, sock, protocol);
55737fda 1173 if (err < 0)
1da177e4 1174 goto out_module_put;
a79af59e 1175
1da177e4
LT
1176 /*
1177 * Now to bump the refcnt of the [loadable] module that owns this
1178 * socket at sock_release time we decrement its refcnt.
1179 */
55737fda
SH
1180 if (!try_module_get(sock->ops->owner))
1181 goto out_module_busy;
1182
1da177e4
LT
1183 /*
1184 * Now that we're done with the ->create function, the [loadable]
1185 * module can have its refcnt decremented
1186 */
55737fda 1187 module_put(pf->owner);
7420ed23
VY
1188 err = security_socket_post_create(sock, family, type, protocol, kern);
1189 if (err)
3b185525 1190 goto out_sock_release;
55737fda 1191 *res = sock;
1da177e4 1192
55737fda
SH
1193 return 0;
1194
1195out_module_busy:
1196 err = -EAFNOSUPPORT;
1da177e4 1197out_module_put:
55737fda
SH
1198 sock->ops = NULL;
1199 module_put(pf->owner);
1200out_sock_release:
1da177e4 1201 sock_release(sock);
55737fda
SH
1202 return err;
1203
1204out_release:
1205 rcu_read_unlock();
1206 goto out_sock_release;
1da177e4
LT
1207}
1208
1209int sock_create(int family, int type, int protocol, struct socket **res)
1210{
1b8d7ae4 1211 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1212}
1213
1214int sock_create_kern(int family, int type, int protocol, struct socket **res)
1215{
1b8d7ae4 1216 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1217}
1218
1219asmlinkage long sys_socket(int family, int type, int protocol)
1220{
1221 int retval;
1222 struct socket *sock;
a677a039
UD
1223 int flags;
1224
1225 flags = type & ~SOCK_TYPE_MASK;
1226 if (flags & ~SOCK_CLOEXEC)
1227 return -EINVAL;
1228 type &= SOCK_TYPE_MASK;
1da177e4 1229
aaca0bdc
UD
1230 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1231 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1232
1da177e4
LT
1233 retval = sock_create(family, type, protocol, &sock);
1234 if (retval < 0)
1235 goto out;
1236
a677a039 1237 retval = sock_map_fd(sock, flags & O_CLOEXEC);
1da177e4
LT
1238 if (retval < 0)
1239 goto out_release;
1240
1241out:
1242 /* It may be already another descriptor 8) Not kernel problem. */
1243 return retval;
1244
1245out_release:
1246 sock_release(sock);
1247 return retval;
1248}
1249
1250/*
1251 * Create a pair of connected sockets.
1252 */
1253
89bddce5
SH
1254asmlinkage long sys_socketpair(int family, int type, int protocol,
1255 int __user *usockvec)
1da177e4
LT
1256{
1257 struct socket *sock1, *sock2;
1258 int fd1, fd2, err;
db349509 1259 struct file *newfile1, *newfile2;
a677a039
UD
1260 int flags;
1261
1262 flags = type & ~SOCK_TYPE_MASK;
1263 if (flags & ~SOCK_CLOEXEC)
1264 return -EINVAL;
1265 type &= SOCK_TYPE_MASK;
1da177e4 1266
aaca0bdc
UD
1267 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1268 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1269
1da177e4
LT
1270 /*
1271 * Obtain the first socket and check if the underlying protocol
1272 * supports the socketpair call.
1273 */
1274
1275 err = sock_create(family, type, protocol, &sock1);
1276 if (err < 0)
1277 goto out;
1278
1279 err = sock_create(family, type, protocol, &sock2);
1280 if (err < 0)
1281 goto out_release_1;
1282
1283 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1284 if (err < 0)
1da177e4
LT
1285 goto out_release_both;
1286
a677a039 1287 fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC);
bf3c23d1
DM
1288 if (unlikely(fd1 < 0)) {
1289 err = fd1;
db349509 1290 goto out_release_both;
bf3c23d1 1291 }
1da177e4 1292
a677a039 1293 fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC);
db349509 1294 if (unlikely(fd2 < 0)) {
bf3c23d1 1295 err = fd2;
db349509
AV
1296 put_filp(newfile1);
1297 put_unused_fd(fd1);
1da177e4 1298 goto out_release_both;
db349509 1299 }
1da177e4 1300
db349509
AV
1301 err = sock_attach_fd(sock1, newfile1);
1302 if (unlikely(err < 0)) {
1303 goto out_fd2;
1304 }
1305
1306 err = sock_attach_fd(sock2, newfile2);
1307 if (unlikely(err < 0)) {
1308 fput(newfile1);
1309 goto out_fd1;
1310 }
1311
1312 err = audit_fd_pair(fd1, fd2);
1313 if (err < 0) {
1314 fput(newfile1);
1315 fput(newfile2);
1316 goto out_fd;
1317 }
1da177e4 1318
db349509
AV
1319 fd_install(fd1, newfile1);
1320 fd_install(fd2, newfile2);
1da177e4
LT
1321 /* fd1 and fd2 may be already another descriptors.
1322 * Not kernel problem.
1323 */
1324
89bddce5 1325 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1326 if (!err)
1327 err = put_user(fd2, &usockvec[1]);
1328 if (!err)
1329 return 0;
1330
1331 sys_close(fd2);
1332 sys_close(fd1);
1333 return err;
1334
1da177e4 1335out_release_both:
89bddce5 1336 sock_release(sock2);
1da177e4 1337out_release_1:
89bddce5 1338 sock_release(sock1);
1da177e4
LT
1339out:
1340 return err;
db349509
AV
1341
1342out_fd2:
1343 put_filp(newfile1);
1344 sock_release(sock1);
1345out_fd1:
1346 put_filp(newfile2);
1347 sock_release(sock2);
1348out_fd:
1349 put_unused_fd(fd1);
1350 put_unused_fd(fd2);
1351 goto out;
1da177e4
LT
1352}
1353
1da177e4
LT
1354/*
1355 * Bind a name to a socket. Nothing much to do here since it's
1356 * the protocol's responsibility to handle the local address.
1357 *
1358 * We move the socket address to kernel space before we call
1359 * the protocol layer (having also checked the address is ok).
1360 */
1361
1362asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1363{
1364 struct socket *sock;
230b1839 1365 struct sockaddr_storage address;
6cb153ca 1366 int err, fput_needed;
1da177e4 1367
89bddce5 1368 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1369 if (sock) {
230b1839 1370 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
89bddce5
SH
1371 if (err >= 0) {
1372 err = security_socket_bind(sock,
230b1839 1373 (struct sockaddr *)&address,
89bddce5 1374 addrlen);
6cb153ca
BL
1375 if (!err)
1376 err = sock->ops->bind(sock,
89bddce5 1377 (struct sockaddr *)
230b1839 1378 &address, addrlen);
1da177e4 1379 }
6cb153ca 1380 fput_light(sock->file, fput_needed);
89bddce5 1381 }
1da177e4
LT
1382 return err;
1383}
1384
1da177e4
LT
1385/*
1386 * Perform a listen. Basically, we allow the protocol to do anything
1387 * necessary for a listen, and if that works, we mark the socket as
1388 * ready for listening.
1389 */
1390
1da177e4
LT
1391asmlinkage long sys_listen(int fd, int backlog)
1392{
1393 struct socket *sock;
6cb153ca 1394 int err, fput_needed;
b8e1f9b5 1395 int somaxconn;
89bddce5
SH
1396
1397 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1398 if (sock) {
8efa6e93 1399 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
b8e1f9b5
PE
1400 if ((unsigned)backlog > somaxconn)
1401 backlog = somaxconn;
1da177e4
LT
1402
1403 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1404 if (!err)
1405 err = sock->ops->listen(sock, backlog);
1da177e4 1406
6cb153ca 1407 fput_light(sock->file, fput_needed);
1da177e4
LT
1408 }
1409 return err;
1410}
1411
1da177e4
LT
1412/*
1413 * For accept, we attempt to create a new socket, set up the link
1414 * with the client, wake up the client, then return the new
1415 * connected fd. We collect the address of the connector in kernel
1416 * space and move it to user at the very end. This is unclean because
1417 * we open the socket then return an error.
1418 *
1419 * 1003.1g adds the ability to recvmsg() to query connection pending
1420 * status to recvmsg. We need to add that support in a way thats
1421 * clean when we restucture accept also.
1422 */
1423
aaca0bdc
UD
1424long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1425 int __user *upeer_addrlen, int flags)
1da177e4
LT
1426{
1427 struct socket *sock, *newsock;
39d8c1b6 1428 struct file *newfile;
6cb153ca 1429 int err, len, newfd, fput_needed;
230b1839 1430 struct sockaddr_storage address;
1da177e4 1431
aaca0bdc
UD
1432 if (flags & ~SOCK_CLOEXEC)
1433 return -EINVAL;
1434
1435 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1436 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1437
6cb153ca 1438 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1439 if (!sock)
1440 goto out;
1441
1442 err = -ENFILE;
89bddce5 1443 if (!(newsock = sock_alloc()))
1da177e4
LT
1444 goto out_put;
1445
1446 newsock->type = sock->type;
1447 newsock->ops = sock->ops;
1448
1da177e4
LT
1449 /*
1450 * We don't need try_module_get here, as the listening socket (sock)
1451 * has the protocol module (sock->ops->owner) held.
1452 */
1453 __module_get(newsock->ops->owner);
1454
aaca0bdc 1455 newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC);
39d8c1b6
DM
1456 if (unlikely(newfd < 0)) {
1457 err = newfd;
9a1875e6
DM
1458 sock_release(newsock);
1459 goto out_put;
39d8c1b6
DM
1460 }
1461
1462 err = sock_attach_fd(newsock, newfile);
1463 if (err < 0)
79f4f642 1464 goto out_fd_simple;
39d8c1b6 1465
a79af59e
FF
1466 err = security_socket_accept(sock, newsock);
1467 if (err)
39d8c1b6 1468 goto out_fd;
a79af59e 1469
1da177e4
LT
1470 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1471 if (err < 0)
39d8c1b6 1472 goto out_fd;
1da177e4
LT
1473
1474 if (upeer_sockaddr) {
230b1839 1475 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1476 &len, 2) < 0) {
1da177e4 1477 err = -ECONNABORTED;
39d8c1b6 1478 goto out_fd;
1da177e4 1479 }
230b1839
YH
1480 err = move_addr_to_user((struct sockaddr *)&address,
1481 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1482 if (err < 0)
39d8c1b6 1483 goto out_fd;
1da177e4
LT
1484 }
1485
1486 /* File flags are not inherited via accept() unlike another OSes. */
1487
39d8c1b6
DM
1488 fd_install(newfd, newfile);
1489 err = newfd;
1da177e4
LT
1490
1491 security_socket_post_accept(sock, newsock);
1492
1493out_put:
6cb153ca 1494 fput_light(sock->file, fput_needed);
1da177e4
LT
1495out:
1496 return err;
79f4f642
AD
1497out_fd_simple:
1498 sock_release(newsock);
1499 put_filp(newfile);
1500 put_unused_fd(newfd);
1501 goto out_put;
39d8c1b6 1502out_fd:
9606a216 1503 fput(newfile);
39d8c1b6 1504 put_unused_fd(newfd);
1da177e4
LT
1505 goto out_put;
1506}
1507
c019bbc6 1508#ifdef HAVE_SET_RESTORE_SIGMASK
aaca0bdc
UD
1509asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
1510 int __user *upeer_addrlen,
1511 const sigset_t __user *sigmask,
1512 size_t sigsetsize, int flags)
1513{
1514 sigset_t ksigmask, sigsaved;
1515 int ret;
1516
1517 if (sigmask) {
1518 /* XXX: Don't preclude handling different sized sigset_t's. */
1519 if (sigsetsize != sizeof(sigset_t))
1520 return -EINVAL;
1521 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
1522 return -EFAULT;
1523
1524 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
1525 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1526 }
1527
1528 ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
1529
1530 if (ret < 0 && signal_pending(current)) {
1531 /*
1532 * Don't restore the signal mask yet. Let do_signal() deliver
1533 * the signal on the way back to userspace, before the signal
1534 * mask is restored.
1535 */
1536 if (sigmask) {
1537 memcpy(&current->saved_sigmask, &sigsaved,
1538 sizeof(sigsaved));
1539 set_restore_sigmask();
1540 }
1541 } else if (sigmask)
1542 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1543
1544 return ret;
1545}
c019bbc6
UD
1546#else
1547asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
1548 int __user *upeer_addrlen,
1549 const sigset_t __user *sigmask,
1550 size_t sigsetsize, int flags)
1551{
1552 /* The platform does not support restoring the signal mask in the
1553 * return path. So we do not allow using paccept() with a signal
1554 * mask. */
1555 if (sigmask)
1556 return -EINVAL;
1557
1558 return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
1559}
1560#endif
aaca0bdc
UD
1561
1562asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1563 int __user *upeer_addrlen)
1564{
1565 return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0);
1566}
1567
1da177e4
LT
1568/*
1569 * Attempt to connect to a socket with the server address. The address
1570 * is in user space so we verify it is OK and move it to kernel space.
1571 *
1572 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1573 * break bindings
1574 *
1575 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1576 * other SEQPACKET protocols that take time to connect() as it doesn't
1577 * include the -EINPROGRESS status for such sockets.
1578 */
1579
89bddce5
SH
1580asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1581 int addrlen)
1da177e4
LT
1582{
1583 struct socket *sock;
230b1839 1584 struct sockaddr_storage address;
6cb153ca 1585 int err, fput_needed;
1da177e4 1586
6cb153ca 1587 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1588 if (!sock)
1589 goto out;
230b1839 1590 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
1da177e4
LT
1591 if (err < 0)
1592 goto out_put;
1593
89bddce5 1594 err =
230b1839 1595 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1596 if (err)
1597 goto out_put;
1598
230b1839 1599 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1600 sock->file->f_flags);
1601out_put:
6cb153ca 1602 fput_light(sock->file, fput_needed);
1da177e4
LT
1603out:
1604 return err;
1605}
1606
1607/*
1608 * Get the local address ('name') of a socket object. Move the obtained
1609 * name to user space.
1610 */
1611
89bddce5
SH
1612asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1613 int __user *usockaddr_len)
1da177e4
LT
1614{
1615 struct socket *sock;
230b1839 1616 struct sockaddr_storage address;
6cb153ca 1617 int len, err, fput_needed;
89bddce5 1618
6cb153ca 1619 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1620 if (!sock)
1621 goto out;
1622
1623 err = security_socket_getsockname(sock);
1624 if (err)
1625 goto out_put;
1626
230b1839 1627 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1628 if (err)
1629 goto out_put;
230b1839 1630 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1631
1632out_put:
6cb153ca 1633 fput_light(sock->file, fput_needed);
1da177e4
LT
1634out:
1635 return err;
1636}
1637
1638/*
1639 * Get the remote address ('name') of a socket object. Move the obtained
1640 * name to user space.
1641 */
1642
89bddce5
SH
1643asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1644 int __user *usockaddr_len)
1da177e4
LT
1645{
1646 struct socket *sock;
230b1839 1647 struct sockaddr_storage address;
6cb153ca 1648 int len, err, fput_needed;
1da177e4 1649
89bddce5
SH
1650 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1651 if (sock != NULL) {
1da177e4
LT
1652 err = security_socket_getpeername(sock);
1653 if (err) {
6cb153ca 1654 fput_light(sock->file, fput_needed);
1da177e4
LT
1655 return err;
1656 }
1657
89bddce5 1658 err =
230b1839 1659 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1660 1);
1da177e4 1661 if (!err)
230b1839 1662 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr,
89bddce5 1663 usockaddr_len);
6cb153ca 1664 fput_light(sock->file, fput_needed);
1da177e4
LT
1665 }
1666 return err;
1667}
1668
1669/*
1670 * Send a datagram to a given address. We move the address into kernel
1671 * space and check the user space data area is readable before invoking
1672 * the protocol.
1673 */
1674
89bddce5
SH
1675asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1676 unsigned flags, struct sockaddr __user *addr,
1677 int addr_len)
1da177e4
LT
1678{
1679 struct socket *sock;
230b1839 1680 struct sockaddr_storage address;
1da177e4
LT
1681 int err;
1682 struct msghdr msg;
1683 struct iovec iov;
6cb153ca 1684 int fput_needed;
6cb153ca 1685
de0fa95c
PE
1686 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1687 if (!sock)
4387ff75 1688 goto out;
6cb153ca 1689
89bddce5
SH
1690 iov.iov_base = buff;
1691 iov.iov_len = len;
1692 msg.msg_name = NULL;
1693 msg.msg_iov = &iov;
1694 msg.msg_iovlen = 1;
1695 msg.msg_control = NULL;
1696 msg.msg_controllen = 0;
1697 msg.msg_namelen = 0;
6cb153ca 1698 if (addr) {
230b1839 1699 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address);
1da177e4
LT
1700 if (err < 0)
1701 goto out_put;
230b1839 1702 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1703 msg.msg_namelen = addr_len;
1da177e4
LT
1704 }
1705 if (sock->file->f_flags & O_NONBLOCK)
1706 flags |= MSG_DONTWAIT;
1707 msg.msg_flags = flags;
1708 err = sock_sendmsg(sock, &msg, len);
1709
89bddce5 1710out_put:
de0fa95c 1711 fput_light(sock->file, fput_needed);
4387ff75 1712out:
1da177e4
LT
1713 return err;
1714}
1715
1716/*
89bddce5 1717 * Send a datagram down a socket.
1da177e4
LT
1718 */
1719
89bddce5 1720asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1721{
1722 return sys_sendto(fd, buff, len, flags, NULL, 0);
1723}
1724
1725/*
89bddce5 1726 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1727 * sender. We verify the buffers are writable and if needed move the
1728 * sender address from kernel to user space.
1729 */
1730
89bddce5
SH
1731asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1732 unsigned flags, struct sockaddr __user *addr,
1733 int __user *addr_len)
1da177e4
LT
1734{
1735 struct socket *sock;
1736 struct iovec iov;
1737 struct msghdr msg;
230b1839 1738 struct sockaddr_storage address;
89bddce5 1739 int err, err2;
6cb153ca
BL
1740 int fput_needed;
1741
de0fa95c 1742 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1743 if (!sock)
de0fa95c 1744 goto out;
1da177e4 1745
89bddce5
SH
1746 msg.msg_control = NULL;
1747 msg.msg_controllen = 0;
1748 msg.msg_iovlen = 1;
1749 msg.msg_iov = &iov;
1750 iov.iov_len = size;
1751 iov.iov_base = ubuf;
230b1839
YH
1752 msg.msg_name = (struct sockaddr *)&address;
1753 msg.msg_namelen = sizeof(address);
1da177e4
LT
1754 if (sock->file->f_flags & O_NONBLOCK)
1755 flags |= MSG_DONTWAIT;
89bddce5 1756 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1757
89bddce5 1758 if (err >= 0 && addr != NULL) {
230b1839
YH
1759 err2 = move_addr_to_user((struct sockaddr *)&address,
1760 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1761 if (err2 < 0)
1762 err = err2;
1da177e4 1763 }
de0fa95c
PE
1764
1765 fput_light(sock->file, fput_needed);
4387ff75 1766out:
1da177e4
LT
1767 return err;
1768}
1769
1770/*
89bddce5 1771 * Receive a datagram from a socket.
1da177e4
LT
1772 */
1773
89bddce5
SH
1774asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1775 unsigned flags)
1da177e4
LT
1776{
1777 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1778}
1779
1780/*
1781 * Set a socket option. Because we don't know the option lengths we have
1782 * to pass the user mode parameter for the protocols to sort out.
1783 */
1784
89bddce5
SH
1785asmlinkage long sys_setsockopt(int fd, int level, int optname,
1786 char __user *optval, int optlen)
1da177e4 1787{
6cb153ca 1788 int err, fput_needed;
1da177e4
LT
1789 struct socket *sock;
1790
1791 if (optlen < 0)
1792 return -EINVAL;
89bddce5
SH
1793
1794 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1795 if (sock != NULL) {
1796 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1797 if (err)
1798 goto out_put;
1da177e4
LT
1799
1800 if (level == SOL_SOCKET)
89bddce5
SH
1801 err =
1802 sock_setsockopt(sock, level, optname, optval,
1803 optlen);
1da177e4 1804 else
89bddce5
SH
1805 err =
1806 sock->ops->setsockopt(sock, level, optname, optval,
1807 optlen);
6cb153ca
BL
1808out_put:
1809 fput_light(sock->file, fput_needed);
1da177e4
LT
1810 }
1811 return err;
1812}
1813
1814/*
1815 * Get a socket option. Because we don't know the option lengths we have
1816 * to pass a user mode parameter for the protocols to sort out.
1817 */
1818
89bddce5
SH
1819asmlinkage long sys_getsockopt(int fd, int level, int optname,
1820 char __user *optval, int __user *optlen)
1da177e4 1821{
6cb153ca 1822 int err, fput_needed;
1da177e4
LT
1823 struct socket *sock;
1824
89bddce5
SH
1825 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1826 if (sock != NULL) {
6cb153ca
BL
1827 err = security_socket_getsockopt(sock, level, optname);
1828 if (err)
1829 goto out_put;
1da177e4
LT
1830
1831 if (level == SOL_SOCKET)
89bddce5
SH
1832 err =
1833 sock_getsockopt(sock, level, optname, optval,
1834 optlen);
1da177e4 1835 else
89bddce5
SH
1836 err =
1837 sock->ops->getsockopt(sock, level, optname, optval,
1838 optlen);
6cb153ca
BL
1839out_put:
1840 fput_light(sock->file, fput_needed);
1da177e4
LT
1841 }
1842 return err;
1843}
1844
1da177e4
LT
1845/*
1846 * Shutdown a socket.
1847 */
1848
1849asmlinkage long sys_shutdown(int fd, int how)
1850{
6cb153ca 1851 int err, fput_needed;
1da177e4
LT
1852 struct socket *sock;
1853
89bddce5
SH
1854 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1855 if (sock != NULL) {
1da177e4 1856 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1857 if (!err)
1858 err = sock->ops->shutdown(sock, how);
1859 fput_light(sock->file, fput_needed);
1da177e4
LT
1860 }
1861 return err;
1862}
1863
89bddce5 1864/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1865 * fields which are the same type (int / unsigned) on our platforms.
1866 */
1867#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1868#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1869#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1870
1da177e4
LT
1871/*
1872 * BSD sendmsg interface
1873 */
1874
1875asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1876{
89bddce5
SH
1877 struct compat_msghdr __user *msg_compat =
1878 (struct compat_msghdr __user *)msg;
1da177e4 1879 struct socket *sock;
230b1839 1880 struct sockaddr_storage address;
1da177e4 1881 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1882 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1883 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1884 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1885 unsigned char *ctl_buf = ctl;
1886 struct msghdr msg_sys;
1887 int err, ctl_len, iov_size, total_len;
6cb153ca 1888 int fput_needed;
89bddce5 1889
1da177e4
LT
1890 err = -EFAULT;
1891 if (MSG_CMSG_COMPAT & flags) {
1892 if (get_compat_msghdr(&msg_sys, msg_compat))
1893 return -EFAULT;
89bddce5
SH
1894 }
1895 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1896 return -EFAULT;
1897
6cb153ca 1898 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1899 if (!sock)
1da177e4
LT
1900 goto out;
1901
1902 /* do not move before msg_sys is valid */
1903 err = -EMSGSIZE;
1904 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1905 goto out_put;
1906
89bddce5 1907 /* Check whether to allocate the iovec area */
1da177e4
LT
1908 err = -ENOMEM;
1909 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1910 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1911 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1912 if (!iov)
1913 goto out_put;
1914 }
1915
1916 /* This will also move the address data into kernel space */
1917 if (MSG_CMSG_COMPAT & flags) {
230b1839
YH
1918 err = verify_compat_iovec(&msg_sys, iov,
1919 (struct sockaddr *)&address,
1920 VERIFY_READ);
1da177e4 1921 } else
230b1839
YH
1922 err = verify_iovec(&msg_sys, iov,
1923 (struct sockaddr *)&address,
1924 VERIFY_READ);
89bddce5 1925 if (err < 0)
1da177e4
LT
1926 goto out_freeiov;
1927 total_len = err;
1928
1929 err = -ENOBUFS;
1930
1931 if (msg_sys.msg_controllen > INT_MAX)
1932 goto out_freeiov;
89bddce5 1933 ctl_len = msg_sys.msg_controllen;
1da177e4 1934 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1935 err =
1936 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1937 sizeof(ctl));
1da177e4
LT
1938 if (err)
1939 goto out_freeiov;
1940 ctl_buf = msg_sys.msg_control;
8920e8f9 1941 ctl_len = msg_sys.msg_controllen;
1da177e4 1942 } else if (ctl_len) {
89bddce5 1943 if (ctl_len > sizeof(ctl)) {
1da177e4 1944 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1945 if (ctl_buf == NULL)
1da177e4
LT
1946 goto out_freeiov;
1947 }
1948 err = -EFAULT;
1949 /*
1950 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1951 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1952 * checking falls down on this.
1953 */
89bddce5
SH
1954 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1955 ctl_len))
1da177e4
LT
1956 goto out_freectl;
1957 msg_sys.msg_control = ctl_buf;
1958 }
1959 msg_sys.msg_flags = flags;
1960
1961 if (sock->file->f_flags & O_NONBLOCK)
1962 msg_sys.msg_flags |= MSG_DONTWAIT;
1963 err = sock_sendmsg(sock, &msg_sys, total_len);
1964
1965out_freectl:
89bddce5 1966 if (ctl_buf != ctl)
1da177e4
LT
1967 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1968out_freeiov:
1969 if (iov != iovstack)
1970 sock_kfree_s(sock->sk, iov, iov_size);
1971out_put:
6cb153ca 1972 fput_light(sock->file, fput_needed);
89bddce5 1973out:
1da177e4
LT
1974 return err;
1975}
1976
1977/*
1978 * BSD recvmsg interface
1979 */
1980
89bddce5
SH
1981asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1982 unsigned int flags)
1da177e4 1983{
89bddce5
SH
1984 struct compat_msghdr __user *msg_compat =
1985 (struct compat_msghdr __user *)msg;
1da177e4
LT
1986 struct socket *sock;
1987 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1988 struct iovec *iov = iovstack;
1da177e4
LT
1989 struct msghdr msg_sys;
1990 unsigned long cmsg_ptr;
1991 int err, iov_size, total_len, len;
6cb153ca 1992 int fput_needed;
1da177e4
LT
1993
1994 /* kernel mode address */
230b1839 1995 struct sockaddr_storage addr;
1da177e4
LT
1996
1997 /* user mode address pointers */
1998 struct sockaddr __user *uaddr;
1999 int __user *uaddr_len;
89bddce5 2000
1da177e4
LT
2001 if (MSG_CMSG_COMPAT & flags) {
2002 if (get_compat_msghdr(&msg_sys, msg_compat))
2003 return -EFAULT;
89bddce5
SH
2004 }
2005 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
2006 return -EFAULT;
1da177e4 2007
6cb153ca 2008 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
2009 if (!sock)
2010 goto out;
2011
2012 err = -EMSGSIZE;
2013 if (msg_sys.msg_iovlen > UIO_MAXIOV)
2014 goto out_put;
89bddce5
SH
2015
2016 /* Check whether to allocate the iovec area */
1da177e4
LT
2017 err = -ENOMEM;
2018 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
2019 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
2020 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
2021 if (!iov)
2022 goto out_put;
2023 }
2024
2025 /*
89bddce5
SH
2026 * Save the user-mode address (verify_iovec will change the
2027 * kernel msghdr to use the kernel address space)
1da177e4 2028 */
89bddce5 2029
cfcabdcc 2030 uaddr = (__force void __user *)msg_sys.msg_name;
1da177e4
LT
2031 uaddr_len = COMPAT_NAMELEN(msg);
2032 if (MSG_CMSG_COMPAT & flags) {
230b1839
YH
2033 err = verify_compat_iovec(&msg_sys, iov,
2034 (struct sockaddr *)&addr,
2035 VERIFY_WRITE);
1da177e4 2036 } else
230b1839
YH
2037 err = verify_iovec(&msg_sys, iov,
2038 (struct sockaddr *)&addr,
2039 VERIFY_WRITE);
1da177e4
LT
2040 if (err < 0)
2041 goto out_freeiov;
89bddce5 2042 total_len = err;
1da177e4
LT
2043
2044 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 2045 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2046
1da177e4
LT
2047 if (sock->file->f_flags & O_NONBLOCK)
2048 flags |= MSG_DONTWAIT;
2049 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
2050 if (err < 0)
2051 goto out_freeiov;
2052 len = err;
2053
2054 if (uaddr != NULL) {
230b1839
YH
2055 err = move_addr_to_user((struct sockaddr *)&addr,
2056 msg_sys.msg_namelen, uaddr,
89bddce5 2057 uaddr_len);
1da177e4
LT
2058 if (err < 0)
2059 goto out_freeiov;
2060 }
37f7f421
DM
2061 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
2062 COMPAT_FLAGS(msg));
1da177e4
LT
2063 if (err)
2064 goto out_freeiov;
2065 if (MSG_CMSG_COMPAT & flags)
89bddce5 2066 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
2067 &msg_compat->msg_controllen);
2068 else
89bddce5 2069 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
2070 &msg->msg_controllen);
2071 if (err)
2072 goto out_freeiov;
2073 err = len;
2074
2075out_freeiov:
2076 if (iov != iovstack)
2077 sock_kfree_s(sock->sk, iov, iov_size);
2078out_put:
6cb153ca 2079 fput_light(sock->file, fput_needed);
1da177e4
LT
2080out:
2081 return err;
2082}
2083
2084#ifdef __ARCH_WANT_SYS_SOCKETCALL
2085
2086/* Argument list sizes for sys_socketcall */
2087#define AL(x) ((x) * sizeof(unsigned long))
aaca0bdc 2088static const unsigned char nargs[19]={
89bddce5
SH
2089 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
2090 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
aaca0bdc
UD
2091 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
2092 AL(6)
89bddce5
SH
2093};
2094
1da177e4
LT
2095#undef AL
2096
2097/*
89bddce5 2098 * System call vectors.
1da177e4
LT
2099 *
2100 * Argument checking cleaned up. Saved 20% in size.
2101 * This function doesn't need to set the kernel lock because
89bddce5 2102 * it is set by the callees.
1da177e4
LT
2103 */
2104
2105asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2106{
2107 unsigned long a[6];
89bddce5 2108 unsigned long a0, a1;
1da177e4
LT
2109 int err;
2110
aaca0bdc 2111 if (call < 1 || call > SYS_PACCEPT)
1da177e4
LT
2112 return -EINVAL;
2113
2114 /* copy_from_user should be SMP safe. */
2115 if (copy_from_user(a, args, nargs[call]))
2116 return -EFAULT;
3ec3b2fb 2117
89bddce5 2118 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2119 if (err)
2120 return err;
2121
89bddce5
SH
2122 a0 = a[0];
2123 a1 = a[1];
2124
2125 switch (call) {
2126 case SYS_SOCKET:
2127 err = sys_socket(a0, a1, a[2]);
2128 break;
2129 case SYS_BIND:
2130 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2131 break;
2132 case SYS_CONNECT:
2133 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2134 break;
2135 case SYS_LISTEN:
2136 err = sys_listen(a0, a1);
2137 break;
2138 case SYS_ACCEPT:
2139 err =
aaca0bdc
UD
2140 do_accept(a0, (struct sockaddr __user *)a1,
2141 (int __user *)a[2], 0);
89bddce5
SH
2142 break;
2143 case SYS_GETSOCKNAME:
2144 err =
2145 sys_getsockname(a0, (struct sockaddr __user *)a1,
2146 (int __user *)a[2]);
2147 break;
2148 case SYS_GETPEERNAME:
2149 err =
2150 sys_getpeername(a0, (struct sockaddr __user *)a1,
2151 (int __user *)a[2]);
2152 break;
2153 case SYS_SOCKETPAIR:
2154 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2155 break;
2156 case SYS_SEND:
2157 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2158 break;
2159 case SYS_SENDTO:
2160 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2161 (struct sockaddr __user *)a[4], a[5]);
2162 break;
2163 case SYS_RECV:
2164 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2165 break;
2166 case SYS_RECVFROM:
2167 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2168 (struct sockaddr __user *)a[4],
2169 (int __user *)a[5]);
2170 break;
2171 case SYS_SHUTDOWN:
2172 err = sys_shutdown(a0, a1);
2173 break;
2174 case SYS_SETSOCKOPT:
2175 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2176 break;
2177 case SYS_GETSOCKOPT:
2178 err =
2179 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2180 (int __user *)a[4]);
2181 break;
2182 case SYS_SENDMSG:
2183 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2184 break;
2185 case SYS_RECVMSG:
2186 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2187 break;
aaca0bdc
UD
2188 case SYS_PACCEPT:
2189 err =
2190 sys_paccept(a0, (struct sockaddr __user *)a1,
2191 (int __user *)a[2],
2192 (const sigset_t __user *) a[3],
2193 a[4], a[5]);
2194 break;
89bddce5
SH
2195 default:
2196 err = -EINVAL;
2197 break;
1da177e4
LT
2198 }
2199 return err;
2200}
2201
89bddce5 2202#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2203
55737fda
SH
2204/**
2205 * sock_register - add a socket protocol handler
2206 * @ops: description of protocol
2207 *
1da177e4
LT
2208 * This function is called by a protocol handler that wants to
2209 * advertise its address family, and have it linked into the
55737fda
SH
2210 * socket interface. The value ops->family coresponds to the
2211 * socket system call protocol family.
1da177e4 2212 */
f0fd27d4 2213int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2214{
2215 int err;
2216
2217 if (ops->family >= NPROTO) {
89bddce5
SH
2218 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2219 NPROTO);
1da177e4
LT
2220 return -ENOBUFS;
2221 }
55737fda
SH
2222
2223 spin_lock(&net_family_lock);
2224 if (net_families[ops->family])
2225 err = -EEXIST;
2226 else {
89bddce5 2227 net_families[ops->family] = ops;
1da177e4
LT
2228 err = 0;
2229 }
55737fda
SH
2230 spin_unlock(&net_family_lock);
2231
89bddce5 2232 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2233 return err;
2234}
2235
55737fda
SH
2236/**
2237 * sock_unregister - remove a protocol handler
2238 * @family: protocol family to remove
2239 *
1da177e4
LT
2240 * This function is called by a protocol handler that wants to
2241 * remove its address family, and have it unlinked from the
55737fda
SH
2242 * new socket creation.
2243 *
2244 * If protocol handler is a module, then it can use module reference
2245 * counts to protect against new references. If protocol handler is not
2246 * a module then it needs to provide its own protection in
2247 * the ops->create routine.
1da177e4 2248 */
f0fd27d4 2249void sock_unregister(int family)
1da177e4 2250{
f0fd27d4 2251 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2252
55737fda 2253 spin_lock(&net_family_lock);
89bddce5 2254 net_families[family] = NULL;
55737fda
SH
2255 spin_unlock(&net_family_lock);
2256
2257 synchronize_rcu();
2258
89bddce5 2259 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2260}
2261
77d76ea3 2262static int __init sock_init(void)
1da177e4
LT
2263{
2264 /*
89bddce5 2265 * Initialize sock SLAB cache.
1da177e4 2266 */
89bddce5 2267
1da177e4
LT
2268 sk_init();
2269
1da177e4 2270 /*
89bddce5 2271 * Initialize skbuff SLAB cache
1da177e4
LT
2272 */
2273 skb_init();
1da177e4
LT
2274
2275 /*
89bddce5 2276 * Initialize the protocols module.
1da177e4
LT
2277 */
2278
2279 init_inodecache();
2280 register_filesystem(&sock_fs_type);
2281 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2282
2283 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2284 */
2285
2286#ifdef CONFIG_NETFILTER
2287 netfilter_init();
2288#endif
cbeb321a
DM
2289
2290 return 0;
1da177e4
LT
2291}
2292
77d76ea3
AK
2293core_initcall(sock_init); /* early initcall */
2294
1da177e4
LT
2295#ifdef CONFIG_PROC_FS
2296void socket_seq_show(struct seq_file *seq)
2297{
2298 int cpu;
2299 int counter = 0;
2300
6f912042 2301 for_each_possible_cpu(cpu)
89bddce5 2302 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2303
2304 /* It can be negative, by the way. 8) */
2305 if (counter < 0)
2306 counter = 0;
2307
2308 seq_printf(seq, "sockets: used %d\n", counter);
2309}
89bddce5 2310#endif /* CONFIG_PROC_FS */
1da177e4 2311
89bbfc95
SP
2312#ifdef CONFIG_COMPAT
2313static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2314 unsigned long arg)
89bbfc95
SP
2315{
2316 struct socket *sock = file->private_data;
2317 int ret = -ENOIOCTLCMD;
87de87d5
DM
2318 struct sock *sk;
2319 struct net *net;
2320
2321 sk = sock->sk;
2322 net = sock_net(sk);
89bbfc95
SP
2323
2324 if (sock->ops->compat_ioctl)
2325 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2326
87de87d5
DM
2327 if (ret == -ENOIOCTLCMD &&
2328 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
2329 ret = compat_wext_handle_ioctl(net, cmd, arg);
2330
89bbfc95
SP
2331 return ret;
2332}
2333#endif
2334
ac5a488e
SS
2335int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2336{
2337 return sock->ops->bind(sock, addr, addrlen);
2338}
2339
2340int kernel_listen(struct socket *sock, int backlog)
2341{
2342 return sock->ops->listen(sock, backlog);
2343}
2344
2345int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2346{
2347 struct sock *sk = sock->sk;
2348 int err;
2349
2350 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2351 newsock);
2352 if (err < 0)
2353 goto done;
2354
2355 err = sock->ops->accept(sock, *newsock, flags);
2356 if (err < 0) {
2357 sock_release(*newsock);
fa8705b0 2358 *newsock = NULL;
ac5a488e
SS
2359 goto done;
2360 }
2361
2362 (*newsock)->ops = sock->ops;
2363
2364done:
2365 return err;
2366}
2367
2368int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2369 int flags)
ac5a488e
SS
2370{
2371 return sock->ops->connect(sock, addr, addrlen, flags);
2372}
2373
2374int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2375 int *addrlen)
2376{
2377 return sock->ops->getname(sock, addr, addrlen, 0);
2378}
2379
2380int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2381 int *addrlen)
2382{
2383 return sock->ops->getname(sock, addr, addrlen, 1);
2384}
2385
2386int kernel_getsockopt(struct socket *sock, int level, int optname,
2387 char *optval, int *optlen)
2388{
2389 mm_segment_t oldfs = get_fs();
2390 int err;
2391
2392 set_fs(KERNEL_DS);
2393 if (level == SOL_SOCKET)
2394 err = sock_getsockopt(sock, level, optname, optval, optlen);
2395 else
2396 err = sock->ops->getsockopt(sock, level, optname, optval,
2397 optlen);
2398 set_fs(oldfs);
2399 return err;
2400}
2401
2402int kernel_setsockopt(struct socket *sock, int level, int optname,
2403 char *optval, int optlen)
2404{
2405 mm_segment_t oldfs = get_fs();
2406 int err;
2407
2408 set_fs(KERNEL_DS);
2409 if (level == SOL_SOCKET)
2410 err = sock_setsockopt(sock, level, optname, optval, optlen);
2411 else
2412 err = sock->ops->setsockopt(sock, level, optname, optval,
2413 optlen);
2414 set_fs(oldfs);
2415 return err;
2416}
2417
2418int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2419 size_t size, int flags)
2420{
2421 if (sock->ops->sendpage)
2422 return sock->ops->sendpage(sock, page, offset, size, flags);
2423
2424 return sock_no_sendpage(sock, page, offset, size, flags);
2425}
2426
2427int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2428{
2429 mm_segment_t oldfs = get_fs();
2430 int err;
2431
2432 set_fs(KERNEL_DS);
2433 err = sock->ops->ioctl(sock, cmd, arg);
2434 set_fs(oldfs);
2435
2436 return err;
2437}
2438
91cf45f0
TM
2439int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
2440{
2441 return sock->ops->shutdown(sock, how);
2442}
2443
1da177e4
LT
2444EXPORT_SYMBOL(sock_create);
2445EXPORT_SYMBOL(sock_create_kern);
2446EXPORT_SYMBOL(sock_create_lite);
2447EXPORT_SYMBOL(sock_map_fd);
2448EXPORT_SYMBOL(sock_recvmsg);
2449EXPORT_SYMBOL(sock_register);
2450EXPORT_SYMBOL(sock_release);
2451EXPORT_SYMBOL(sock_sendmsg);
2452EXPORT_SYMBOL(sock_unregister);
2453EXPORT_SYMBOL(sock_wake_async);
2454EXPORT_SYMBOL(sockfd_lookup);
2455EXPORT_SYMBOL(kernel_sendmsg);
2456EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2457EXPORT_SYMBOL(kernel_bind);
2458EXPORT_SYMBOL(kernel_listen);
2459EXPORT_SYMBOL(kernel_accept);
2460EXPORT_SYMBOL(kernel_connect);
2461EXPORT_SYMBOL(kernel_getsockname);
2462EXPORT_SYMBOL(kernel_getpeername);
2463EXPORT_SYMBOL(kernel_getsockopt);
2464EXPORT_SYMBOL(kernel_setsockopt);
2465EXPORT_SYMBOL(kernel_sendpage);
2466EXPORT_SYMBOL(kernel_sock_ioctl);
91cf45f0 2467EXPORT_SYMBOL(kernel_sock_shutdown);