bnx2: Fix bug in bnx2_free_rx_mem().
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
c019bbc6 72#include <linux/thread_info.h>
1da177e4
LT
73#include <linux/wanrouter.h>
74#include <linux/if_bridge.h>
20380731
ACM
75#include <linux/if_frad.h>
76#include <linux/if_vlan.h>
1da177e4
LT
77#include <linux/init.h>
78#include <linux/poll.h>
79#include <linux/cache.h>
80#include <linux/module.h>
81#include <linux/highmem.h>
1da177e4
LT
82#include <linux/mount.h>
83#include <linux/security.h>
84#include <linux/syscalls.h>
85#include <linux/compat.h>
86#include <linux/kmod.h>
3ec3b2fb 87#include <linux/audit.h>
d86b5e0e 88#include <linux/wireless.h>
1b8d7ae4 89#include <linux/nsproxy.h>
1da177e4
LT
90
91#include <asm/uaccess.h>
92#include <asm/unistd.h>
93
94#include <net/compat.h>
87de87d5 95#include <net/wext.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
100static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
101static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
102 unsigned long nr_segs, loff_t pos);
103static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
104 unsigned long nr_segs, loff_t pos);
89bddce5 105static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
106
107static int sock_close(struct inode *inode, struct file *file);
108static unsigned int sock_poll(struct file *file,
109 struct poll_table_struct *wait);
89bddce5 110static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
111#ifdef CONFIG_COMPAT
112static long compat_sock_ioctl(struct file *file,
89bddce5 113 unsigned int cmd, unsigned long arg);
89bbfc95 114#endif
1da177e4 115static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
116static ssize_t sock_sendpage(struct file *file, struct page *page,
117 int offset, size_t size, loff_t *ppos, int more);
9c55e01c
JA
118static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
119 struct pipe_inode_info *pipe, size_t len,
120 unsigned int flags);
1da177e4 121
1da177e4
LT
122/*
123 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
124 * in the operation structures but are done directly via the socketcall() multiplexor.
125 */
126
da7071d7 127static const struct file_operations socket_file_ops = {
1da177e4
LT
128 .owner = THIS_MODULE,
129 .llseek = no_llseek,
130 .aio_read = sock_aio_read,
131 .aio_write = sock_aio_write,
132 .poll = sock_poll,
133 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
134#ifdef CONFIG_COMPAT
135 .compat_ioctl = compat_sock_ioctl,
136#endif
1da177e4
LT
137 .mmap = sock_mmap,
138 .open = sock_no_open, /* special open code to disallow open via /proc */
139 .release = sock_close,
140 .fasync = sock_fasync,
5274f052
JA
141 .sendpage = sock_sendpage,
142 .splice_write = generic_splice_sendpage,
9c55e01c 143 .splice_read = sock_splice_read,
1da177e4
LT
144};
145
146/*
147 * The protocol list. Each protocol is registered in here.
148 */
149
1da177e4 150static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 151static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 152
1da177e4
LT
153/*
154 * Statistics counters of the socket lists
155 */
156
157static DEFINE_PER_CPU(int, sockets_in_use) = 0;
158
159/*
89bddce5
SH
160 * Support routines.
161 * Move socket addresses back and forth across the kernel/user
162 * divide and look after the messy bits.
1da177e4
LT
163 */
164
89bddce5 165#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
166 16 for IP, 16 for IPX,
167 24 for IPv6,
89bddce5 168 about 80 for AX.25
1da177e4
LT
169 must be at least one bigger than
170 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 171 :unix_mkname()).
1da177e4 172 */
89bddce5 173
1da177e4
LT
174/**
175 * move_addr_to_kernel - copy a socket address into kernel space
176 * @uaddr: Address in user space
177 * @kaddr: Address in kernel space
178 * @ulen: Length in user space
179 *
180 * The address is copied into kernel space. If the provided address is
181 * too long an error code of -EINVAL is returned. If the copy gives
182 * invalid addresses -EFAULT is returned. On a success 0 is returned.
183 */
184
230b1839 185int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
1da177e4 186{
230b1839 187 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 188 return -EINVAL;
89bddce5 189 if (ulen == 0)
1da177e4 190 return 0;
89bddce5 191 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 192 return -EFAULT;
3ec3b2fb 193 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
194}
195
196/**
197 * move_addr_to_user - copy an address to user space
198 * @kaddr: kernel space address
199 * @klen: length of address in kernel
200 * @uaddr: user space address
201 * @ulen: pointer to user length field
202 *
203 * The value pointed to by ulen on entry is the buffer length available.
204 * This is overwritten with the buffer space used. -EINVAL is returned
205 * if an overlong buffer is specified or a negative buffer size. -EFAULT
206 * is returned if either the buffer or the length field are not
207 * accessible.
208 * After copying the data up to the limit the user specifies, the true
209 * length of the data is written over the length limit the user
210 * specified. Zero is returned for a success.
211 */
89bddce5 212
230b1839 213int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
89bddce5 214 int __user *ulen)
1da177e4
LT
215{
216 int err;
217 int len;
218
89bddce5
SH
219 err = get_user(len, ulen);
220 if (err)
1da177e4 221 return err;
89bddce5
SH
222 if (len > klen)
223 len = klen;
230b1839 224 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 225 return -EINVAL;
89bddce5 226 if (len) {
d6fe3945
SG
227 if (audit_sockaddr(klen, kaddr))
228 return -ENOMEM;
89bddce5 229 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
230 return -EFAULT;
231 }
232 /*
89bddce5
SH
233 * "fromlen shall refer to the value before truncation.."
234 * 1003.1g
1da177e4
LT
235 */
236 return __put_user(klen, ulen);
237}
238
239#define SOCKFS_MAGIC 0x534F434B
240
e18b890b 241static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
242
243static struct inode *sock_alloc_inode(struct super_block *sb)
244{
245 struct socket_alloc *ei;
89bddce5 246
e94b1766 247 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
248 if (!ei)
249 return NULL;
250 init_waitqueue_head(&ei->socket.wait);
89bddce5 251
1da177e4
LT
252 ei->socket.fasync_list = NULL;
253 ei->socket.state = SS_UNCONNECTED;
254 ei->socket.flags = 0;
255 ei->socket.ops = NULL;
256 ei->socket.sk = NULL;
257 ei->socket.file = NULL;
1da177e4
LT
258
259 return &ei->vfs_inode;
260}
261
262static void sock_destroy_inode(struct inode *inode)
263{
264 kmem_cache_free(sock_inode_cachep,
265 container_of(inode, struct socket_alloc, vfs_inode));
266}
267
51cc5068 268static void init_once(void *foo)
1da177e4 269{
89bddce5 270 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 271
a35afb83 272 inode_init_once(&ei->vfs_inode);
1da177e4 273}
89bddce5 274
1da177e4
LT
275static int init_inodecache(void)
276{
277 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
278 sizeof(struct socket_alloc),
279 0,
280 (SLAB_HWCACHE_ALIGN |
281 SLAB_RECLAIM_ACCOUNT |
282 SLAB_MEM_SPREAD),
20c2df83 283 init_once);
1da177e4
LT
284 if (sock_inode_cachep == NULL)
285 return -ENOMEM;
286 return 0;
287}
288
289static struct super_operations sockfs_ops = {
290 .alloc_inode = sock_alloc_inode,
291 .destroy_inode =sock_destroy_inode,
292 .statfs = simple_statfs,
293};
294
454e2398 295static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
296 int flags, const char *dev_name, void *data,
297 struct vfsmount *mnt)
1da177e4 298{
454e2398
DH
299 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
300 mnt);
1da177e4
LT
301}
302
ba89966c 303static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
304
305static struct file_system_type sock_fs_type = {
306 .name = "sockfs",
307 .get_sb = sockfs_get_sb,
308 .kill_sb = kill_anon_super,
309};
89bddce5 310
1da177e4
LT
311static int sockfs_delete_dentry(struct dentry *dentry)
312{
304e61e6
ED
313 /*
314 * At creation time, we pretended this dentry was hashed
315 * (by clearing DCACHE_UNHASHED bit in d_flags)
316 * At delete time, we restore the truth : not hashed.
317 * (so that dput() can proceed correctly)
318 */
319 dentry->d_flags |= DCACHE_UNHASHED;
320 return 0;
1da177e4 321}
c23fbb6b
ED
322
323/*
324 * sockfs_dname() is called from d_path().
325 */
326static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
327{
328 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
329 dentry->d_inode->i_ino);
330}
331
1da177e4 332static struct dentry_operations sockfs_dentry_operations = {
89bddce5 333 .d_delete = sockfs_delete_dentry,
c23fbb6b 334 .d_dname = sockfs_dname,
1da177e4
LT
335};
336
337/*
338 * Obtains the first available file descriptor and sets it up for use.
339 *
39d8c1b6
DM
340 * These functions create file structures and maps them to fd space
341 * of the current process. On success it returns file descriptor
1da177e4
LT
342 * and file struct implicitly stored in sock->file.
343 * Note that another thread may close file descriptor before we return
344 * from this function. We use the fact that now we do not refer
345 * to socket after mapping. If one day we will need it, this
346 * function will increment ref. count on file by 1.
347 *
348 * In any case returned fd MAY BE not valid!
349 * This race condition is unavoidable
350 * with shared fd spaces, we cannot solve it inside kernel,
351 * but we take care of internal coherence yet.
352 */
353
a677a039 354static int sock_alloc_fd(struct file **filep, int flags)
1da177e4
LT
355{
356 int fd;
1da177e4 357
a677a039 358 fd = get_unused_fd_flags(flags);
39d8c1b6 359 if (likely(fd >= 0)) {
1da177e4
LT
360 struct file *file = get_empty_filp();
361
39d8c1b6
DM
362 *filep = file;
363 if (unlikely(!file)) {
1da177e4 364 put_unused_fd(fd);
39d8c1b6 365 return -ENFILE;
1da177e4 366 }
39d8c1b6
DM
367 } else
368 *filep = NULL;
369 return fd;
370}
1da177e4 371
77d27200 372static int sock_attach_fd(struct socket *sock, struct file *file, int flags)
39d8c1b6 373{
ce8d2cdf 374 struct dentry *dentry;
c23fbb6b 375 struct qstr name = { .name = "" };
39d8c1b6 376
ce8d2cdf
DH
377 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
378 if (unlikely(!dentry))
39d8c1b6
DM
379 return -ENOMEM;
380
ce8d2cdf 381 dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
382 /*
383 * We dont want to push this dentry into global dentry hash table.
384 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
385 * This permits a working /proc/$pid/fd/XXX on sockets
386 */
ce8d2cdf
DH
387 dentry->d_flags &= ~DCACHE_UNHASHED;
388 d_instantiate(dentry, SOCK_INODE(sock));
39d8c1b6
DM
389
390 sock->file = file;
ce8d2cdf
DH
391 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
392 &socket_file_ops);
393 SOCK_INODE(sock)->i_fop = &socket_file_ops;
77d27200 394 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6
DM
395 file->f_pos = 0;
396 file->private_data = sock;
1da177e4 397
39d8c1b6
DM
398 return 0;
399}
400
a677a039 401int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
402{
403 struct file *newfile;
a677a039 404 int fd = sock_alloc_fd(&newfile, flags);
39d8c1b6
DM
405
406 if (likely(fd >= 0)) {
77d27200 407 int err = sock_attach_fd(sock, newfile, flags);
39d8c1b6
DM
408
409 if (unlikely(err < 0)) {
410 put_filp(newfile);
1da177e4 411 put_unused_fd(fd);
39d8c1b6 412 return err;
1da177e4 413 }
39d8c1b6 414 fd_install(fd, newfile);
1da177e4 415 }
1da177e4
LT
416 return fd;
417}
418
6cb153ca
BL
419static struct socket *sock_from_file(struct file *file, int *err)
420{
6cb153ca
BL
421 if (file->f_op == &socket_file_ops)
422 return file->private_data; /* set in sock_map_fd */
423
23bb80d2
ED
424 *err = -ENOTSOCK;
425 return NULL;
6cb153ca
BL
426}
427
1da177e4
LT
428/**
429 * sockfd_lookup - Go from a file number to its socket slot
430 * @fd: file handle
431 * @err: pointer to an error code return
432 *
433 * The file handle passed in is locked and the socket it is bound
434 * too is returned. If an error occurs the err pointer is overwritten
435 * with a negative errno code and NULL is returned. The function checks
436 * for both invalid handles and passing a handle which is not a socket.
437 *
438 * On a success the socket object pointer is returned.
439 */
440
441struct socket *sockfd_lookup(int fd, int *err)
442{
443 struct file *file;
1da177e4
LT
444 struct socket *sock;
445
89bddce5
SH
446 file = fget(fd);
447 if (!file) {
1da177e4
LT
448 *err = -EBADF;
449 return NULL;
450 }
89bddce5 451
6cb153ca
BL
452 sock = sock_from_file(file, err);
453 if (!sock)
1da177e4 454 fput(file);
6cb153ca
BL
455 return sock;
456}
1da177e4 457
6cb153ca
BL
458static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
459{
460 struct file *file;
461 struct socket *sock;
462
3672558c 463 *err = -EBADF;
6cb153ca
BL
464 file = fget_light(fd, fput_needed);
465 if (file) {
466 sock = sock_from_file(file, err);
467 if (sock)
468 return sock;
469 fput_light(file, *fput_needed);
1da177e4 470 }
6cb153ca 471 return NULL;
1da177e4
LT
472}
473
474/**
475 * sock_alloc - allocate a socket
89bddce5 476 *
1da177e4
LT
477 * Allocate a new inode and socket object. The two are bound together
478 * and initialised. The socket is then returned. If we are out of inodes
479 * NULL is returned.
480 */
481
482static struct socket *sock_alloc(void)
483{
89bddce5
SH
484 struct inode *inode;
485 struct socket *sock;
1da177e4
LT
486
487 inode = new_inode(sock_mnt->mnt_sb);
488 if (!inode)
489 return NULL;
490
491 sock = SOCKET_I(inode);
492
89bddce5 493 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
494 inode->i_uid = current->fsuid;
495 inode->i_gid = current->fsgid;
496
497 get_cpu_var(sockets_in_use)++;
498 put_cpu_var(sockets_in_use);
499 return sock;
500}
501
502/*
503 * In theory you can't get an open on this inode, but /proc provides
504 * a back door. Remember to keep it shut otherwise you'll let the
505 * creepy crawlies in.
506 */
89bddce5 507
1da177e4
LT
508static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
509{
510 return -ENXIO;
511}
512
4b6f5d20 513const struct file_operations bad_sock_fops = {
1da177e4
LT
514 .owner = THIS_MODULE,
515 .open = sock_no_open,
516};
517
518/**
519 * sock_release - close a socket
520 * @sock: socket to close
521 *
522 * The socket is released from the protocol stack if it has a release
523 * callback, and the inode is then released if the socket is bound to
89bddce5 524 * an inode not a file.
1da177e4 525 */
89bddce5 526
1da177e4
LT
527void sock_release(struct socket *sock)
528{
529 if (sock->ops) {
530 struct module *owner = sock->ops->owner;
531
532 sock->ops->release(sock);
533 sock->ops = NULL;
534 module_put(owner);
535 }
536
537 if (sock->fasync_list)
538 printk(KERN_ERR "sock_release: fasync list not empty!\n");
539
540 get_cpu_var(sockets_in_use)--;
541 put_cpu_var(sockets_in_use);
542 if (!sock->file) {
543 iput(SOCK_INODE(sock));
544 return;
545 }
89bddce5 546 sock->file = NULL;
1da177e4
LT
547}
548
89bddce5 549static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
550 struct msghdr *msg, size_t size)
551{
552 struct sock_iocb *si = kiocb_to_siocb(iocb);
553 int err;
554
555 si->sock = sock;
556 si->scm = NULL;
557 si->msg = msg;
558 si->size = size;
559
560 err = security_socket_sendmsg(sock, msg, size);
561 if (err)
562 return err;
563
564 return sock->ops->sendmsg(iocb, sock, msg, size);
565}
566
567int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
568{
569 struct kiocb iocb;
570 struct sock_iocb siocb;
571 int ret;
572
573 init_sync_kiocb(&iocb, NULL);
574 iocb.private = &siocb;
575 ret = __sock_sendmsg(&iocb, sock, msg, size);
576 if (-EIOCBQUEUED == ret)
577 ret = wait_on_sync_kiocb(&iocb);
578 return ret;
579}
580
581int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
582 struct kvec *vec, size_t num, size_t size)
583{
584 mm_segment_t oldfs = get_fs();
585 int result;
586
587 set_fs(KERNEL_DS);
588 /*
589 * the following is safe, since for compiler definitions of kvec and
590 * iovec are identical, yielding the same in-core layout and alignment
591 */
89bddce5 592 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
593 msg->msg_iovlen = num;
594 result = sock_sendmsg(sock, msg, size);
595 set_fs(oldfs);
596 return result;
597}
598
92f37fd2
ED
599/*
600 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
601 */
602void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
603 struct sk_buff *skb)
604{
605 ktime_t kt = skb->tstamp;
606
607 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
608 struct timeval tv;
609 /* Race occurred between timestamp enabling and packet
610 receiving. Fill in the current time for now. */
611 if (kt.tv64 == 0)
612 kt = ktime_get_real();
613 skb->tstamp = kt;
614 tv = ktime_to_timeval(kt);
615 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
616 } else {
617 struct timespec ts;
618 /* Race occurred between timestamp enabling and packet
619 receiving. Fill in the current time for now. */
620 if (kt.tv64 == 0)
621 kt = ktime_get_real();
622 skb->tstamp = kt;
623 ts = ktime_to_timespec(kt);
624 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
625 }
626}
627
7c81fd8b
ACM
628EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
629
89bddce5 630static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
631 struct msghdr *msg, size_t size, int flags)
632{
633 int err;
634 struct sock_iocb *si = kiocb_to_siocb(iocb);
635
636 si->sock = sock;
637 si->scm = NULL;
638 si->msg = msg;
639 si->size = size;
640 si->flags = flags;
641
642 err = security_socket_recvmsg(sock, msg, size, flags);
643 if (err)
644 return err;
645
646 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
647}
648
89bddce5 649int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
650 size_t size, int flags)
651{
652 struct kiocb iocb;
653 struct sock_iocb siocb;
654 int ret;
655
89bddce5 656 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
657 iocb.private = &siocb;
658 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
659 if (-EIOCBQUEUED == ret)
660 ret = wait_on_sync_kiocb(&iocb);
661 return ret;
662}
663
89bddce5
SH
664int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
665 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
666{
667 mm_segment_t oldfs = get_fs();
668 int result;
669
670 set_fs(KERNEL_DS);
671 /*
672 * the following is safe, since for compiler definitions of kvec and
673 * iovec are identical, yielding the same in-core layout and alignment
674 */
89bddce5 675 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
676 result = sock_recvmsg(sock, msg, size, flags);
677 set_fs(oldfs);
678 return result;
679}
680
681static void sock_aio_dtor(struct kiocb *iocb)
682{
683 kfree(iocb->private);
684}
685
ce1d4d3e
CH
686static ssize_t sock_sendpage(struct file *file, struct page *page,
687 int offset, size_t size, loff_t *ppos, int more)
1da177e4 688{
1da177e4
LT
689 struct socket *sock;
690 int flags;
691
ce1d4d3e
CH
692 sock = file->private_data;
693
694 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
695 if (more)
696 flags |= MSG_MORE;
697
698 return sock->ops->sendpage(sock, page, offset, size, flags);
699}
1da177e4 700
9c55e01c
JA
701static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
702 struct pipe_inode_info *pipe, size_t len,
703 unsigned int flags)
704{
705 struct socket *sock = file->private_data;
706
997b37da
RDC
707 if (unlikely(!sock->ops->splice_read))
708 return -EINVAL;
709
9c55e01c
JA
710 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
711}
712
ce1d4d3e 713static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 714 struct sock_iocb *siocb)
ce1d4d3e
CH
715{
716 if (!is_sync_kiocb(iocb)) {
717 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
718 if (!siocb)
719 return NULL;
1da177e4
LT
720 iocb->ki_dtor = sock_aio_dtor;
721 }
1da177e4 722
ce1d4d3e 723 siocb->kiocb = iocb;
ce1d4d3e
CH
724 iocb->private = siocb;
725 return siocb;
1da177e4
LT
726}
727
ce1d4d3e 728static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
729 struct file *file, const struct iovec *iov,
730 unsigned long nr_segs)
ce1d4d3e
CH
731{
732 struct socket *sock = file->private_data;
733 size_t size = 0;
734 int i;
1da177e4 735
89bddce5
SH
736 for (i = 0; i < nr_segs; i++)
737 size += iov[i].iov_len;
1da177e4 738
ce1d4d3e
CH
739 msg->msg_name = NULL;
740 msg->msg_namelen = 0;
741 msg->msg_control = NULL;
742 msg->msg_controllen = 0;
89bddce5 743 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
744 msg->msg_iovlen = nr_segs;
745 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
746
747 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
748}
749
027445c3
BP
750static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
751 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
752{
753 struct sock_iocb siocb, *x;
754
1da177e4
LT
755 if (pos != 0)
756 return -ESPIPE;
027445c3
BP
757
758 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
759 return 0;
760
027445c3
BP
761
762 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
763 if (!x)
764 return -ENOMEM;
027445c3 765 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
766}
767
ce1d4d3e 768static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
769 struct file *file, const struct iovec *iov,
770 unsigned long nr_segs)
1da177e4 771{
ce1d4d3e
CH
772 struct socket *sock = file->private_data;
773 size_t size = 0;
774 int i;
1da177e4 775
89bddce5
SH
776 for (i = 0; i < nr_segs; i++)
777 size += iov[i].iov_len;
1da177e4 778
ce1d4d3e
CH
779 msg->msg_name = NULL;
780 msg->msg_namelen = 0;
781 msg->msg_control = NULL;
782 msg->msg_controllen = 0;
89bddce5 783 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
784 msg->msg_iovlen = nr_segs;
785 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
786 if (sock->type == SOCK_SEQPACKET)
787 msg->msg_flags |= MSG_EOR;
1da177e4 788
ce1d4d3e 789 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
790}
791
027445c3
BP
792static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
793 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
794{
795 struct sock_iocb siocb, *x;
1da177e4 796
ce1d4d3e
CH
797 if (pos != 0)
798 return -ESPIPE;
027445c3 799
027445c3 800 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
801 if (!x)
802 return -ENOMEM;
1da177e4 803
027445c3 804 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
805}
806
1da177e4
LT
807/*
808 * Atomic setting of ioctl hooks to avoid race
809 * with module unload.
810 */
811
4a3e2f71 812static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 813static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 814
881d966b 815void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 816{
4a3e2f71 817 mutex_lock(&br_ioctl_mutex);
1da177e4 818 br_ioctl_hook = hook;
4a3e2f71 819 mutex_unlock(&br_ioctl_mutex);
1da177e4 820}
89bddce5 821
1da177e4
LT
822EXPORT_SYMBOL(brioctl_set);
823
4a3e2f71 824static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 825static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 826
881d966b 827void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 828{
4a3e2f71 829 mutex_lock(&vlan_ioctl_mutex);
1da177e4 830 vlan_ioctl_hook = hook;
4a3e2f71 831 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 832}
89bddce5 833
1da177e4
LT
834EXPORT_SYMBOL(vlan_ioctl_set);
835
4a3e2f71 836static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 837static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 838
89bddce5 839void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 840{
4a3e2f71 841 mutex_lock(&dlci_ioctl_mutex);
1da177e4 842 dlci_ioctl_hook = hook;
4a3e2f71 843 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 844}
89bddce5 845
1da177e4
LT
846EXPORT_SYMBOL(dlci_ioctl_set);
847
848/*
849 * With an ioctl, arg may well be a user mode pointer, but we don't know
850 * what to do with it - that's up to the protocol still.
851 */
852
853static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
854{
855 struct socket *sock;
881d966b 856 struct sock *sk;
1da177e4
LT
857 void __user *argp = (void __user *)arg;
858 int pid, err;
881d966b 859 struct net *net;
1da177e4 860
b69aee04 861 sock = file->private_data;
881d966b 862 sk = sock->sk;
3b1e0a65 863 net = sock_net(sk);
1da177e4 864 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 865 err = dev_ioctl(net, cmd, argp);
1da177e4 866 } else
d86b5e0e 867#ifdef CONFIG_WIRELESS_EXT
1da177e4 868 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 869 err = dev_ioctl(net, cmd, argp);
1da177e4 870 } else
89bddce5
SH
871#endif /* CONFIG_WIRELESS_EXT */
872 switch (cmd) {
1da177e4
LT
873 case FIOSETOWN:
874 case SIOCSPGRP:
875 err = -EFAULT;
876 if (get_user(pid, (int __user *)argp))
877 break;
878 err = f_setown(sock->file, pid, 1);
879 break;
880 case FIOGETOWN:
881 case SIOCGPGRP:
609d7fa9 882 err = put_user(f_getown(sock->file),
89bddce5 883 (int __user *)argp);
1da177e4
LT
884 break;
885 case SIOCGIFBR:
886 case SIOCSIFBR:
887 case SIOCBRADDBR:
888 case SIOCBRDELBR:
889 err = -ENOPKG;
890 if (!br_ioctl_hook)
891 request_module("bridge");
892
4a3e2f71 893 mutex_lock(&br_ioctl_mutex);
89bddce5 894 if (br_ioctl_hook)
881d966b 895 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 896 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
897 break;
898 case SIOCGIFVLAN:
899 case SIOCSIFVLAN:
900 err = -ENOPKG;
901 if (!vlan_ioctl_hook)
902 request_module("8021q");
903
4a3e2f71 904 mutex_lock(&vlan_ioctl_mutex);
1da177e4 905 if (vlan_ioctl_hook)
881d966b 906 err = vlan_ioctl_hook(net, argp);
4a3e2f71 907 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 908 break;
1da177e4
LT
909 case SIOCADDDLCI:
910 case SIOCDELDLCI:
911 err = -ENOPKG;
912 if (!dlci_ioctl_hook)
913 request_module("dlci");
914
7512cbf6
PE
915 mutex_lock(&dlci_ioctl_mutex);
916 if (dlci_ioctl_hook)
1da177e4 917 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 918 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
919 break;
920 default:
921 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
922
923 /*
924 * If this ioctl is unknown try to hand it down
925 * to the NIC driver.
926 */
927 if (err == -ENOIOCTLCMD)
881d966b 928 err = dev_ioctl(net, cmd, argp);
1da177e4 929 break;
89bddce5 930 }
1da177e4
LT
931 return err;
932}
933
934int sock_create_lite(int family, int type, int protocol, struct socket **res)
935{
936 int err;
937 struct socket *sock = NULL;
89bddce5 938
1da177e4
LT
939 err = security_socket_create(family, type, protocol, 1);
940 if (err)
941 goto out;
942
943 sock = sock_alloc();
944 if (!sock) {
945 err = -ENOMEM;
946 goto out;
947 }
948
1da177e4 949 sock->type = type;
7420ed23
VY
950 err = security_socket_post_create(sock, family, type, protocol, 1);
951 if (err)
952 goto out_release;
953
1da177e4
LT
954out:
955 *res = sock;
956 return err;
7420ed23
VY
957out_release:
958 sock_release(sock);
959 sock = NULL;
960 goto out;
1da177e4
LT
961}
962
963/* No kernel lock held - perfect */
89bddce5 964static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
965{
966 struct socket *sock;
967
968 /*
89bddce5 969 * We can't return errors to poll, so it's either yes or no.
1da177e4 970 */
b69aee04 971 sock = file->private_data;
1da177e4
LT
972 return sock->ops->poll(file, sock, wait);
973}
974
89bddce5 975static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 976{
b69aee04 977 struct socket *sock = file->private_data;
1da177e4
LT
978
979 return sock->ops->mmap(file, sock, vma);
980}
981
20380731 982static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
983{
984 /*
89bddce5
SH
985 * It was possible the inode is NULL we were
986 * closing an unfinished socket.
1da177e4
LT
987 */
988
89bddce5 989 if (!inode) {
1da177e4
LT
990 printk(KERN_DEBUG "sock_close: NULL inode\n");
991 return 0;
992 }
1da177e4
LT
993 sock_release(SOCKET_I(inode));
994 return 0;
995}
996
997/*
998 * Update the socket async list
999 *
1000 * Fasync_list locking strategy.
1001 *
1002 * 1. fasync_list is modified only under process context socket lock
1003 * i.e. under semaphore.
1004 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1005 * or under socket lock.
1006 * 3. fasync_list can be used from softirq context, so that
1007 * modification under socket lock have to be enhanced with
1008 * write_lock_bh(&sk->sk_callback_lock).
1009 * --ANK (990710)
1010 */
1011
1012static int sock_fasync(int fd, struct file *filp, int on)
1013{
89bddce5 1014 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1015 struct socket *sock;
1016 struct sock *sk;
1017
89bddce5 1018 if (on) {
8b3a7005 1019 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1020 if (fna == NULL)
1da177e4
LT
1021 return -ENOMEM;
1022 }
1023
b69aee04 1024 sock = filp->private_data;
1da177e4 1025
89bddce5
SH
1026 sk = sock->sk;
1027 if (sk == NULL) {
1da177e4
LT
1028 kfree(fna);
1029 return -EINVAL;
1030 }
1031
1032 lock_sock(sk);
1033
89bddce5 1034 prev = &(sock->fasync_list);
1da177e4 1035
89bddce5
SH
1036 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1037 if (fa->fa_file == filp)
1da177e4
LT
1038 break;
1039
89bddce5
SH
1040 if (on) {
1041 if (fa != NULL) {
1da177e4 1042 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1043 fa->fa_fd = fd;
1da177e4
LT
1044 write_unlock_bh(&sk->sk_callback_lock);
1045
1046 kfree(fna);
1047 goto out;
1048 }
89bddce5
SH
1049 fna->fa_file = filp;
1050 fna->fa_fd = fd;
1051 fna->magic = FASYNC_MAGIC;
1052 fna->fa_next = sock->fasync_list;
1da177e4 1053 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1054 sock->fasync_list = fna;
1da177e4 1055 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1056 } else {
1057 if (fa != NULL) {
1da177e4 1058 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1059 *prev = fa->fa_next;
1da177e4
LT
1060 write_unlock_bh(&sk->sk_callback_lock);
1061 kfree(fa);
1062 }
1063 }
1064
1065out:
1066 release_sock(sock->sk);
1067 return 0;
1068}
1069
1070/* This function may be called only under socket lock or callback_lock */
1071
1072int sock_wake_async(struct socket *sock, int how, int band)
1073{
1074 if (!sock || !sock->fasync_list)
1075 return -1;
89bddce5 1076 switch (how) {
8d8ad9d7 1077 case SOCK_WAKE_WAITD:
1da177e4
LT
1078 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1079 break;
1080 goto call_kill;
8d8ad9d7 1081 case SOCK_WAKE_SPACE:
1da177e4
LT
1082 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1083 break;
1084 /* fall through */
8d8ad9d7 1085 case SOCK_WAKE_IO:
89bddce5 1086call_kill:
1da177e4
LT
1087 __kill_fasync(sock->fasync_list, SIGIO, band);
1088 break;
8d8ad9d7 1089 case SOCK_WAKE_URG:
1da177e4
LT
1090 __kill_fasync(sock->fasync_list, SIGURG, band);
1091 }
1092 return 0;
1093}
1094
1b8d7ae4 1095static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1096 struct socket **res, int kern)
1da177e4
LT
1097{
1098 int err;
1099 struct socket *sock;
55737fda 1100 const struct net_proto_family *pf;
1da177e4
LT
1101
1102 /*
89bddce5 1103 * Check protocol is in range
1da177e4
LT
1104 */
1105 if (family < 0 || family >= NPROTO)
1106 return -EAFNOSUPPORT;
1107 if (type < 0 || type >= SOCK_MAX)
1108 return -EINVAL;
1109
1110 /* Compatibility.
1111
1112 This uglymoron is moved from INET layer to here to avoid
1113 deadlock in module load.
1114 */
1115 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1116 static int warned;
1da177e4
LT
1117 if (!warned) {
1118 warned = 1;
89bddce5
SH
1119 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1120 current->comm);
1da177e4
LT
1121 }
1122 family = PF_PACKET;
1123 }
1124
1125 err = security_socket_create(family, type, protocol, kern);
1126 if (err)
1127 return err;
89bddce5 1128
55737fda
SH
1129 /*
1130 * Allocate the socket and allow the family to set things up. if
1131 * the protocol is 0, the family is instructed to select an appropriate
1132 * default.
1133 */
1134 sock = sock_alloc();
1135 if (!sock) {
1136 if (net_ratelimit())
1137 printk(KERN_WARNING "socket: no more sockets\n");
1138 return -ENFILE; /* Not exactly a match, but its the
1139 closest posix thing */
1140 }
1141
1142 sock->type = type;
1143
95a5afca 1144#ifdef CONFIG_MODULES
89bddce5
SH
1145 /* Attempt to load a protocol module if the find failed.
1146 *
1147 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1148 * requested real, full-featured networking support upon configuration.
1149 * Otherwise module support will break!
1150 */
55737fda 1151 if (net_families[family] == NULL)
89bddce5 1152 request_module("net-pf-%d", family);
1da177e4
LT
1153#endif
1154
55737fda
SH
1155 rcu_read_lock();
1156 pf = rcu_dereference(net_families[family]);
1157 err = -EAFNOSUPPORT;
1158 if (!pf)
1159 goto out_release;
1da177e4
LT
1160
1161 /*
1162 * We will call the ->create function, that possibly is in a loadable
1163 * module, so we have to bump that loadable module refcnt first.
1164 */
55737fda 1165 if (!try_module_get(pf->owner))
1da177e4
LT
1166 goto out_release;
1167
55737fda
SH
1168 /* Now protected by module ref count */
1169 rcu_read_unlock();
1170
1b8d7ae4 1171 err = pf->create(net, sock, protocol);
55737fda 1172 if (err < 0)
1da177e4 1173 goto out_module_put;
a79af59e 1174
1da177e4
LT
1175 /*
1176 * Now to bump the refcnt of the [loadable] module that owns this
1177 * socket at sock_release time we decrement its refcnt.
1178 */
55737fda
SH
1179 if (!try_module_get(sock->ops->owner))
1180 goto out_module_busy;
1181
1da177e4
LT
1182 /*
1183 * Now that we're done with the ->create function, the [loadable]
1184 * module can have its refcnt decremented
1185 */
55737fda 1186 module_put(pf->owner);
7420ed23
VY
1187 err = security_socket_post_create(sock, family, type, protocol, kern);
1188 if (err)
3b185525 1189 goto out_sock_release;
55737fda 1190 *res = sock;
1da177e4 1191
55737fda
SH
1192 return 0;
1193
1194out_module_busy:
1195 err = -EAFNOSUPPORT;
1da177e4 1196out_module_put:
55737fda
SH
1197 sock->ops = NULL;
1198 module_put(pf->owner);
1199out_sock_release:
1da177e4 1200 sock_release(sock);
55737fda
SH
1201 return err;
1202
1203out_release:
1204 rcu_read_unlock();
1205 goto out_sock_release;
1da177e4
LT
1206}
1207
1208int sock_create(int family, int type, int protocol, struct socket **res)
1209{
1b8d7ae4 1210 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1211}
1212
1213int sock_create_kern(int family, int type, int protocol, struct socket **res)
1214{
1b8d7ae4 1215 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1216}
1217
1218asmlinkage long sys_socket(int family, int type, int protocol)
1219{
1220 int retval;
1221 struct socket *sock;
a677a039
UD
1222 int flags;
1223
e38b36f3
UD
1224 /* Check the SOCK_* constants for consistency. */
1225 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1226 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1227 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1228 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1229
a677a039 1230 flags = type & ~SOCK_TYPE_MASK;
77d27200 1231 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1232 return -EINVAL;
1233 type &= SOCK_TYPE_MASK;
1da177e4 1234
aaca0bdc
UD
1235 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1236 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1237
1da177e4
LT
1238 retval = sock_create(family, type, protocol, &sock);
1239 if (retval < 0)
1240 goto out;
1241
77d27200 1242 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1243 if (retval < 0)
1244 goto out_release;
1245
1246out:
1247 /* It may be already another descriptor 8) Not kernel problem. */
1248 return retval;
1249
1250out_release:
1251 sock_release(sock);
1252 return retval;
1253}
1254
1255/*
1256 * Create a pair of connected sockets.
1257 */
1258
89bddce5
SH
1259asmlinkage long sys_socketpair(int family, int type, int protocol,
1260 int __user *usockvec)
1da177e4
LT
1261{
1262 struct socket *sock1, *sock2;
1263 int fd1, fd2, err;
db349509 1264 struct file *newfile1, *newfile2;
a677a039
UD
1265 int flags;
1266
1267 flags = type & ~SOCK_TYPE_MASK;
77d27200 1268 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1269 return -EINVAL;
1270 type &= SOCK_TYPE_MASK;
1da177e4 1271
aaca0bdc
UD
1272 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1273 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1274
1da177e4
LT
1275 /*
1276 * Obtain the first socket and check if the underlying protocol
1277 * supports the socketpair call.
1278 */
1279
1280 err = sock_create(family, type, protocol, &sock1);
1281 if (err < 0)
1282 goto out;
1283
1284 err = sock_create(family, type, protocol, &sock2);
1285 if (err < 0)
1286 goto out_release_1;
1287
1288 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1289 if (err < 0)
1da177e4
LT
1290 goto out_release_both;
1291
a677a039 1292 fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC);
bf3c23d1
DM
1293 if (unlikely(fd1 < 0)) {
1294 err = fd1;
db349509 1295 goto out_release_both;
bf3c23d1 1296 }
1da177e4 1297
a677a039 1298 fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC);
db349509 1299 if (unlikely(fd2 < 0)) {
bf3c23d1 1300 err = fd2;
db349509
AV
1301 put_filp(newfile1);
1302 put_unused_fd(fd1);
1da177e4 1303 goto out_release_both;
db349509 1304 }
1da177e4 1305
77d27200 1306 err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK);
db349509
AV
1307 if (unlikely(err < 0)) {
1308 goto out_fd2;
1309 }
1310
77d27200 1311 err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK);
db349509
AV
1312 if (unlikely(err < 0)) {
1313 fput(newfile1);
1314 goto out_fd1;
1315 }
1316
1317 err = audit_fd_pair(fd1, fd2);
1318 if (err < 0) {
1319 fput(newfile1);
1320 fput(newfile2);
1321 goto out_fd;
1322 }
1da177e4 1323
db349509
AV
1324 fd_install(fd1, newfile1);
1325 fd_install(fd2, newfile2);
1da177e4
LT
1326 /* fd1 and fd2 may be already another descriptors.
1327 * Not kernel problem.
1328 */
1329
89bddce5 1330 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1331 if (!err)
1332 err = put_user(fd2, &usockvec[1]);
1333 if (!err)
1334 return 0;
1335
1336 sys_close(fd2);
1337 sys_close(fd1);
1338 return err;
1339
1da177e4 1340out_release_both:
89bddce5 1341 sock_release(sock2);
1da177e4 1342out_release_1:
89bddce5 1343 sock_release(sock1);
1da177e4
LT
1344out:
1345 return err;
db349509
AV
1346
1347out_fd2:
1348 put_filp(newfile1);
1349 sock_release(sock1);
1350out_fd1:
1351 put_filp(newfile2);
1352 sock_release(sock2);
1353out_fd:
1354 put_unused_fd(fd1);
1355 put_unused_fd(fd2);
1356 goto out;
1da177e4
LT
1357}
1358
1da177e4
LT
1359/*
1360 * Bind a name to a socket. Nothing much to do here since it's
1361 * the protocol's responsibility to handle the local address.
1362 *
1363 * We move the socket address to kernel space before we call
1364 * the protocol layer (having also checked the address is ok).
1365 */
1366
1367asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1368{
1369 struct socket *sock;
230b1839 1370 struct sockaddr_storage address;
6cb153ca 1371 int err, fput_needed;
1da177e4 1372
89bddce5 1373 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1374 if (sock) {
230b1839 1375 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
89bddce5
SH
1376 if (err >= 0) {
1377 err = security_socket_bind(sock,
230b1839 1378 (struct sockaddr *)&address,
89bddce5 1379 addrlen);
6cb153ca
BL
1380 if (!err)
1381 err = sock->ops->bind(sock,
89bddce5 1382 (struct sockaddr *)
230b1839 1383 &address, addrlen);
1da177e4 1384 }
6cb153ca 1385 fput_light(sock->file, fput_needed);
89bddce5 1386 }
1da177e4
LT
1387 return err;
1388}
1389
1da177e4
LT
1390/*
1391 * Perform a listen. Basically, we allow the protocol to do anything
1392 * necessary for a listen, and if that works, we mark the socket as
1393 * ready for listening.
1394 */
1395
1da177e4
LT
1396asmlinkage long sys_listen(int fd, int backlog)
1397{
1398 struct socket *sock;
6cb153ca 1399 int err, fput_needed;
b8e1f9b5 1400 int somaxconn;
89bddce5
SH
1401
1402 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1403 if (sock) {
8efa6e93 1404 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
b8e1f9b5
PE
1405 if ((unsigned)backlog > somaxconn)
1406 backlog = somaxconn;
1da177e4
LT
1407
1408 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1409 if (!err)
1410 err = sock->ops->listen(sock, backlog);
1da177e4 1411
6cb153ca 1412 fput_light(sock->file, fput_needed);
1da177e4
LT
1413 }
1414 return err;
1415}
1416
1da177e4
LT
1417/*
1418 * For accept, we attempt to create a new socket, set up the link
1419 * with the client, wake up the client, then return the new
1420 * connected fd. We collect the address of the connector in kernel
1421 * space and move it to user at the very end. This is unclean because
1422 * we open the socket then return an error.
1423 *
1424 * 1003.1g adds the ability to recvmsg() to query connection pending
1425 * status to recvmsg. We need to add that support in a way thats
1426 * clean when we restucture accept also.
1427 */
1428
de11defe
UD
1429asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1430 int __user *upeer_addrlen, int flags)
1da177e4
LT
1431{
1432 struct socket *sock, *newsock;
39d8c1b6 1433 struct file *newfile;
6cb153ca 1434 int err, len, newfd, fput_needed;
230b1839 1435 struct sockaddr_storage address;
1da177e4 1436
77d27200 1437 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1438 return -EINVAL;
1439
1440 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1441 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1442
6cb153ca 1443 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1444 if (!sock)
1445 goto out;
1446
1447 err = -ENFILE;
89bddce5 1448 if (!(newsock = sock_alloc()))
1da177e4
LT
1449 goto out_put;
1450
1451 newsock->type = sock->type;
1452 newsock->ops = sock->ops;
1453
1da177e4
LT
1454 /*
1455 * We don't need try_module_get here, as the listening socket (sock)
1456 * has the protocol module (sock->ops->owner) held.
1457 */
1458 __module_get(newsock->ops->owner);
1459
aaca0bdc 1460 newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC);
39d8c1b6
DM
1461 if (unlikely(newfd < 0)) {
1462 err = newfd;
9a1875e6
DM
1463 sock_release(newsock);
1464 goto out_put;
39d8c1b6
DM
1465 }
1466
77d27200 1467 err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK);
39d8c1b6 1468 if (err < 0)
79f4f642 1469 goto out_fd_simple;
39d8c1b6 1470
a79af59e
FF
1471 err = security_socket_accept(sock, newsock);
1472 if (err)
39d8c1b6 1473 goto out_fd;
a79af59e 1474
1da177e4
LT
1475 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1476 if (err < 0)
39d8c1b6 1477 goto out_fd;
1da177e4
LT
1478
1479 if (upeer_sockaddr) {
230b1839 1480 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1481 &len, 2) < 0) {
1da177e4 1482 err = -ECONNABORTED;
39d8c1b6 1483 goto out_fd;
1da177e4 1484 }
230b1839
YH
1485 err = move_addr_to_user((struct sockaddr *)&address,
1486 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1487 if (err < 0)
39d8c1b6 1488 goto out_fd;
1da177e4
LT
1489 }
1490
1491 /* File flags are not inherited via accept() unlike another OSes. */
1492
39d8c1b6
DM
1493 fd_install(newfd, newfile);
1494 err = newfd;
1da177e4
LT
1495
1496 security_socket_post_accept(sock, newsock);
1497
1498out_put:
6cb153ca 1499 fput_light(sock->file, fput_needed);
1da177e4
LT
1500out:
1501 return err;
79f4f642
AD
1502out_fd_simple:
1503 sock_release(newsock);
1504 put_filp(newfile);
1505 put_unused_fd(newfd);
1506 goto out_put;
39d8c1b6 1507out_fd:
9606a216 1508 fput(newfile);
39d8c1b6 1509 put_unused_fd(newfd);
1da177e4
LT
1510 goto out_put;
1511}
1512
aaca0bdc
UD
1513asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1514 int __user *upeer_addrlen)
1515{
de11defe 1516 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1517}
1518
1da177e4
LT
1519/*
1520 * Attempt to connect to a socket with the server address. The address
1521 * is in user space so we verify it is OK and move it to kernel space.
1522 *
1523 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1524 * break bindings
1525 *
1526 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1527 * other SEQPACKET protocols that take time to connect() as it doesn't
1528 * include the -EINPROGRESS status for such sockets.
1529 */
1530
89bddce5
SH
1531asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1532 int addrlen)
1da177e4
LT
1533{
1534 struct socket *sock;
230b1839 1535 struct sockaddr_storage address;
6cb153ca 1536 int err, fput_needed;
1da177e4 1537
6cb153ca 1538 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1539 if (!sock)
1540 goto out;
230b1839 1541 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
1da177e4
LT
1542 if (err < 0)
1543 goto out_put;
1544
89bddce5 1545 err =
230b1839 1546 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1547 if (err)
1548 goto out_put;
1549
230b1839 1550 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1551 sock->file->f_flags);
1552out_put:
6cb153ca 1553 fput_light(sock->file, fput_needed);
1da177e4
LT
1554out:
1555 return err;
1556}
1557
1558/*
1559 * Get the local address ('name') of a socket object. Move the obtained
1560 * name to user space.
1561 */
1562
89bddce5
SH
1563asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1564 int __user *usockaddr_len)
1da177e4
LT
1565{
1566 struct socket *sock;
230b1839 1567 struct sockaddr_storage address;
6cb153ca 1568 int len, err, fput_needed;
89bddce5 1569
6cb153ca 1570 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1571 if (!sock)
1572 goto out;
1573
1574 err = security_socket_getsockname(sock);
1575 if (err)
1576 goto out_put;
1577
230b1839 1578 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1579 if (err)
1580 goto out_put;
230b1839 1581 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1582
1583out_put:
6cb153ca 1584 fput_light(sock->file, fput_needed);
1da177e4
LT
1585out:
1586 return err;
1587}
1588
1589/*
1590 * Get the remote address ('name') of a socket object. Move the obtained
1591 * name to user space.
1592 */
1593
89bddce5
SH
1594asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1595 int __user *usockaddr_len)
1da177e4
LT
1596{
1597 struct socket *sock;
230b1839 1598 struct sockaddr_storage address;
6cb153ca 1599 int len, err, fput_needed;
1da177e4 1600
89bddce5
SH
1601 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1602 if (sock != NULL) {
1da177e4
LT
1603 err = security_socket_getpeername(sock);
1604 if (err) {
6cb153ca 1605 fput_light(sock->file, fput_needed);
1da177e4
LT
1606 return err;
1607 }
1608
89bddce5 1609 err =
230b1839 1610 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1611 1);
1da177e4 1612 if (!err)
230b1839 1613 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr,
89bddce5 1614 usockaddr_len);
6cb153ca 1615 fput_light(sock->file, fput_needed);
1da177e4
LT
1616 }
1617 return err;
1618}
1619
1620/*
1621 * Send a datagram to a given address. We move the address into kernel
1622 * space and check the user space data area is readable before invoking
1623 * the protocol.
1624 */
1625
89bddce5
SH
1626asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1627 unsigned flags, struct sockaddr __user *addr,
1628 int addr_len)
1da177e4
LT
1629{
1630 struct socket *sock;
230b1839 1631 struct sockaddr_storage address;
1da177e4
LT
1632 int err;
1633 struct msghdr msg;
1634 struct iovec iov;
6cb153ca 1635 int fput_needed;
6cb153ca 1636
de0fa95c
PE
1637 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1638 if (!sock)
4387ff75 1639 goto out;
6cb153ca 1640
89bddce5
SH
1641 iov.iov_base = buff;
1642 iov.iov_len = len;
1643 msg.msg_name = NULL;
1644 msg.msg_iov = &iov;
1645 msg.msg_iovlen = 1;
1646 msg.msg_control = NULL;
1647 msg.msg_controllen = 0;
1648 msg.msg_namelen = 0;
6cb153ca 1649 if (addr) {
230b1839 1650 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address);
1da177e4
LT
1651 if (err < 0)
1652 goto out_put;
230b1839 1653 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1654 msg.msg_namelen = addr_len;
1da177e4
LT
1655 }
1656 if (sock->file->f_flags & O_NONBLOCK)
1657 flags |= MSG_DONTWAIT;
1658 msg.msg_flags = flags;
1659 err = sock_sendmsg(sock, &msg, len);
1660
89bddce5 1661out_put:
de0fa95c 1662 fput_light(sock->file, fput_needed);
4387ff75 1663out:
1da177e4
LT
1664 return err;
1665}
1666
1667/*
89bddce5 1668 * Send a datagram down a socket.
1da177e4
LT
1669 */
1670
89bddce5 1671asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1672{
1673 return sys_sendto(fd, buff, len, flags, NULL, 0);
1674}
1675
1676/*
89bddce5 1677 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1678 * sender. We verify the buffers are writable and if needed move the
1679 * sender address from kernel to user space.
1680 */
1681
89bddce5
SH
1682asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1683 unsigned flags, struct sockaddr __user *addr,
1684 int __user *addr_len)
1da177e4
LT
1685{
1686 struct socket *sock;
1687 struct iovec iov;
1688 struct msghdr msg;
230b1839 1689 struct sockaddr_storage address;
89bddce5 1690 int err, err2;
6cb153ca
BL
1691 int fput_needed;
1692
de0fa95c 1693 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1694 if (!sock)
de0fa95c 1695 goto out;
1da177e4 1696
89bddce5
SH
1697 msg.msg_control = NULL;
1698 msg.msg_controllen = 0;
1699 msg.msg_iovlen = 1;
1700 msg.msg_iov = &iov;
1701 iov.iov_len = size;
1702 iov.iov_base = ubuf;
230b1839
YH
1703 msg.msg_name = (struct sockaddr *)&address;
1704 msg.msg_namelen = sizeof(address);
1da177e4
LT
1705 if (sock->file->f_flags & O_NONBLOCK)
1706 flags |= MSG_DONTWAIT;
89bddce5 1707 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1708
89bddce5 1709 if (err >= 0 && addr != NULL) {
230b1839
YH
1710 err2 = move_addr_to_user((struct sockaddr *)&address,
1711 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1712 if (err2 < 0)
1713 err = err2;
1da177e4 1714 }
de0fa95c
PE
1715
1716 fput_light(sock->file, fput_needed);
4387ff75 1717out:
1da177e4
LT
1718 return err;
1719}
1720
1721/*
89bddce5 1722 * Receive a datagram from a socket.
1da177e4
LT
1723 */
1724
89bddce5
SH
1725asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1726 unsigned flags)
1da177e4
LT
1727{
1728 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1729}
1730
1731/*
1732 * Set a socket option. Because we don't know the option lengths we have
1733 * to pass the user mode parameter for the protocols to sort out.
1734 */
1735
89bddce5
SH
1736asmlinkage long sys_setsockopt(int fd, int level, int optname,
1737 char __user *optval, int optlen)
1da177e4 1738{
6cb153ca 1739 int err, fput_needed;
1da177e4
LT
1740 struct socket *sock;
1741
1742 if (optlen < 0)
1743 return -EINVAL;
89bddce5
SH
1744
1745 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1746 if (sock != NULL) {
1747 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1748 if (err)
1749 goto out_put;
1da177e4
LT
1750
1751 if (level == SOL_SOCKET)
89bddce5
SH
1752 err =
1753 sock_setsockopt(sock, level, optname, optval,
1754 optlen);
1da177e4 1755 else
89bddce5
SH
1756 err =
1757 sock->ops->setsockopt(sock, level, optname, optval,
1758 optlen);
6cb153ca
BL
1759out_put:
1760 fput_light(sock->file, fput_needed);
1da177e4
LT
1761 }
1762 return err;
1763}
1764
1765/*
1766 * Get a socket option. Because we don't know the option lengths we have
1767 * to pass a user mode parameter for the protocols to sort out.
1768 */
1769
89bddce5
SH
1770asmlinkage long sys_getsockopt(int fd, int level, int optname,
1771 char __user *optval, int __user *optlen)
1da177e4 1772{
6cb153ca 1773 int err, fput_needed;
1da177e4
LT
1774 struct socket *sock;
1775
89bddce5
SH
1776 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1777 if (sock != NULL) {
6cb153ca
BL
1778 err = security_socket_getsockopt(sock, level, optname);
1779 if (err)
1780 goto out_put;
1da177e4
LT
1781
1782 if (level == SOL_SOCKET)
89bddce5
SH
1783 err =
1784 sock_getsockopt(sock, level, optname, optval,
1785 optlen);
1da177e4 1786 else
89bddce5
SH
1787 err =
1788 sock->ops->getsockopt(sock, level, optname, optval,
1789 optlen);
6cb153ca
BL
1790out_put:
1791 fput_light(sock->file, fput_needed);
1da177e4
LT
1792 }
1793 return err;
1794}
1795
1da177e4
LT
1796/*
1797 * Shutdown a socket.
1798 */
1799
1800asmlinkage long sys_shutdown(int fd, int how)
1801{
6cb153ca 1802 int err, fput_needed;
1da177e4
LT
1803 struct socket *sock;
1804
89bddce5
SH
1805 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1806 if (sock != NULL) {
1da177e4 1807 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1808 if (!err)
1809 err = sock->ops->shutdown(sock, how);
1810 fput_light(sock->file, fput_needed);
1da177e4
LT
1811 }
1812 return err;
1813}
1814
89bddce5 1815/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1816 * fields which are the same type (int / unsigned) on our platforms.
1817 */
1818#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1819#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1820#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1821
1da177e4
LT
1822/*
1823 * BSD sendmsg interface
1824 */
1825
1826asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1827{
89bddce5
SH
1828 struct compat_msghdr __user *msg_compat =
1829 (struct compat_msghdr __user *)msg;
1da177e4 1830 struct socket *sock;
230b1839 1831 struct sockaddr_storage address;
1da177e4 1832 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1833 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1834 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1835 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1836 unsigned char *ctl_buf = ctl;
1837 struct msghdr msg_sys;
1838 int err, ctl_len, iov_size, total_len;
6cb153ca 1839 int fput_needed;
89bddce5 1840
1da177e4
LT
1841 err = -EFAULT;
1842 if (MSG_CMSG_COMPAT & flags) {
1843 if (get_compat_msghdr(&msg_sys, msg_compat))
1844 return -EFAULT;
89bddce5
SH
1845 }
1846 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1847 return -EFAULT;
1848
6cb153ca 1849 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1850 if (!sock)
1da177e4
LT
1851 goto out;
1852
1853 /* do not move before msg_sys is valid */
1854 err = -EMSGSIZE;
1855 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1856 goto out_put;
1857
89bddce5 1858 /* Check whether to allocate the iovec area */
1da177e4
LT
1859 err = -ENOMEM;
1860 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1861 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1862 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1863 if (!iov)
1864 goto out_put;
1865 }
1866
1867 /* This will also move the address data into kernel space */
1868 if (MSG_CMSG_COMPAT & flags) {
230b1839
YH
1869 err = verify_compat_iovec(&msg_sys, iov,
1870 (struct sockaddr *)&address,
1871 VERIFY_READ);
1da177e4 1872 } else
230b1839
YH
1873 err = verify_iovec(&msg_sys, iov,
1874 (struct sockaddr *)&address,
1875 VERIFY_READ);
89bddce5 1876 if (err < 0)
1da177e4
LT
1877 goto out_freeiov;
1878 total_len = err;
1879
1880 err = -ENOBUFS;
1881
1882 if (msg_sys.msg_controllen > INT_MAX)
1883 goto out_freeiov;
89bddce5 1884 ctl_len = msg_sys.msg_controllen;
1da177e4 1885 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1886 err =
1887 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1888 sizeof(ctl));
1da177e4
LT
1889 if (err)
1890 goto out_freeiov;
1891 ctl_buf = msg_sys.msg_control;
8920e8f9 1892 ctl_len = msg_sys.msg_controllen;
1da177e4 1893 } else if (ctl_len) {
89bddce5 1894 if (ctl_len > sizeof(ctl)) {
1da177e4 1895 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1896 if (ctl_buf == NULL)
1da177e4
LT
1897 goto out_freeiov;
1898 }
1899 err = -EFAULT;
1900 /*
1901 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1902 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1903 * checking falls down on this.
1904 */
89bddce5
SH
1905 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1906 ctl_len))
1da177e4
LT
1907 goto out_freectl;
1908 msg_sys.msg_control = ctl_buf;
1909 }
1910 msg_sys.msg_flags = flags;
1911
1912 if (sock->file->f_flags & O_NONBLOCK)
1913 msg_sys.msg_flags |= MSG_DONTWAIT;
1914 err = sock_sendmsg(sock, &msg_sys, total_len);
1915
1916out_freectl:
89bddce5 1917 if (ctl_buf != ctl)
1da177e4
LT
1918 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1919out_freeiov:
1920 if (iov != iovstack)
1921 sock_kfree_s(sock->sk, iov, iov_size);
1922out_put:
6cb153ca 1923 fput_light(sock->file, fput_needed);
89bddce5 1924out:
1da177e4
LT
1925 return err;
1926}
1927
1928/*
1929 * BSD recvmsg interface
1930 */
1931
89bddce5
SH
1932asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1933 unsigned int flags)
1da177e4 1934{
89bddce5
SH
1935 struct compat_msghdr __user *msg_compat =
1936 (struct compat_msghdr __user *)msg;
1da177e4
LT
1937 struct socket *sock;
1938 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1939 struct iovec *iov = iovstack;
1da177e4
LT
1940 struct msghdr msg_sys;
1941 unsigned long cmsg_ptr;
1942 int err, iov_size, total_len, len;
6cb153ca 1943 int fput_needed;
1da177e4
LT
1944
1945 /* kernel mode address */
230b1839 1946 struct sockaddr_storage addr;
1da177e4
LT
1947
1948 /* user mode address pointers */
1949 struct sockaddr __user *uaddr;
1950 int __user *uaddr_len;
89bddce5 1951
1da177e4
LT
1952 if (MSG_CMSG_COMPAT & flags) {
1953 if (get_compat_msghdr(&msg_sys, msg_compat))
1954 return -EFAULT;
89bddce5
SH
1955 }
1956 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1957 return -EFAULT;
1da177e4 1958
6cb153ca 1959 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1960 if (!sock)
1961 goto out;
1962
1963 err = -EMSGSIZE;
1964 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1965 goto out_put;
89bddce5
SH
1966
1967 /* Check whether to allocate the iovec area */
1da177e4
LT
1968 err = -ENOMEM;
1969 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1970 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1971 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1972 if (!iov)
1973 goto out_put;
1974 }
1975
1976 /*
89bddce5
SH
1977 * Save the user-mode address (verify_iovec will change the
1978 * kernel msghdr to use the kernel address space)
1da177e4 1979 */
89bddce5 1980
cfcabdcc 1981 uaddr = (__force void __user *)msg_sys.msg_name;
1da177e4
LT
1982 uaddr_len = COMPAT_NAMELEN(msg);
1983 if (MSG_CMSG_COMPAT & flags) {
230b1839
YH
1984 err = verify_compat_iovec(&msg_sys, iov,
1985 (struct sockaddr *)&addr,
1986 VERIFY_WRITE);
1da177e4 1987 } else
230b1839
YH
1988 err = verify_iovec(&msg_sys, iov,
1989 (struct sockaddr *)&addr,
1990 VERIFY_WRITE);
1da177e4
LT
1991 if (err < 0)
1992 goto out_freeiov;
89bddce5 1993 total_len = err;
1da177e4
LT
1994
1995 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 1996 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 1997
1da177e4
LT
1998 if (sock->file->f_flags & O_NONBLOCK)
1999 flags |= MSG_DONTWAIT;
2000 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
2001 if (err < 0)
2002 goto out_freeiov;
2003 len = err;
2004
2005 if (uaddr != NULL) {
230b1839
YH
2006 err = move_addr_to_user((struct sockaddr *)&addr,
2007 msg_sys.msg_namelen, uaddr,
89bddce5 2008 uaddr_len);
1da177e4
LT
2009 if (err < 0)
2010 goto out_freeiov;
2011 }
37f7f421
DM
2012 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
2013 COMPAT_FLAGS(msg));
1da177e4
LT
2014 if (err)
2015 goto out_freeiov;
2016 if (MSG_CMSG_COMPAT & flags)
89bddce5 2017 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
2018 &msg_compat->msg_controllen);
2019 else
89bddce5 2020 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
2021 &msg->msg_controllen);
2022 if (err)
2023 goto out_freeiov;
2024 err = len;
2025
2026out_freeiov:
2027 if (iov != iovstack)
2028 sock_kfree_s(sock->sk, iov, iov_size);
2029out_put:
6cb153ca 2030 fput_light(sock->file, fput_needed);
1da177e4
LT
2031out:
2032 return err;
2033}
2034
2035#ifdef __ARCH_WANT_SYS_SOCKETCALL
2036
2037/* Argument list sizes for sys_socketcall */
2038#define AL(x) ((x) * sizeof(unsigned long))
aaca0bdc 2039static const unsigned char nargs[19]={
89bddce5
SH
2040 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
2041 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
aaca0bdc 2042 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
de11defe 2043 AL(4)
89bddce5
SH
2044};
2045
1da177e4
LT
2046#undef AL
2047
2048/*
89bddce5 2049 * System call vectors.
1da177e4
LT
2050 *
2051 * Argument checking cleaned up. Saved 20% in size.
2052 * This function doesn't need to set the kernel lock because
89bddce5 2053 * it is set by the callees.
1da177e4
LT
2054 */
2055
2056asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2057{
2058 unsigned long a[6];
89bddce5 2059 unsigned long a0, a1;
1da177e4
LT
2060 int err;
2061
de11defe 2062 if (call < 1 || call > SYS_ACCEPT4)
1da177e4
LT
2063 return -EINVAL;
2064
2065 /* copy_from_user should be SMP safe. */
2066 if (copy_from_user(a, args, nargs[call]))
2067 return -EFAULT;
3ec3b2fb 2068
89bddce5 2069 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2070 if (err)
2071 return err;
2072
89bddce5
SH
2073 a0 = a[0];
2074 a1 = a[1];
2075
2076 switch (call) {
2077 case SYS_SOCKET:
2078 err = sys_socket(a0, a1, a[2]);
2079 break;
2080 case SYS_BIND:
2081 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2082 break;
2083 case SYS_CONNECT:
2084 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2085 break;
2086 case SYS_LISTEN:
2087 err = sys_listen(a0, a1);
2088 break;
2089 case SYS_ACCEPT:
de11defe
UD
2090 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2091 (int __user *)a[2], 0);
89bddce5
SH
2092 break;
2093 case SYS_GETSOCKNAME:
2094 err =
2095 sys_getsockname(a0, (struct sockaddr __user *)a1,
2096 (int __user *)a[2]);
2097 break;
2098 case SYS_GETPEERNAME:
2099 err =
2100 sys_getpeername(a0, (struct sockaddr __user *)a1,
2101 (int __user *)a[2]);
2102 break;
2103 case SYS_SOCKETPAIR:
2104 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2105 break;
2106 case SYS_SEND:
2107 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2108 break;
2109 case SYS_SENDTO:
2110 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2111 (struct sockaddr __user *)a[4], a[5]);
2112 break;
2113 case SYS_RECV:
2114 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2115 break;
2116 case SYS_RECVFROM:
2117 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2118 (struct sockaddr __user *)a[4],
2119 (int __user *)a[5]);
2120 break;
2121 case SYS_SHUTDOWN:
2122 err = sys_shutdown(a0, a1);
2123 break;
2124 case SYS_SETSOCKOPT:
2125 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2126 break;
2127 case SYS_GETSOCKOPT:
2128 err =
2129 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2130 (int __user *)a[4]);
2131 break;
2132 case SYS_SENDMSG:
2133 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2134 break;
2135 case SYS_RECVMSG:
2136 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2137 break;
de11defe
UD
2138 case SYS_ACCEPT4:
2139 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2140 (int __user *)a[2], a[3]);
aaca0bdc 2141 break;
89bddce5
SH
2142 default:
2143 err = -EINVAL;
2144 break;
1da177e4
LT
2145 }
2146 return err;
2147}
2148
89bddce5 2149#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2150
55737fda
SH
2151/**
2152 * sock_register - add a socket protocol handler
2153 * @ops: description of protocol
2154 *
1da177e4
LT
2155 * This function is called by a protocol handler that wants to
2156 * advertise its address family, and have it linked into the
55737fda
SH
2157 * socket interface. The value ops->family coresponds to the
2158 * socket system call protocol family.
1da177e4 2159 */
f0fd27d4 2160int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2161{
2162 int err;
2163
2164 if (ops->family >= NPROTO) {
89bddce5
SH
2165 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2166 NPROTO);
1da177e4
LT
2167 return -ENOBUFS;
2168 }
55737fda
SH
2169
2170 spin_lock(&net_family_lock);
2171 if (net_families[ops->family])
2172 err = -EEXIST;
2173 else {
89bddce5 2174 net_families[ops->family] = ops;
1da177e4
LT
2175 err = 0;
2176 }
55737fda
SH
2177 spin_unlock(&net_family_lock);
2178
89bddce5 2179 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2180 return err;
2181}
2182
55737fda
SH
2183/**
2184 * sock_unregister - remove a protocol handler
2185 * @family: protocol family to remove
2186 *
1da177e4
LT
2187 * This function is called by a protocol handler that wants to
2188 * remove its address family, and have it unlinked from the
55737fda
SH
2189 * new socket creation.
2190 *
2191 * If protocol handler is a module, then it can use module reference
2192 * counts to protect against new references. If protocol handler is not
2193 * a module then it needs to provide its own protection in
2194 * the ops->create routine.
1da177e4 2195 */
f0fd27d4 2196void sock_unregister(int family)
1da177e4 2197{
f0fd27d4 2198 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2199
55737fda 2200 spin_lock(&net_family_lock);
89bddce5 2201 net_families[family] = NULL;
55737fda
SH
2202 spin_unlock(&net_family_lock);
2203
2204 synchronize_rcu();
2205
89bddce5 2206 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2207}
2208
77d76ea3 2209static int __init sock_init(void)
1da177e4
LT
2210{
2211 /*
89bddce5 2212 * Initialize sock SLAB cache.
1da177e4 2213 */
89bddce5 2214
1da177e4
LT
2215 sk_init();
2216
1da177e4 2217 /*
89bddce5 2218 * Initialize skbuff SLAB cache
1da177e4
LT
2219 */
2220 skb_init();
1da177e4
LT
2221
2222 /*
89bddce5 2223 * Initialize the protocols module.
1da177e4
LT
2224 */
2225
2226 init_inodecache();
2227 register_filesystem(&sock_fs_type);
2228 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2229
2230 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2231 */
2232
2233#ifdef CONFIG_NETFILTER
2234 netfilter_init();
2235#endif
cbeb321a
DM
2236
2237 return 0;
1da177e4
LT
2238}
2239
77d76ea3
AK
2240core_initcall(sock_init); /* early initcall */
2241
1da177e4
LT
2242#ifdef CONFIG_PROC_FS
2243void socket_seq_show(struct seq_file *seq)
2244{
2245 int cpu;
2246 int counter = 0;
2247
6f912042 2248 for_each_possible_cpu(cpu)
89bddce5 2249 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2250
2251 /* It can be negative, by the way. 8) */
2252 if (counter < 0)
2253 counter = 0;
2254
2255 seq_printf(seq, "sockets: used %d\n", counter);
2256}
89bddce5 2257#endif /* CONFIG_PROC_FS */
1da177e4 2258
89bbfc95
SP
2259#ifdef CONFIG_COMPAT
2260static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2261 unsigned long arg)
89bbfc95
SP
2262{
2263 struct socket *sock = file->private_data;
2264 int ret = -ENOIOCTLCMD;
87de87d5
DM
2265 struct sock *sk;
2266 struct net *net;
2267
2268 sk = sock->sk;
2269 net = sock_net(sk);
89bbfc95
SP
2270
2271 if (sock->ops->compat_ioctl)
2272 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2273
87de87d5
DM
2274 if (ret == -ENOIOCTLCMD &&
2275 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
2276 ret = compat_wext_handle_ioctl(net, cmd, arg);
2277
89bbfc95
SP
2278 return ret;
2279}
2280#endif
2281
ac5a488e
SS
2282int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2283{
2284 return sock->ops->bind(sock, addr, addrlen);
2285}
2286
2287int kernel_listen(struct socket *sock, int backlog)
2288{
2289 return sock->ops->listen(sock, backlog);
2290}
2291
2292int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2293{
2294 struct sock *sk = sock->sk;
2295 int err;
2296
2297 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2298 newsock);
2299 if (err < 0)
2300 goto done;
2301
2302 err = sock->ops->accept(sock, *newsock, flags);
2303 if (err < 0) {
2304 sock_release(*newsock);
fa8705b0 2305 *newsock = NULL;
ac5a488e
SS
2306 goto done;
2307 }
2308
2309 (*newsock)->ops = sock->ops;
2310
2311done:
2312 return err;
2313}
2314
2315int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2316 int flags)
ac5a488e
SS
2317{
2318 return sock->ops->connect(sock, addr, addrlen, flags);
2319}
2320
2321int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2322 int *addrlen)
2323{
2324 return sock->ops->getname(sock, addr, addrlen, 0);
2325}
2326
2327int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2328 int *addrlen)
2329{
2330 return sock->ops->getname(sock, addr, addrlen, 1);
2331}
2332
2333int kernel_getsockopt(struct socket *sock, int level, int optname,
2334 char *optval, int *optlen)
2335{
2336 mm_segment_t oldfs = get_fs();
2337 int err;
2338
2339 set_fs(KERNEL_DS);
2340 if (level == SOL_SOCKET)
2341 err = sock_getsockopt(sock, level, optname, optval, optlen);
2342 else
2343 err = sock->ops->getsockopt(sock, level, optname, optval,
2344 optlen);
2345 set_fs(oldfs);
2346 return err;
2347}
2348
2349int kernel_setsockopt(struct socket *sock, int level, int optname,
2350 char *optval, int optlen)
2351{
2352 mm_segment_t oldfs = get_fs();
2353 int err;
2354
2355 set_fs(KERNEL_DS);
2356 if (level == SOL_SOCKET)
2357 err = sock_setsockopt(sock, level, optname, optval, optlen);
2358 else
2359 err = sock->ops->setsockopt(sock, level, optname, optval,
2360 optlen);
2361 set_fs(oldfs);
2362 return err;
2363}
2364
2365int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2366 size_t size, int flags)
2367{
2368 if (sock->ops->sendpage)
2369 return sock->ops->sendpage(sock, page, offset, size, flags);
2370
2371 return sock_no_sendpage(sock, page, offset, size, flags);
2372}
2373
2374int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2375{
2376 mm_segment_t oldfs = get_fs();
2377 int err;
2378
2379 set_fs(KERNEL_DS);
2380 err = sock->ops->ioctl(sock, cmd, arg);
2381 set_fs(oldfs);
2382
2383 return err;
2384}
2385
91cf45f0
TM
2386int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
2387{
2388 return sock->ops->shutdown(sock, how);
2389}
2390
1da177e4
LT
2391EXPORT_SYMBOL(sock_create);
2392EXPORT_SYMBOL(sock_create_kern);
2393EXPORT_SYMBOL(sock_create_lite);
2394EXPORT_SYMBOL(sock_map_fd);
2395EXPORT_SYMBOL(sock_recvmsg);
2396EXPORT_SYMBOL(sock_register);
2397EXPORT_SYMBOL(sock_release);
2398EXPORT_SYMBOL(sock_sendmsg);
2399EXPORT_SYMBOL(sock_unregister);
2400EXPORT_SYMBOL(sock_wake_async);
2401EXPORT_SYMBOL(sockfd_lookup);
2402EXPORT_SYMBOL(kernel_sendmsg);
2403EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2404EXPORT_SYMBOL(kernel_bind);
2405EXPORT_SYMBOL(kernel_listen);
2406EXPORT_SYMBOL(kernel_accept);
2407EXPORT_SYMBOL(kernel_connect);
2408EXPORT_SYMBOL(kernel_getsockname);
2409EXPORT_SYMBOL(kernel_getpeername);
2410EXPORT_SYMBOL(kernel_getsockopt);
2411EXPORT_SYMBOL(kernel_setsockopt);
2412EXPORT_SYMBOL(kernel_sendpage);
2413EXPORT_SYMBOL(kernel_sock_ioctl);
91cf45f0 2414EXPORT_SYMBOL(kernel_sock_shutdown);