vxlan: use htonl when snooping for loopback address
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1fd7317d 88#include <linux/magic.h>
5a0e3ad6 89#include <linux/slab.h>
600e1779 90#include <linux/xattr.h>
1da177e4
LT
91
92#include <asm/uaccess.h>
93#include <asm/unistd.h>
94
95#include <net/compat.h>
87de87d5 96#include <net/wext.h>
f8451725 97#include <net/cls_cgroup.h>
1da177e4
LT
98
99#include <net/sock.h>
100#include <linux/netfilter.h>
101
6b96018b
AB
102#include <linux/if_tun.h>
103#include <linux/ipv6_route.h>
104#include <linux/route.h>
6b96018b
AB
105#include <linux/sockios.h>
106#include <linux/atalk.h>
107
1da177e4 108static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
109static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
110 unsigned long nr_segs, loff_t pos);
111static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
112 unsigned long nr_segs, loff_t pos);
89bddce5 113static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
114
115static int sock_close(struct inode *inode, struct file *file);
116static unsigned int sock_poll(struct file *file,
117 struct poll_table_struct *wait);
89bddce5 118static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
119#ifdef CONFIG_COMPAT
120static long compat_sock_ioctl(struct file *file,
89bddce5 121 unsigned int cmd, unsigned long arg);
89bbfc95 122#endif
1da177e4 123static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
124static ssize_t sock_sendpage(struct file *file, struct page *page,
125 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 126static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 127 struct pipe_inode_info *pipe, size_t len,
9c55e01c 128 unsigned int flags);
1da177e4 129
1da177e4
LT
130/*
131 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
132 * in the operation structures but are done directly via the socketcall() multiplexor.
133 */
134
da7071d7 135static const struct file_operations socket_file_ops = {
1da177e4
LT
136 .owner = THIS_MODULE,
137 .llseek = no_llseek,
138 .aio_read = sock_aio_read,
139 .aio_write = sock_aio_write,
140 .poll = sock_poll,
141 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
142#ifdef CONFIG_COMPAT
143 .compat_ioctl = compat_sock_ioctl,
144#endif
1da177e4
LT
145 .mmap = sock_mmap,
146 .open = sock_no_open, /* special open code to disallow open via /proc */
147 .release = sock_close,
148 .fasync = sock_fasync,
5274f052
JA
149 .sendpage = sock_sendpage,
150 .splice_write = generic_splice_sendpage,
9c55e01c 151 .splice_read = sock_splice_read,
1da177e4
LT
152};
153
154/*
155 * The protocol list. Each protocol is registered in here.
156 */
157
1da177e4 158static DEFINE_SPINLOCK(net_family_lock);
190683a9 159static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 160
1da177e4
LT
161/*
162 * Statistics counters of the socket lists
163 */
164
c6d409cf 165static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
166
167/*
89bddce5
SH
168 * Support routines.
169 * Move socket addresses back and forth across the kernel/user
170 * divide and look after the messy bits.
1da177e4
LT
171 */
172
1da177e4
LT
173/**
174 * move_addr_to_kernel - copy a socket address into kernel space
175 * @uaddr: Address in user space
176 * @kaddr: Address in kernel space
177 * @ulen: Length in user space
178 *
179 * The address is copied into kernel space. If the provided address is
180 * too long an error code of -EINVAL is returned. If the copy gives
181 * invalid addresses -EFAULT is returned. On a success 0 is returned.
182 */
183
43db362d 184int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 185{
230b1839 186 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 187 return -EINVAL;
89bddce5 188 if (ulen == 0)
1da177e4 189 return 0;
89bddce5 190 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 191 return -EFAULT;
3ec3b2fb 192 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
193}
194
195/**
196 * move_addr_to_user - copy an address to user space
197 * @kaddr: kernel space address
198 * @klen: length of address in kernel
199 * @uaddr: user space address
200 * @ulen: pointer to user length field
201 *
202 * The value pointed to by ulen on entry is the buffer length available.
203 * This is overwritten with the buffer space used. -EINVAL is returned
204 * if an overlong buffer is specified or a negative buffer size. -EFAULT
205 * is returned if either the buffer or the length field are not
206 * accessible.
207 * After copying the data up to the limit the user specifies, the true
208 * length of the data is written over the length limit the user
209 * specified. Zero is returned for a success.
210 */
89bddce5 211
43db362d 212static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 213 void __user *uaddr, int __user *ulen)
1da177e4
LT
214{
215 int err;
216 int len;
217
89bddce5
SH
218 err = get_user(len, ulen);
219 if (err)
1da177e4 220 return err;
89bddce5
SH
221 if (len > klen)
222 len = klen;
230b1839 223 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 224 return -EINVAL;
89bddce5 225 if (len) {
d6fe3945
SG
226 if (audit_sockaddr(klen, kaddr))
227 return -ENOMEM;
89bddce5 228 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
229 return -EFAULT;
230 }
231 /*
89bddce5
SH
232 * "fromlen shall refer to the value before truncation.."
233 * 1003.1g
1da177e4
LT
234 */
235 return __put_user(klen, ulen);
236}
237
e18b890b 238static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
239
240static struct inode *sock_alloc_inode(struct super_block *sb)
241{
242 struct socket_alloc *ei;
eaefd110 243 struct socket_wq *wq;
89bddce5 244
e94b1766 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
246 if (!ei)
247 return NULL;
eaefd110
ED
248 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
249 if (!wq) {
43815482
ED
250 kmem_cache_free(sock_inode_cachep, ei);
251 return NULL;
252 }
eaefd110
ED
253 init_waitqueue_head(&wq->wait);
254 wq->fasync_list = NULL;
255 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482 268 struct socket_alloc *ei;
eaefd110 269 struct socket_wq *wq;
43815482
ED
270
271 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 272 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 273 kfree_rcu(wq, rcu);
43815482 274 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
275}
276
51cc5068 277static void init_once(void *foo)
1da177e4 278{
89bddce5 279 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 280
a35afb83 281 inode_init_once(&ei->vfs_inode);
1da177e4 282}
89bddce5 283
1da177e4
LT
284static int init_inodecache(void)
285{
286 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
287 sizeof(struct socket_alloc),
288 0,
289 (SLAB_HWCACHE_ALIGN |
290 SLAB_RECLAIM_ACCOUNT |
291 SLAB_MEM_SPREAD),
20c2df83 292 init_once);
1da177e4
LT
293 if (sock_inode_cachep == NULL)
294 return -ENOMEM;
295 return 0;
296}
297
b87221de 298static const struct super_operations sockfs_ops = {
c6d409cf
ED
299 .alloc_inode = sock_alloc_inode,
300 .destroy_inode = sock_destroy_inode,
301 .statfs = simple_statfs,
1da177e4
LT
302};
303
c23fbb6b
ED
304/*
305 * sockfs_dname() is called from d_path().
306 */
307static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
308{
309 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
310 dentry->d_inode->i_ino);
311}
312
3ba13d17 313static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 314 .d_dname = sockfs_dname,
1da177e4
LT
315};
316
c74a1cbb
AV
317static struct dentry *sockfs_mount(struct file_system_type *fs_type,
318 int flags, const char *dev_name, void *data)
319{
320 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
321 &sockfs_dentry_operations, SOCKFS_MAGIC);
322}
323
324static struct vfsmount *sock_mnt __read_mostly;
325
326static struct file_system_type sock_fs_type = {
327 .name = "sockfs",
328 .mount = sockfs_mount,
329 .kill_sb = kill_anon_super,
330};
331
1da177e4
LT
332/*
333 * Obtains the first available file descriptor and sets it up for use.
334 *
39d8c1b6
DM
335 * These functions create file structures and maps them to fd space
336 * of the current process. On success it returns file descriptor
1da177e4
LT
337 * and file struct implicitly stored in sock->file.
338 * Note that another thread may close file descriptor before we return
339 * from this function. We use the fact that now we do not refer
340 * to socket after mapping. If one day we will need it, this
341 * function will increment ref. count on file by 1.
342 *
343 * In any case returned fd MAY BE not valid!
344 * This race condition is unavoidable
345 * with shared fd spaces, we cannot solve it inside kernel,
346 * but we take care of internal coherence yet.
347 */
348
aab174f0 349struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 350{
7cbe66b6 351 struct qstr name = { .name = "" };
2c48b9c4 352 struct path path;
7cbe66b6 353 struct file *file;
1da177e4 354
600e1779
MY
355 if (dname) {
356 name.name = dname;
357 name.len = strlen(name.name);
358 } else if (sock->sk) {
359 name.name = sock->sk->sk_prot_creator->name;
360 name.len = strlen(name.name);
361 }
4b936885 362 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
363 if (unlikely(!path.dentry))
364 return ERR_PTR(-ENOMEM);
2c48b9c4 365 path.mnt = mntget(sock_mnt);
39d8c1b6 366
2c48b9c4 367 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 368 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 369
2c48b9c4 370 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 371 &socket_file_ops);
39b65252 372 if (unlikely(IS_ERR(file))) {
cc3808f8 373 /* drop dentry, keep inode */
7de9c6ee 374 ihold(path.dentry->d_inode);
2c48b9c4 375 path_put(&path);
39b65252 376 return file;
cc3808f8
AV
377 }
378
379 sock->file = file;
77d27200 380 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 381 file->private_data = sock;
28407630 382 return file;
39d8c1b6 383}
56b31d1c 384EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 385
56b31d1c 386static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
387{
388 struct file *newfile;
28407630
AV
389 int fd = get_unused_fd_flags(flags);
390 if (unlikely(fd < 0))
391 return fd;
39d8c1b6 392
aab174f0 393 newfile = sock_alloc_file(sock, flags, NULL);
28407630 394 if (likely(!IS_ERR(newfile))) {
39d8c1b6 395 fd_install(fd, newfile);
28407630
AV
396 return fd;
397 }
7cbe66b6 398
28407630
AV
399 put_unused_fd(fd);
400 return PTR_ERR(newfile);
1da177e4
LT
401}
402
406a3c63 403struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 404{
6cb153ca
BL
405 if (file->f_op == &socket_file_ops)
406 return file->private_data; /* set in sock_map_fd */
407
23bb80d2
ED
408 *err = -ENOTSOCK;
409 return NULL;
6cb153ca 410}
406a3c63 411EXPORT_SYMBOL(sock_from_file);
6cb153ca 412
1da177e4 413/**
c6d409cf 414 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
415 * @fd: file handle
416 * @err: pointer to an error code return
417 *
418 * The file handle passed in is locked and the socket it is bound
419 * too is returned. If an error occurs the err pointer is overwritten
420 * with a negative errno code and NULL is returned. The function checks
421 * for both invalid handles and passing a handle which is not a socket.
422 *
423 * On a success the socket object pointer is returned.
424 */
425
426struct socket *sockfd_lookup(int fd, int *err)
427{
428 struct file *file;
1da177e4
LT
429 struct socket *sock;
430
89bddce5
SH
431 file = fget(fd);
432 if (!file) {
1da177e4
LT
433 *err = -EBADF;
434 return NULL;
435 }
89bddce5 436
6cb153ca
BL
437 sock = sock_from_file(file, err);
438 if (!sock)
1da177e4 439 fput(file);
6cb153ca
BL
440 return sock;
441}
c6d409cf 442EXPORT_SYMBOL(sockfd_lookup);
1da177e4 443
6cb153ca
BL
444static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
445{
446 struct file *file;
447 struct socket *sock;
448
3672558c 449 *err = -EBADF;
6cb153ca
BL
450 file = fget_light(fd, fput_needed);
451 if (file) {
452 sock = sock_from_file(file, err);
453 if (sock)
454 return sock;
455 fput_light(file, *fput_needed);
1da177e4 456 }
6cb153ca 457 return NULL;
1da177e4
LT
458}
459
600e1779
MY
460#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
461#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
462#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
463static ssize_t sockfs_getxattr(struct dentry *dentry,
464 const char *name, void *value, size_t size)
465{
466 const char *proto_name;
467 size_t proto_size;
468 int error;
469
470 error = -ENODATA;
471 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
472 proto_name = dentry->d_name.name;
473 proto_size = strlen(proto_name);
474
475 if (value) {
476 error = -ERANGE;
477 if (proto_size + 1 > size)
478 goto out;
479
480 strncpy(value, proto_name, proto_size + 1);
481 }
482 error = proto_size + 1;
483 }
484
485out:
486 return error;
487}
488
489static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
490 size_t size)
491{
492 ssize_t len;
493 ssize_t used = 0;
494
495 len = security_inode_listsecurity(dentry->d_inode, buffer, size);
496 if (len < 0)
497 return len;
498 used += len;
499 if (buffer) {
500 if (size < used)
501 return -ERANGE;
502 buffer += len;
503 }
504
505 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
506 used += len;
507 if (buffer) {
508 if (size < used)
509 return -ERANGE;
510 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
511 buffer += len;
512 }
513
514 return used;
515}
516
517static const struct inode_operations sockfs_inode_ops = {
518 .getxattr = sockfs_getxattr,
519 .listxattr = sockfs_listxattr,
520};
521
1da177e4
LT
522/**
523 * sock_alloc - allocate a socket
89bddce5 524 *
1da177e4
LT
525 * Allocate a new inode and socket object. The two are bound together
526 * and initialised. The socket is then returned. If we are out of inodes
527 * NULL is returned.
528 */
529
530static struct socket *sock_alloc(void)
531{
89bddce5
SH
532 struct inode *inode;
533 struct socket *sock;
1da177e4 534
a209dfc7 535 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
536 if (!inode)
537 return NULL;
538
539 sock = SOCKET_I(inode);
540
29a020d3 541 kmemcheck_annotate_bitfield(sock, type);
85fe4025 542 inode->i_ino = get_next_ino();
89bddce5 543 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
544 inode->i_uid = current_fsuid();
545 inode->i_gid = current_fsgid();
600e1779 546 inode->i_op = &sockfs_inode_ops;
1da177e4 547
19e8d69c 548 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
549 return sock;
550}
551
552/*
553 * In theory you can't get an open on this inode, but /proc provides
554 * a back door. Remember to keep it shut otherwise you'll let the
555 * creepy crawlies in.
556 */
89bddce5 557
1da177e4
LT
558static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
559{
560 return -ENXIO;
561}
562
4b6f5d20 563const struct file_operations bad_sock_fops = {
1da177e4
LT
564 .owner = THIS_MODULE,
565 .open = sock_no_open,
6038f373 566 .llseek = noop_llseek,
1da177e4
LT
567};
568
569/**
570 * sock_release - close a socket
571 * @sock: socket to close
572 *
573 * The socket is released from the protocol stack if it has a release
574 * callback, and the inode is then released if the socket is bound to
89bddce5 575 * an inode not a file.
1da177e4 576 */
89bddce5 577
1da177e4
LT
578void sock_release(struct socket *sock)
579{
580 if (sock->ops) {
581 struct module *owner = sock->ops->owner;
582
583 sock->ops->release(sock);
584 sock->ops = NULL;
585 module_put(owner);
586 }
587
eaefd110 588 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
1da177e4
LT
589 printk(KERN_ERR "sock_release: fasync list not empty!\n");
590
b09e786b
MP
591 if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
592 return;
593
19e8d69c 594 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
595 if (!sock->file) {
596 iput(SOCK_INODE(sock));
597 return;
598 }
89bddce5 599 sock->file = NULL;
1da177e4 600}
c6d409cf 601EXPORT_SYMBOL(sock_release);
1da177e4 602
2244d07b 603int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
20d49473 604{
2244d07b 605 *tx_flags = 0;
20d49473 606 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
2244d07b 607 *tx_flags |= SKBTX_HW_TSTAMP;
20d49473 608 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
2244d07b 609 *tx_flags |= SKBTX_SW_TSTAMP;
6e3e939f
JB
610 if (sock_flag(sk, SOCK_WIFI_STATUS))
611 *tx_flags |= SKBTX_WIFI_STATUS;
20d49473
PO
612 return 0;
613}
614EXPORT_SYMBOL(sock_tx_timestamp);
615
228e548e
AB
616static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
617 struct msghdr *msg, size_t size)
1da177e4
LT
618{
619 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4
LT
620
621 si->sock = sock;
622 si->scm = NULL;
623 si->msg = msg;
624 si->size = size;
625
1da177e4
LT
626 return sock->ops->sendmsg(iocb, sock, msg, size);
627}
628
228e548e
AB
629static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
630 struct msghdr *msg, size_t size)
631{
632 int err = security_socket_sendmsg(sock, msg, size);
633
634 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
635}
636
1da177e4
LT
637int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
638{
639 struct kiocb iocb;
640 struct sock_iocb siocb;
641 int ret;
642
643 init_sync_kiocb(&iocb, NULL);
644 iocb.private = &siocb;
645 ret = __sock_sendmsg(&iocb, sock, msg, size);
646 if (-EIOCBQUEUED == ret)
647 ret = wait_on_sync_kiocb(&iocb);
648 return ret;
649}
c6d409cf 650EXPORT_SYMBOL(sock_sendmsg);
1da177e4 651
894dc24c 652static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e
AB
653{
654 struct kiocb iocb;
655 struct sock_iocb siocb;
656 int ret;
657
658 init_sync_kiocb(&iocb, NULL);
659 iocb.private = &siocb;
660 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
661 if (-EIOCBQUEUED == ret)
662 ret = wait_on_sync_kiocb(&iocb);
663 return ret;
664}
665
1da177e4
LT
666int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
667 struct kvec *vec, size_t num, size_t size)
668{
669 mm_segment_t oldfs = get_fs();
670 int result;
671
672 set_fs(KERNEL_DS);
673 /*
674 * the following is safe, since for compiler definitions of kvec and
675 * iovec are identical, yielding the same in-core layout and alignment
676 */
89bddce5 677 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
678 msg->msg_iovlen = num;
679 result = sock_sendmsg(sock, msg, size);
680 set_fs(oldfs);
681 return result;
682}
c6d409cf 683EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 684
20d49473
PO
685static int ktime2ts(ktime_t kt, struct timespec *ts)
686{
687 if (kt.tv64) {
688 *ts = ktime_to_timespec(kt);
689 return 1;
690 } else {
691 return 0;
692 }
693}
694
92f37fd2
ED
695/*
696 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
697 */
698void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
699 struct sk_buff *skb)
700{
20d49473
PO
701 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
702 struct timespec ts[3];
703 int empty = 1;
704 struct skb_shared_hwtstamps *shhwtstamps =
705 skb_hwtstamps(skb);
706
707 /* Race occurred between timestamp enabling and packet
708 receiving. Fill in the current time for now. */
709 if (need_software_tstamp && skb->tstamp.tv64 == 0)
710 __net_timestamp(skb);
711
712 if (need_software_tstamp) {
713 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
714 struct timeval tv;
715 skb_get_timestamp(skb, &tv);
716 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
717 sizeof(tv), &tv);
718 } else {
842509b8 719 skb_get_timestampns(skb, &ts[0]);
20d49473 720 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 721 sizeof(ts[0]), &ts[0]);
20d49473
PO
722 }
723 }
724
725
726 memset(ts, 0, sizeof(ts));
727 if (skb->tstamp.tv64 &&
728 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
729 skb_get_timestampns(skb, ts + 0);
730 empty = 0;
731 }
732 if (shhwtstamps) {
733 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
734 ktime2ts(shhwtstamps->syststamp, ts + 1))
735 empty = 0;
736 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
737 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
738 empty = 0;
92f37fd2 739 }
20d49473
PO
740 if (!empty)
741 put_cmsg(msg, SOL_SOCKET,
742 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 743}
7c81fd8b
ACM
744EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
745
6e3e939f
JB
746void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
747 struct sk_buff *skb)
748{
749 int ack;
750
751 if (!sock_flag(sk, SOCK_WIFI_STATUS))
752 return;
753 if (!skb->wifi_acked_valid)
754 return;
755
756 ack = skb->wifi_acked;
757
758 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
759}
760EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
761
11165f14 762static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
763 struct sk_buff *skb)
3b885787
NH
764{
765 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
766 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
767 sizeof(__u32), &skb->dropcount);
768}
769
767dd033 770void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
771 struct sk_buff *skb)
772{
773 sock_recv_timestamp(msg, sk, skb);
774 sock_recv_drops(msg, sk, skb);
775}
767dd033 776EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 777
a2e27255
ACM
778static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
779 struct msghdr *msg, size_t size, int flags)
1da177e4 780{
1da177e4
LT
781 struct sock_iocb *si = kiocb_to_siocb(iocb);
782
783 si->sock = sock;
784 si->scm = NULL;
785 si->msg = msg;
786 si->size = size;
787 si->flags = flags;
788
1da177e4
LT
789 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
790}
791
a2e27255
ACM
792static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
793 struct msghdr *msg, size_t size, int flags)
794{
795 int err = security_socket_recvmsg(sock, msg, size, flags);
796
797 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
798}
799
89bddce5 800int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
801 size_t size, int flags)
802{
803 struct kiocb iocb;
804 struct sock_iocb siocb;
805 int ret;
806
89bddce5 807 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
808 iocb.private = &siocb;
809 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
810 if (-EIOCBQUEUED == ret)
811 ret = wait_on_sync_kiocb(&iocb);
812 return ret;
813}
c6d409cf 814EXPORT_SYMBOL(sock_recvmsg);
1da177e4 815
a2e27255
ACM
816static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
817 size_t size, int flags)
818{
819 struct kiocb iocb;
820 struct sock_iocb siocb;
821 int ret;
822
823 init_sync_kiocb(&iocb, NULL);
824 iocb.private = &siocb;
825 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
826 if (-EIOCBQUEUED == ret)
827 ret = wait_on_sync_kiocb(&iocb);
828 return ret;
829}
830
c1249c0a
ML
831/**
832 * kernel_recvmsg - Receive a message from a socket (kernel space)
833 * @sock: The socket to receive the message from
834 * @msg: Received message
835 * @vec: Input s/g array for message data
836 * @num: Size of input s/g array
837 * @size: Number of bytes to read
838 * @flags: Message flags (MSG_DONTWAIT, etc...)
839 *
840 * On return the msg structure contains the scatter/gather array passed in the
841 * vec argument. The array is modified so that it consists of the unfilled
842 * portion of the original array.
843 *
844 * The returned value is the total number of bytes received, or an error.
845 */
89bddce5
SH
846int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
847 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
848{
849 mm_segment_t oldfs = get_fs();
850 int result;
851
852 set_fs(KERNEL_DS);
853 /*
854 * the following is safe, since for compiler definitions of kvec and
855 * iovec are identical, yielding the same in-core layout and alignment
856 */
89bddce5 857 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
858 result = sock_recvmsg(sock, msg, size, flags);
859 set_fs(oldfs);
860 return result;
861}
c6d409cf 862EXPORT_SYMBOL(kernel_recvmsg);
1da177e4
LT
863
864static void sock_aio_dtor(struct kiocb *iocb)
865{
866 kfree(iocb->private);
867}
868
ce1d4d3e
CH
869static ssize_t sock_sendpage(struct file *file, struct page *page,
870 int offset, size_t size, loff_t *ppos, int more)
1da177e4 871{
1da177e4
LT
872 struct socket *sock;
873 int flags;
874
ce1d4d3e
CH
875 sock = file->private_data;
876
35f9c09f
ED
877 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
878 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
879 flags |= more;
ce1d4d3e 880
e6949583 881 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 882}
1da177e4 883
9c55e01c 884static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 885 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
886 unsigned int flags)
887{
888 struct socket *sock = file->private_data;
889
997b37da
RDC
890 if (unlikely(!sock->ops->splice_read))
891 return -EINVAL;
892
9c55e01c
JA
893 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
894}
895
ce1d4d3e 896static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 897 struct sock_iocb *siocb)
ce1d4d3e
CH
898{
899 if (!is_sync_kiocb(iocb)) {
900 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
901 if (!siocb)
902 return NULL;
1da177e4
LT
903 iocb->ki_dtor = sock_aio_dtor;
904 }
1da177e4 905
ce1d4d3e 906 siocb->kiocb = iocb;
ce1d4d3e
CH
907 iocb->private = siocb;
908 return siocb;
1da177e4
LT
909}
910
ce1d4d3e 911static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
912 struct file *file, const struct iovec *iov,
913 unsigned long nr_segs)
ce1d4d3e
CH
914{
915 struct socket *sock = file->private_data;
916 size_t size = 0;
917 int i;
1da177e4 918
89bddce5
SH
919 for (i = 0; i < nr_segs; i++)
920 size += iov[i].iov_len;
1da177e4 921
ce1d4d3e
CH
922 msg->msg_name = NULL;
923 msg->msg_namelen = 0;
924 msg->msg_control = NULL;
925 msg->msg_controllen = 0;
89bddce5 926 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
927 msg->msg_iovlen = nr_segs;
928 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
929
930 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
931}
932
027445c3
BP
933static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
934 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
935{
936 struct sock_iocb siocb, *x;
937
1da177e4
LT
938 if (pos != 0)
939 return -ESPIPE;
027445c3
BP
940
941 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
942 return 0;
943
027445c3
BP
944
945 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
946 if (!x)
947 return -ENOMEM;
027445c3 948 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
949}
950
ce1d4d3e 951static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
952 struct file *file, const struct iovec *iov,
953 unsigned long nr_segs)
1da177e4 954{
ce1d4d3e
CH
955 struct socket *sock = file->private_data;
956 size_t size = 0;
957 int i;
1da177e4 958
89bddce5
SH
959 for (i = 0; i < nr_segs; i++)
960 size += iov[i].iov_len;
1da177e4 961
ce1d4d3e
CH
962 msg->msg_name = NULL;
963 msg->msg_namelen = 0;
964 msg->msg_control = NULL;
965 msg->msg_controllen = 0;
89bddce5 966 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
967 msg->msg_iovlen = nr_segs;
968 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
969 if (sock->type == SOCK_SEQPACKET)
970 msg->msg_flags |= MSG_EOR;
1da177e4 971
ce1d4d3e 972 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
973}
974
027445c3
BP
975static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
976 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
977{
978 struct sock_iocb siocb, *x;
1da177e4 979
ce1d4d3e
CH
980 if (pos != 0)
981 return -ESPIPE;
027445c3 982
027445c3 983 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
984 if (!x)
985 return -ENOMEM;
1da177e4 986
027445c3 987 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
988}
989
1da177e4
LT
990/*
991 * Atomic setting of ioctl hooks to avoid race
992 * with module unload.
993 */
994
4a3e2f71 995static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 996static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 997
881d966b 998void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 999{
4a3e2f71 1000 mutex_lock(&br_ioctl_mutex);
1da177e4 1001 br_ioctl_hook = hook;
4a3e2f71 1002 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1003}
1004EXPORT_SYMBOL(brioctl_set);
1005
4a3e2f71 1006static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1007static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1008
881d966b 1009void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1010{
4a3e2f71 1011 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1012 vlan_ioctl_hook = hook;
4a3e2f71 1013 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1014}
1015EXPORT_SYMBOL(vlan_ioctl_set);
1016
4a3e2f71 1017static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1018static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1019
89bddce5 1020void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1021{
4a3e2f71 1022 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1023 dlci_ioctl_hook = hook;
4a3e2f71 1024 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1025}
1026EXPORT_SYMBOL(dlci_ioctl_set);
1027
6b96018b
AB
1028static long sock_do_ioctl(struct net *net, struct socket *sock,
1029 unsigned int cmd, unsigned long arg)
1030{
1031 int err;
1032 void __user *argp = (void __user *)arg;
1033
1034 err = sock->ops->ioctl(sock, cmd, arg);
1035
1036 /*
1037 * If this ioctl is unknown try to hand it down
1038 * to the NIC driver.
1039 */
1040 if (err == -ENOIOCTLCMD)
1041 err = dev_ioctl(net, cmd, argp);
1042
1043 return err;
1044}
1045
1da177e4
LT
1046/*
1047 * With an ioctl, arg may well be a user mode pointer, but we don't know
1048 * what to do with it - that's up to the protocol still.
1049 */
1050
1051static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1052{
1053 struct socket *sock;
881d966b 1054 struct sock *sk;
1da177e4
LT
1055 void __user *argp = (void __user *)arg;
1056 int pid, err;
881d966b 1057 struct net *net;
1da177e4 1058
b69aee04 1059 sock = file->private_data;
881d966b 1060 sk = sock->sk;
3b1e0a65 1061 net = sock_net(sk);
1da177e4 1062 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1063 err = dev_ioctl(net, cmd, argp);
1da177e4 1064 } else
3d23e349 1065#ifdef CONFIG_WEXT_CORE
1da177e4 1066 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1067 err = dev_ioctl(net, cmd, argp);
1da177e4 1068 } else
3d23e349 1069#endif
89bddce5 1070 switch (cmd) {
1da177e4
LT
1071 case FIOSETOWN:
1072 case SIOCSPGRP:
1073 err = -EFAULT;
1074 if (get_user(pid, (int __user *)argp))
1075 break;
1076 err = f_setown(sock->file, pid, 1);
1077 break;
1078 case FIOGETOWN:
1079 case SIOCGPGRP:
609d7fa9 1080 err = put_user(f_getown(sock->file),
89bddce5 1081 (int __user *)argp);
1da177e4
LT
1082 break;
1083 case SIOCGIFBR:
1084 case SIOCSIFBR:
1085 case SIOCBRADDBR:
1086 case SIOCBRDELBR:
1087 err = -ENOPKG;
1088 if (!br_ioctl_hook)
1089 request_module("bridge");
1090
4a3e2f71 1091 mutex_lock(&br_ioctl_mutex);
89bddce5 1092 if (br_ioctl_hook)
881d966b 1093 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1094 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1095 break;
1096 case SIOCGIFVLAN:
1097 case SIOCSIFVLAN:
1098 err = -ENOPKG;
1099 if (!vlan_ioctl_hook)
1100 request_module("8021q");
1101
4a3e2f71 1102 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1103 if (vlan_ioctl_hook)
881d966b 1104 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1105 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1106 break;
1da177e4
LT
1107 case SIOCADDDLCI:
1108 case SIOCDELDLCI:
1109 err = -ENOPKG;
1110 if (!dlci_ioctl_hook)
1111 request_module("dlci");
1112
7512cbf6
PE
1113 mutex_lock(&dlci_ioctl_mutex);
1114 if (dlci_ioctl_hook)
1da177e4 1115 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1116 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1117 break;
1118 default:
6b96018b 1119 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1120 break;
89bddce5 1121 }
1da177e4
LT
1122 return err;
1123}
1124
1125int sock_create_lite(int family, int type, int protocol, struct socket **res)
1126{
1127 int err;
1128 struct socket *sock = NULL;
89bddce5 1129
1da177e4
LT
1130 err = security_socket_create(family, type, protocol, 1);
1131 if (err)
1132 goto out;
1133
1134 sock = sock_alloc();
1135 if (!sock) {
1136 err = -ENOMEM;
1137 goto out;
1138 }
1139
1da177e4 1140 sock->type = type;
7420ed23
VY
1141 err = security_socket_post_create(sock, family, type, protocol, 1);
1142 if (err)
1143 goto out_release;
1144
1da177e4
LT
1145out:
1146 *res = sock;
1147 return err;
7420ed23
VY
1148out_release:
1149 sock_release(sock);
1150 sock = NULL;
1151 goto out;
1da177e4 1152}
c6d409cf 1153EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1154
1155/* No kernel lock held - perfect */
89bddce5 1156static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1157{
1158 struct socket *sock;
1159
1160 /*
89bddce5 1161 * We can't return errors to poll, so it's either yes or no.
1da177e4 1162 */
b69aee04 1163 sock = file->private_data;
1da177e4
LT
1164 return sock->ops->poll(file, sock, wait);
1165}
1166
89bddce5 1167static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1168{
b69aee04 1169 struct socket *sock = file->private_data;
1da177e4
LT
1170
1171 return sock->ops->mmap(file, sock, vma);
1172}
1173
20380731 1174static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1175{
1176 /*
89bddce5
SH
1177 * It was possible the inode is NULL we were
1178 * closing an unfinished socket.
1da177e4
LT
1179 */
1180
89bddce5 1181 if (!inode) {
1da177e4
LT
1182 printk(KERN_DEBUG "sock_close: NULL inode\n");
1183 return 0;
1184 }
1da177e4
LT
1185 sock_release(SOCKET_I(inode));
1186 return 0;
1187}
1188
1189/*
1190 * Update the socket async list
1191 *
1192 * Fasync_list locking strategy.
1193 *
1194 * 1. fasync_list is modified only under process context socket lock
1195 * i.e. under semaphore.
1196 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1197 * or under socket lock
1da177e4
LT
1198 */
1199
1200static int sock_fasync(int fd, struct file *filp, int on)
1201{
989a2979
ED
1202 struct socket *sock = filp->private_data;
1203 struct sock *sk = sock->sk;
eaefd110 1204 struct socket_wq *wq;
1da177e4 1205
989a2979 1206 if (sk == NULL)
1da177e4 1207 return -EINVAL;
1da177e4
LT
1208
1209 lock_sock(sk);
eaefd110
ED
1210 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1211 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1212
eaefd110 1213 if (!wq->fasync_list)
989a2979
ED
1214 sock_reset_flag(sk, SOCK_FASYNC);
1215 else
bcdce719 1216 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1217
989a2979 1218 release_sock(sk);
1da177e4
LT
1219 return 0;
1220}
1221
43815482 1222/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1223
1224int sock_wake_async(struct socket *sock, int how, int band)
1225{
43815482
ED
1226 struct socket_wq *wq;
1227
1228 if (!sock)
1229 return -1;
1230 rcu_read_lock();
1231 wq = rcu_dereference(sock->wq);
1232 if (!wq || !wq->fasync_list) {
1233 rcu_read_unlock();
1da177e4 1234 return -1;
43815482 1235 }
89bddce5 1236 switch (how) {
8d8ad9d7 1237 case SOCK_WAKE_WAITD:
1da177e4
LT
1238 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1239 break;
1240 goto call_kill;
8d8ad9d7 1241 case SOCK_WAKE_SPACE:
1da177e4
LT
1242 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1243 break;
1244 /* fall through */
8d8ad9d7 1245 case SOCK_WAKE_IO:
89bddce5 1246call_kill:
43815482 1247 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1248 break;
8d8ad9d7 1249 case SOCK_WAKE_URG:
43815482 1250 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1251 }
43815482 1252 rcu_read_unlock();
1da177e4
LT
1253 return 0;
1254}
c6d409cf 1255EXPORT_SYMBOL(sock_wake_async);
1da177e4 1256
721db93a 1257int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1258 struct socket **res, int kern)
1da177e4
LT
1259{
1260 int err;
1261 struct socket *sock;
55737fda 1262 const struct net_proto_family *pf;
1da177e4
LT
1263
1264 /*
89bddce5 1265 * Check protocol is in range
1da177e4
LT
1266 */
1267 if (family < 0 || family >= NPROTO)
1268 return -EAFNOSUPPORT;
1269 if (type < 0 || type >= SOCK_MAX)
1270 return -EINVAL;
1271
1272 /* Compatibility.
1273
1274 This uglymoron is moved from INET layer to here to avoid
1275 deadlock in module load.
1276 */
1277 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1278 static int warned;
1da177e4
LT
1279 if (!warned) {
1280 warned = 1;
89bddce5
SH
1281 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1282 current->comm);
1da177e4
LT
1283 }
1284 family = PF_PACKET;
1285 }
1286
1287 err = security_socket_create(family, type, protocol, kern);
1288 if (err)
1289 return err;
89bddce5 1290
55737fda
SH
1291 /*
1292 * Allocate the socket and allow the family to set things up. if
1293 * the protocol is 0, the family is instructed to select an appropriate
1294 * default.
1295 */
1296 sock = sock_alloc();
1297 if (!sock) {
e87cc472 1298 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1299 return -ENFILE; /* Not exactly a match, but its the
1300 closest posix thing */
1301 }
1302
1303 sock->type = type;
1304
95a5afca 1305#ifdef CONFIG_MODULES
89bddce5
SH
1306 /* Attempt to load a protocol module if the find failed.
1307 *
1308 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1309 * requested real, full-featured networking support upon configuration.
1310 * Otherwise module support will break!
1311 */
190683a9 1312 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1313 request_module("net-pf-%d", family);
1da177e4
LT
1314#endif
1315
55737fda
SH
1316 rcu_read_lock();
1317 pf = rcu_dereference(net_families[family]);
1318 err = -EAFNOSUPPORT;
1319 if (!pf)
1320 goto out_release;
1da177e4
LT
1321
1322 /*
1323 * We will call the ->create function, that possibly is in a loadable
1324 * module, so we have to bump that loadable module refcnt first.
1325 */
55737fda 1326 if (!try_module_get(pf->owner))
1da177e4
LT
1327 goto out_release;
1328
55737fda
SH
1329 /* Now protected by module ref count */
1330 rcu_read_unlock();
1331
3f378b68 1332 err = pf->create(net, sock, protocol, kern);
55737fda 1333 if (err < 0)
1da177e4 1334 goto out_module_put;
a79af59e 1335
1da177e4
LT
1336 /*
1337 * Now to bump the refcnt of the [loadable] module that owns this
1338 * socket at sock_release time we decrement its refcnt.
1339 */
55737fda
SH
1340 if (!try_module_get(sock->ops->owner))
1341 goto out_module_busy;
1342
1da177e4
LT
1343 /*
1344 * Now that we're done with the ->create function, the [loadable]
1345 * module can have its refcnt decremented
1346 */
55737fda 1347 module_put(pf->owner);
7420ed23
VY
1348 err = security_socket_post_create(sock, family, type, protocol, kern);
1349 if (err)
3b185525 1350 goto out_sock_release;
55737fda 1351 *res = sock;
1da177e4 1352
55737fda
SH
1353 return 0;
1354
1355out_module_busy:
1356 err = -EAFNOSUPPORT;
1da177e4 1357out_module_put:
55737fda
SH
1358 sock->ops = NULL;
1359 module_put(pf->owner);
1360out_sock_release:
1da177e4 1361 sock_release(sock);
55737fda
SH
1362 return err;
1363
1364out_release:
1365 rcu_read_unlock();
1366 goto out_sock_release;
1da177e4 1367}
721db93a 1368EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1369
1370int sock_create(int family, int type, int protocol, struct socket **res)
1371{
1b8d7ae4 1372 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1373}
c6d409cf 1374EXPORT_SYMBOL(sock_create);
1da177e4
LT
1375
1376int sock_create_kern(int family, int type, int protocol, struct socket **res)
1377{
1b8d7ae4 1378 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1379}
c6d409cf 1380EXPORT_SYMBOL(sock_create_kern);
1da177e4 1381
3e0fa65f 1382SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1383{
1384 int retval;
1385 struct socket *sock;
a677a039
UD
1386 int flags;
1387
e38b36f3
UD
1388 /* Check the SOCK_* constants for consistency. */
1389 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1390 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1391 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1392 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1393
a677a039 1394 flags = type & ~SOCK_TYPE_MASK;
77d27200 1395 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1396 return -EINVAL;
1397 type &= SOCK_TYPE_MASK;
1da177e4 1398
aaca0bdc
UD
1399 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1400 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1401
1da177e4
LT
1402 retval = sock_create(family, type, protocol, &sock);
1403 if (retval < 0)
1404 goto out;
1405
77d27200 1406 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1407 if (retval < 0)
1408 goto out_release;
1409
1410out:
1411 /* It may be already another descriptor 8) Not kernel problem. */
1412 return retval;
1413
1414out_release:
1415 sock_release(sock);
1416 return retval;
1417}
1418
1419/*
1420 * Create a pair of connected sockets.
1421 */
1422
3e0fa65f
HC
1423SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1424 int __user *, usockvec)
1da177e4
LT
1425{
1426 struct socket *sock1, *sock2;
1427 int fd1, fd2, err;
db349509 1428 struct file *newfile1, *newfile2;
a677a039
UD
1429 int flags;
1430
1431 flags = type & ~SOCK_TYPE_MASK;
77d27200 1432 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1433 return -EINVAL;
1434 type &= SOCK_TYPE_MASK;
1da177e4 1435
aaca0bdc
UD
1436 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1437 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1438
1da177e4
LT
1439 /*
1440 * Obtain the first socket and check if the underlying protocol
1441 * supports the socketpair call.
1442 */
1443
1444 err = sock_create(family, type, protocol, &sock1);
1445 if (err < 0)
1446 goto out;
1447
1448 err = sock_create(family, type, protocol, &sock2);
1449 if (err < 0)
1450 goto out_release_1;
1451
1452 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1453 if (err < 0)
1da177e4
LT
1454 goto out_release_both;
1455
28407630 1456 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1457 if (unlikely(fd1 < 0)) {
1458 err = fd1;
db349509 1459 goto out_release_both;
bf3c23d1 1460 }
28407630 1461 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1462 if (unlikely(fd2 < 0)) {
1463 err = fd2;
28407630
AV
1464 put_unused_fd(fd1);
1465 goto out_release_both;
1466 }
1467
aab174f0 1468 newfile1 = sock_alloc_file(sock1, flags, NULL);
28407630
AV
1469 if (unlikely(IS_ERR(newfile1))) {
1470 err = PTR_ERR(newfile1);
1471 put_unused_fd(fd1);
1472 put_unused_fd(fd2);
1473 goto out_release_both;
1474 }
1475
aab174f0 1476 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1477 if (IS_ERR(newfile2)) {
1478 err = PTR_ERR(newfile2);
198de4d7
AV
1479 fput(newfile1);
1480 put_unused_fd(fd1);
28407630 1481 put_unused_fd(fd2);
198de4d7
AV
1482 sock_release(sock2);
1483 goto out;
db349509
AV
1484 }
1485
157cf649 1486 audit_fd_pair(fd1, fd2);
db349509
AV
1487 fd_install(fd1, newfile1);
1488 fd_install(fd2, newfile2);
1da177e4
LT
1489 /* fd1 and fd2 may be already another descriptors.
1490 * Not kernel problem.
1491 */
1492
89bddce5 1493 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1494 if (!err)
1495 err = put_user(fd2, &usockvec[1]);
1496 if (!err)
1497 return 0;
1498
1499 sys_close(fd2);
1500 sys_close(fd1);
1501 return err;
1502
1da177e4 1503out_release_both:
89bddce5 1504 sock_release(sock2);
1da177e4 1505out_release_1:
89bddce5 1506 sock_release(sock1);
1da177e4
LT
1507out:
1508 return err;
1509}
1510
1da177e4
LT
1511/*
1512 * Bind a name to a socket. Nothing much to do here since it's
1513 * the protocol's responsibility to handle the local address.
1514 *
1515 * We move the socket address to kernel space before we call
1516 * the protocol layer (having also checked the address is ok).
1517 */
1518
20f37034 1519SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1520{
1521 struct socket *sock;
230b1839 1522 struct sockaddr_storage address;
6cb153ca 1523 int err, fput_needed;
1da177e4 1524
89bddce5 1525 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1526 if (sock) {
43db362d 1527 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1528 if (err >= 0) {
1529 err = security_socket_bind(sock,
230b1839 1530 (struct sockaddr *)&address,
89bddce5 1531 addrlen);
6cb153ca
BL
1532 if (!err)
1533 err = sock->ops->bind(sock,
89bddce5 1534 (struct sockaddr *)
230b1839 1535 &address, addrlen);
1da177e4 1536 }
6cb153ca 1537 fput_light(sock->file, fput_needed);
89bddce5 1538 }
1da177e4
LT
1539 return err;
1540}
1541
1da177e4
LT
1542/*
1543 * Perform a listen. Basically, we allow the protocol to do anything
1544 * necessary for a listen, and if that works, we mark the socket as
1545 * ready for listening.
1546 */
1547
3e0fa65f 1548SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1549{
1550 struct socket *sock;
6cb153ca 1551 int err, fput_needed;
b8e1f9b5 1552 int somaxconn;
89bddce5
SH
1553
1554 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1555 if (sock) {
8efa6e93 1556 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1557 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1558 backlog = somaxconn;
1da177e4
LT
1559
1560 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1561 if (!err)
1562 err = sock->ops->listen(sock, backlog);
1da177e4 1563
6cb153ca 1564 fput_light(sock->file, fput_needed);
1da177e4
LT
1565 }
1566 return err;
1567}
1568
1da177e4
LT
1569/*
1570 * For accept, we attempt to create a new socket, set up the link
1571 * with the client, wake up the client, then return the new
1572 * connected fd. We collect the address of the connector in kernel
1573 * space and move it to user at the very end. This is unclean because
1574 * we open the socket then return an error.
1575 *
1576 * 1003.1g adds the ability to recvmsg() to query connection pending
1577 * status to recvmsg. We need to add that support in a way thats
1578 * clean when we restucture accept also.
1579 */
1580
20f37034
HC
1581SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1582 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1583{
1584 struct socket *sock, *newsock;
39d8c1b6 1585 struct file *newfile;
6cb153ca 1586 int err, len, newfd, fput_needed;
230b1839 1587 struct sockaddr_storage address;
1da177e4 1588
77d27200 1589 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1590 return -EINVAL;
1591
1592 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1593 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1594
6cb153ca 1595 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1596 if (!sock)
1597 goto out;
1598
1599 err = -ENFILE;
c6d409cf
ED
1600 newsock = sock_alloc();
1601 if (!newsock)
1da177e4
LT
1602 goto out_put;
1603
1604 newsock->type = sock->type;
1605 newsock->ops = sock->ops;
1606
1da177e4
LT
1607 /*
1608 * We don't need try_module_get here, as the listening socket (sock)
1609 * has the protocol module (sock->ops->owner) held.
1610 */
1611 __module_get(newsock->ops->owner);
1612
28407630 1613 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1614 if (unlikely(newfd < 0)) {
1615 err = newfd;
9a1875e6
DM
1616 sock_release(newsock);
1617 goto out_put;
39d8c1b6 1618 }
aab174f0 1619 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
28407630
AV
1620 if (unlikely(IS_ERR(newfile))) {
1621 err = PTR_ERR(newfile);
1622 put_unused_fd(newfd);
1623 sock_release(newsock);
1624 goto out_put;
1625 }
39d8c1b6 1626
a79af59e
FF
1627 err = security_socket_accept(sock, newsock);
1628 if (err)
39d8c1b6 1629 goto out_fd;
a79af59e 1630
1da177e4
LT
1631 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1632 if (err < 0)
39d8c1b6 1633 goto out_fd;
1da177e4
LT
1634
1635 if (upeer_sockaddr) {
230b1839 1636 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1637 &len, 2) < 0) {
1da177e4 1638 err = -ECONNABORTED;
39d8c1b6 1639 goto out_fd;
1da177e4 1640 }
43db362d 1641 err = move_addr_to_user(&address,
230b1839 1642 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1643 if (err < 0)
39d8c1b6 1644 goto out_fd;
1da177e4
LT
1645 }
1646
1647 /* File flags are not inherited via accept() unlike another OSes. */
1648
39d8c1b6
DM
1649 fd_install(newfd, newfile);
1650 err = newfd;
1da177e4 1651
1da177e4 1652out_put:
6cb153ca 1653 fput_light(sock->file, fput_needed);
1da177e4
LT
1654out:
1655 return err;
39d8c1b6 1656out_fd:
9606a216 1657 fput(newfile);
39d8c1b6 1658 put_unused_fd(newfd);
1da177e4
LT
1659 goto out_put;
1660}
1661
20f37034
HC
1662SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1663 int __user *, upeer_addrlen)
aaca0bdc 1664{
de11defe 1665 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1666}
1667
1da177e4
LT
1668/*
1669 * Attempt to connect to a socket with the server address. The address
1670 * is in user space so we verify it is OK and move it to kernel space.
1671 *
1672 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1673 * break bindings
1674 *
1675 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1676 * other SEQPACKET protocols that take time to connect() as it doesn't
1677 * include the -EINPROGRESS status for such sockets.
1678 */
1679
20f37034
HC
1680SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1681 int, addrlen)
1da177e4
LT
1682{
1683 struct socket *sock;
230b1839 1684 struct sockaddr_storage address;
6cb153ca 1685 int err, fput_needed;
1da177e4 1686
6cb153ca 1687 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1688 if (!sock)
1689 goto out;
43db362d 1690 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1691 if (err < 0)
1692 goto out_put;
1693
89bddce5 1694 err =
230b1839 1695 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1696 if (err)
1697 goto out_put;
1698
230b1839 1699 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1700 sock->file->f_flags);
1701out_put:
6cb153ca 1702 fput_light(sock->file, fput_needed);
1da177e4
LT
1703out:
1704 return err;
1705}
1706
1707/*
1708 * Get the local address ('name') of a socket object. Move the obtained
1709 * name to user space.
1710 */
1711
20f37034
HC
1712SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1713 int __user *, usockaddr_len)
1da177e4
LT
1714{
1715 struct socket *sock;
230b1839 1716 struct sockaddr_storage address;
6cb153ca 1717 int len, err, fput_needed;
89bddce5 1718
6cb153ca 1719 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1720 if (!sock)
1721 goto out;
1722
1723 err = security_socket_getsockname(sock);
1724 if (err)
1725 goto out_put;
1726
230b1839 1727 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1728 if (err)
1729 goto out_put;
43db362d 1730 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1731
1732out_put:
6cb153ca 1733 fput_light(sock->file, fput_needed);
1da177e4
LT
1734out:
1735 return err;
1736}
1737
1738/*
1739 * Get the remote address ('name') of a socket object. Move the obtained
1740 * name to user space.
1741 */
1742
20f37034
HC
1743SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1744 int __user *, usockaddr_len)
1da177e4
LT
1745{
1746 struct socket *sock;
230b1839 1747 struct sockaddr_storage address;
6cb153ca 1748 int len, err, fput_needed;
1da177e4 1749
89bddce5
SH
1750 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1751 if (sock != NULL) {
1da177e4
LT
1752 err = security_socket_getpeername(sock);
1753 if (err) {
6cb153ca 1754 fput_light(sock->file, fput_needed);
1da177e4
LT
1755 return err;
1756 }
1757
89bddce5 1758 err =
230b1839 1759 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1760 1);
1da177e4 1761 if (!err)
43db362d 1762 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1763 usockaddr_len);
6cb153ca 1764 fput_light(sock->file, fput_needed);
1da177e4
LT
1765 }
1766 return err;
1767}
1768
1769/*
1770 * Send a datagram to a given address. We move the address into kernel
1771 * space and check the user space data area is readable before invoking
1772 * the protocol.
1773 */
1774
3e0fa65f 1775SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1776 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1777 int, addr_len)
1da177e4
LT
1778{
1779 struct socket *sock;
230b1839 1780 struct sockaddr_storage address;
1da177e4
LT
1781 int err;
1782 struct msghdr msg;
1783 struct iovec iov;
6cb153ca 1784 int fput_needed;
6cb153ca 1785
253eacc0
LT
1786 if (len > INT_MAX)
1787 len = INT_MAX;
de0fa95c
PE
1788 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1789 if (!sock)
4387ff75 1790 goto out;
6cb153ca 1791
89bddce5
SH
1792 iov.iov_base = buff;
1793 iov.iov_len = len;
1794 msg.msg_name = NULL;
1795 msg.msg_iov = &iov;
1796 msg.msg_iovlen = 1;
1797 msg.msg_control = NULL;
1798 msg.msg_controllen = 0;
1799 msg.msg_namelen = 0;
6cb153ca 1800 if (addr) {
43db362d 1801 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1802 if (err < 0)
1803 goto out_put;
230b1839 1804 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1805 msg.msg_namelen = addr_len;
1da177e4
LT
1806 }
1807 if (sock->file->f_flags & O_NONBLOCK)
1808 flags |= MSG_DONTWAIT;
1809 msg.msg_flags = flags;
1810 err = sock_sendmsg(sock, &msg, len);
1811
89bddce5 1812out_put:
de0fa95c 1813 fput_light(sock->file, fput_needed);
4387ff75 1814out:
1da177e4
LT
1815 return err;
1816}
1817
1818/*
89bddce5 1819 * Send a datagram down a socket.
1da177e4
LT
1820 */
1821
3e0fa65f 1822SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1823 unsigned int, flags)
1da177e4
LT
1824{
1825 return sys_sendto(fd, buff, len, flags, NULL, 0);
1826}
1827
1828/*
89bddce5 1829 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1830 * sender. We verify the buffers are writable and if needed move the
1831 * sender address from kernel to user space.
1832 */
1833
3e0fa65f 1834SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1835 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1836 int __user *, addr_len)
1da177e4
LT
1837{
1838 struct socket *sock;
1839 struct iovec iov;
1840 struct msghdr msg;
230b1839 1841 struct sockaddr_storage address;
89bddce5 1842 int err, err2;
6cb153ca
BL
1843 int fput_needed;
1844
253eacc0
LT
1845 if (size > INT_MAX)
1846 size = INT_MAX;
de0fa95c 1847 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1848 if (!sock)
de0fa95c 1849 goto out;
1da177e4 1850
89bddce5
SH
1851 msg.msg_control = NULL;
1852 msg.msg_controllen = 0;
1853 msg.msg_iovlen = 1;
1854 msg.msg_iov = &iov;
1855 iov.iov_len = size;
1856 iov.iov_base = ubuf;
230b1839
YH
1857 msg.msg_name = (struct sockaddr *)&address;
1858 msg.msg_namelen = sizeof(address);
1da177e4
LT
1859 if (sock->file->f_flags & O_NONBLOCK)
1860 flags |= MSG_DONTWAIT;
89bddce5 1861 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1862
89bddce5 1863 if (err >= 0 && addr != NULL) {
43db362d 1864 err2 = move_addr_to_user(&address,
230b1839 1865 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1866 if (err2 < 0)
1867 err = err2;
1da177e4 1868 }
de0fa95c
PE
1869
1870 fput_light(sock->file, fput_needed);
4387ff75 1871out:
1da177e4
LT
1872 return err;
1873}
1874
1875/*
89bddce5 1876 * Receive a datagram from a socket.
1da177e4
LT
1877 */
1878
89bddce5 1879asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
95c96174 1880 unsigned int flags)
1da177e4
LT
1881{
1882 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1883}
1884
1885/*
1886 * Set a socket option. Because we don't know the option lengths we have
1887 * to pass the user mode parameter for the protocols to sort out.
1888 */
1889
20f37034
HC
1890SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1891 char __user *, optval, int, optlen)
1da177e4 1892{
6cb153ca 1893 int err, fput_needed;
1da177e4
LT
1894 struct socket *sock;
1895
1896 if (optlen < 0)
1897 return -EINVAL;
89bddce5
SH
1898
1899 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1900 if (sock != NULL) {
1901 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1902 if (err)
1903 goto out_put;
1da177e4
LT
1904
1905 if (level == SOL_SOCKET)
89bddce5
SH
1906 err =
1907 sock_setsockopt(sock, level, optname, optval,
1908 optlen);
1da177e4 1909 else
89bddce5
SH
1910 err =
1911 sock->ops->setsockopt(sock, level, optname, optval,
1912 optlen);
6cb153ca
BL
1913out_put:
1914 fput_light(sock->file, fput_needed);
1da177e4
LT
1915 }
1916 return err;
1917}
1918
1919/*
1920 * Get a socket option. Because we don't know the option lengths we have
1921 * to pass a user mode parameter for the protocols to sort out.
1922 */
1923
20f37034
HC
1924SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1925 char __user *, optval, int __user *, optlen)
1da177e4 1926{
6cb153ca 1927 int err, fput_needed;
1da177e4
LT
1928 struct socket *sock;
1929
89bddce5
SH
1930 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1931 if (sock != NULL) {
6cb153ca
BL
1932 err = security_socket_getsockopt(sock, level, optname);
1933 if (err)
1934 goto out_put;
1da177e4
LT
1935
1936 if (level == SOL_SOCKET)
89bddce5
SH
1937 err =
1938 sock_getsockopt(sock, level, optname, optval,
1939 optlen);
1da177e4 1940 else
89bddce5
SH
1941 err =
1942 sock->ops->getsockopt(sock, level, optname, optval,
1943 optlen);
6cb153ca
BL
1944out_put:
1945 fput_light(sock->file, fput_needed);
1da177e4
LT
1946 }
1947 return err;
1948}
1949
1da177e4
LT
1950/*
1951 * Shutdown a socket.
1952 */
1953
754fe8d2 1954SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1955{
6cb153ca 1956 int err, fput_needed;
1da177e4
LT
1957 struct socket *sock;
1958
89bddce5
SH
1959 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1960 if (sock != NULL) {
1da177e4 1961 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1962 if (!err)
1963 err = sock->ops->shutdown(sock, how);
1964 fput_light(sock->file, fput_needed);
1da177e4
LT
1965 }
1966 return err;
1967}
1968
89bddce5 1969/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1970 * fields which are the same type (int / unsigned) on our platforms.
1971 */
1972#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1973#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1974#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1975
c71d8ebe
TH
1976struct used_address {
1977 struct sockaddr_storage name;
1978 unsigned int name_len;
1979};
1980
228e548e 1981static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 1982 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1983 struct used_address *used_address)
1da177e4 1984{
89bddce5
SH
1985 struct compat_msghdr __user *msg_compat =
1986 (struct compat_msghdr __user *)msg;
230b1839 1987 struct sockaddr_storage address;
1da177e4 1988 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1989 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1990 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1991 /* 20 is size of ipv6_pktinfo */
1da177e4 1992 unsigned char *ctl_buf = ctl;
a74e9106 1993 int err, ctl_len, total_len;
89bddce5 1994
1da177e4
LT
1995 err = -EFAULT;
1996 if (MSG_CMSG_COMPAT & flags) {
228e548e 1997 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 1998 return -EFAULT;
228e548e 1999 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
2000 return -EFAULT;
2001
228e548e 2002 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2003 err = -EMSGSIZE;
2004 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2005 goto out;
2006 err = -ENOMEM;
2007 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2008 GFP_KERNEL);
1da177e4 2009 if (!iov)
228e548e 2010 goto out;
1da177e4
LT
2011 }
2012
2013 /* This will also move the address data into kernel space */
2014 if (MSG_CMSG_COMPAT & flags) {
43db362d 2015 err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
1da177e4 2016 } else
43db362d 2017 err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
89bddce5 2018 if (err < 0)
1da177e4
LT
2019 goto out_freeiov;
2020 total_len = err;
2021
2022 err = -ENOBUFS;
2023
228e548e 2024 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2025 goto out_freeiov;
228e548e 2026 ctl_len = msg_sys->msg_controllen;
1da177e4 2027 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2028 err =
228e548e 2029 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2030 sizeof(ctl));
1da177e4
LT
2031 if (err)
2032 goto out_freeiov;
228e548e
AB
2033 ctl_buf = msg_sys->msg_control;
2034 ctl_len = msg_sys->msg_controllen;
1da177e4 2035 } else if (ctl_len) {
89bddce5 2036 if (ctl_len > sizeof(ctl)) {
1da177e4 2037 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2038 if (ctl_buf == NULL)
1da177e4
LT
2039 goto out_freeiov;
2040 }
2041 err = -EFAULT;
2042 /*
228e548e 2043 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2044 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2045 * checking falls down on this.
2046 */
fb8621bb 2047 if (copy_from_user(ctl_buf,
228e548e 2048 (void __user __force *)msg_sys->msg_control,
89bddce5 2049 ctl_len))
1da177e4 2050 goto out_freectl;
228e548e 2051 msg_sys->msg_control = ctl_buf;
1da177e4 2052 }
228e548e 2053 msg_sys->msg_flags = flags;
1da177e4
LT
2054
2055 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2056 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2057 /*
2058 * If this is sendmmsg() and current destination address is same as
2059 * previously succeeded address, omit asking LSM's decision.
2060 * used_address->name_len is initialized to UINT_MAX so that the first
2061 * destination address never matches.
2062 */
bc909d9d
MD
2063 if (used_address && msg_sys->msg_name &&
2064 used_address->name_len == msg_sys->msg_namelen &&
2065 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
2066 used_address->name_len)) {
2067 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
2068 goto out_freectl;
2069 }
2070 err = sock_sendmsg(sock, msg_sys, total_len);
2071 /*
2072 * If this is sendmmsg() and sending to current destination address was
2073 * successful, remember it.
2074 */
2075 if (used_address && err >= 0) {
2076 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2077 if (msg_sys->msg_name)
2078 memcpy(&used_address->name, msg_sys->msg_name,
2079 used_address->name_len);
c71d8ebe 2080 }
1da177e4
LT
2081
2082out_freectl:
89bddce5 2083 if (ctl_buf != ctl)
1da177e4
LT
2084 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2085out_freeiov:
2086 if (iov != iovstack)
a74e9106 2087 kfree(iov);
228e548e
AB
2088out:
2089 return err;
2090}
2091
2092/*
2093 * BSD sendmsg interface
2094 */
2095
95c96174 2096SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
228e548e
AB
2097{
2098 int fput_needed, err;
2099 struct msghdr msg_sys;
2100 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2101
2102 if (!sock)
2103 goto out;
2104
c71d8ebe 2105 err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2106
6cb153ca 2107 fput_light(sock->file, fput_needed);
89bddce5 2108out:
1da177e4
LT
2109 return err;
2110}
2111
228e548e
AB
2112/*
2113 * Linux sendmmsg interface
2114 */
2115
2116int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2117 unsigned int flags)
2118{
2119 int fput_needed, err, datagrams;
2120 struct socket *sock;
2121 struct mmsghdr __user *entry;
2122 struct compat_mmsghdr __user *compat_entry;
2123 struct msghdr msg_sys;
c71d8ebe 2124 struct used_address used_address;
228e548e 2125
98382f41
AB
2126 if (vlen > UIO_MAXIOV)
2127 vlen = UIO_MAXIOV;
228e548e
AB
2128
2129 datagrams = 0;
2130
2131 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2132 if (!sock)
2133 return err;
2134
c71d8ebe 2135 used_address.name_len = UINT_MAX;
228e548e
AB
2136 entry = mmsg;
2137 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2138 err = 0;
228e548e
AB
2139
2140 while (datagrams < vlen) {
228e548e
AB
2141 if (MSG_CMSG_COMPAT & flags) {
2142 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
c71d8ebe 2143 &msg_sys, flags, &used_address);
228e548e
AB
2144 if (err < 0)
2145 break;
2146 err = __put_user(err, &compat_entry->msg_len);
2147 ++compat_entry;
2148 } else {
2149 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
c71d8ebe 2150 &msg_sys, flags, &used_address);
228e548e
AB
2151 if (err < 0)
2152 break;
2153 err = put_user(err, &entry->msg_len);
2154 ++entry;
2155 }
2156
2157 if (err)
2158 break;
2159 ++datagrams;
2160 }
2161
228e548e
AB
2162 fput_light(sock->file, fput_needed);
2163
728ffb86
AB
2164 /* We only return an error if no datagrams were able to be sent */
2165 if (datagrams != 0)
228e548e
AB
2166 return datagrams;
2167
228e548e
AB
2168 return err;
2169}
2170
2171SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2172 unsigned int, vlen, unsigned int, flags)
2173{
2174 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2175}
2176
a2e27255 2177static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 2178 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2179{
89bddce5
SH
2180 struct compat_msghdr __user *msg_compat =
2181 (struct compat_msghdr __user *)msg;
1da177e4 2182 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2183 struct iovec *iov = iovstack;
1da177e4 2184 unsigned long cmsg_ptr;
a74e9106 2185 int err, total_len, len;
1da177e4
LT
2186
2187 /* kernel mode address */
230b1839 2188 struct sockaddr_storage addr;
1da177e4
LT
2189
2190 /* user mode address pointers */
2191 struct sockaddr __user *uaddr;
2192 int __user *uaddr_len;
89bddce5 2193
1da177e4 2194 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2195 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2196 return -EFAULT;
c6d409cf 2197 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
89bddce5 2198 return -EFAULT;
1da177e4 2199
a2e27255 2200 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2201 err = -EMSGSIZE;
2202 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2203 goto out;
2204 err = -ENOMEM;
2205 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2206 GFP_KERNEL);
1da177e4 2207 if (!iov)
a2e27255 2208 goto out;
1da177e4
LT
2209 }
2210
2211 /*
89bddce5
SH
2212 * Save the user-mode address (verify_iovec will change the
2213 * kernel msghdr to use the kernel address space)
1da177e4 2214 */
89bddce5 2215
a2e27255 2216 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4
LT
2217 uaddr_len = COMPAT_NAMELEN(msg);
2218 if (MSG_CMSG_COMPAT & flags) {
43db362d 2219 err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4 2220 } else
43db362d 2221 err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4
LT
2222 if (err < 0)
2223 goto out_freeiov;
89bddce5 2224 total_len = err;
1da177e4 2225
a2e27255
ACM
2226 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2227 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2228
1da177e4
LT
2229 if (sock->file->f_flags & O_NONBLOCK)
2230 flags |= MSG_DONTWAIT;
a2e27255
ACM
2231 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2232 total_len, flags);
1da177e4
LT
2233 if (err < 0)
2234 goto out_freeiov;
2235 len = err;
2236
2237 if (uaddr != NULL) {
43db362d 2238 err = move_addr_to_user(&addr,
a2e27255 2239 msg_sys->msg_namelen, uaddr,
89bddce5 2240 uaddr_len);
1da177e4
LT
2241 if (err < 0)
2242 goto out_freeiov;
2243 }
a2e27255 2244 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2245 COMPAT_FLAGS(msg));
1da177e4
LT
2246 if (err)
2247 goto out_freeiov;
2248 if (MSG_CMSG_COMPAT & flags)
a2e27255 2249 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2250 &msg_compat->msg_controllen);
2251 else
a2e27255 2252 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2253 &msg->msg_controllen);
2254 if (err)
2255 goto out_freeiov;
2256 err = len;
2257
2258out_freeiov:
2259 if (iov != iovstack)
a74e9106 2260 kfree(iov);
a2e27255
ACM
2261out:
2262 return err;
2263}
2264
2265/*
2266 * BSD recvmsg interface
2267 */
2268
2269SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2270 unsigned int, flags)
2271{
2272 int fput_needed, err;
2273 struct msghdr msg_sys;
2274 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2275
2276 if (!sock)
2277 goto out;
2278
2279 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2280
6cb153ca 2281 fput_light(sock->file, fput_needed);
1da177e4
LT
2282out:
2283 return err;
2284}
2285
a2e27255
ACM
2286/*
2287 * Linux recvmmsg interface
2288 */
2289
2290int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2291 unsigned int flags, struct timespec *timeout)
2292{
2293 int fput_needed, err, datagrams;
2294 struct socket *sock;
2295 struct mmsghdr __user *entry;
d7256d0e 2296 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2297 struct msghdr msg_sys;
2298 struct timespec end_time;
2299
2300 if (timeout &&
2301 poll_select_set_timeout(&end_time, timeout->tv_sec,
2302 timeout->tv_nsec))
2303 return -EINVAL;
2304
2305 datagrams = 0;
2306
2307 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2308 if (!sock)
2309 return err;
2310
2311 err = sock_error(sock->sk);
2312 if (err)
2313 goto out_put;
2314
2315 entry = mmsg;
d7256d0e 2316 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2317
2318 while (datagrams < vlen) {
2319 /*
2320 * No need to ask LSM for more than the first datagram.
2321 */
d7256d0e
JMG
2322 if (MSG_CMSG_COMPAT & flags) {
2323 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
b9eb8b87
AB
2324 &msg_sys, flags & ~MSG_WAITFORONE,
2325 datagrams);
d7256d0e
JMG
2326 if (err < 0)
2327 break;
2328 err = __put_user(err, &compat_entry->msg_len);
2329 ++compat_entry;
2330 } else {
2331 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
b9eb8b87
AB
2332 &msg_sys, flags & ~MSG_WAITFORONE,
2333 datagrams);
d7256d0e
JMG
2334 if (err < 0)
2335 break;
2336 err = put_user(err, &entry->msg_len);
2337 ++entry;
2338 }
2339
a2e27255
ACM
2340 if (err)
2341 break;
a2e27255
ACM
2342 ++datagrams;
2343
71c5c159
BB
2344 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2345 if (flags & MSG_WAITFORONE)
2346 flags |= MSG_DONTWAIT;
2347
a2e27255
ACM
2348 if (timeout) {
2349 ktime_get_ts(timeout);
2350 *timeout = timespec_sub(end_time, *timeout);
2351 if (timeout->tv_sec < 0) {
2352 timeout->tv_sec = timeout->tv_nsec = 0;
2353 break;
2354 }
2355
2356 /* Timeout, return less than vlen datagrams */
2357 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2358 break;
2359 }
2360
2361 /* Out of band data, return right away */
2362 if (msg_sys.msg_flags & MSG_OOB)
2363 break;
2364 }
2365
2366out_put:
2367 fput_light(sock->file, fput_needed);
1da177e4 2368
a2e27255
ACM
2369 if (err == 0)
2370 return datagrams;
2371
2372 if (datagrams != 0) {
2373 /*
2374 * We may return less entries than requested (vlen) if the
2375 * sock is non block and there aren't enough datagrams...
2376 */
2377 if (err != -EAGAIN) {
2378 /*
2379 * ... or if recvmsg returns an error after we
2380 * received some datagrams, where we record the
2381 * error to return on the next call or if the
2382 * app asks about it using getsockopt(SO_ERROR).
2383 */
2384 sock->sk->sk_err = -err;
2385 }
2386
2387 return datagrams;
2388 }
2389
2390 return err;
2391}
2392
2393SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2394 unsigned int, vlen, unsigned int, flags,
2395 struct timespec __user *, timeout)
2396{
2397 int datagrams;
2398 struct timespec timeout_sys;
2399
2400 if (!timeout)
2401 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2402
2403 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2404 return -EFAULT;
2405
2406 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2407
2408 if (datagrams > 0 &&
2409 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2410 datagrams = -EFAULT;
2411
2412 return datagrams;
2413}
2414
2415#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2416/* Argument list sizes for sys_socketcall */
2417#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2418static const unsigned char nargs[21] = {
c6d409cf
ED
2419 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2420 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2421 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2422 AL(4), AL(5), AL(4)
89bddce5
SH
2423};
2424
1da177e4
LT
2425#undef AL
2426
2427/*
89bddce5 2428 * System call vectors.
1da177e4
LT
2429 *
2430 * Argument checking cleaned up. Saved 20% in size.
2431 * This function doesn't need to set the kernel lock because
89bddce5 2432 * it is set by the callees.
1da177e4
LT
2433 */
2434
3e0fa65f 2435SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4
LT
2436{
2437 unsigned long a[6];
89bddce5 2438 unsigned long a0, a1;
1da177e4 2439 int err;
47379052 2440 unsigned int len;
1da177e4 2441
228e548e 2442 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2443 return -EINVAL;
2444
47379052
AV
2445 len = nargs[call];
2446 if (len > sizeof(a))
2447 return -EINVAL;
2448
1da177e4 2449 /* copy_from_user should be SMP safe. */
47379052 2450 if (copy_from_user(a, args, len))
1da177e4 2451 return -EFAULT;
3ec3b2fb 2452
f3298dc4 2453 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb 2454
89bddce5
SH
2455 a0 = a[0];
2456 a1 = a[1];
2457
2458 switch (call) {
2459 case SYS_SOCKET:
2460 err = sys_socket(a0, a1, a[2]);
2461 break;
2462 case SYS_BIND:
2463 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2464 break;
2465 case SYS_CONNECT:
2466 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2467 break;
2468 case SYS_LISTEN:
2469 err = sys_listen(a0, a1);
2470 break;
2471 case SYS_ACCEPT:
de11defe
UD
2472 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2473 (int __user *)a[2], 0);
89bddce5
SH
2474 break;
2475 case SYS_GETSOCKNAME:
2476 err =
2477 sys_getsockname(a0, (struct sockaddr __user *)a1,
2478 (int __user *)a[2]);
2479 break;
2480 case SYS_GETPEERNAME:
2481 err =
2482 sys_getpeername(a0, (struct sockaddr __user *)a1,
2483 (int __user *)a[2]);
2484 break;
2485 case SYS_SOCKETPAIR:
2486 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2487 break;
2488 case SYS_SEND:
2489 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2490 break;
2491 case SYS_SENDTO:
2492 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2493 (struct sockaddr __user *)a[4], a[5]);
2494 break;
2495 case SYS_RECV:
2496 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2497 break;
2498 case SYS_RECVFROM:
2499 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2500 (struct sockaddr __user *)a[4],
2501 (int __user *)a[5]);
2502 break;
2503 case SYS_SHUTDOWN:
2504 err = sys_shutdown(a0, a1);
2505 break;
2506 case SYS_SETSOCKOPT:
2507 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2508 break;
2509 case SYS_GETSOCKOPT:
2510 err =
2511 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2512 (int __user *)a[4]);
2513 break;
2514 case SYS_SENDMSG:
2515 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2516 break;
228e548e
AB
2517 case SYS_SENDMMSG:
2518 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2519 break;
89bddce5
SH
2520 case SYS_RECVMSG:
2521 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2522 break;
a2e27255
ACM
2523 case SYS_RECVMMSG:
2524 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2525 (struct timespec __user *)a[4]);
2526 break;
de11defe
UD
2527 case SYS_ACCEPT4:
2528 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2529 (int __user *)a[2], a[3]);
aaca0bdc 2530 break;
89bddce5
SH
2531 default:
2532 err = -EINVAL;
2533 break;
1da177e4
LT
2534 }
2535 return err;
2536}
2537
89bddce5 2538#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2539
55737fda
SH
2540/**
2541 * sock_register - add a socket protocol handler
2542 * @ops: description of protocol
2543 *
1da177e4
LT
2544 * This function is called by a protocol handler that wants to
2545 * advertise its address family, and have it linked into the
55737fda
SH
2546 * socket interface. The value ops->family coresponds to the
2547 * socket system call protocol family.
1da177e4 2548 */
f0fd27d4 2549int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2550{
2551 int err;
2552
2553 if (ops->family >= NPROTO) {
89bddce5
SH
2554 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2555 NPROTO);
1da177e4
LT
2556 return -ENOBUFS;
2557 }
55737fda
SH
2558
2559 spin_lock(&net_family_lock);
190683a9
ED
2560 if (rcu_dereference_protected(net_families[ops->family],
2561 lockdep_is_held(&net_family_lock)))
55737fda
SH
2562 err = -EEXIST;
2563 else {
cf778b00 2564 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2565 err = 0;
2566 }
55737fda
SH
2567 spin_unlock(&net_family_lock);
2568
89bddce5 2569 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2570 return err;
2571}
c6d409cf 2572EXPORT_SYMBOL(sock_register);
1da177e4 2573
55737fda
SH
2574/**
2575 * sock_unregister - remove a protocol handler
2576 * @family: protocol family to remove
2577 *
1da177e4
LT
2578 * This function is called by a protocol handler that wants to
2579 * remove its address family, and have it unlinked from the
55737fda
SH
2580 * new socket creation.
2581 *
2582 * If protocol handler is a module, then it can use module reference
2583 * counts to protect against new references. If protocol handler is not
2584 * a module then it needs to provide its own protection in
2585 * the ops->create routine.
1da177e4 2586 */
f0fd27d4 2587void sock_unregister(int family)
1da177e4 2588{
f0fd27d4 2589 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2590
55737fda 2591 spin_lock(&net_family_lock);
a9b3cd7f 2592 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2593 spin_unlock(&net_family_lock);
2594
2595 synchronize_rcu();
2596
89bddce5 2597 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4 2598}
c6d409cf 2599EXPORT_SYMBOL(sock_unregister);
1da177e4 2600
77d76ea3 2601static int __init sock_init(void)
1da177e4 2602{
b3e19d92 2603 int err;
2ca794e5
EB
2604 /*
2605 * Initialize the network sysctl infrastructure.
2606 */
2607 err = net_sysctl_init();
2608 if (err)
2609 goto out;
b3e19d92 2610
1da177e4 2611 /*
89bddce5 2612 * Initialize skbuff SLAB cache
1da177e4
LT
2613 */
2614 skb_init();
1da177e4
LT
2615
2616 /*
89bddce5 2617 * Initialize the protocols module.
1da177e4
LT
2618 */
2619
2620 init_inodecache();
b3e19d92
NP
2621
2622 err = register_filesystem(&sock_fs_type);
2623 if (err)
2624 goto out_fs;
1da177e4 2625 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2626 if (IS_ERR(sock_mnt)) {
2627 err = PTR_ERR(sock_mnt);
2628 goto out_mount;
2629 }
77d76ea3
AK
2630
2631 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2632 */
2633
2634#ifdef CONFIG_NETFILTER
2635 netfilter_init();
2636#endif
cbeb321a 2637
c1f19b51
RC
2638#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2639 skb_timestamping_init();
2640#endif
2641
b3e19d92
NP
2642out:
2643 return err;
2644
2645out_mount:
2646 unregister_filesystem(&sock_fs_type);
2647out_fs:
2648 goto out;
1da177e4
LT
2649}
2650
77d76ea3
AK
2651core_initcall(sock_init); /* early initcall */
2652
1da177e4
LT
2653#ifdef CONFIG_PROC_FS
2654void socket_seq_show(struct seq_file *seq)
2655{
2656 int cpu;
2657 int counter = 0;
2658
6f912042 2659 for_each_possible_cpu(cpu)
89bddce5 2660 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2661
2662 /* It can be negative, by the way. 8) */
2663 if (counter < 0)
2664 counter = 0;
2665
2666 seq_printf(seq, "sockets: used %d\n", counter);
2667}
89bddce5 2668#endif /* CONFIG_PROC_FS */
1da177e4 2669
89bbfc95 2670#ifdef CONFIG_COMPAT
6b96018b 2671static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2672 unsigned int cmd, void __user *up)
7a229387 2673{
7a229387
AB
2674 mm_segment_t old_fs = get_fs();
2675 struct timeval ktv;
2676 int err;
2677
2678 set_fs(KERNEL_DS);
6b96018b 2679 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2680 set_fs(old_fs);
644595f8 2681 if (!err)
ed6fe9d6 2682 err = compat_put_timeval(&ktv, up);
644595f8 2683
7a229387
AB
2684 return err;
2685}
2686
6b96018b 2687static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2688 unsigned int cmd, void __user *up)
7a229387 2689{
7a229387
AB
2690 mm_segment_t old_fs = get_fs();
2691 struct timespec kts;
2692 int err;
2693
2694 set_fs(KERNEL_DS);
6b96018b 2695 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2696 set_fs(old_fs);
644595f8 2697 if (!err)
ed6fe9d6 2698 err = compat_put_timespec(&kts, up);
644595f8 2699
7a229387
AB
2700 return err;
2701}
2702
6b96018b 2703static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2704{
2705 struct ifreq __user *uifr;
2706 int err;
2707
2708 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2709 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2710 return -EFAULT;
2711
6b96018b 2712 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2713 if (err)
2714 return err;
2715
6b96018b 2716 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2717 return -EFAULT;
2718
2719 return 0;
2720}
2721
6b96018b 2722static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2723{
6b96018b 2724 struct compat_ifconf ifc32;
7a229387
AB
2725 struct ifconf ifc;
2726 struct ifconf __user *uifc;
6b96018b 2727 struct compat_ifreq __user *ifr32;
7a229387
AB
2728 struct ifreq __user *ifr;
2729 unsigned int i, j;
2730 int err;
2731
6b96018b 2732 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2733 return -EFAULT;
2734
43da5f2e 2735 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2736 if (ifc32.ifcbuf == 0) {
2737 ifc32.ifc_len = 0;
2738 ifc.ifc_len = 0;
2739 ifc.ifc_req = NULL;
2740 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2741 } else {
c6d409cf
ED
2742 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2743 sizeof(struct ifreq);
7a229387
AB
2744 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2745 ifc.ifc_len = len;
2746 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2747 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2748 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2749 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2750 return -EFAULT;
2751 ifr++;
2752 ifr32++;
2753 }
2754 }
2755 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2756 return -EFAULT;
2757
6b96018b 2758 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2759 if (err)
2760 return err;
2761
2762 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2763 return -EFAULT;
2764
2765 ifr = ifc.ifc_req;
2766 ifr32 = compat_ptr(ifc32.ifcbuf);
2767 for (i = 0, j = 0;
c6d409cf
ED
2768 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2769 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2770 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2771 return -EFAULT;
2772 ifr32++;
2773 ifr++;
2774 }
2775
2776 if (ifc32.ifcbuf == 0) {
2777 /* Translate from 64-bit structure multiple to
2778 * a 32-bit one.
2779 */
2780 i = ifc.ifc_len;
6b96018b 2781 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2782 ifc32.ifc_len = i;
2783 } else {
2784 ifc32.ifc_len = i;
2785 }
6b96018b 2786 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2787 return -EFAULT;
2788
2789 return 0;
2790}
2791
6b96018b 2792static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2793{
3a7da39d
BH
2794 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2795 bool convert_in = false, convert_out = false;
2796 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2797 struct ethtool_rxnfc __user *rxnfc;
7a229387 2798 struct ifreq __user *ifr;
3a7da39d
BH
2799 u32 rule_cnt = 0, actual_rule_cnt;
2800 u32 ethcmd;
7a229387 2801 u32 data;
3a7da39d 2802 int ret;
7a229387 2803
3a7da39d
BH
2804 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2805 return -EFAULT;
7a229387 2806
3a7da39d
BH
2807 compat_rxnfc = compat_ptr(data);
2808
2809 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2810 return -EFAULT;
2811
3a7da39d
BH
2812 /* Most ethtool structures are defined without padding.
2813 * Unfortunately struct ethtool_rxnfc is an exception.
2814 */
2815 switch (ethcmd) {
2816 default:
2817 break;
2818 case ETHTOOL_GRXCLSRLALL:
2819 /* Buffer size is variable */
2820 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2821 return -EFAULT;
2822 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2823 return -ENOMEM;
2824 buf_size += rule_cnt * sizeof(u32);
2825 /* fall through */
2826 case ETHTOOL_GRXRINGS:
2827 case ETHTOOL_GRXCLSRLCNT:
2828 case ETHTOOL_GRXCLSRULE:
55664f32 2829 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2830 convert_out = true;
2831 /* fall through */
2832 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2833 buf_size += sizeof(struct ethtool_rxnfc);
2834 convert_in = true;
2835 break;
2836 }
2837
2838 ifr = compat_alloc_user_space(buf_size);
954b1244 2839 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2840
2841 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2842 return -EFAULT;
2843
3a7da39d
BH
2844 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2845 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2846 return -EFAULT;
2847
3a7da39d 2848 if (convert_in) {
127fe533 2849 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2850 * fs.ring_cookie and at the end of fs, but nowhere else.
2851 */
127fe533
AD
2852 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2853 sizeof(compat_rxnfc->fs.m_ext) !=
2854 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2855 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2856 BUILD_BUG_ON(
2857 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2858 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2859 offsetof(struct ethtool_rxnfc, fs.location) -
2860 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2861
2862 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2863 (void __user *)(&rxnfc->fs.m_ext + 1) -
2864 (void __user *)rxnfc) ||
3a7da39d
BH
2865 copy_in_user(&rxnfc->fs.ring_cookie,
2866 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2867 (void __user *)(&rxnfc->fs.location + 1) -
2868 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2869 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2870 sizeof(rxnfc->rule_cnt)))
2871 return -EFAULT;
2872 }
2873
2874 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2875 if (ret)
2876 return ret;
2877
2878 if (convert_out) {
2879 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2880 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2881 (const void __user *)rxnfc) ||
3a7da39d
BH
2882 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2883 &rxnfc->fs.ring_cookie,
954b1244
SH
2884 (const void __user *)(&rxnfc->fs.location + 1) -
2885 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2886 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2887 sizeof(rxnfc->rule_cnt)))
2888 return -EFAULT;
2889
2890 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2891 /* As an optimisation, we only copy the actual
2892 * number of rules that the underlying
2893 * function returned. Since Mallory might
2894 * change the rule count in user memory, we
2895 * check that it is less than the rule count
2896 * originally given (as the user buffer size),
2897 * which has been range-checked.
2898 */
2899 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2900 return -EFAULT;
2901 if (actual_rule_cnt < rule_cnt)
2902 rule_cnt = actual_rule_cnt;
2903 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2904 &rxnfc->rule_locs[0],
2905 rule_cnt * sizeof(u32)))
2906 return -EFAULT;
2907 }
2908 }
2909
2910 return 0;
7a229387
AB
2911}
2912
7a50a240
AB
2913static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2914{
2915 void __user *uptr;
2916 compat_uptr_t uptr32;
2917 struct ifreq __user *uifr;
2918
c6d409cf 2919 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2920 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2921 return -EFAULT;
2922
2923 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2924 return -EFAULT;
2925
2926 uptr = compat_ptr(uptr32);
2927
2928 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2929 return -EFAULT;
2930
2931 return dev_ioctl(net, SIOCWANDEV, uifr);
2932}
2933
6b96018b
AB
2934static int bond_ioctl(struct net *net, unsigned int cmd,
2935 struct compat_ifreq __user *ifr32)
7a229387
AB
2936{
2937 struct ifreq kifr;
2938 struct ifreq __user *uifr;
7a229387
AB
2939 mm_segment_t old_fs;
2940 int err;
2941 u32 data;
2942 void __user *datap;
2943
2944 switch (cmd) {
2945 case SIOCBONDENSLAVE:
2946 case SIOCBONDRELEASE:
2947 case SIOCBONDSETHWADDR:
2948 case SIOCBONDCHANGEACTIVE:
6b96018b 2949 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2950 return -EFAULT;
2951
2952 old_fs = get_fs();
c6d409cf 2953 set_fs(KERNEL_DS);
c3f52ae6 2954 err = dev_ioctl(net, cmd,
2955 (struct ifreq __user __force *) &kifr);
c6d409cf 2956 set_fs(old_fs);
7a229387
AB
2957
2958 return err;
2959 case SIOCBONDSLAVEINFOQUERY:
2960 case SIOCBONDINFOQUERY:
2961 uifr = compat_alloc_user_space(sizeof(*uifr));
2962 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2963 return -EFAULT;
2964
2965 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2966 return -EFAULT;
2967
2968 datap = compat_ptr(data);
2969 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2970 return -EFAULT;
2971
6b96018b 2972 return dev_ioctl(net, cmd, uifr);
7a229387 2973 default:
07d106d0 2974 return -ENOIOCTLCMD;
ccbd6a5a 2975 }
7a229387
AB
2976}
2977
6b96018b
AB
2978static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2979 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2980{
2981 struct ifreq __user *u_ifreq64;
7a229387
AB
2982 char tmp_buf[IFNAMSIZ];
2983 void __user *data64;
2984 u32 data32;
2985
2986 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2987 IFNAMSIZ))
2988 return -EFAULT;
2989 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2990 return -EFAULT;
2991 data64 = compat_ptr(data32);
2992
2993 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2994
2995 /* Don't check these user accesses, just let that get trapped
2996 * in the ioctl handler instead.
2997 */
2998 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2999 IFNAMSIZ))
3000 return -EFAULT;
3001 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
3002 return -EFAULT;
3003
6b96018b 3004 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
3005}
3006
6b96018b
AB
3007static int dev_ifsioc(struct net *net, struct socket *sock,
3008 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 3009{
a2116ed2 3010 struct ifreq __user *uifr;
7a229387
AB
3011 int err;
3012
a2116ed2
AB
3013 uifr = compat_alloc_user_space(sizeof(*uifr));
3014 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3015 return -EFAULT;
3016
3017 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3018
7a229387
AB
3019 if (!err) {
3020 switch (cmd) {
3021 case SIOCGIFFLAGS:
3022 case SIOCGIFMETRIC:
3023 case SIOCGIFMTU:
3024 case SIOCGIFMEM:
3025 case SIOCGIFHWADDR:
3026 case SIOCGIFINDEX:
3027 case SIOCGIFADDR:
3028 case SIOCGIFBRDADDR:
3029 case SIOCGIFDSTADDR:
3030 case SIOCGIFNETMASK:
fab2532b 3031 case SIOCGIFPFLAGS:
7a229387 3032 case SIOCGIFTXQLEN:
fab2532b
AB
3033 case SIOCGMIIPHY:
3034 case SIOCGMIIREG:
a2116ed2 3035 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
3036 err = -EFAULT;
3037 break;
3038 }
3039 }
3040 return err;
3041}
3042
a2116ed2
AB
3043static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3044 struct compat_ifreq __user *uifr32)
3045{
3046 struct ifreq ifr;
3047 struct compat_ifmap __user *uifmap32;
3048 mm_segment_t old_fs;
3049 int err;
3050
3051 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3052 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3053 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3054 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3055 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3056 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
3057 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
3058 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
3059 if (err)
3060 return -EFAULT;
3061
3062 old_fs = get_fs();
c6d409cf 3063 set_fs(KERNEL_DS);
c3f52ae6 3064 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 3065 set_fs(old_fs);
a2116ed2
AB
3066
3067 if (cmd == SIOCGIFMAP && !err) {
3068 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3069 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3070 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3071 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3072 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
3073 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
3074 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
3075 if (err)
3076 err = -EFAULT;
3077 }
3078 return err;
3079}
3080
3081static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32)
3082{
3083 void __user *uptr;
3084 compat_uptr_t uptr32;
3085 struct ifreq __user *uifr;
3086
c6d409cf 3087 uifr = compat_alloc_user_space(sizeof(*uifr));
a2116ed2
AB
3088 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
3089 return -EFAULT;
3090
3091 if (get_user(uptr32, &uifr32->ifr_data))
3092 return -EFAULT;
3093
3094 uptr = compat_ptr(uptr32);
3095
3096 if (put_user(uptr, &uifr->ifr_data))
3097 return -EFAULT;
3098
3099 return dev_ioctl(net, SIOCSHWTSTAMP, uifr);
3100}
3101
7a229387 3102struct rtentry32 {
c6d409cf 3103 u32 rt_pad1;
7a229387
AB
3104 struct sockaddr rt_dst; /* target address */
3105 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3106 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3107 unsigned short rt_flags;
3108 short rt_pad2;
3109 u32 rt_pad3;
3110 unsigned char rt_tos;
3111 unsigned char rt_class;
3112 short rt_pad4;
3113 short rt_metric; /* +1 for binary compatibility! */
7a229387 3114 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3115 u32 rt_mtu; /* per route MTU/Window */
3116 u32 rt_window; /* Window clamping */
7a229387
AB
3117 unsigned short rt_irtt; /* Initial RTT */
3118};
3119
3120struct in6_rtmsg32 {
3121 struct in6_addr rtmsg_dst;
3122 struct in6_addr rtmsg_src;
3123 struct in6_addr rtmsg_gateway;
3124 u32 rtmsg_type;
3125 u16 rtmsg_dst_len;
3126 u16 rtmsg_src_len;
3127 u32 rtmsg_metric;
3128 u32 rtmsg_info;
3129 u32 rtmsg_flags;
3130 s32 rtmsg_ifindex;
3131};
3132
6b96018b
AB
3133static int routing_ioctl(struct net *net, struct socket *sock,
3134 unsigned int cmd, void __user *argp)
7a229387
AB
3135{
3136 int ret;
3137 void *r = NULL;
3138 struct in6_rtmsg r6;
3139 struct rtentry r4;
3140 char devname[16];
3141 u32 rtdev;
3142 mm_segment_t old_fs = get_fs();
3143
6b96018b
AB
3144 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3145 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3146 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3147 3 * sizeof(struct in6_addr));
c6d409cf
ED
3148 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3149 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3150 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3151 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3152 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3153 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3154 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3155
3156 r = (void *) &r6;
3157 } else { /* ipv4 */
6b96018b 3158 struct rtentry32 __user *ur4 = argp;
c6d409cf 3159 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3160 3 * sizeof(struct sockaddr));
c6d409cf
ED
3161 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
3162 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
3163 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
3164 ret |= __get_user(r4.rt_window, &(ur4->rt_window));
3165 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
3166 ret |= __get_user(rtdev, &(ur4->rt_dev));
7a229387 3167 if (rtdev) {
c6d409cf 3168 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3169 r4.rt_dev = (char __user __force *)devname;
3170 devname[15] = 0;
7a229387
AB
3171 } else
3172 r4.rt_dev = NULL;
3173
3174 r = (void *) &r4;
3175 }
3176
3177 if (ret) {
3178 ret = -EFAULT;
3179 goto out;
3180 }
3181
c6d409cf 3182 set_fs(KERNEL_DS);
6b96018b 3183 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3184 set_fs(old_fs);
7a229387
AB
3185
3186out:
7a229387
AB
3187 return ret;
3188}
3189
3190/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3191 * for some operations; this forces use of the newer bridge-utils that
25985edc 3192 * use compatible ioctls
7a229387 3193 */
6b96018b 3194static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3195{
6b96018b 3196 compat_ulong_t tmp;
7a229387 3197
6b96018b 3198 if (get_user(tmp, argp))
7a229387
AB
3199 return -EFAULT;
3200 if (tmp == BRCTL_GET_VERSION)
3201 return BRCTL_VERSION + 1;
3202 return -EINVAL;
3203}
3204
6b96018b
AB
3205static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3206 unsigned int cmd, unsigned long arg)
3207{
3208 void __user *argp = compat_ptr(arg);
3209 struct sock *sk = sock->sk;
3210 struct net *net = sock_net(sk);
7a229387 3211
6b96018b
AB
3212 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3213 return siocdevprivate_ioctl(net, cmd, argp);
3214
3215 switch (cmd) {
3216 case SIOCSIFBR:
3217 case SIOCGIFBR:
3218 return old_bridge_ioctl(argp);
3219 case SIOCGIFNAME:
3220 return dev_ifname32(net, argp);
3221 case SIOCGIFCONF:
3222 return dev_ifconf(net, argp);
3223 case SIOCETHTOOL:
3224 return ethtool_ioctl(net, argp);
7a50a240
AB
3225 case SIOCWANDEV:
3226 return compat_siocwandev(net, argp);
a2116ed2
AB
3227 case SIOCGIFMAP:
3228 case SIOCSIFMAP:
3229 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3230 case SIOCBONDENSLAVE:
3231 case SIOCBONDRELEASE:
3232 case SIOCBONDSETHWADDR:
3233 case SIOCBONDSLAVEINFOQUERY:
3234 case SIOCBONDINFOQUERY:
3235 case SIOCBONDCHANGEACTIVE:
3236 return bond_ioctl(net, cmd, argp);
3237 case SIOCADDRT:
3238 case SIOCDELRT:
3239 return routing_ioctl(net, sock, cmd, argp);
3240 case SIOCGSTAMP:
3241 return do_siocgstamp(net, sock, cmd, argp);
3242 case SIOCGSTAMPNS:
3243 return do_siocgstampns(net, sock, cmd, argp);
a2116ed2
AB
3244 case SIOCSHWTSTAMP:
3245 return compat_siocshwtstamp(net, argp);
6b96018b
AB
3246
3247 case FIOSETOWN:
3248 case SIOCSPGRP:
3249 case FIOGETOWN:
3250 case SIOCGPGRP:
3251 case SIOCBRADDBR:
3252 case SIOCBRDELBR:
3253 case SIOCGIFVLAN:
3254 case SIOCSIFVLAN:
3255 case SIOCADDDLCI:
3256 case SIOCDELDLCI:
3257 return sock_ioctl(file, cmd, arg);
3258
3259 case SIOCGIFFLAGS:
3260 case SIOCSIFFLAGS:
3261 case SIOCGIFMETRIC:
3262 case SIOCSIFMETRIC:
3263 case SIOCGIFMTU:
3264 case SIOCSIFMTU:
3265 case SIOCGIFMEM:
3266 case SIOCSIFMEM:
3267 case SIOCGIFHWADDR:
3268 case SIOCSIFHWADDR:
3269 case SIOCADDMULTI:
3270 case SIOCDELMULTI:
3271 case SIOCGIFINDEX:
6b96018b
AB
3272 case SIOCGIFADDR:
3273 case SIOCSIFADDR:
3274 case SIOCSIFHWBROADCAST:
6b96018b 3275 case SIOCDIFADDR:
6b96018b
AB
3276 case SIOCGIFBRDADDR:
3277 case SIOCSIFBRDADDR:
3278 case SIOCGIFDSTADDR:
3279 case SIOCSIFDSTADDR:
3280 case SIOCGIFNETMASK:
3281 case SIOCSIFNETMASK:
3282 case SIOCSIFPFLAGS:
3283 case SIOCGIFPFLAGS:
3284 case SIOCGIFTXQLEN:
3285 case SIOCSIFTXQLEN:
3286 case SIOCBRADDIF:
3287 case SIOCBRDELIF:
9177efd3
AB
3288 case SIOCSIFNAME:
3289 case SIOCGMIIPHY:
3290 case SIOCGMIIREG:
3291 case SIOCSMIIREG:
6b96018b 3292 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3293
6b96018b
AB
3294 case SIOCSARP:
3295 case SIOCGARP:
3296 case SIOCDARP:
6b96018b 3297 case SIOCATMARK:
9177efd3
AB
3298 return sock_do_ioctl(net, sock, cmd, arg);
3299 }
3300
6b96018b
AB
3301 return -ENOIOCTLCMD;
3302}
7a229387 3303
95c96174 3304static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3305 unsigned long arg)
89bbfc95
SP
3306{
3307 struct socket *sock = file->private_data;
3308 int ret = -ENOIOCTLCMD;
87de87d5
DM
3309 struct sock *sk;
3310 struct net *net;
3311
3312 sk = sock->sk;
3313 net = sock_net(sk);
89bbfc95
SP
3314
3315 if (sock->ops->compat_ioctl)
3316 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3317
87de87d5
DM
3318 if (ret == -ENOIOCTLCMD &&
3319 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3320 ret = compat_wext_handle_ioctl(net, cmd, arg);
3321
6b96018b
AB
3322 if (ret == -ENOIOCTLCMD)
3323 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3324
89bbfc95
SP
3325 return ret;
3326}
3327#endif
3328
ac5a488e
SS
3329int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3330{
3331 return sock->ops->bind(sock, addr, addrlen);
3332}
c6d409cf 3333EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3334
3335int kernel_listen(struct socket *sock, int backlog)
3336{
3337 return sock->ops->listen(sock, backlog);
3338}
c6d409cf 3339EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3340
3341int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3342{
3343 struct sock *sk = sock->sk;
3344 int err;
3345
3346 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3347 newsock);
3348 if (err < 0)
3349 goto done;
3350
3351 err = sock->ops->accept(sock, *newsock, flags);
3352 if (err < 0) {
3353 sock_release(*newsock);
fa8705b0 3354 *newsock = NULL;
ac5a488e
SS
3355 goto done;
3356 }
3357
3358 (*newsock)->ops = sock->ops;
1b08534e 3359 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3360
3361done:
3362 return err;
3363}
c6d409cf 3364EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3365
3366int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3367 int flags)
ac5a488e
SS
3368{
3369 return sock->ops->connect(sock, addr, addrlen, flags);
3370}
c6d409cf 3371EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3372
3373int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3374 int *addrlen)
3375{
3376 return sock->ops->getname(sock, addr, addrlen, 0);
3377}
c6d409cf 3378EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3379
3380int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3381 int *addrlen)
3382{
3383 return sock->ops->getname(sock, addr, addrlen, 1);
3384}
c6d409cf 3385EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3386
3387int kernel_getsockopt(struct socket *sock, int level, int optname,
3388 char *optval, int *optlen)
3389{
3390 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3391 char __user *uoptval;
3392 int __user *uoptlen;
ac5a488e
SS
3393 int err;
3394
fb8621bb
NK
3395 uoptval = (char __user __force *) optval;
3396 uoptlen = (int __user __force *) optlen;
3397
ac5a488e
SS
3398 set_fs(KERNEL_DS);
3399 if (level == SOL_SOCKET)
fb8621bb 3400 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3401 else
fb8621bb
NK
3402 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3403 uoptlen);
ac5a488e
SS
3404 set_fs(oldfs);
3405 return err;
3406}
c6d409cf 3407EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3408
3409int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3410 char *optval, unsigned int optlen)
ac5a488e
SS
3411{
3412 mm_segment_t oldfs = get_fs();
fb8621bb 3413 char __user *uoptval;
ac5a488e
SS
3414 int err;
3415
fb8621bb
NK
3416 uoptval = (char __user __force *) optval;
3417
ac5a488e
SS
3418 set_fs(KERNEL_DS);
3419 if (level == SOL_SOCKET)
fb8621bb 3420 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3421 else
fb8621bb 3422 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3423 optlen);
3424 set_fs(oldfs);
3425 return err;
3426}
c6d409cf 3427EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3428
3429int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3430 size_t size, int flags)
3431{
3432 if (sock->ops->sendpage)
3433 return sock->ops->sendpage(sock, page, offset, size, flags);
3434
3435 return sock_no_sendpage(sock, page, offset, size, flags);
3436}
c6d409cf 3437EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3438
3439int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3440{
3441 mm_segment_t oldfs = get_fs();
3442 int err;
3443
3444 set_fs(KERNEL_DS);
3445 err = sock->ops->ioctl(sock, cmd, arg);
3446 set_fs(oldfs);
3447
3448 return err;
3449}
c6d409cf 3450EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3451
91cf45f0
TM
3452int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3453{
3454 return sock->ops->shutdown(sock, how);
3455}
91cf45f0 3456EXPORT_SYMBOL(kernel_sock_shutdown);