net: Add net_ratelimited_function and net_<level>_ratelimited macros
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4
LT
72#include <linux/wanrouter.h>
73#include <linux/if_bridge.h>
20380731
ACM
74#include <linux/if_frad.h>
75#include <linux/if_vlan.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
1da177e4
LT
91
92#include <asm/uaccess.h>
93#include <asm/unistd.h>
94
95#include <net/compat.h>
87de87d5 96#include <net/wext.h>
f8451725 97#include <net/cls_cgroup.h>
1da177e4
LT
98
99#include <net/sock.h>
100#include <linux/netfilter.h>
101
6b96018b
AB
102#include <linux/if_tun.h>
103#include <linux/ipv6_route.h>
104#include <linux/route.h>
6b96018b
AB
105#include <linux/sockios.h>
106#include <linux/atalk.h>
107
1da177e4 108static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
109static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
110 unsigned long nr_segs, loff_t pos);
111static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
112 unsigned long nr_segs, loff_t pos);
89bddce5 113static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
114
115static int sock_close(struct inode *inode, struct file *file);
116static unsigned int sock_poll(struct file *file,
117 struct poll_table_struct *wait);
89bddce5 118static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
119#ifdef CONFIG_COMPAT
120static long compat_sock_ioctl(struct file *file,
89bddce5 121 unsigned int cmd, unsigned long arg);
89bbfc95 122#endif
1da177e4 123static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
124static ssize_t sock_sendpage(struct file *file, struct page *page,
125 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 126static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 127 struct pipe_inode_info *pipe, size_t len,
9c55e01c 128 unsigned int flags);
1da177e4 129
1da177e4
LT
130/*
131 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
132 * in the operation structures but are done directly via the socketcall() multiplexor.
133 */
134
da7071d7 135static const struct file_operations socket_file_ops = {
1da177e4
LT
136 .owner = THIS_MODULE,
137 .llseek = no_llseek,
138 .aio_read = sock_aio_read,
139 .aio_write = sock_aio_write,
140 .poll = sock_poll,
141 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
142#ifdef CONFIG_COMPAT
143 .compat_ioctl = compat_sock_ioctl,
144#endif
1da177e4
LT
145 .mmap = sock_mmap,
146 .open = sock_no_open, /* special open code to disallow open via /proc */
147 .release = sock_close,
148 .fasync = sock_fasync,
5274f052
JA
149 .sendpage = sock_sendpage,
150 .splice_write = generic_splice_sendpage,
9c55e01c 151 .splice_read = sock_splice_read,
1da177e4
LT
152};
153
154/*
155 * The protocol list. Each protocol is registered in here.
156 */
157
1da177e4 158static DEFINE_SPINLOCK(net_family_lock);
190683a9 159static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 160
1da177e4
LT
161/*
162 * Statistics counters of the socket lists
163 */
164
c6d409cf 165static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
166
167/*
89bddce5
SH
168 * Support routines.
169 * Move socket addresses back and forth across the kernel/user
170 * divide and look after the messy bits.
1da177e4
LT
171 */
172
1da177e4
LT
173/**
174 * move_addr_to_kernel - copy a socket address into kernel space
175 * @uaddr: Address in user space
176 * @kaddr: Address in kernel space
177 * @ulen: Length in user space
178 *
179 * The address is copied into kernel space. If the provided address is
180 * too long an error code of -EINVAL is returned. If the copy gives
181 * invalid addresses -EFAULT is returned. On a success 0 is returned.
182 */
183
43db362d 184int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 185{
230b1839 186 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 187 return -EINVAL;
89bddce5 188 if (ulen == 0)
1da177e4 189 return 0;
89bddce5 190 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 191 return -EFAULT;
3ec3b2fb 192 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
193}
194
195/**
196 * move_addr_to_user - copy an address to user space
197 * @kaddr: kernel space address
198 * @klen: length of address in kernel
199 * @uaddr: user space address
200 * @ulen: pointer to user length field
201 *
202 * The value pointed to by ulen on entry is the buffer length available.
203 * This is overwritten with the buffer space used. -EINVAL is returned
204 * if an overlong buffer is specified or a negative buffer size. -EFAULT
205 * is returned if either the buffer or the length field are not
206 * accessible.
207 * After copying the data up to the limit the user specifies, the true
208 * length of the data is written over the length limit the user
209 * specified. Zero is returned for a success.
210 */
89bddce5 211
43db362d 212static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 213 void __user *uaddr, int __user *ulen)
1da177e4
LT
214{
215 int err;
216 int len;
217
89bddce5
SH
218 err = get_user(len, ulen);
219 if (err)
1da177e4 220 return err;
89bddce5
SH
221 if (len > klen)
222 len = klen;
230b1839 223 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 224 return -EINVAL;
89bddce5 225 if (len) {
d6fe3945
SG
226 if (audit_sockaddr(klen, kaddr))
227 return -ENOMEM;
89bddce5 228 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
229 return -EFAULT;
230 }
231 /*
89bddce5
SH
232 * "fromlen shall refer to the value before truncation.."
233 * 1003.1g
1da177e4
LT
234 */
235 return __put_user(klen, ulen);
236}
237
e18b890b 238static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
239
240static struct inode *sock_alloc_inode(struct super_block *sb)
241{
242 struct socket_alloc *ei;
eaefd110 243 struct socket_wq *wq;
89bddce5 244
e94b1766 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
246 if (!ei)
247 return NULL;
eaefd110
ED
248 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
249 if (!wq) {
43815482
ED
250 kmem_cache_free(sock_inode_cachep, ei);
251 return NULL;
252 }
eaefd110
ED
253 init_waitqueue_head(&wq->wait);
254 wq->fasync_list = NULL;
255 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482 268 struct socket_alloc *ei;
eaefd110 269 struct socket_wq *wq;
43815482
ED
270
271 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 272 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 273 kfree_rcu(wq, rcu);
43815482 274 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
275}
276
51cc5068 277static void init_once(void *foo)
1da177e4 278{
89bddce5 279 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 280
a35afb83 281 inode_init_once(&ei->vfs_inode);
1da177e4 282}
89bddce5 283
1da177e4
LT
284static int init_inodecache(void)
285{
286 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
287 sizeof(struct socket_alloc),
288 0,
289 (SLAB_HWCACHE_ALIGN |
290 SLAB_RECLAIM_ACCOUNT |
291 SLAB_MEM_SPREAD),
20c2df83 292 init_once);
1da177e4
LT
293 if (sock_inode_cachep == NULL)
294 return -ENOMEM;
295 return 0;
296}
297
b87221de 298static const struct super_operations sockfs_ops = {
c6d409cf
ED
299 .alloc_inode = sock_alloc_inode,
300 .destroy_inode = sock_destroy_inode,
301 .statfs = simple_statfs,
1da177e4
LT
302};
303
c23fbb6b
ED
304/*
305 * sockfs_dname() is called from d_path().
306 */
307static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
308{
309 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
310 dentry->d_inode->i_ino);
311}
312
3ba13d17 313static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 314 .d_dname = sockfs_dname,
1da177e4
LT
315};
316
c74a1cbb
AV
317static struct dentry *sockfs_mount(struct file_system_type *fs_type,
318 int flags, const char *dev_name, void *data)
319{
320 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
321 &sockfs_dentry_operations, SOCKFS_MAGIC);
322}
323
324static struct vfsmount *sock_mnt __read_mostly;
325
326static struct file_system_type sock_fs_type = {
327 .name = "sockfs",
328 .mount = sockfs_mount,
329 .kill_sb = kill_anon_super,
330};
331
1da177e4
LT
332/*
333 * Obtains the first available file descriptor and sets it up for use.
334 *
39d8c1b6
DM
335 * These functions create file structures and maps them to fd space
336 * of the current process. On success it returns file descriptor
1da177e4
LT
337 * and file struct implicitly stored in sock->file.
338 * Note that another thread may close file descriptor before we return
339 * from this function. We use the fact that now we do not refer
340 * to socket after mapping. If one day we will need it, this
341 * function will increment ref. count on file by 1.
342 *
343 * In any case returned fd MAY BE not valid!
344 * This race condition is unavoidable
345 * with shared fd spaces, we cannot solve it inside kernel,
346 * but we take care of internal coherence yet.
347 */
348
7cbe66b6 349static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
1da177e4 350{
7cbe66b6 351 struct qstr name = { .name = "" };
2c48b9c4 352 struct path path;
7cbe66b6 353 struct file *file;
1da177e4 354 int fd;
1da177e4 355
a677a039 356 fd = get_unused_fd_flags(flags);
7cbe66b6
AV
357 if (unlikely(fd < 0))
358 return fd;
1da177e4 359
4b936885 360 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
2c48b9c4 361 if (unlikely(!path.dentry)) {
7cbe66b6 362 put_unused_fd(fd);
39d8c1b6 363 return -ENOMEM;
7cbe66b6 364 }
2c48b9c4 365 path.mnt = mntget(sock_mnt);
39d8c1b6 366
2c48b9c4 367 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 368 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 369
2c48b9c4 370 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 371 &socket_file_ops);
cc3808f8
AV
372 if (unlikely(!file)) {
373 /* drop dentry, keep inode */
7de9c6ee 374 ihold(path.dentry->d_inode);
2c48b9c4 375 path_put(&path);
cc3808f8
AV
376 put_unused_fd(fd);
377 return -ENFILE;
378 }
379
380 sock->file = file;
77d27200 381 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6
DM
382 file->f_pos = 0;
383 file->private_data = sock;
1da177e4 384
7cbe66b6
AV
385 *f = file;
386 return fd;
39d8c1b6
DM
387}
388
a677a039 389int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
390{
391 struct file *newfile;
7cbe66b6 392 int fd = sock_alloc_file(sock, &newfile, flags);
39d8c1b6 393
7cbe66b6 394 if (likely(fd >= 0))
39d8c1b6 395 fd_install(fd, newfile);
7cbe66b6 396
1da177e4
LT
397 return fd;
398}
c6d409cf 399EXPORT_SYMBOL(sock_map_fd);
1da177e4 400
6cb153ca
BL
401static struct socket *sock_from_file(struct file *file, int *err)
402{
6cb153ca
BL
403 if (file->f_op == &socket_file_ops)
404 return file->private_data; /* set in sock_map_fd */
405
23bb80d2
ED
406 *err = -ENOTSOCK;
407 return NULL;
6cb153ca
BL
408}
409
1da177e4 410/**
c6d409cf 411 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
412 * @fd: file handle
413 * @err: pointer to an error code return
414 *
415 * The file handle passed in is locked and the socket it is bound
416 * too is returned. If an error occurs the err pointer is overwritten
417 * with a negative errno code and NULL is returned. The function checks
418 * for both invalid handles and passing a handle which is not a socket.
419 *
420 * On a success the socket object pointer is returned.
421 */
422
423struct socket *sockfd_lookup(int fd, int *err)
424{
425 struct file *file;
1da177e4
LT
426 struct socket *sock;
427
89bddce5
SH
428 file = fget(fd);
429 if (!file) {
1da177e4
LT
430 *err = -EBADF;
431 return NULL;
432 }
89bddce5 433
6cb153ca
BL
434 sock = sock_from_file(file, err);
435 if (!sock)
1da177e4 436 fput(file);
6cb153ca
BL
437 return sock;
438}
c6d409cf 439EXPORT_SYMBOL(sockfd_lookup);
1da177e4 440
6cb153ca
BL
441static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
442{
443 struct file *file;
444 struct socket *sock;
445
3672558c 446 *err = -EBADF;
6cb153ca
BL
447 file = fget_light(fd, fput_needed);
448 if (file) {
449 sock = sock_from_file(file, err);
450 if (sock)
451 return sock;
452 fput_light(file, *fput_needed);
1da177e4 453 }
6cb153ca 454 return NULL;
1da177e4
LT
455}
456
457/**
458 * sock_alloc - allocate a socket
89bddce5 459 *
1da177e4
LT
460 * Allocate a new inode and socket object. The two are bound together
461 * and initialised. The socket is then returned. If we are out of inodes
462 * NULL is returned.
463 */
464
465static struct socket *sock_alloc(void)
466{
89bddce5
SH
467 struct inode *inode;
468 struct socket *sock;
1da177e4 469
a209dfc7 470 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
471 if (!inode)
472 return NULL;
473
474 sock = SOCKET_I(inode);
475
29a020d3 476 kmemcheck_annotate_bitfield(sock, type);
85fe4025 477 inode->i_ino = get_next_ino();
89bddce5 478 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
479 inode->i_uid = current_fsuid();
480 inode->i_gid = current_fsgid();
1da177e4 481
4e69489a 482 percpu_add(sockets_in_use, 1);
1da177e4
LT
483 return sock;
484}
485
486/*
487 * In theory you can't get an open on this inode, but /proc provides
488 * a back door. Remember to keep it shut otherwise you'll let the
489 * creepy crawlies in.
490 */
89bddce5 491
1da177e4
LT
492static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
493{
494 return -ENXIO;
495}
496
4b6f5d20 497const struct file_operations bad_sock_fops = {
1da177e4
LT
498 .owner = THIS_MODULE,
499 .open = sock_no_open,
6038f373 500 .llseek = noop_llseek,
1da177e4
LT
501};
502
503/**
504 * sock_release - close a socket
505 * @sock: socket to close
506 *
507 * The socket is released from the protocol stack if it has a release
508 * callback, and the inode is then released if the socket is bound to
89bddce5 509 * an inode not a file.
1da177e4 510 */
89bddce5 511
1da177e4
LT
512void sock_release(struct socket *sock)
513{
514 if (sock->ops) {
515 struct module *owner = sock->ops->owner;
516
517 sock->ops->release(sock);
518 sock->ops = NULL;
519 module_put(owner);
520 }
521
eaefd110 522 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
1da177e4
LT
523 printk(KERN_ERR "sock_release: fasync list not empty!\n");
524
4e69489a 525 percpu_sub(sockets_in_use, 1);
1da177e4
LT
526 if (!sock->file) {
527 iput(SOCK_INODE(sock));
528 return;
529 }
89bddce5 530 sock->file = NULL;
1da177e4 531}
c6d409cf 532EXPORT_SYMBOL(sock_release);
1da177e4 533
2244d07b 534int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
20d49473 535{
2244d07b 536 *tx_flags = 0;
20d49473 537 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
2244d07b 538 *tx_flags |= SKBTX_HW_TSTAMP;
20d49473 539 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
2244d07b 540 *tx_flags |= SKBTX_SW_TSTAMP;
6e3e939f
JB
541 if (sock_flag(sk, SOCK_WIFI_STATUS))
542 *tx_flags |= SKBTX_WIFI_STATUS;
20d49473
PO
543 return 0;
544}
545EXPORT_SYMBOL(sock_tx_timestamp);
546
228e548e
AB
547static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
548 struct msghdr *msg, size_t size)
1da177e4
LT
549{
550 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4 551
f8451725
HX
552 sock_update_classid(sock->sk);
553
5bc1421e
NH
554 sock_update_netprioidx(sock->sk);
555
1da177e4
LT
556 si->sock = sock;
557 si->scm = NULL;
558 si->msg = msg;
559 si->size = size;
560
1da177e4
LT
561 return sock->ops->sendmsg(iocb, sock, msg, size);
562}
563
228e548e
AB
564static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
565 struct msghdr *msg, size_t size)
566{
567 int err = security_socket_sendmsg(sock, msg, size);
568
569 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
570}
571
1da177e4
LT
572int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
573{
574 struct kiocb iocb;
575 struct sock_iocb siocb;
576 int ret;
577
578 init_sync_kiocb(&iocb, NULL);
579 iocb.private = &siocb;
580 ret = __sock_sendmsg(&iocb, sock, msg, size);
581 if (-EIOCBQUEUED == ret)
582 ret = wait_on_sync_kiocb(&iocb);
583 return ret;
584}
c6d409cf 585EXPORT_SYMBOL(sock_sendmsg);
1da177e4 586
894dc24c 587static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e
AB
588{
589 struct kiocb iocb;
590 struct sock_iocb siocb;
591 int ret;
592
593 init_sync_kiocb(&iocb, NULL);
594 iocb.private = &siocb;
595 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
596 if (-EIOCBQUEUED == ret)
597 ret = wait_on_sync_kiocb(&iocb);
598 return ret;
599}
600
1da177e4
LT
601int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
602 struct kvec *vec, size_t num, size_t size)
603{
604 mm_segment_t oldfs = get_fs();
605 int result;
606
607 set_fs(KERNEL_DS);
608 /*
609 * the following is safe, since for compiler definitions of kvec and
610 * iovec are identical, yielding the same in-core layout and alignment
611 */
89bddce5 612 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
613 msg->msg_iovlen = num;
614 result = sock_sendmsg(sock, msg, size);
615 set_fs(oldfs);
616 return result;
617}
c6d409cf 618EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 619
20d49473
PO
620static int ktime2ts(ktime_t kt, struct timespec *ts)
621{
622 if (kt.tv64) {
623 *ts = ktime_to_timespec(kt);
624 return 1;
625 } else {
626 return 0;
627 }
628}
629
92f37fd2
ED
630/*
631 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
632 */
633void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
634 struct sk_buff *skb)
635{
20d49473
PO
636 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
637 struct timespec ts[3];
638 int empty = 1;
639 struct skb_shared_hwtstamps *shhwtstamps =
640 skb_hwtstamps(skb);
641
642 /* Race occurred between timestamp enabling and packet
643 receiving. Fill in the current time for now. */
644 if (need_software_tstamp && skb->tstamp.tv64 == 0)
645 __net_timestamp(skb);
646
647 if (need_software_tstamp) {
648 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
649 struct timeval tv;
650 skb_get_timestamp(skb, &tv);
651 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
652 sizeof(tv), &tv);
653 } else {
842509b8 654 skb_get_timestampns(skb, &ts[0]);
20d49473 655 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 656 sizeof(ts[0]), &ts[0]);
20d49473
PO
657 }
658 }
659
660
661 memset(ts, 0, sizeof(ts));
662 if (skb->tstamp.tv64 &&
663 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
664 skb_get_timestampns(skb, ts + 0);
665 empty = 0;
666 }
667 if (shhwtstamps) {
668 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
669 ktime2ts(shhwtstamps->syststamp, ts + 1))
670 empty = 0;
671 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
672 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
673 empty = 0;
92f37fd2 674 }
20d49473
PO
675 if (!empty)
676 put_cmsg(msg, SOL_SOCKET,
677 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 678}
7c81fd8b
ACM
679EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
680
6e3e939f
JB
681void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
682 struct sk_buff *skb)
683{
684 int ack;
685
686 if (!sock_flag(sk, SOCK_WIFI_STATUS))
687 return;
688 if (!skb->wifi_acked_valid)
689 return;
690
691 ack = skb->wifi_acked;
692
693 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
694}
695EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
696
11165f14 697static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
698 struct sk_buff *skb)
3b885787
NH
699{
700 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
701 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
702 sizeof(__u32), &skb->dropcount);
703}
704
767dd033 705void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
706 struct sk_buff *skb)
707{
708 sock_recv_timestamp(msg, sk, skb);
709 sock_recv_drops(msg, sk, skb);
710}
767dd033 711EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 712
a2e27255
ACM
713static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
714 struct msghdr *msg, size_t size, int flags)
1da177e4 715{
1da177e4
LT
716 struct sock_iocb *si = kiocb_to_siocb(iocb);
717
f8451725
HX
718 sock_update_classid(sock->sk);
719
1da177e4
LT
720 si->sock = sock;
721 si->scm = NULL;
722 si->msg = msg;
723 si->size = size;
724 si->flags = flags;
725
1da177e4
LT
726 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
727}
728
a2e27255
ACM
729static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
730 struct msghdr *msg, size_t size, int flags)
731{
732 int err = security_socket_recvmsg(sock, msg, size, flags);
733
734 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
735}
736
89bddce5 737int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
738 size_t size, int flags)
739{
740 struct kiocb iocb;
741 struct sock_iocb siocb;
742 int ret;
743
89bddce5 744 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
745 iocb.private = &siocb;
746 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
747 if (-EIOCBQUEUED == ret)
748 ret = wait_on_sync_kiocb(&iocb);
749 return ret;
750}
c6d409cf 751EXPORT_SYMBOL(sock_recvmsg);
1da177e4 752
a2e27255
ACM
753static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
754 size_t size, int flags)
755{
756 struct kiocb iocb;
757 struct sock_iocb siocb;
758 int ret;
759
760 init_sync_kiocb(&iocb, NULL);
761 iocb.private = &siocb;
762 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
763 if (-EIOCBQUEUED == ret)
764 ret = wait_on_sync_kiocb(&iocb);
765 return ret;
766}
767
c1249c0a
ML
768/**
769 * kernel_recvmsg - Receive a message from a socket (kernel space)
770 * @sock: The socket to receive the message from
771 * @msg: Received message
772 * @vec: Input s/g array for message data
773 * @num: Size of input s/g array
774 * @size: Number of bytes to read
775 * @flags: Message flags (MSG_DONTWAIT, etc...)
776 *
777 * On return the msg structure contains the scatter/gather array passed in the
778 * vec argument. The array is modified so that it consists of the unfilled
779 * portion of the original array.
780 *
781 * The returned value is the total number of bytes received, or an error.
782 */
89bddce5
SH
783int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
784 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
785{
786 mm_segment_t oldfs = get_fs();
787 int result;
788
789 set_fs(KERNEL_DS);
790 /*
791 * the following is safe, since for compiler definitions of kvec and
792 * iovec are identical, yielding the same in-core layout and alignment
793 */
89bddce5 794 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
795 result = sock_recvmsg(sock, msg, size, flags);
796 set_fs(oldfs);
797 return result;
798}
c6d409cf 799EXPORT_SYMBOL(kernel_recvmsg);
1da177e4
LT
800
801static void sock_aio_dtor(struct kiocb *iocb)
802{
803 kfree(iocb->private);
804}
805
ce1d4d3e
CH
806static ssize_t sock_sendpage(struct file *file, struct page *page,
807 int offset, size_t size, loff_t *ppos, int more)
1da177e4 808{
1da177e4
LT
809 struct socket *sock;
810 int flags;
811
ce1d4d3e
CH
812 sock = file->private_data;
813
35f9c09f
ED
814 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
815 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
816 flags |= more;
ce1d4d3e 817
e6949583 818 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 819}
1da177e4 820
9c55e01c 821static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 822 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
823 unsigned int flags)
824{
825 struct socket *sock = file->private_data;
826
997b37da
RDC
827 if (unlikely(!sock->ops->splice_read))
828 return -EINVAL;
829
f8451725
HX
830 sock_update_classid(sock->sk);
831
9c55e01c
JA
832 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
833}
834
ce1d4d3e 835static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 836 struct sock_iocb *siocb)
ce1d4d3e
CH
837{
838 if (!is_sync_kiocb(iocb)) {
839 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
840 if (!siocb)
841 return NULL;
1da177e4
LT
842 iocb->ki_dtor = sock_aio_dtor;
843 }
1da177e4 844
ce1d4d3e 845 siocb->kiocb = iocb;
ce1d4d3e
CH
846 iocb->private = siocb;
847 return siocb;
1da177e4
LT
848}
849
ce1d4d3e 850static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
851 struct file *file, const struct iovec *iov,
852 unsigned long nr_segs)
ce1d4d3e
CH
853{
854 struct socket *sock = file->private_data;
855 size_t size = 0;
856 int i;
1da177e4 857
89bddce5
SH
858 for (i = 0; i < nr_segs; i++)
859 size += iov[i].iov_len;
1da177e4 860
ce1d4d3e
CH
861 msg->msg_name = NULL;
862 msg->msg_namelen = 0;
863 msg->msg_control = NULL;
864 msg->msg_controllen = 0;
89bddce5 865 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
866 msg->msg_iovlen = nr_segs;
867 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
868
869 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
870}
871
027445c3
BP
872static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
873 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
874{
875 struct sock_iocb siocb, *x;
876
1da177e4
LT
877 if (pos != 0)
878 return -ESPIPE;
027445c3
BP
879
880 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
881 return 0;
882
027445c3
BP
883
884 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
885 if (!x)
886 return -ENOMEM;
027445c3 887 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
888}
889
ce1d4d3e 890static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
891 struct file *file, const struct iovec *iov,
892 unsigned long nr_segs)
1da177e4 893{
ce1d4d3e
CH
894 struct socket *sock = file->private_data;
895 size_t size = 0;
896 int i;
1da177e4 897
89bddce5
SH
898 for (i = 0; i < nr_segs; i++)
899 size += iov[i].iov_len;
1da177e4 900
ce1d4d3e
CH
901 msg->msg_name = NULL;
902 msg->msg_namelen = 0;
903 msg->msg_control = NULL;
904 msg->msg_controllen = 0;
89bddce5 905 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
906 msg->msg_iovlen = nr_segs;
907 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
908 if (sock->type == SOCK_SEQPACKET)
909 msg->msg_flags |= MSG_EOR;
1da177e4 910
ce1d4d3e 911 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
912}
913
027445c3
BP
914static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
915 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
916{
917 struct sock_iocb siocb, *x;
1da177e4 918
ce1d4d3e
CH
919 if (pos != 0)
920 return -ESPIPE;
027445c3 921
027445c3 922 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
923 if (!x)
924 return -ENOMEM;
1da177e4 925
027445c3 926 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
927}
928
1da177e4
LT
929/*
930 * Atomic setting of ioctl hooks to avoid race
931 * with module unload.
932 */
933
4a3e2f71 934static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 935static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 936
881d966b 937void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 938{
4a3e2f71 939 mutex_lock(&br_ioctl_mutex);
1da177e4 940 br_ioctl_hook = hook;
4a3e2f71 941 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
942}
943EXPORT_SYMBOL(brioctl_set);
944
4a3e2f71 945static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 946static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 947
881d966b 948void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 949{
4a3e2f71 950 mutex_lock(&vlan_ioctl_mutex);
1da177e4 951 vlan_ioctl_hook = hook;
4a3e2f71 952 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
953}
954EXPORT_SYMBOL(vlan_ioctl_set);
955
4a3e2f71 956static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 957static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 958
89bddce5 959void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 960{
4a3e2f71 961 mutex_lock(&dlci_ioctl_mutex);
1da177e4 962 dlci_ioctl_hook = hook;
4a3e2f71 963 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
964}
965EXPORT_SYMBOL(dlci_ioctl_set);
966
6b96018b
AB
967static long sock_do_ioctl(struct net *net, struct socket *sock,
968 unsigned int cmd, unsigned long arg)
969{
970 int err;
971 void __user *argp = (void __user *)arg;
972
973 err = sock->ops->ioctl(sock, cmd, arg);
974
975 /*
976 * If this ioctl is unknown try to hand it down
977 * to the NIC driver.
978 */
979 if (err == -ENOIOCTLCMD)
980 err = dev_ioctl(net, cmd, argp);
981
982 return err;
983}
984
1da177e4
LT
985/*
986 * With an ioctl, arg may well be a user mode pointer, but we don't know
987 * what to do with it - that's up to the protocol still.
988 */
989
990static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
991{
992 struct socket *sock;
881d966b 993 struct sock *sk;
1da177e4
LT
994 void __user *argp = (void __user *)arg;
995 int pid, err;
881d966b 996 struct net *net;
1da177e4 997
b69aee04 998 sock = file->private_data;
881d966b 999 sk = sock->sk;
3b1e0a65 1000 net = sock_net(sk);
1da177e4 1001 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1002 err = dev_ioctl(net, cmd, argp);
1da177e4 1003 } else
3d23e349 1004#ifdef CONFIG_WEXT_CORE
1da177e4 1005 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1006 err = dev_ioctl(net, cmd, argp);
1da177e4 1007 } else
3d23e349 1008#endif
89bddce5 1009 switch (cmd) {
1da177e4
LT
1010 case FIOSETOWN:
1011 case SIOCSPGRP:
1012 err = -EFAULT;
1013 if (get_user(pid, (int __user *)argp))
1014 break;
1015 err = f_setown(sock->file, pid, 1);
1016 break;
1017 case FIOGETOWN:
1018 case SIOCGPGRP:
609d7fa9 1019 err = put_user(f_getown(sock->file),
89bddce5 1020 (int __user *)argp);
1da177e4
LT
1021 break;
1022 case SIOCGIFBR:
1023 case SIOCSIFBR:
1024 case SIOCBRADDBR:
1025 case SIOCBRDELBR:
1026 err = -ENOPKG;
1027 if (!br_ioctl_hook)
1028 request_module("bridge");
1029
4a3e2f71 1030 mutex_lock(&br_ioctl_mutex);
89bddce5 1031 if (br_ioctl_hook)
881d966b 1032 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1033 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1034 break;
1035 case SIOCGIFVLAN:
1036 case SIOCSIFVLAN:
1037 err = -ENOPKG;
1038 if (!vlan_ioctl_hook)
1039 request_module("8021q");
1040
4a3e2f71 1041 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1042 if (vlan_ioctl_hook)
881d966b 1043 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1044 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1045 break;
1da177e4
LT
1046 case SIOCADDDLCI:
1047 case SIOCDELDLCI:
1048 err = -ENOPKG;
1049 if (!dlci_ioctl_hook)
1050 request_module("dlci");
1051
7512cbf6
PE
1052 mutex_lock(&dlci_ioctl_mutex);
1053 if (dlci_ioctl_hook)
1da177e4 1054 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1055 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1056 break;
1057 default:
6b96018b 1058 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1059 break;
89bddce5 1060 }
1da177e4
LT
1061 return err;
1062}
1063
1064int sock_create_lite(int family, int type, int protocol, struct socket **res)
1065{
1066 int err;
1067 struct socket *sock = NULL;
89bddce5 1068
1da177e4
LT
1069 err = security_socket_create(family, type, protocol, 1);
1070 if (err)
1071 goto out;
1072
1073 sock = sock_alloc();
1074 if (!sock) {
1075 err = -ENOMEM;
1076 goto out;
1077 }
1078
1da177e4 1079 sock->type = type;
7420ed23
VY
1080 err = security_socket_post_create(sock, family, type, protocol, 1);
1081 if (err)
1082 goto out_release;
1083
1da177e4
LT
1084out:
1085 *res = sock;
1086 return err;
7420ed23
VY
1087out_release:
1088 sock_release(sock);
1089 sock = NULL;
1090 goto out;
1da177e4 1091}
c6d409cf 1092EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1093
1094/* No kernel lock held - perfect */
89bddce5 1095static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1096{
1097 struct socket *sock;
1098
1099 /*
89bddce5 1100 * We can't return errors to poll, so it's either yes or no.
1da177e4 1101 */
b69aee04 1102 sock = file->private_data;
1da177e4
LT
1103 return sock->ops->poll(file, sock, wait);
1104}
1105
89bddce5 1106static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1107{
b69aee04 1108 struct socket *sock = file->private_data;
1da177e4
LT
1109
1110 return sock->ops->mmap(file, sock, vma);
1111}
1112
20380731 1113static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1114{
1115 /*
89bddce5
SH
1116 * It was possible the inode is NULL we were
1117 * closing an unfinished socket.
1da177e4
LT
1118 */
1119
89bddce5 1120 if (!inode) {
1da177e4
LT
1121 printk(KERN_DEBUG "sock_close: NULL inode\n");
1122 return 0;
1123 }
1da177e4
LT
1124 sock_release(SOCKET_I(inode));
1125 return 0;
1126}
1127
1128/*
1129 * Update the socket async list
1130 *
1131 * Fasync_list locking strategy.
1132 *
1133 * 1. fasync_list is modified only under process context socket lock
1134 * i.e. under semaphore.
1135 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1136 * or under socket lock
1da177e4
LT
1137 */
1138
1139static int sock_fasync(int fd, struct file *filp, int on)
1140{
989a2979
ED
1141 struct socket *sock = filp->private_data;
1142 struct sock *sk = sock->sk;
eaefd110 1143 struct socket_wq *wq;
1da177e4 1144
989a2979 1145 if (sk == NULL)
1da177e4 1146 return -EINVAL;
1da177e4
LT
1147
1148 lock_sock(sk);
eaefd110
ED
1149 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1150 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1151
eaefd110 1152 if (!wq->fasync_list)
989a2979
ED
1153 sock_reset_flag(sk, SOCK_FASYNC);
1154 else
bcdce719 1155 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1156
989a2979 1157 release_sock(sk);
1da177e4
LT
1158 return 0;
1159}
1160
43815482 1161/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1162
1163int sock_wake_async(struct socket *sock, int how, int band)
1164{
43815482
ED
1165 struct socket_wq *wq;
1166
1167 if (!sock)
1168 return -1;
1169 rcu_read_lock();
1170 wq = rcu_dereference(sock->wq);
1171 if (!wq || !wq->fasync_list) {
1172 rcu_read_unlock();
1da177e4 1173 return -1;
43815482 1174 }
89bddce5 1175 switch (how) {
8d8ad9d7 1176 case SOCK_WAKE_WAITD:
1da177e4
LT
1177 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1178 break;
1179 goto call_kill;
8d8ad9d7 1180 case SOCK_WAKE_SPACE:
1da177e4
LT
1181 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1182 break;
1183 /* fall through */
8d8ad9d7 1184 case SOCK_WAKE_IO:
89bddce5 1185call_kill:
43815482 1186 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1187 break;
8d8ad9d7 1188 case SOCK_WAKE_URG:
43815482 1189 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1190 }
43815482 1191 rcu_read_unlock();
1da177e4
LT
1192 return 0;
1193}
c6d409cf 1194EXPORT_SYMBOL(sock_wake_async);
1da177e4 1195
721db93a 1196int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1197 struct socket **res, int kern)
1da177e4
LT
1198{
1199 int err;
1200 struct socket *sock;
55737fda 1201 const struct net_proto_family *pf;
1da177e4
LT
1202
1203 /*
89bddce5 1204 * Check protocol is in range
1da177e4
LT
1205 */
1206 if (family < 0 || family >= NPROTO)
1207 return -EAFNOSUPPORT;
1208 if (type < 0 || type >= SOCK_MAX)
1209 return -EINVAL;
1210
1211 /* Compatibility.
1212
1213 This uglymoron is moved from INET layer to here to avoid
1214 deadlock in module load.
1215 */
1216 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1217 static int warned;
1da177e4
LT
1218 if (!warned) {
1219 warned = 1;
89bddce5
SH
1220 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1221 current->comm);
1da177e4
LT
1222 }
1223 family = PF_PACKET;
1224 }
1225
1226 err = security_socket_create(family, type, protocol, kern);
1227 if (err)
1228 return err;
89bddce5 1229
55737fda
SH
1230 /*
1231 * Allocate the socket and allow the family to set things up. if
1232 * the protocol is 0, the family is instructed to select an appropriate
1233 * default.
1234 */
1235 sock = sock_alloc();
1236 if (!sock) {
1237 if (net_ratelimit())
1238 printk(KERN_WARNING "socket: no more sockets\n");
1239 return -ENFILE; /* Not exactly a match, but its the
1240 closest posix thing */
1241 }
1242
1243 sock->type = type;
1244
95a5afca 1245#ifdef CONFIG_MODULES
89bddce5
SH
1246 /* Attempt to load a protocol module if the find failed.
1247 *
1248 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1249 * requested real, full-featured networking support upon configuration.
1250 * Otherwise module support will break!
1251 */
190683a9 1252 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1253 request_module("net-pf-%d", family);
1da177e4
LT
1254#endif
1255
55737fda
SH
1256 rcu_read_lock();
1257 pf = rcu_dereference(net_families[family]);
1258 err = -EAFNOSUPPORT;
1259 if (!pf)
1260 goto out_release;
1da177e4
LT
1261
1262 /*
1263 * We will call the ->create function, that possibly is in a loadable
1264 * module, so we have to bump that loadable module refcnt first.
1265 */
55737fda 1266 if (!try_module_get(pf->owner))
1da177e4
LT
1267 goto out_release;
1268
55737fda
SH
1269 /* Now protected by module ref count */
1270 rcu_read_unlock();
1271
3f378b68 1272 err = pf->create(net, sock, protocol, kern);
55737fda 1273 if (err < 0)
1da177e4 1274 goto out_module_put;
a79af59e 1275
1da177e4
LT
1276 /*
1277 * Now to bump the refcnt of the [loadable] module that owns this
1278 * socket at sock_release time we decrement its refcnt.
1279 */
55737fda
SH
1280 if (!try_module_get(sock->ops->owner))
1281 goto out_module_busy;
1282
1da177e4
LT
1283 /*
1284 * Now that we're done with the ->create function, the [loadable]
1285 * module can have its refcnt decremented
1286 */
55737fda 1287 module_put(pf->owner);
7420ed23
VY
1288 err = security_socket_post_create(sock, family, type, protocol, kern);
1289 if (err)
3b185525 1290 goto out_sock_release;
55737fda 1291 *res = sock;
1da177e4 1292
55737fda
SH
1293 return 0;
1294
1295out_module_busy:
1296 err = -EAFNOSUPPORT;
1da177e4 1297out_module_put:
55737fda
SH
1298 sock->ops = NULL;
1299 module_put(pf->owner);
1300out_sock_release:
1da177e4 1301 sock_release(sock);
55737fda
SH
1302 return err;
1303
1304out_release:
1305 rcu_read_unlock();
1306 goto out_sock_release;
1da177e4 1307}
721db93a 1308EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1309
1310int sock_create(int family, int type, int protocol, struct socket **res)
1311{
1b8d7ae4 1312 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1313}
c6d409cf 1314EXPORT_SYMBOL(sock_create);
1da177e4
LT
1315
1316int sock_create_kern(int family, int type, int protocol, struct socket **res)
1317{
1b8d7ae4 1318 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1319}
c6d409cf 1320EXPORT_SYMBOL(sock_create_kern);
1da177e4 1321
3e0fa65f 1322SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1323{
1324 int retval;
1325 struct socket *sock;
a677a039
UD
1326 int flags;
1327
e38b36f3
UD
1328 /* Check the SOCK_* constants for consistency. */
1329 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1330 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1331 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1332 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1333
a677a039 1334 flags = type & ~SOCK_TYPE_MASK;
77d27200 1335 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1336 return -EINVAL;
1337 type &= SOCK_TYPE_MASK;
1da177e4 1338
aaca0bdc
UD
1339 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1340 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1341
1da177e4
LT
1342 retval = sock_create(family, type, protocol, &sock);
1343 if (retval < 0)
1344 goto out;
1345
77d27200 1346 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1347 if (retval < 0)
1348 goto out_release;
1349
1350out:
1351 /* It may be already another descriptor 8) Not kernel problem. */
1352 return retval;
1353
1354out_release:
1355 sock_release(sock);
1356 return retval;
1357}
1358
1359/*
1360 * Create a pair of connected sockets.
1361 */
1362
3e0fa65f
HC
1363SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1364 int __user *, usockvec)
1da177e4
LT
1365{
1366 struct socket *sock1, *sock2;
1367 int fd1, fd2, err;
db349509 1368 struct file *newfile1, *newfile2;
a677a039
UD
1369 int flags;
1370
1371 flags = type & ~SOCK_TYPE_MASK;
77d27200 1372 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1373 return -EINVAL;
1374 type &= SOCK_TYPE_MASK;
1da177e4 1375
aaca0bdc
UD
1376 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1377 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1378
1da177e4
LT
1379 /*
1380 * Obtain the first socket and check if the underlying protocol
1381 * supports the socketpair call.
1382 */
1383
1384 err = sock_create(family, type, protocol, &sock1);
1385 if (err < 0)
1386 goto out;
1387
1388 err = sock_create(family, type, protocol, &sock2);
1389 if (err < 0)
1390 goto out_release_1;
1391
1392 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1393 if (err < 0)
1da177e4
LT
1394 goto out_release_both;
1395
7cbe66b6 1396 fd1 = sock_alloc_file(sock1, &newfile1, flags);
bf3c23d1
DM
1397 if (unlikely(fd1 < 0)) {
1398 err = fd1;
db349509 1399 goto out_release_both;
bf3c23d1 1400 }
1da177e4 1401
7cbe66b6 1402 fd2 = sock_alloc_file(sock2, &newfile2, flags);
198de4d7
AV
1403 if (unlikely(fd2 < 0)) {
1404 err = fd2;
1405 fput(newfile1);
1406 put_unused_fd(fd1);
1407 sock_release(sock2);
1408 goto out;
db349509
AV
1409 }
1410
157cf649 1411 audit_fd_pair(fd1, fd2);
db349509
AV
1412 fd_install(fd1, newfile1);
1413 fd_install(fd2, newfile2);
1da177e4
LT
1414 /* fd1 and fd2 may be already another descriptors.
1415 * Not kernel problem.
1416 */
1417
89bddce5 1418 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1419 if (!err)
1420 err = put_user(fd2, &usockvec[1]);
1421 if (!err)
1422 return 0;
1423
1424 sys_close(fd2);
1425 sys_close(fd1);
1426 return err;
1427
1da177e4 1428out_release_both:
89bddce5 1429 sock_release(sock2);
1da177e4 1430out_release_1:
89bddce5 1431 sock_release(sock1);
1da177e4
LT
1432out:
1433 return err;
1434}
1435
1da177e4
LT
1436/*
1437 * Bind a name to a socket. Nothing much to do here since it's
1438 * the protocol's responsibility to handle the local address.
1439 *
1440 * We move the socket address to kernel space before we call
1441 * the protocol layer (having also checked the address is ok).
1442 */
1443
20f37034 1444SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1445{
1446 struct socket *sock;
230b1839 1447 struct sockaddr_storage address;
6cb153ca 1448 int err, fput_needed;
1da177e4 1449
89bddce5 1450 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1451 if (sock) {
43db362d 1452 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1453 if (err >= 0) {
1454 err = security_socket_bind(sock,
230b1839 1455 (struct sockaddr *)&address,
89bddce5 1456 addrlen);
6cb153ca
BL
1457 if (!err)
1458 err = sock->ops->bind(sock,
89bddce5 1459 (struct sockaddr *)
230b1839 1460 &address, addrlen);
1da177e4 1461 }
6cb153ca 1462 fput_light(sock->file, fput_needed);
89bddce5 1463 }
1da177e4
LT
1464 return err;
1465}
1466
1da177e4
LT
1467/*
1468 * Perform a listen. Basically, we allow the protocol to do anything
1469 * necessary for a listen, and if that works, we mark the socket as
1470 * ready for listening.
1471 */
1472
3e0fa65f 1473SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1474{
1475 struct socket *sock;
6cb153ca 1476 int err, fput_needed;
b8e1f9b5 1477 int somaxconn;
89bddce5
SH
1478
1479 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1480 if (sock) {
8efa6e93 1481 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1482 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1483 backlog = somaxconn;
1da177e4
LT
1484
1485 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1486 if (!err)
1487 err = sock->ops->listen(sock, backlog);
1da177e4 1488
6cb153ca 1489 fput_light(sock->file, fput_needed);
1da177e4
LT
1490 }
1491 return err;
1492}
1493
1da177e4
LT
1494/*
1495 * For accept, we attempt to create a new socket, set up the link
1496 * with the client, wake up the client, then return the new
1497 * connected fd. We collect the address of the connector in kernel
1498 * space and move it to user at the very end. This is unclean because
1499 * we open the socket then return an error.
1500 *
1501 * 1003.1g adds the ability to recvmsg() to query connection pending
1502 * status to recvmsg. We need to add that support in a way thats
1503 * clean when we restucture accept also.
1504 */
1505
20f37034
HC
1506SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1507 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1508{
1509 struct socket *sock, *newsock;
39d8c1b6 1510 struct file *newfile;
6cb153ca 1511 int err, len, newfd, fput_needed;
230b1839 1512 struct sockaddr_storage address;
1da177e4 1513
77d27200 1514 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1515 return -EINVAL;
1516
1517 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1518 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1519
6cb153ca 1520 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1521 if (!sock)
1522 goto out;
1523
1524 err = -ENFILE;
c6d409cf
ED
1525 newsock = sock_alloc();
1526 if (!newsock)
1da177e4
LT
1527 goto out_put;
1528
1529 newsock->type = sock->type;
1530 newsock->ops = sock->ops;
1531
1da177e4
LT
1532 /*
1533 * We don't need try_module_get here, as the listening socket (sock)
1534 * has the protocol module (sock->ops->owner) held.
1535 */
1536 __module_get(newsock->ops->owner);
1537
7cbe66b6 1538 newfd = sock_alloc_file(newsock, &newfile, flags);
39d8c1b6
DM
1539 if (unlikely(newfd < 0)) {
1540 err = newfd;
9a1875e6
DM
1541 sock_release(newsock);
1542 goto out_put;
39d8c1b6
DM
1543 }
1544
a79af59e
FF
1545 err = security_socket_accept(sock, newsock);
1546 if (err)
39d8c1b6 1547 goto out_fd;
a79af59e 1548
1da177e4
LT
1549 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1550 if (err < 0)
39d8c1b6 1551 goto out_fd;
1da177e4
LT
1552
1553 if (upeer_sockaddr) {
230b1839 1554 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1555 &len, 2) < 0) {
1da177e4 1556 err = -ECONNABORTED;
39d8c1b6 1557 goto out_fd;
1da177e4 1558 }
43db362d 1559 err = move_addr_to_user(&address,
230b1839 1560 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1561 if (err < 0)
39d8c1b6 1562 goto out_fd;
1da177e4
LT
1563 }
1564
1565 /* File flags are not inherited via accept() unlike another OSes. */
1566
39d8c1b6
DM
1567 fd_install(newfd, newfile);
1568 err = newfd;
1da177e4 1569
1da177e4 1570out_put:
6cb153ca 1571 fput_light(sock->file, fput_needed);
1da177e4
LT
1572out:
1573 return err;
39d8c1b6 1574out_fd:
9606a216 1575 fput(newfile);
39d8c1b6 1576 put_unused_fd(newfd);
1da177e4
LT
1577 goto out_put;
1578}
1579
20f37034
HC
1580SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1581 int __user *, upeer_addrlen)
aaca0bdc 1582{
de11defe 1583 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1584}
1585
1da177e4
LT
1586/*
1587 * Attempt to connect to a socket with the server address. The address
1588 * is in user space so we verify it is OK and move it to kernel space.
1589 *
1590 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1591 * break bindings
1592 *
1593 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1594 * other SEQPACKET protocols that take time to connect() as it doesn't
1595 * include the -EINPROGRESS status for such sockets.
1596 */
1597
20f37034
HC
1598SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1599 int, addrlen)
1da177e4
LT
1600{
1601 struct socket *sock;
230b1839 1602 struct sockaddr_storage address;
6cb153ca 1603 int err, fput_needed;
1da177e4 1604
6cb153ca 1605 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1606 if (!sock)
1607 goto out;
43db362d 1608 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1609 if (err < 0)
1610 goto out_put;
1611
89bddce5 1612 err =
230b1839 1613 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1614 if (err)
1615 goto out_put;
1616
230b1839 1617 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1618 sock->file->f_flags);
1619out_put:
6cb153ca 1620 fput_light(sock->file, fput_needed);
1da177e4
LT
1621out:
1622 return err;
1623}
1624
1625/*
1626 * Get the local address ('name') of a socket object. Move the obtained
1627 * name to user space.
1628 */
1629
20f37034
HC
1630SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1631 int __user *, usockaddr_len)
1da177e4
LT
1632{
1633 struct socket *sock;
230b1839 1634 struct sockaddr_storage address;
6cb153ca 1635 int len, err, fput_needed;
89bddce5 1636
6cb153ca 1637 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1638 if (!sock)
1639 goto out;
1640
1641 err = security_socket_getsockname(sock);
1642 if (err)
1643 goto out_put;
1644
230b1839 1645 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1646 if (err)
1647 goto out_put;
43db362d 1648 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1649
1650out_put:
6cb153ca 1651 fput_light(sock->file, fput_needed);
1da177e4
LT
1652out:
1653 return err;
1654}
1655
1656/*
1657 * Get the remote address ('name') of a socket object. Move the obtained
1658 * name to user space.
1659 */
1660
20f37034
HC
1661SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1662 int __user *, usockaddr_len)
1da177e4
LT
1663{
1664 struct socket *sock;
230b1839 1665 struct sockaddr_storage address;
6cb153ca 1666 int len, err, fput_needed;
1da177e4 1667
89bddce5
SH
1668 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1669 if (sock != NULL) {
1da177e4
LT
1670 err = security_socket_getpeername(sock);
1671 if (err) {
6cb153ca 1672 fput_light(sock->file, fput_needed);
1da177e4
LT
1673 return err;
1674 }
1675
89bddce5 1676 err =
230b1839 1677 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1678 1);
1da177e4 1679 if (!err)
43db362d 1680 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1681 usockaddr_len);
6cb153ca 1682 fput_light(sock->file, fput_needed);
1da177e4
LT
1683 }
1684 return err;
1685}
1686
1687/*
1688 * Send a datagram to a given address. We move the address into kernel
1689 * space and check the user space data area is readable before invoking
1690 * the protocol.
1691 */
1692
3e0fa65f 1693SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1694 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1695 int, addr_len)
1da177e4
LT
1696{
1697 struct socket *sock;
230b1839 1698 struct sockaddr_storage address;
1da177e4
LT
1699 int err;
1700 struct msghdr msg;
1701 struct iovec iov;
6cb153ca 1702 int fput_needed;
6cb153ca 1703
253eacc0
LT
1704 if (len > INT_MAX)
1705 len = INT_MAX;
de0fa95c
PE
1706 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1707 if (!sock)
4387ff75 1708 goto out;
6cb153ca 1709
89bddce5
SH
1710 iov.iov_base = buff;
1711 iov.iov_len = len;
1712 msg.msg_name = NULL;
1713 msg.msg_iov = &iov;
1714 msg.msg_iovlen = 1;
1715 msg.msg_control = NULL;
1716 msg.msg_controllen = 0;
1717 msg.msg_namelen = 0;
6cb153ca 1718 if (addr) {
43db362d 1719 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1720 if (err < 0)
1721 goto out_put;
230b1839 1722 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1723 msg.msg_namelen = addr_len;
1da177e4
LT
1724 }
1725 if (sock->file->f_flags & O_NONBLOCK)
1726 flags |= MSG_DONTWAIT;
1727 msg.msg_flags = flags;
1728 err = sock_sendmsg(sock, &msg, len);
1729
89bddce5 1730out_put:
de0fa95c 1731 fput_light(sock->file, fput_needed);
4387ff75 1732out:
1da177e4
LT
1733 return err;
1734}
1735
1736/*
89bddce5 1737 * Send a datagram down a socket.
1da177e4
LT
1738 */
1739
3e0fa65f 1740SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1741 unsigned int, flags)
1da177e4
LT
1742{
1743 return sys_sendto(fd, buff, len, flags, NULL, 0);
1744}
1745
1746/*
89bddce5 1747 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1748 * sender. We verify the buffers are writable and if needed move the
1749 * sender address from kernel to user space.
1750 */
1751
3e0fa65f 1752SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1753 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1754 int __user *, addr_len)
1da177e4
LT
1755{
1756 struct socket *sock;
1757 struct iovec iov;
1758 struct msghdr msg;
230b1839 1759 struct sockaddr_storage address;
89bddce5 1760 int err, err2;
6cb153ca
BL
1761 int fput_needed;
1762
253eacc0
LT
1763 if (size > INT_MAX)
1764 size = INT_MAX;
de0fa95c 1765 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1766 if (!sock)
de0fa95c 1767 goto out;
1da177e4 1768
89bddce5
SH
1769 msg.msg_control = NULL;
1770 msg.msg_controllen = 0;
1771 msg.msg_iovlen = 1;
1772 msg.msg_iov = &iov;
1773 iov.iov_len = size;
1774 iov.iov_base = ubuf;
230b1839
YH
1775 msg.msg_name = (struct sockaddr *)&address;
1776 msg.msg_namelen = sizeof(address);
1da177e4
LT
1777 if (sock->file->f_flags & O_NONBLOCK)
1778 flags |= MSG_DONTWAIT;
89bddce5 1779 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1780
89bddce5 1781 if (err >= 0 && addr != NULL) {
43db362d 1782 err2 = move_addr_to_user(&address,
230b1839 1783 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1784 if (err2 < 0)
1785 err = err2;
1da177e4 1786 }
de0fa95c
PE
1787
1788 fput_light(sock->file, fput_needed);
4387ff75 1789out:
1da177e4
LT
1790 return err;
1791}
1792
1793/*
89bddce5 1794 * Receive a datagram from a socket.
1da177e4
LT
1795 */
1796
89bddce5 1797asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
95c96174 1798 unsigned int flags)
1da177e4
LT
1799{
1800 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1801}
1802
1803/*
1804 * Set a socket option. Because we don't know the option lengths we have
1805 * to pass the user mode parameter for the protocols to sort out.
1806 */
1807
20f37034
HC
1808SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1809 char __user *, optval, int, optlen)
1da177e4 1810{
6cb153ca 1811 int err, fput_needed;
1da177e4
LT
1812 struct socket *sock;
1813
1814 if (optlen < 0)
1815 return -EINVAL;
89bddce5
SH
1816
1817 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1818 if (sock != NULL) {
1819 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1820 if (err)
1821 goto out_put;
1da177e4
LT
1822
1823 if (level == SOL_SOCKET)
89bddce5
SH
1824 err =
1825 sock_setsockopt(sock, level, optname, optval,
1826 optlen);
1da177e4 1827 else
89bddce5
SH
1828 err =
1829 sock->ops->setsockopt(sock, level, optname, optval,
1830 optlen);
6cb153ca
BL
1831out_put:
1832 fput_light(sock->file, fput_needed);
1da177e4
LT
1833 }
1834 return err;
1835}
1836
1837/*
1838 * Get a socket option. Because we don't know the option lengths we have
1839 * to pass a user mode parameter for the protocols to sort out.
1840 */
1841
20f37034
HC
1842SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1843 char __user *, optval, int __user *, optlen)
1da177e4 1844{
6cb153ca 1845 int err, fput_needed;
1da177e4
LT
1846 struct socket *sock;
1847
89bddce5
SH
1848 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1849 if (sock != NULL) {
6cb153ca
BL
1850 err = security_socket_getsockopt(sock, level, optname);
1851 if (err)
1852 goto out_put;
1da177e4
LT
1853
1854 if (level == SOL_SOCKET)
89bddce5
SH
1855 err =
1856 sock_getsockopt(sock, level, optname, optval,
1857 optlen);
1da177e4 1858 else
89bddce5
SH
1859 err =
1860 sock->ops->getsockopt(sock, level, optname, optval,
1861 optlen);
6cb153ca
BL
1862out_put:
1863 fput_light(sock->file, fput_needed);
1da177e4
LT
1864 }
1865 return err;
1866}
1867
1da177e4
LT
1868/*
1869 * Shutdown a socket.
1870 */
1871
754fe8d2 1872SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1873{
6cb153ca 1874 int err, fput_needed;
1da177e4
LT
1875 struct socket *sock;
1876
89bddce5
SH
1877 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1878 if (sock != NULL) {
1da177e4 1879 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1880 if (!err)
1881 err = sock->ops->shutdown(sock, how);
1882 fput_light(sock->file, fput_needed);
1da177e4
LT
1883 }
1884 return err;
1885}
1886
89bddce5 1887/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1888 * fields which are the same type (int / unsigned) on our platforms.
1889 */
1890#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1891#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1892#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1893
c71d8ebe
TH
1894struct used_address {
1895 struct sockaddr_storage name;
1896 unsigned int name_len;
1897};
1898
228e548e 1899static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 1900 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1901 struct used_address *used_address)
1da177e4 1902{
89bddce5
SH
1903 struct compat_msghdr __user *msg_compat =
1904 (struct compat_msghdr __user *)msg;
230b1839 1905 struct sockaddr_storage address;
1da177e4 1906 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1907 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1908 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1909 /* 20 is size of ipv6_pktinfo */
1da177e4 1910 unsigned char *ctl_buf = ctl;
a74e9106 1911 int err, ctl_len, total_len;
89bddce5 1912
1da177e4
LT
1913 err = -EFAULT;
1914 if (MSG_CMSG_COMPAT & flags) {
228e548e 1915 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 1916 return -EFAULT;
228e548e 1917 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1918 return -EFAULT;
1919
228e548e 1920 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
1921 err = -EMSGSIZE;
1922 if (msg_sys->msg_iovlen > UIO_MAXIOV)
1923 goto out;
1924 err = -ENOMEM;
1925 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
1926 GFP_KERNEL);
1da177e4 1927 if (!iov)
228e548e 1928 goto out;
1da177e4
LT
1929 }
1930
1931 /* This will also move the address data into kernel space */
1932 if (MSG_CMSG_COMPAT & flags) {
43db362d 1933 err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
1da177e4 1934 } else
43db362d 1935 err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
89bddce5 1936 if (err < 0)
1da177e4
LT
1937 goto out_freeiov;
1938 total_len = err;
1939
1940 err = -ENOBUFS;
1941
228e548e 1942 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1943 goto out_freeiov;
228e548e 1944 ctl_len = msg_sys->msg_controllen;
1da177e4 1945 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1946 err =
228e548e 1947 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1948 sizeof(ctl));
1da177e4
LT
1949 if (err)
1950 goto out_freeiov;
228e548e
AB
1951 ctl_buf = msg_sys->msg_control;
1952 ctl_len = msg_sys->msg_controllen;
1da177e4 1953 } else if (ctl_len) {
89bddce5 1954 if (ctl_len > sizeof(ctl)) {
1da177e4 1955 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1956 if (ctl_buf == NULL)
1da177e4
LT
1957 goto out_freeiov;
1958 }
1959 err = -EFAULT;
1960 /*
228e548e 1961 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1962 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1963 * checking falls down on this.
1964 */
fb8621bb 1965 if (copy_from_user(ctl_buf,
228e548e 1966 (void __user __force *)msg_sys->msg_control,
89bddce5 1967 ctl_len))
1da177e4 1968 goto out_freectl;
228e548e 1969 msg_sys->msg_control = ctl_buf;
1da177e4 1970 }
228e548e 1971 msg_sys->msg_flags = flags;
1da177e4
LT
1972
1973 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1974 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1975 /*
1976 * If this is sendmmsg() and current destination address is same as
1977 * previously succeeded address, omit asking LSM's decision.
1978 * used_address->name_len is initialized to UINT_MAX so that the first
1979 * destination address never matches.
1980 */
bc909d9d
MD
1981 if (used_address && msg_sys->msg_name &&
1982 used_address->name_len == msg_sys->msg_namelen &&
1983 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
1984 used_address->name_len)) {
1985 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
1986 goto out_freectl;
1987 }
1988 err = sock_sendmsg(sock, msg_sys, total_len);
1989 /*
1990 * If this is sendmmsg() and sending to current destination address was
1991 * successful, remember it.
1992 */
1993 if (used_address && err >= 0) {
1994 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1995 if (msg_sys->msg_name)
1996 memcpy(&used_address->name, msg_sys->msg_name,
1997 used_address->name_len);
c71d8ebe 1998 }
1da177e4
LT
1999
2000out_freectl:
89bddce5 2001 if (ctl_buf != ctl)
1da177e4
LT
2002 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2003out_freeiov:
2004 if (iov != iovstack)
a74e9106 2005 kfree(iov);
228e548e
AB
2006out:
2007 return err;
2008}
2009
2010/*
2011 * BSD sendmsg interface
2012 */
2013
95c96174 2014SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
228e548e
AB
2015{
2016 int fput_needed, err;
2017 struct msghdr msg_sys;
2018 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2019
2020 if (!sock)
2021 goto out;
2022
c71d8ebe 2023 err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2024
6cb153ca 2025 fput_light(sock->file, fput_needed);
89bddce5 2026out:
1da177e4
LT
2027 return err;
2028}
2029
228e548e
AB
2030/*
2031 * Linux sendmmsg interface
2032 */
2033
2034int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2035 unsigned int flags)
2036{
2037 int fput_needed, err, datagrams;
2038 struct socket *sock;
2039 struct mmsghdr __user *entry;
2040 struct compat_mmsghdr __user *compat_entry;
2041 struct msghdr msg_sys;
c71d8ebe 2042 struct used_address used_address;
228e548e 2043
98382f41
AB
2044 if (vlen > UIO_MAXIOV)
2045 vlen = UIO_MAXIOV;
228e548e
AB
2046
2047 datagrams = 0;
2048
2049 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2050 if (!sock)
2051 return err;
2052
c71d8ebe 2053 used_address.name_len = UINT_MAX;
228e548e
AB
2054 entry = mmsg;
2055 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2056 err = 0;
228e548e
AB
2057
2058 while (datagrams < vlen) {
228e548e
AB
2059 if (MSG_CMSG_COMPAT & flags) {
2060 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
c71d8ebe 2061 &msg_sys, flags, &used_address);
228e548e
AB
2062 if (err < 0)
2063 break;
2064 err = __put_user(err, &compat_entry->msg_len);
2065 ++compat_entry;
2066 } else {
2067 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
c71d8ebe 2068 &msg_sys, flags, &used_address);
228e548e
AB
2069 if (err < 0)
2070 break;
2071 err = put_user(err, &entry->msg_len);
2072 ++entry;
2073 }
2074
2075 if (err)
2076 break;
2077 ++datagrams;
2078 }
2079
228e548e
AB
2080 fput_light(sock->file, fput_needed);
2081
728ffb86
AB
2082 /* We only return an error if no datagrams were able to be sent */
2083 if (datagrams != 0)
228e548e
AB
2084 return datagrams;
2085
228e548e
AB
2086 return err;
2087}
2088
2089SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2090 unsigned int, vlen, unsigned int, flags)
2091{
2092 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2093}
2094
a2e27255 2095static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 2096 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2097{
89bddce5
SH
2098 struct compat_msghdr __user *msg_compat =
2099 (struct compat_msghdr __user *)msg;
1da177e4 2100 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2101 struct iovec *iov = iovstack;
1da177e4 2102 unsigned long cmsg_ptr;
a74e9106 2103 int err, total_len, len;
1da177e4
LT
2104
2105 /* kernel mode address */
230b1839 2106 struct sockaddr_storage addr;
1da177e4
LT
2107
2108 /* user mode address pointers */
2109 struct sockaddr __user *uaddr;
2110 int __user *uaddr_len;
89bddce5 2111
1da177e4 2112 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2113 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2114 return -EFAULT;
c6d409cf 2115 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
89bddce5 2116 return -EFAULT;
1da177e4 2117
a2e27255 2118 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2119 err = -EMSGSIZE;
2120 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2121 goto out;
2122 err = -ENOMEM;
2123 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2124 GFP_KERNEL);
1da177e4 2125 if (!iov)
a2e27255 2126 goto out;
1da177e4
LT
2127 }
2128
2129 /*
89bddce5
SH
2130 * Save the user-mode address (verify_iovec will change the
2131 * kernel msghdr to use the kernel address space)
1da177e4 2132 */
89bddce5 2133
a2e27255 2134 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4
LT
2135 uaddr_len = COMPAT_NAMELEN(msg);
2136 if (MSG_CMSG_COMPAT & flags) {
43db362d 2137 err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4 2138 } else
43db362d 2139 err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4
LT
2140 if (err < 0)
2141 goto out_freeiov;
89bddce5 2142 total_len = err;
1da177e4 2143
a2e27255
ACM
2144 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2145 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2146
1da177e4
LT
2147 if (sock->file->f_flags & O_NONBLOCK)
2148 flags |= MSG_DONTWAIT;
a2e27255
ACM
2149 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2150 total_len, flags);
1da177e4
LT
2151 if (err < 0)
2152 goto out_freeiov;
2153 len = err;
2154
2155 if (uaddr != NULL) {
43db362d 2156 err = move_addr_to_user(&addr,
a2e27255 2157 msg_sys->msg_namelen, uaddr,
89bddce5 2158 uaddr_len);
1da177e4
LT
2159 if (err < 0)
2160 goto out_freeiov;
2161 }
a2e27255 2162 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2163 COMPAT_FLAGS(msg));
1da177e4
LT
2164 if (err)
2165 goto out_freeiov;
2166 if (MSG_CMSG_COMPAT & flags)
a2e27255 2167 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2168 &msg_compat->msg_controllen);
2169 else
a2e27255 2170 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2171 &msg->msg_controllen);
2172 if (err)
2173 goto out_freeiov;
2174 err = len;
2175
2176out_freeiov:
2177 if (iov != iovstack)
a74e9106 2178 kfree(iov);
a2e27255
ACM
2179out:
2180 return err;
2181}
2182
2183/*
2184 * BSD recvmsg interface
2185 */
2186
2187SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2188 unsigned int, flags)
2189{
2190 int fput_needed, err;
2191 struct msghdr msg_sys;
2192 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2193
2194 if (!sock)
2195 goto out;
2196
2197 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2198
6cb153ca 2199 fput_light(sock->file, fput_needed);
1da177e4
LT
2200out:
2201 return err;
2202}
2203
a2e27255
ACM
2204/*
2205 * Linux recvmmsg interface
2206 */
2207
2208int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2209 unsigned int flags, struct timespec *timeout)
2210{
2211 int fput_needed, err, datagrams;
2212 struct socket *sock;
2213 struct mmsghdr __user *entry;
d7256d0e 2214 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2215 struct msghdr msg_sys;
2216 struct timespec end_time;
2217
2218 if (timeout &&
2219 poll_select_set_timeout(&end_time, timeout->tv_sec,
2220 timeout->tv_nsec))
2221 return -EINVAL;
2222
2223 datagrams = 0;
2224
2225 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2226 if (!sock)
2227 return err;
2228
2229 err = sock_error(sock->sk);
2230 if (err)
2231 goto out_put;
2232
2233 entry = mmsg;
d7256d0e 2234 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2235
2236 while (datagrams < vlen) {
2237 /*
2238 * No need to ask LSM for more than the first datagram.
2239 */
d7256d0e
JMG
2240 if (MSG_CMSG_COMPAT & flags) {
2241 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
b9eb8b87
AB
2242 &msg_sys, flags & ~MSG_WAITFORONE,
2243 datagrams);
d7256d0e
JMG
2244 if (err < 0)
2245 break;
2246 err = __put_user(err, &compat_entry->msg_len);
2247 ++compat_entry;
2248 } else {
2249 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
b9eb8b87
AB
2250 &msg_sys, flags & ~MSG_WAITFORONE,
2251 datagrams);
d7256d0e
JMG
2252 if (err < 0)
2253 break;
2254 err = put_user(err, &entry->msg_len);
2255 ++entry;
2256 }
2257
a2e27255
ACM
2258 if (err)
2259 break;
a2e27255
ACM
2260 ++datagrams;
2261
71c5c159
BB
2262 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2263 if (flags & MSG_WAITFORONE)
2264 flags |= MSG_DONTWAIT;
2265
a2e27255
ACM
2266 if (timeout) {
2267 ktime_get_ts(timeout);
2268 *timeout = timespec_sub(end_time, *timeout);
2269 if (timeout->tv_sec < 0) {
2270 timeout->tv_sec = timeout->tv_nsec = 0;
2271 break;
2272 }
2273
2274 /* Timeout, return less than vlen datagrams */
2275 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2276 break;
2277 }
2278
2279 /* Out of band data, return right away */
2280 if (msg_sys.msg_flags & MSG_OOB)
2281 break;
2282 }
2283
2284out_put:
2285 fput_light(sock->file, fput_needed);
1da177e4 2286
a2e27255
ACM
2287 if (err == 0)
2288 return datagrams;
2289
2290 if (datagrams != 0) {
2291 /*
2292 * We may return less entries than requested (vlen) if the
2293 * sock is non block and there aren't enough datagrams...
2294 */
2295 if (err != -EAGAIN) {
2296 /*
2297 * ... or if recvmsg returns an error after we
2298 * received some datagrams, where we record the
2299 * error to return on the next call or if the
2300 * app asks about it using getsockopt(SO_ERROR).
2301 */
2302 sock->sk->sk_err = -err;
2303 }
2304
2305 return datagrams;
2306 }
2307
2308 return err;
2309}
2310
2311SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2312 unsigned int, vlen, unsigned int, flags,
2313 struct timespec __user *, timeout)
2314{
2315 int datagrams;
2316 struct timespec timeout_sys;
2317
2318 if (!timeout)
2319 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2320
2321 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2322 return -EFAULT;
2323
2324 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2325
2326 if (datagrams > 0 &&
2327 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2328 datagrams = -EFAULT;
2329
2330 return datagrams;
2331}
2332
2333#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2334/* Argument list sizes for sys_socketcall */
2335#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2336static const unsigned char nargs[21] = {
c6d409cf
ED
2337 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2338 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2339 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2340 AL(4), AL(5), AL(4)
89bddce5
SH
2341};
2342
1da177e4
LT
2343#undef AL
2344
2345/*
89bddce5 2346 * System call vectors.
1da177e4
LT
2347 *
2348 * Argument checking cleaned up. Saved 20% in size.
2349 * This function doesn't need to set the kernel lock because
89bddce5 2350 * it is set by the callees.
1da177e4
LT
2351 */
2352
3e0fa65f 2353SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4
LT
2354{
2355 unsigned long a[6];
89bddce5 2356 unsigned long a0, a1;
1da177e4 2357 int err;
47379052 2358 unsigned int len;
1da177e4 2359
228e548e 2360 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2361 return -EINVAL;
2362
47379052
AV
2363 len = nargs[call];
2364 if (len > sizeof(a))
2365 return -EINVAL;
2366
1da177e4 2367 /* copy_from_user should be SMP safe. */
47379052 2368 if (copy_from_user(a, args, len))
1da177e4 2369 return -EFAULT;
3ec3b2fb 2370
f3298dc4 2371 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb 2372
89bddce5
SH
2373 a0 = a[0];
2374 a1 = a[1];
2375
2376 switch (call) {
2377 case SYS_SOCKET:
2378 err = sys_socket(a0, a1, a[2]);
2379 break;
2380 case SYS_BIND:
2381 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2382 break;
2383 case SYS_CONNECT:
2384 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2385 break;
2386 case SYS_LISTEN:
2387 err = sys_listen(a0, a1);
2388 break;
2389 case SYS_ACCEPT:
de11defe
UD
2390 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2391 (int __user *)a[2], 0);
89bddce5
SH
2392 break;
2393 case SYS_GETSOCKNAME:
2394 err =
2395 sys_getsockname(a0, (struct sockaddr __user *)a1,
2396 (int __user *)a[2]);
2397 break;
2398 case SYS_GETPEERNAME:
2399 err =
2400 sys_getpeername(a0, (struct sockaddr __user *)a1,
2401 (int __user *)a[2]);
2402 break;
2403 case SYS_SOCKETPAIR:
2404 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2405 break;
2406 case SYS_SEND:
2407 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2408 break;
2409 case SYS_SENDTO:
2410 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2411 (struct sockaddr __user *)a[4], a[5]);
2412 break;
2413 case SYS_RECV:
2414 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2415 break;
2416 case SYS_RECVFROM:
2417 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2418 (struct sockaddr __user *)a[4],
2419 (int __user *)a[5]);
2420 break;
2421 case SYS_SHUTDOWN:
2422 err = sys_shutdown(a0, a1);
2423 break;
2424 case SYS_SETSOCKOPT:
2425 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2426 break;
2427 case SYS_GETSOCKOPT:
2428 err =
2429 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2430 (int __user *)a[4]);
2431 break;
2432 case SYS_SENDMSG:
2433 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2434 break;
228e548e
AB
2435 case SYS_SENDMMSG:
2436 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2437 break;
89bddce5
SH
2438 case SYS_RECVMSG:
2439 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2440 break;
a2e27255
ACM
2441 case SYS_RECVMMSG:
2442 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2443 (struct timespec __user *)a[4]);
2444 break;
de11defe
UD
2445 case SYS_ACCEPT4:
2446 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2447 (int __user *)a[2], a[3]);
aaca0bdc 2448 break;
89bddce5
SH
2449 default:
2450 err = -EINVAL;
2451 break;
1da177e4
LT
2452 }
2453 return err;
2454}
2455
89bddce5 2456#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2457
55737fda
SH
2458/**
2459 * sock_register - add a socket protocol handler
2460 * @ops: description of protocol
2461 *
1da177e4
LT
2462 * This function is called by a protocol handler that wants to
2463 * advertise its address family, and have it linked into the
55737fda
SH
2464 * socket interface. The value ops->family coresponds to the
2465 * socket system call protocol family.
1da177e4 2466 */
f0fd27d4 2467int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2468{
2469 int err;
2470
2471 if (ops->family >= NPROTO) {
89bddce5
SH
2472 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2473 NPROTO);
1da177e4
LT
2474 return -ENOBUFS;
2475 }
55737fda
SH
2476
2477 spin_lock(&net_family_lock);
190683a9
ED
2478 if (rcu_dereference_protected(net_families[ops->family],
2479 lockdep_is_held(&net_family_lock)))
55737fda
SH
2480 err = -EEXIST;
2481 else {
cf778b00 2482 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2483 err = 0;
2484 }
55737fda
SH
2485 spin_unlock(&net_family_lock);
2486
89bddce5 2487 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2488 return err;
2489}
c6d409cf 2490EXPORT_SYMBOL(sock_register);
1da177e4 2491
55737fda
SH
2492/**
2493 * sock_unregister - remove a protocol handler
2494 * @family: protocol family to remove
2495 *
1da177e4
LT
2496 * This function is called by a protocol handler that wants to
2497 * remove its address family, and have it unlinked from the
55737fda
SH
2498 * new socket creation.
2499 *
2500 * If protocol handler is a module, then it can use module reference
2501 * counts to protect against new references. If protocol handler is not
2502 * a module then it needs to provide its own protection in
2503 * the ops->create routine.
1da177e4 2504 */
f0fd27d4 2505void sock_unregister(int family)
1da177e4 2506{
f0fd27d4 2507 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2508
55737fda 2509 spin_lock(&net_family_lock);
a9b3cd7f 2510 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2511 spin_unlock(&net_family_lock);
2512
2513 synchronize_rcu();
2514
89bddce5 2515 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4 2516}
c6d409cf 2517EXPORT_SYMBOL(sock_unregister);
1da177e4 2518
77d76ea3 2519static int __init sock_init(void)
1da177e4 2520{
b3e19d92 2521 int err;
2ca794e5
EB
2522 /*
2523 * Initialize the network sysctl infrastructure.
2524 */
2525 err = net_sysctl_init();
2526 if (err)
2527 goto out;
b3e19d92 2528
1da177e4 2529 /*
89bddce5 2530 * Initialize sock SLAB cache.
1da177e4 2531 */
89bddce5 2532
1da177e4
LT
2533 sk_init();
2534
1da177e4 2535 /*
89bddce5 2536 * Initialize skbuff SLAB cache
1da177e4
LT
2537 */
2538 skb_init();
1da177e4
LT
2539
2540 /*
89bddce5 2541 * Initialize the protocols module.
1da177e4
LT
2542 */
2543
2544 init_inodecache();
b3e19d92
NP
2545
2546 err = register_filesystem(&sock_fs_type);
2547 if (err)
2548 goto out_fs;
1da177e4 2549 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2550 if (IS_ERR(sock_mnt)) {
2551 err = PTR_ERR(sock_mnt);
2552 goto out_mount;
2553 }
77d76ea3
AK
2554
2555 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2556 */
2557
2558#ifdef CONFIG_NETFILTER
2559 netfilter_init();
2560#endif
cbeb321a 2561
c1f19b51
RC
2562#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2563 skb_timestamping_init();
2564#endif
2565
b3e19d92
NP
2566out:
2567 return err;
2568
2569out_mount:
2570 unregister_filesystem(&sock_fs_type);
2571out_fs:
2572 goto out;
1da177e4
LT
2573}
2574
77d76ea3
AK
2575core_initcall(sock_init); /* early initcall */
2576
1da177e4
LT
2577#ifdef CONFIG_PROC_FS
2578void socket_seq_show(struct seq_file *seq)
2579{
2580 int cpu;
2581 int counter = 0;
2582
6f912042 2583 for_each_possible_cpu(cpu)
89bddce5 2584 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2585
2586 /* It can be negative, by the way. 8) */
2587 if (counter < 0)
2588 counter = 0;
2589
2590 seq_printf(seq, "sockets: used %d\n", counter);
2591}
89bddce5 2592#endif /* CONFIG_PROC_FS */
1da177e4 2593
89bbfc95 2594#ifdef CONFIG_COMPAT
6b96018b 2595static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2596 unsigned int cmd, void __user *up)
7a229387 2597{
7a229387
AB
2598 mm_segment_t old_fs = get_fs();
2599 struct timeval ktv;
2600 int err;
2601
2602 set_fs(KERNEL_DS);
6b96018b 2603 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2604 set_fs(old_fs);
644595f8
PA
2605 if (!err)
2606 err = compat_put_timeval(up, &ktv);
2607
7a229387
AB
2608 return err;
2609}
2610
6b96018b 2611static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2612 unsigned int cmd, void __user *up)
7a229387 2613{
7a229387
AB
2614 mm_segment_t old_fs = get_fs();
2615 struct timespec kts;
2616 int err;
2617
2618 set_fs(KERNEL_DS);
6b96018b 2619 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2620 set_fs(old_fs);
644595f8
PA
2621 if (!err)
2622 err = compat_put_timespec(up, &kts);
2623
7a229387
AB
2624 return err;
2625}
2626
6b96018b 2627static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2628{
2629 struct ifreq __user *uifr;
2630 int err;
2631
2632 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2633 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2634 return -EFAULT;
2635
6b96018b 2636 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2637 if (err)
2638 return err;
2639
6b96018b 2640 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2641 return -EFAULT;
2642
2643 return 0;
2644}
2645
6b96018b 2646static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2647{
6b96018b 2648 struct compat_ifconf ifc32;
7a229387
AB
2649 struct ifconf ifc;
2650 struct ifconf __user *uifc;
6b96018b 2651 struct compat_ifreq __user *ifr32;
7a229387
AB
2652 struct ifreq __user *ifr;
2653 unsigned int i, j;
2654 int err;
2655
6b96018b 2656 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2657 return -EFAULT;
2658
2659 if (ifc32.ifcbuf == 0) {
2660 ifc32.ifc_len = 0;
2661 ifc.ifc_len = 0;
2662 ifc.ifc_req = NULL;
2663 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2664 } else {
c6d409cf
ED
2665 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2666 sizeof(struct ifreq);
7a229387
AB
2667 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2668 ifc.ifc_len = len;
2669 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2670 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2671 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2672 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2673 return -EFAULT;
2674 ifr++;
2675 ifr32++;
2676 }
2677 }
2678 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2679 return -EFAULT;
2680
6b96018b 2681 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2682 if (err)
2683 return err;
2684
2685 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2686 return -EFAULT;
2687
2688 ifr = ifc.ifc_req;
2689 ifr32 = compat_ptr(ifc32.ifcbuf);
2690 for (i = 0, j = 0;
c6d409cf
ED
2691 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2692 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2693 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2694 return -EFAULT;
2695 ifr32++;
2696 ifr++;
2697 }
2698
2699 if (ifc32.ifcbuf == 0) {
2700 /* Translate from 64-bit structure multiple to
2701 * a 32-bit one.
2702 */
2703 i = ifc.ifc_len;
6b96018b 2704 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2705 ifc32.ifc_len = i;
2706 } else {
2707 ifc32.ifc_len = i;
2708 }
6b96018b 2709 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2710 return -EFAULT;
2711
2712 return 0;
2713}
2714
6b96018b 2715static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2716{
3a7da39d
BH
2717 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2718 bool convert_in = false, convert_out = false;
2719 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2720 struct ethtool_rxnfc __user *rxnfc;
7a229387 2721 struct ifreq __user *ifr;
3a7da39d
BH
2722 u32 rule_cnt = 0, actual_rule_cnt;
2723 u32 ethcmd;
7a229387 2724 u32 data;
3a7da39d 2725 int ret;
7a229387 2726
3a7da39d
BH
2727 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2728 return -EFAULT;
7a229387 2729
3a7da39d
BH
2730 compat_rxnfc = compat_ptr(data);
2731
2732 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2733 return -EFAULT;
2734
3a7da39d
BH
2735 /* Most ethtool structures are defined without padding.
2736 * Unfortunately struct ethtool_rxnfc is an exception.
2737 */
2738 switch (ethcmd) {
2739 default:
2740 break;
2741 case ETHTOOL_GRXCLSRLALL:
2742 /* Buffer size is variable */
2743 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2744 return -EFAULT;
2745 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2746 return -ENOMEM;
2747 buf_size += rule_cnt * sizeof(u32);
2748 /* fall through */
2749 case ETHTOOL_GRXRINGS:
2750 case ETHTOOL_GRXCLSRLCNT:
2751 case ETHTOOL_GRXCLSRULE:
55664f32 2752 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2753 convert_out = true;
2754 /* fall through */
2755 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2756 buf_size += sizeof(struct ethtool_rxnfc);
2757 convert_in = true;
2758 break;
2759 }
2760
2761 ifr = compat_alloc_user_space(buf_size);
2762 rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
2763
2764 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2765 return -EFAULT;
2766
3a7da39d
BH
2767 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2768 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2769 return -EFAULT;
2770
3a7da39d 2771 if (convert_in) {
127fe533 2772 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2773 * fs.ring_cookie and at the end of fs, but nowhere else.
2774 */
127fe533
AD
2775 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2776 sizeof(compat_rxnfc->fs.m_ext) !=
2777 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2778 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2779 BUILD_BUG_ON(
2780 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2781 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2782 offsetof(struct ethtool_rxnfc, fs.location) -
2783 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2784
2785 if (copy_in_user(rxnfc, compat_rxnfc,
127fe533 2786 (void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2787 (void *)rxnfc) ||
2788 copy_in_user(&rxnfc->fs.ring_cookie,
2789 &compat_rxnfc->fs.ring_cookie,
2790 (void *)(&rxnfc->fs.location + 1) -
2791 (void *)&rxnfc->fs.ring_cookie) ||
2792 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2793 sizeof(rxnfc->rule_cnt)))
2794 return -EFAULT;
2795 }
2796
2797 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2798 if (ret)
2799 return ret;
2800
2801 if (convert_out) {
2802 if (copy_in_user(compat_rxnfc, rxnfc,
127fe533 2803 (const void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2804 (const void *)rxnfc) ||
2805 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2806 &rxnfc->fs.ring_cookie,
2807 (const void *)(&rxnfc->fs.location + 1) -
2808 (const void *)&rxnfc->fs.ring_cookie) ||
2809 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2810 sizeof(rxnfc->rule_cnt)))
2811 return -EFAULT;
2812
2813 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2814 /* As an optimisation, we only copy the actual
2815 * number of rules that the underlying
2816 * function returned. Since Mallory might
2817 * change the rule count in user memory, we
2818 * check that it is less than the rule count
2819 * originally given (as the user buffer size),
2820 * which has been range-checked.
2821 */
2822 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2823 return -EFAULT;
2824 if (actual_rule_cnt < rule_cnt)
2825 rule_cnt = actual_rule_cnt;
2826 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2827 &rxnfc->rule_locs[0],
2828 rule_cnt * sizeof(u32)))
2829 return -EFAULT;
2830 }
2831 }
2832
2833 return 0;
7a229387
AB
2834}
2835
7a50a240
AB
2836static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2837{
2838 void __user *uptr;
2839 compat_uptr_t uptr32;
2840 struct ifreq __user *uifr;
2841
c6d409cf 2842 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2843 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2844 return -EFAULT;
2845
2846 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2847 return -EFAULT;
2848
2849 uptr = compat_ptr(uptr32);
2850
2851 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2852 return -EFAULT;
2853
2854 return dev_ioctl(net, SIOCWANDEV, uifr);
2855}
2856
6b96018b
AB
2857static int bond_ioctl(struct net *net, unsigned int cmd,
2858 struct compat_ifreq __user *ifr32)
7a229387
AB
2859{
2860 struct ifreq kifr;
2861 struct ifreq __user *uifr;
7a229387
AB
2862 mm_segment_t old_fs;
2863 int err;
2864 u32 data;
2865 void __user *datap;
2866
2867 switch (cmd) {
2868 case SIOCBONDENSLAVE:
2869 case SIOCBONDRELEASE:
2870 case SIOCBONDSETHWADDR:
2871 case SIOCBONDCHANGEACTIVE:
6b96018b 2872 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2873 return -EFAULT;
2874
2875 old_fs = get_fs();
c6d409cf 2876 set_fs(KERNEL_DS);
c3f52ae6 2877 err = dev_ioctl(net, cmd,
2878 (struct ifreq __user __force *) &kifr);
c6d409cf 2879 set_fs(old_fs);
7a229387
AB
2880
2881 return err;
2882 case SIOCBONDSLAVEINFOQUERY:
2883 case SIOCBONDINFOQUERY:
2884 uifr = compat_alloc_user_space(sizeof(*uifr));
2885 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2886 return -EFAULT;
2887
2888 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2889 return -EFAULT;
2890
2891 datap = compat_ptr(data);
2892 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2893 return -EFAULT;
2894
6b96018b 2895 return dev_ioctl(net, cmd, uifr);
7a229387 2896 default:
07d106d0 2897 return -ENOIOCTLCMD;
ccbd6a5a 2898 }
7a229387
AB
2899}
2900
6b96018b
AB
2901static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2902 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2903{
2904 struct ifreq __user *u_ifreq64;
7a229387
AB
2905 char tmp_buf[IFNAMSIZ];
2906 void __user *data64;
2907 u32 data32;
2908
2909 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2910 IFNAMSIZ))
2911 return -EFAULT;
2912 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2913 return -EFAULT;
2914 data64 = compat_ptr(data32);
2915
2916 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2917
2918 /* Don't check these user accesses, just let that get trapped
2919 * in the ioctl handler instead.
2920 */
2921 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2922 IFNAMSIZ))
2923 return -EFAULT;
2924 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
2925 return -EFAULT;
2926
6b96018b 2927 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2928}
2929
6b96018b
AB
2930static int dev_ifsioc(struct net *net, struct socket *sock,
2931 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2932{
a2116ed2 2933 struct ifreq __user *uifr;
7a229387
AB
2934 int err;
2935
a2116ed2
AB
2936 uifr = compat_alloc_user_space(sizeof(*uifr));
2937 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2938 return -EFAULT;
2939
2940 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2941
7a229387
AB
2942 if (!err) {
2943 switch (cmd) {
2944 case SIOCGIFFLAGS:
2945 case SIOCGIFMETRIC:
2946 case SIOCGIFMTU:
2947 case SIOCGIFMEM:
2948 case SIOCGIFHWADDR:
2949 case SIOCGIFINDEX:
2950 case SIOCGIFADDR:
2951 case SIOCGIFBRDADDR:
2952 case SIOCGIFDSTADDR:
2953 case SIOCGIFNETMASK:
fab2532b 2954 case SIOCGIFPFLAGS:
7a229387 2955 case SIOCGIFTXQLEN:
fab2532b
AB
2956 case SIOCGMIIPHY:
2957 case SIOCGMIIREG:
a2116ed2 2958 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2959 err = -EFAULT;
2960 break;
2961 }
2962 }
2963 return err;
2964}
2965
a2116ed2
AB
2966static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2967 struct compat_ifreq __user *uifr32)
2968{
2969 struct ifreq ifr;
2970 struct compat_ifmap __user *uifmap32;
2971 mm_segment_t old_fs;
2972 int err;
2973
2974 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2975 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
2976 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2977 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2978 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2979 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
2980 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
2981 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
2982 if (err)
2983 return -EFAULT;
2984
2985 old_fs = get_fs();
c6d409cf 2986 set_fs(KERNEL_DS);
c3f52ae6 2987 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2988 set_fs(old_fs);
a2116ed2
AB
2989
2990 if (cmd == SIOCGIFMAP && !err) {
2991 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
2992 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2993 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2994 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2995 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
2996 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
2997 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
2998 if (err)
2999 err = -EFAULT;
3000 }
3001 return err;
3002}
3003
3004static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32)
3005{
3006 void __user *uptr;
3007 compat_uptr_t uptr32;
3008 struct ifreq __user *uifr;
3009
c6d409cf 3010 uifr = compat_alloc_user_space(sizeof(*uifr));
a2116ed2
AB
3011 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
3012 return -EFAULT;
3013
3014 if (get_user(uptr32, &uifr32->ifr_data))
3015 return -EFAULT;
3016
3017 uptr = compat_ptr(uptr32);
3018
3019 if (put_user(uptr, &uifr->ifr_data))
3020 return -EFAULT;
3021
3022 return dev_ioctl(net, SIOCSHWTSTAMP, uifr);
3023}
3024
7a229387 3025struct rtentry32 {
c6d409cf 3026 u32 rt_pad1;
7a229387
AB
3027 struct sockaddr rt_dst; /* target address */
3028 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3029 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3030 unsigned short rt_flags;
3031 short rt_pad2;
3032 u32 rt_pad3;
3033 unsigned char rt_tos;
3034 unsigned char rt_class;
3035 short rt_pad4;
3036 short rt_metric; /* +1 for binary compatibility! */
7a229387 3037 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3038 u32 rt_mtu; /* per route MTU/Window */
3039 u32 rt_window; /* Window clamping */
7a229387
AB
3040 unsigned short rt_irtt; /* Initial RTT */
3041};
3042
3043struct in6_rtmsg32 {
3044 struct in6_addr rtmsg_dst;
3045 struct in6_addr rtmsg_src;
3046 struct in6_addr rtmsg_gateway;
3047 u32 rtmsg_type;
3048 u16 rtmsg_dst_len;
3049 u16 rtmsg_src_len;
3050 u32 rtmsg_metric;
3051 u32 rtmsg_info;
3052 u32 rtmsg_flags;
3053 s32 rtmsg_ifindex;
3054};
3055
6b96018b
AB
3056static int routing_ioctl(struct net *net, struct socket *sock,
3057 unsigned int cmd, void __user *argp)
7a229387
AB
3058{
3059 int ret;
3060 void *r = NULL;
3061 struct in6_rtmsg r6;
3062 struct rtentry r4;
3063 char devname[16];
3064 u32 rtdev;
3065 mm_segment_t old_fs = get_fs();
3066
6b96018b
AB
3067 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3068 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3069 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3070 3 * sizeof(struct in6_addr));
c6d409cf
ED
3071 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3072 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3073 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3074 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3075 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3076 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3077 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3078
3079 r = (void *) &r6;
3080 } else { /* ipv4 */
6b96018b 3081 struct rtentry32 __user *ur4 = argp;
c6d409cf 3082 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3083 3 * sizeof(struct sockaddr));
c6d409cf
ED
3084 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
3085 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
3086 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
3087 ret |= __get_user(r4.rt_window, &(ur4->rt_window));
3088 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
3089 ret |= __get_user(rtdev, &(ur4->rt_dev));
7a229387 3090 if (rtdev) {
c6d409cf 3091 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3092 r4.rt_dev = (char __user __force *)devname;
3093 devname[15] = 0;
7a229387
AB
3094 } else
3095 r4.rt_dev = NULL;
3096
3097 r = (void *) &r4;
3098 }
3099
3100 if (ret) {
3101 ret = -EFAULT;
3102 goto out;
3103 }
3104
c6d409cf 3105 set_fs(KERNEL_DS);
6b96018b 3106 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3107 set_fs(old_fs);
7a229387
AB
3108
3109out:
7a229387
AB
3110 return ret;
3111}
3112
3113/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3114 * for some operations; this forces use of the newer bridge-utils that
25985edc 3115 * use compatible ioctls
7a229387 3116 */
6b96018b 3117static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3118{
6b96018b 3119 compat_ulong_t tmp;
7a229387 3120
6b96018b 3121 if (get_user(tmp, argp))
7a229387
AB
3122 return -EFAULT;
3123 if (tmp == BRCTL_GET_VERSION)
3124 return BRCTL_VERSION + 1;
3125 return -EINVAL;
3126}
3127
6b96018b
AB
3128static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3129 unsigned int cmd, unsigned long arg)
3130{
3131 void __user *argp = compat_ptr(arg);
3132 struct sock *sk = sock->sk;
3133 struct net *net = sock_net(sk);
7a229387 3134
6b96018b
AB
3135 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3136 return siocdevprivate_ioctl(net, cmd, argp);
3137
3138 switch (cmd) {
3139 case SIOCSIFBR:
3140 case SIOCGIFBR:
3141 return old_bridge_ioctl(argp);
3142 case SIOCGIFNAME:
3143 return dev_ifname32(net, argp);
3144 case SIOCGIFCONF:
3145 return dev_ifconf(net, argp);
3146 case SIOCETHTOOL:
3147 return ethtool_ioctl(net, argp);
7a50a240
AB
3148 case SIOCWANDEV:
3149 return compat_siocwandev(net, argp);
a2116ed2
AB
3150 case SIOCGIFMAP:
3151 case SIOCSIFMAP:
3152 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3153 case SIOCBONDENSLAVE:
3154 case SIOCBONDRELEASE:
3155 case SIOCBONDSETHWADDR:
3156 case SIOCBONDSLAVEINFOQUERY:
3157 case SIOCBONDINFOQUERY:
3158 case SIOCBONDCHANGEACTIVE:
3159 return bond_ioctl(net, cmd, argp);
3160 case SIOCADDRT:
3161 case SIOCDELRT:
3162 return routing_ioctl(net, sock, cmd, argp);
3163 case SIOCGSTAMP:
3164 return do_siocgstamp(net, sock, cmd, argp);
3165 case SIOCGSTAMPNS:
3166 return do_siocgstampns(net, sock, cmd, argp);
a2116ed2
AB
3167 case SIOCSHWTSTAMP:
3168 return compat_siocshwtstamp(net, argp);
6b96018b
AB
3169
3170 case FIOSETOWN:
3171 case SIOCSPGRP:
3172 case FIOGETOWN:
3173 case SIOCGPGRP:
3174 case SIOCBRADDBR:
3175 case SIOCBRDELBR:
3176 case SIOCGIFVLAN:
3177 case SIOCSIFVLAN:
3178 case SIOCADDDLCI:
3179 case SIOCDELDLCI:
3180 return sock_ioctl(file, cmd, arg);
3181
3182 case SIOCGIFFLAGS:
3183 case SIOCSIFFLAGS:
3184 case SIOCGIFMETRIC:
3185 case SIOCSIFMETRIC:
3186 case SIOCGIFMTU:
3187 case SIOCSIFMTU:
3188 case SIOCGIFMEM:
3189 case SIOCSIFMEM:
3190 case SIOCGIFHWADDR:
3191 case SIOCSIFHWADDR:
3192 case SIOCADDMULTI:
3193 case SIOCDELMULTI:
3194 case SIOCGIFINDEX:
6b96018b
AB
3195 case SIOCGIFADDR:
3196 case SIOCSIFADDR:
3197 case SIOCSIFHWBROADCAST:
6b96018b 3198 case SIOCDIFADDR:
6b96018b
AB
3199 case SIOCGIFBRDADDR:
3200 case SIOCSIFBRDADDR:
3201 case SIOCGIFDSTADDR:
3202 case SIOCSIFDSTADDR:
3203 case SIOCGIFNETMASK:
3204 case SIOCSIFNETMASK:
3205 case SIOCSIFPFLAGS:
3206 case SIOCGIFPFLAGS:
3207 case SIOCGIFTXQLEN:
3208 case SIOCSIFTXQLEN:
3209 case SIOCBRADDIF:
3210 case SIOCBRDELIF:
9177efd3
AB
3211 case SIOCSIFNAME:
3212 case SIOCGMIIPHY:
3213 case SIOCGMIIREG:
3214 case SIOCSMIIREG:
6b96018b 3215 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3216
6b96018b
AB
3217 case SIOCSARP:
3218 case SIOCGARP:
3219 case SIOCDARP:
6b96018b 3220 case SIOCATMARK:
9177efd3
AB
3221 return sock_do_ioctl(net, sock, cmd, arg);
3222 }
3223
6b96018b
AB
3224 return -ENOIOCTLCMD;
3225}
7a229387 3226
95c96174 3227static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3228 unsigned long arg)
89bbfc95
SP
3229{
3230 struct socket *sock = file->private_data;
3231 int ret = -ENOIOCTLCMD;
87de87d5
DM
3232 struct sock *sk;
3233 struct net *net;
3234
3235 sk = sock->sk;
3236 net = sock_net(sk);
89bbfc95
SP
3237
3238 if (sock->ops->compat_ioctl)
3239 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3240
87de87d5
DM
3241 if (ret == -ENOIOCTLCMD &&
3242 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3243 ret = compat_wext_handle_ioctl(net, cmd, arg);
3244
6b96018b
AB
3245 if (ret == -ENOIOCTLCMD)
3246 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3247
89bbfc95
SP
3248 return ret;
3249}
3250#endif
3251
ac5a488e
SS
3252int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3253{
3254 return sock->ops->bind(sock, addr, addrlen);
3255}
c6d409cf 3256EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3257
3258int kernel_listen(struct socket *sock, int backlog)
3259{
3260 return sock->ops->listen(sock, backlog);
3261}
c6d409cf 3262EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3263
3264int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3265{
3266 struct sock *sk = sock->sk;
3267 int err;
3268
3269 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3270 newsock);
3271 if (err < 0)
3272 goto done;
3273
3274 err = sock->ops->accept(sock, *newsock, flags);
3275 if (err < 0) {
3276 sock_release(*newsock);
fa8705b0 3277 *newsock = NULL;
ac5a488e
SS
3278 goto done;
3279 }
3280
3281 (*newsock)->ops = sock->ops;
1b08534e 3282 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3283
3284done:
3285 return err;
3286}
c6d409cf 3287EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3288
3289int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3290 int flags)
ac5a488e
SS
3291{
3292 return sock->ops->connect(sock, addr, addrlen, flags);
3293}
c6d409cf 3294EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3295
3296int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3297 int *addrlen)
3298{
3299 return sock->ops->getname(sock, addr, addrlen, 0);
3300}
c6d409cf 3301EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3302
3303int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3304 int *addrlen)
3305{
3306 return sock->ops->getname(sock, addr, addrlen, 1);
3307}
c6d409cf 3308EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3309
3310int kernel_getsockopt(struct socket *sock, int level, int optname,
3311 char *optval, int *optlen)
3312{
3313 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3314 char __user *uoptval;
3315 int __user *uoptlen;
ac5a488e
SS
3316 int err;
3317
fb8621bb
NK
3318 uoptval = (char __user __force *) optval;
3319 uoptlen = (int __user __force *) optlen;
3320
ac5a488e
SS
3321 set_fs(KERNEL_DS);
3322 if (level == SOL_SOCKET)
fb8621bb 3323 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3324 else
fb8621bb
NK
3325 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3326 uoptlen);
ac5a488e
SS
3327 set_fs(oldfs);
3328 return err;
3329}
c6d409cf 3330EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3331
3332int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3333 char *optval, unsigned int optlen)
ac5a488e
SS
3334{
3335 mm_segment_t oldfs = get_fs();
fb8621bb 3336 char __user *uoptval;
ac5a488e
SS
3337 int err;
3338
fb8621bb
NK
3339 uoptval = (char __user __force *) optval;
3340
ac5a488e
SS
3341 set_fs(KERNEL_DS);
3342 if (level == SOL_SOCKET)
fb8621bb 3343 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3344 else
fb8621bb 3345 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3346 optlen);
3347 set_fs(oldfs);
3348 return err;
3349}
c6d409cf 3350EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3351
3352int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3353 size_t size, int flags)
3354{
f8451725
HX
3355 sock_update_classid(sock->sk);
3356
ac5a488e
SS
3357 if (sock->ops->sendpage)
3358 return sock->ops->sendpage(sock, page, offset, size, flags);
3359
3360 return sock_no_sendpage(sock, page, offset, size, flags);
3361}
c6d409cf 3362EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3363
3364int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3365{
3366 mm_segment_t oldfs = get_fs();
3367 int err;
3368
3369 set_fs(KERNEL_DS);
3370 err = sock->ops->ioctl(sock, cmd, arg);
3371 set_fs(oldfs);
3372
3373 return err;
3374}
c6d409cf 3375EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3376
91cf45f0
TM
3377int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3378{
3379 return sock->ops->shutdown(sock, how);
3380}
91cf45f0 3381EXPORT_SYMBOL(kernel_sock_shutdown);