ipv4: show pmtu in route list
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4
LT
72#include <linux/wanrouter.h>
73#include <linux/if_bridge.h>
20380731
ACM
74#include <linux/if_frad.h>
75#include <linux/if_vlan.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
1da177e4
LT
91
92#include <asm/uaccess.h>
93#include <asm/unistd.h>
94
95#include <net/compat.h>
87de87d5 96#include <net/wext.h>
f8451725 97#include <net/cls_cgroup.h>
1da177e4
LT
98
99#include <net/sock.h>
100#include <linux/netfilter.h>
101
6b96018b
AB
102#include <linux/if_tun.h>
103#include <linux/ipv6_route.h>
104#include <linux/route.h>
6b96018b
AB
105#include <linux/sockios.h>
106#include <linux/atalk.h>
107
1da177e4 108static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
109static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
110 unsigned long nr_segs, loff_t pos);
111static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
112 unsigned long nr_segs, loff_t pos);
89bddce5 113static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
114
115static int sock_close(struct inode *inode, struct file *file);
116static unsigned int sock_poll(struct file *file,
117 struct poll_table_struct *wait);
89bddce5 118static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
119#ifdef CONFIG_COMPAT
120static long compat_sock_ioctl(struct file *file,
89bddce5 121 unsigned int cmd, unsigned long arg);
89bbfc95 122#endif
1da177e4 123static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
124static ssize_t sock_sendpage(struct file *file, struct page *page,
125 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 126static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 127 struct pipe_inode_info *pipe, size_t len,
9c55e01c 128 unsigned int flags);
1da177e4 129
1da177e4
LT
130/*
131 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
132 * in the operation structures but are done directly via the socketcall() multiplexor.
133 */
134
da7071d7 135static const struct file_operations socket_file_ops = {
1da177e4
LT
136 .owner = THIS_MODULE,
137 .llseek = no_llseek,
138 .aio_read = sock_aio_read,
139 .aio_write = sock_aio_write,
140 .poll = sock_poll,
141 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
142#ifdef CONFIG_COMPAT
143 .compat_ioctl = compat_sock_ioctl,
144#endif
1da177e4
LT
145 .mmap = sock_mmap,
146 .open = sock_no_open, /* special open code to disallow open via /proc */
147 .release = sock_close,
148 .fasync = sock_fasync,
5274f052
JA
149 .sendpage = sock_sendpage,
150 .splice_write = generic_splice_sendpage,
9c55e01c 151 .splice_read = sock_splice_read,
1da177e4
LT
152};
153
154/*
155 * The protocol list. Each protocol is registered in here.
156 */
157
1da177e4 158static DEFINE_SPINLOCK(net_family_lock);
190683a9 159static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 160
1da177e4
LT
161/*
162 * Statistics counters of the socket lists
163 */
164
c6d409cf 165static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
166
167/*
89bddce5
SH
168 * Support routines.
169 * Move socket addresses back and forth across the kernel/user
170 * divide and look after the messy bits.
1da177e4
LT
171 */
172
1da177e4
LT
173/**
174 * move_addr_to_kernel - copy a socket address into kernel space
175 * @uaddr: Address in user space
176 * @kaddr: Address in kernel space
177 * @ulen: Length in user space
178 *
179 * The address is copied into kernel space. If the provided address is
180 * too long an error code of -EINVAL is returned. If the copy gives
181 * invalid addresses -EFAULT is returned. On a success 0 is returned.
182 */
183
43db362d 184int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 185{
230b1839 186 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 187 return -EINVAL;
89bddce5 188 if (ulen == 0)
1da177e4 189 return 0;
89bddce5 190 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 191 return -EFAULT;
3ec3b2fb 192 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
193}
194
195/**
196 * move_addr_to_user - copy an address to user space
197 * @kaddr: kernel space address
198 * @klen: length of address in kernel
199 * @uaddr: user space address
200 * @ulen: pointer to user length field
201 *
202 * The value pointed to by ulen on entry is the buffer length available.
203 * This is overwritten with the buffer space used. -EINVAL is returned
204 * if an overlong buffer is specified or a negative buffer size. -EFAULT
205 * is returned if either the buffer or the length field are not
206 * accessible.
207 * After copying the data up to the limit the user specifies, the true
208 * length of the data is written over the length limit the user
209 * specified. Zero is returned for a success.
210 */
89bddce5 211
43db362d 212static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 213 void __user *uaddr, int __user *ulen)
1da177e4
LT
214{
215 int err;
216 int len;
217
89bddce5
SH
218 err = get_user(len, ulen);
219 if (err)
1da177e4 220 return err;
89bddce5
SH
221 if (len > klen)
222 len = klen;
230b1839 223 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 224 return -EINVAL;
89bddce5 225 if (len) {
d6fe3945
SG
226 if (audit_sockaddr(klen, kaddr))
227 return -ENOMEM;
89bddce5 228 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
229 return -EFAULT;
230 }
231 /*
89bddce5
SH
232 * "fromlen shall refer to the value before truncation.."
233 * 1003.1g
1da177e4
LT
234 */
235 return __put_user(klen, ulen);
236}
237
e18b890b 238static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
239
240static struct inode *sock_alloc_inode(struct super_block *sb)
241{
242 struct socket_alloc *ei;
eaefd110 243 struct socket_wq *wq;
89bddce5 244
e94b1766 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
246 if (!ei)
247 return NULL;
eaefd110
ED
248 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
249 if (!wq) {
43815482
ED
250 kmem_cache_free(sock_inode_cachep, ei);
251 return NULL;
252 }
eaefd110
ED
253 init_waitqueue_head(&wq->wait);
254 wq->fasync_list = NULL;
255 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482 268 struct socket_alloc *ei;
eaefd110 269 struct socket_wq *wq;
43815482
ED
270
271 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 272 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 273 kfree_rcu(wq, rcu);
43815482 274 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
275}
276
51cc5068 277static void init_once(void *foo)
1da177e4 278{
89bddce5 279 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 280
a35afb83 281 inode_init_once(&ei->vfs_inode);
1da177e4 282}
89bddce5 283
1da177e4
LT
284static int init_inodecache(void)
285{
286 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
287 sizeof(struct socket_alloc),
288 0,
289 (SLAB_HWCACHE_ALIGN |
290 SLAB_RECLAIM_ACCOUNT |
291 SLAB_MEM_SPREAD),
20c2df83 292 init_once);
1da177e4
LT
293 if (sock_inode_cachep == NULL)
294 return -ENOMEM;
295 return 0;
296}
297
b87221de 298static const struct super_operations sockfs_ops = {
c6d409cf
ED
299 .alloc_inode = sock_alloc_inode,
300 .destroy_inode = sock_destroy_inode,
301 .statfs = simple_statfs,
1da177e4
LT
302};
303
c23fbb6b
ED
304/*
305 * sockfs_dname() is called from d_path().
306 */
307static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
308{
309 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
310 dentry->d_inode->i_ino);
311}
312
3ba13d17 313static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 314 .d_dname = sockfs_dname,
1da177e4
LT
315};
316
c74a1cbb
AV
317static struct dentry *sockfs_mount(struct file_system_type *fs_type,
318 int flags, const char *dev_name, void *data)
319{
320 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
321 &sockfs_dentry_operations, SOCKFS_MAGIC);
322}
323
324static struct vfsmount *sock_mnt __read_mostly;
325
326static struct file_system_type sock_fs_type = {
327 .name = "sockfs",
328 .mount = sockfs_mount,
329 .kill_sb = kill_anon_super,
330};
331
1da177e4
LT
332/*
333 * Obtains the first available file descriptor and sets it up for use.
334 *
39d8c1b6
DM
335 * These functions create file structures and maps them to fd space
336 * of the current process. On success it returns file descriptor
1da177e4
LT
337 * and file struct implicitly stored in sock->file.
338 * Note that another thread may close file descriptor before we return
339 * from this function. We use the fact that now we do not refer
340 * to socket after mapping. If one day we will need it, this
341 * function will increment ref. count on file by 1.
342 *
343 * In any case returned fd MAY BE not valid!
344 * This race condition is unavoidable
345 * with shared fd spaces, we cannot solve it inside kernel,
346 * but we take care of internal coherence yet.
347 */
348
7cbe66b6 349static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
1da177e4 350{
7cbe66b6 351 struct qstr name = { .name = "" };
2c48b9c4 352 struct path path;
7cbe66b6 353 struct file *file;
1da177e4 354 int fd;
1da177e4 355
a677a039 356 fd = get_unused_fd_flags(flags);
7cbe66b6
AV
357 if (unlikely(fd < 0))
358 return fd;
1da177e4 359
4b936885 360 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
2c48b9c4 361 if (unlikely(!path.dentry)) {
7cbe66b6 362 put_unused_fd(fd);
39d8c1b6 363 return -ENOMEM;
7cbe66b6 364 }
2c48b9c4 365 path.mnt = mntget(sock_mnt);
39d8c1b6 366
2c48b9c4 367 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 368 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 369
2c48b9c4 370 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 371 &socket_file_ops);
cc3808f8
AV
372 if (unlikely(!file)) {
373 /* drop dentry, keep inode */
7de9c6ee 374 ihold(path.dentry->d_inode);
2c48b9c4 375 path_put(&path);
cc3808f8
AV
376 put_unused_fd(fd);
377 return -ENFILE;
378 }
379
380 sock->file = file;
77d27200 381 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6
DM
382 file->f_pos = 0;
383 file->private_data = sock;
1da177e4 384
7cbe66b6
AV
385 *f = file;
386 return fd;
39d8c1b6
DM
387}
388
a677a039 389int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
390{
391 struct file *newfile;
7cbe66b6 392 int fd = sock_alloc_file(sock, &newfile, flags);
39d8c1b6 393
7cbe66b6 394 if (likely(fd >= 0))
39d8c1b6 395 fd_install(fd, newfile);
7cbe66b6 396
1da177e4
LT
397 return fd;
398}
c6d409cf 399EXPORT_SYMBOL(sock_map_fd);
1da177e4 400
6cb153ca
BL
401static struct socket *sock_from_file(struct file *file, int *err)
402{
6cb153ca
BL
403 if (file->f_op == &socket_file_ops)
404 return file->private_data; /* set in sock_map_fd */
405
23bb80d2
ED
406 *err = -ENOTSOCK;
407 return NULL;
6cb153ca
BL
408}
409
1da177e4 410/**
c6d409cf 411 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
412 * @fd: file handle
413 * @err: pointer to an error code return
414 *
415 * The file handle passed in is locked and the socket it is bound
416 * too is returned. If an error occurs the err pointer is overwritten
417 * with a negative errno code and NULL is returned. The function checks
418 * for both invalid handles and passing a handle which is not a socket.
419 *
420 * On a success the socket object pointer is returned.
421 */
422
423struct socket *sockfd_lookup(int fd, int *err)
424{
425 struct file *file;
1da177e4
LT
426 struct socket *sock;
427
89bddce5
SH
428 file = fget(fd);
429 if (!file) {
1da177e4
LT
430 *err = -EBADF;
431 return NULL;
432 }
89bddce5 433
6cb153ca
BL
434 sock = sock_from_file(file, err);
435 if (!sock)
1da177e4 436 fput(file);
6cb153ca
BL
437 return sock;
438}
c6d409cf 439EXPORT_SYMBOL(sockfd_lookup);
1da177e4 440
6cb153ca
BL
441static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
442{
443 struct file *file;
444 struct socket *sock;
445
3672558c 446 *err = -EBADF;
6cb153ca
BL
447 file = fget_light(fd, fput_needed);
448 if (file) {
449 sock = sock_from_file(file, err);
450 if (sock)
451 return sock;
452 fput_light(file, *fput_needed);
1da177e4 453 }
6cb153ca 454 return NULL;
1da177e4
LT
455}
456
457/**
458 * sock_alloc - allocate a socket
89bddce5 459 *
1da177e4
LT
460 * Allocate a new inode and socket object. The two are bound together
461 * and initialised. The socket is then returned. If we are out of inodes
462 * NULL is returned.
463 */
464
465static struct socket *sock_alloc(void)
466{
89bddce5
SH
467 struct inode *inode;
468 struct socket *sock;
1da177e4 469
a209dfc7 470 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
471 if (!inode)
472 return NULL;
473
474 sock = SOCKET_I(inode);
475
29a020d3 476 kmemcheck_annotate_bitfield(sock, type);
85fe4025 477 inode->i_ino = get_next_ino();
89bddce5 478 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
479 inode->i_uid = current_fsuid();
480 inode->i_gid = current_fsgid();
1da177e4 481
19e8d69c 482 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
483 return sock;
484}
485
486/*
487 * In theory you can't get an open on this inode, but /proc provides
488 * a back door. Remember to keep it shut otherwise you'll let the
489 * creepy crawlies in.
490 */
89bddce5 491
1da177e4
LT
492static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
493{
494 return -ENXIO;
495}
496
4b6f5d20 497const struct file_operations bad_sock_fops = {
1da177e4
LT
498 .owner = THIS_MODULE,
499 .open = sock_no_open,
6038f373 500 .llseek = noop_llseek,
1da177e4
LT
501};
502
503/**
504 * sock_release - close a socket
505 * @sock: socket to close
506 *
507 * The socket is released from the protocol stack if it has a release
508 * callback, and the inode is then released if the socket is bound to
89bddce5 509 * an inode not a file.
1da177e4 510 */
89bddce5 511
1da177e4
LT
512void sock_release(struct socket *sock)
513{
514 if (sock->ops) {
515 struct module *owner = sock->ops->owner;
516
517 sock->ops->release(sock);
518 sock->ops = NULL;
519 module_put(owner);
520 }
521
eaefd110 522 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
1da177e4
LT
523 printk(KERN_ERR "sock_release: fasync list not empty!\n");
524
19e8d69c 525 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
526 if (!sock->file) {
527 iput(SOCK_INODE(sock));
528 return;
529 }
89bddce5 530 sock->file = NULL;
1da177e4 531}
c6d409cf 532EXPORT_SYMBOL(sock_release);
1da177e4 533
2244d07b 534int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
20d49473 535{
2244d07b 536 *tx_flags = 0;
20d49473 537 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
2244d07b 538 *tx_flags |= SKBTX_HW_TSTAMP;
20d49473 539 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
2244d07b 540 *tx_flags |= SKBTX_SW_TSTAMP;
6e3e939f
JB
541 if (sock_flag(sk, SOCK_WIFI_STATUS))
542 *tx_flags |= SKBTX_WIFI_STATUS;
20d49473
PO
543 return 0;
544}
545EXPORT_SYMBOL(sock_tx_timestamp);
546
228e548e
AB
547static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
548 struct msghdr *msg, size_t size)
1da177e4
LT
549{
550 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4 551
f8451725
HX
552 sock_update_classid(sock->sk);
553
5bc1421e
NH
554 sock_update_netprioidx(sock->sk);
555
1da177e4
LT
556 si->sock = sock;
557 si->scm = NULL;
558 si->msg = msg;
559 si->size = size;
560
1da177e4
LT
561 return sock->ops->sendmsg(iocb, sock, msg, size);
562}
563
228e548e
AB
564static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
565 struct msghdr *msg, size_t size)
566{
567 int err = security_socket_sendmsg(sock, msg, size);
568
569 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
570}
571
1da177e4
LT
572int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
573{
574 struct kiocb iocb;
575 struct sock_iocb siocb;
576 int ret;
577
578 init_sync_kiocb(&iocb, NULL);
579 iocb.private = &siocb;
580 ret = __sock_sendmsg(&iocb, sock, msg, size);
581 if (-EIOCBQUEUED == ret)
582 ret = wait_on_sync_kiocb(&iocb);
583 return ret;
584}
c6d409cf 585EXPORT_SYMBOL(sock_sendmsg);
1da177e4 586
894dc24c 587static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e
AB
588{
589 struct kiocb iocb;
590 struct sock_iocb siocb;
591 int ret;
592
593 init_sync_kiocb(&iocb, NULL);
594 iocb.private = &siocb;
595 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
596 if (-EIOCBQUEUED == ret)
597 ret = wait_on_sync_kiocb(&iocb);
598 return ret;
599}
600
1da177e4
LT
601int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
602 struct kvec *vec, size_t num, size_t size)
603{
604 mm_segment_t oldfs = get_fs();
605 int result;
606
607 set_fs(KERNEL_DS);
608 /*
609 * the following is safe, since for compiler definitions of kvec and
610 * iovec are identical, yielding the same in-core layout and alignment
611 */
89bddce5 612 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
613 msg->msg_iovlen = num;
614 result = sock_sendmsg(sock, msg, size);
615 set_fs(oldfs);
616 return result;
617}
c6d409cf 618EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 619
20d49473
PO
620static int ktime2ts(ktime_t kt, struct timespec *ts)
621{
622 if (kt.tv64) {
623 *ts = ktime_to_timespec(kt);
624 return 1;
625 } else {
626 return 0;
627 }
628}
629
92f37fd2
ED
630/*
631 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
632 */
633void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
634 struct sk_buff *skb)
635{
20d49473
PO
636 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
637 struct timespec ts[3];
638 int empty = 1;
639 struct skb_shared_hwtstamps *shhwtstamps =
640 skb_hwtstamps(skb);
641
642 /* Race occurred between timestamp enabling and packet
643 receiving. Fill in the current time for now. */
644 if (need_software_tstamp && skb->tstamp.tv64 == 0)
645 __net_timestamp(skb);
646
647 if (need_software_tstamp) {
648 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
649 struct timeval tv;
650 skb_get_timestamp(skb, &tv);
651 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
652 sizeof(tv), &tv);
653 } else {
842509b8 654 skb_get_timestampns(skb, &ts[0]);
20d49473 655 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 656 sizeof(ts[0]), &ts[0]);
20d49473
PO
657 }
658 }
659
660
661 memset(ts, 0, sizeof(ts));
662 if (skb->tstamp.tv64 &&
663 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
664 skb_get_timestampns(skb, ts + 0);
665 empty = 0;
666 }
667 if (shhwtstamps) {
668 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
669 ktime2ts(shhwtstamps->syststamp, ts + 1))
670 empty = 0;
671 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
672 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
673 empty = 0;
92f37fd2 674 }
20d49473
PO
675 if (!empty)
676 put_cmsg(msg, SOL_SOCKET,
677 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 678}
7c81fd8b
ACM
679EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
680
6e3e939f
JB
681void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
682 struct sk_buff *skb)
683{
684 int ack;
685
686 if (!sock_flag(sk, SOCK_WIFI_STATUS))
687 return;
688 if (!skb->wifi_acked_valid)
689 return;
690
691 ack = skb->wifi_acked;
692
693 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
694}
695EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
696
11165f14 697static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
698 struct sk_buff *skb)
3b885787
NH
699{
700 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
701 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
702 sizeof(__u32), &skb->dropcount);
703}
704
767dd033 705void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
706 struct sk_buff *skb)
707{
708 sock_recv_timestamp(msg, sk, skb);
709 sock_recv_drops(msg, sk, skb);
710}
767dd033 711EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 712
a2e27255
ACM
713static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
714 struct msghdr *msg, size_t size, int flags)
1da177e4 715{
1da177e4
LT
716 struct sock_iocb *si = kiocb_to_siocb(iocb);
717
f8451725
HX
718 sock_update_classid(sock->sk);
719
1da177e4
LT
720 si->sock = sock;
721 si->scm = NULL;
722 si->msg = msg;
723 si->size = size;
724 si->flags = flags;
725
1da177e4
LT
726 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
727}
728
a2e27255
ACM
729static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
730 struct msghdr *msg, size_t size, int flags)
731{
732 int err = security_socket_recvmsg(sock, msg, size, flags);
733
734 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
735}
736
89bddce5 737int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
738 size_t size, int flags)
739{
740 struct kiocb iocb;
741 struct sock_iocb siocb;
742 int ret;
743
89bddce5 744 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
745 iocb.private = &siocb;
746 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
747 if (-EIOCBQUEUED == ret)
748 ret = wait_on_sync_kiocb(&iocb);
749 return ret;
750}
c6d409cf 751EXPORT_SYMBOL(sock_recvmsg);
1da177e4 752
a2e27255
ACM
753static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
754 size_t size, int flags)
755{
756 struct kiocb iocb;
757 struct sock_iocb siocb;
758 int ret;
759
760 init_sync_kiocb(&iocb, NULL);
761 iocb.private = &siocb;
762 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
763 if (-EIOCBQUEUED == ret)
764 ret = wait_on_sync_kiocb(&iocb);
765 return ret;
766}
767
c1249c0a
ML
768/**
769 * kernel_recvmsg - Receive a message from a socket (kernel space)
770 * @sock: The socket to receive the message from
771 * @msg: Received message
772 * @vec: Input s/g array for message data
773 * @num: Size of input s/g array
774 * @size: Number of bytes to read
775 * @flags: Message flags (MSG_DONTWAIT, etc...)
776 *
777 * On return the msg structure contains the scatter/gather array passed in the
778 * vec argument. The array is modified so that it consists of the unfilled
779 * portion of the original array.
780 *
781 * The returned value is the total number of bytes received, or an error.
782 */
89bddce5
SH
783int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
784 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
785{
786 mm_segment_t oldfs = get_fs();
787 int result;
788
789 set_fs(KERNEL_DS);
790 /*
791 * the following is safe, since for compiler definitions of kvec and
792 * iovec are identical, yielding the same in-core layout and alignment
793 */
89bddce5 794 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
795 result = sock_recvmsg(sock, msg, size, flags);
796 set_fs(oldfs);
797 return result;
798}
c6d409cf 799EXPORT_SYMBOL(kernel_recvmsg);
1da177e4
LT
800
801static void sock_aio_dtor(struct kiocb *iocb)
802{
803 kfree(iocb->private);
804}
805
ce1d4d3e
CH
806static ssize_t sock_sendpage(struct file *file, struct page *page,
807 int offset, size_t size, loff_t *ppos, int more)
1da177e4 808{
1da177e4
LT
809 struct socket *sock;
810 int flags;
811
ce1d4d3e
CH
812 sock = file->private_data;
813
35f9c09f
ED
814 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
815 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
816 flags |= more;
ce1d4d3e 817
e6949583 818 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 819}
1da177e4 820
9c55e01c 821static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 822 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
823 unsigned int flags)
824{
825 struct socket *sock = file->private_data;
826
997b37da
RDC
827 if (unlikely(!sock->ops->splice_read))
828 return -EINVAL;
829
f8451725
HX
830 sock_update_classid(sock->sk);
831
9c55e01c
JA
832 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
833}
834
ce1d4d3e 835static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 836 struct sock_iocb *siocb)
ce1d4d3e
CH
837{
838 if (!is_sync_kiocb(iocb)) {
839 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
840 if (!siocb)
841 return NULL;
1da177e4
LT
842 iocb->ki_dtor = sock_aio_dtor;
843 }
1da177e4 844
ce1d4d3e 845 siocb->kiocb = iocb;
ce1d4d3e
CH
846 iocb->private = siocb;
847 return siocb;
1da177e4
LT
848}
849
ce1d4d3e 850static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
851 struct file *file, const struct iovec *iov,
852 unsigned long nr_segs)
ce1d4d3e
CH
853{
854 struct socket *sock = file->private_data;
855 size_t size = 0;
856 int i;
1da177e4 857
89bddce5
SH
858 for (i = 0; i < nr_segs; i++)
859 size += iov[i].iov_len;
1da177e4 860
ce1d4d3e
CH
861 msg->msg_name = NULL;
862 msg->msg_namelen = 0;
863 msg->msg_control = NULL;
864 msg->msg_controllen = 0;
89bddce5 865 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
866 msg->msg_iovlen = nr_segs;
867 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
868
869 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
870}
871
027445c3
BP
872static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
873 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
874{
875 struct sock_iocb siocb, *x;
876
1da177e4
LT
877 if (pos != 0)
878 return -ESPIPE;
027445c3
BP
879
880 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
881 return 0;
882
027445c3
BP
883
884 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
885 if (!x)
886 return -ENOMEM;
027445c3 887 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
888}
889
ce1d4d3e 890static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
891 struct file *file, const struct iovec *iov,
892 unsigned long nr_segs)
1da177e4 893{
ce1d4d3e
CH
894 struct socket *sock = file->private_data;
895 size_t size = 0;
896 int i;
1da177e4 897
89bddce5
SH
898 for (i = 0; i < nr_segs; i++)
899 size += iov[i].iov_len;
1da177e4 900
ce1d4d3e
CH
901 msg->msg_name = NULL;
902 msg->msg_namelen = 0;
903 msg->msg_control = NULL;
904 msg->msg_controllen = 0;
89bddce5 905 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
906 msg->msg_iovlen = nr_segs;
907 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
908 if (sock->type == SOCK_SEQPACKET)
909 msg->msg_flags |= MSG_EOR;
1da177e4 910
ce1d4d3e 911 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
912}
913
027445c3
BP
914static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
915 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
916{
917 struct sock_iocb siocb, *x;
1da177e4 918
ce1d4d3e
CH
919 if (pos != 0)
920 return -ESPIPE;
027445c3 921
027445c3 922 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
923 if (!x)
924 return -ENOMEM;
1da177e4 925
027445c3 926 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
927}
928
1da177e4
LT
929/*
930 * Atomic setting of ioctl hooks to avoid race
931 * with module unload.
932 */
933
4a3e2f71 934static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 935static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 936
881d966b 937void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 938{
4a3e2f71 939 mutex_lock(&br_ioctl_mutex);
1da177e4 940 br_ioctl_hook = hook;
4a3e2f71 941 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
942}
943EXPORT_SYMBOL(brioctl_set);
944
4a3e2f71 945static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 946static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 947
881d966b 948void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 949{
4a3e2f71 950 mutex_lock(&vlan_ioctl_mutex);
1da177e4 951 vlan_ioctl_hook = hook;
4a3e2f71 952 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
953}
954EXPORT_SYMBOL(vlan_ioctl_set);
955
4a3e2f71 956static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 957static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 958
89bddce5 959void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 960{
4a3e2f71 961 mutex_lock(&dlci_ioctl_mutex);
1da177e4 962 dlci_ioctl_hook = hook;
4a3e2f71 963 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
964}
965EXPORT_SYMBOL(dlci_ioctl_set);
966
6b96018b
AB
967static long sock_do_ioctl(struct net *net, struct socket *sock,
968 unsigned int cmd, unsigned long arg)
969{
970 int err;
971 void __user *argp = (void __user *)arg;
972
973 err = sock->ops->ioctl(sock, cmd, arg);
974
975 /*
976 * If this ioctl is unknown try to hand it down
977 * to the NIC driver.
978 */
979 if (err == -ENOIOCTLCMD)
980 err = dev_ioctl(net, cmd, argp);
981
982 return err;
983}
984
1da177e4
LT
985/*
986 * With an ioctl, arg may well be a user mode pointer, but we don't know
987 * what to do with it - that's up to the protocol still.
988 */
989
990static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
991{
992 struct socket *sock;
881d966b 993 struct sock *sk;
1da177e4
LT
994 void __user *argp = (void __user *)arg;
995 int pid, err;
881d966b 996 struct net *net;
1da177e4 997
b69aee04 998 sock = file->private_data;
881d966b 999 sk = sock->sk;
3b1e0a65 1000 net = sock_net(sk);
1da177e4 1001 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1002 err = dev_ioctl(net, cmd, argp);
1da177e4 1003 } else
3d23e349 1004#ifdef CONFIG_WEXT_CORE
1da177e4 1005 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1006 err = dev_ioctl(net, cmd, argp);
1da177e4 1007 } else
3d23e349 1008#endif
89bddce5 1009 switch (cmd) {
1da177e4
LT
1010 case FIOSETOWN:
1011 case SIOCSPGRP:
1012 err = -EFAULT;
1013 if (get_user(pid, (int __user *)argp))
1014 break;
1015 err = f_setown(sock->file, pid, 1);
1016 break;
1017 case FIOGETOWN:
1018 case SIOCGPGRP:
609d7fa9 1019 err = put_user(f_getown(sock->file),
89bddce5 1020 (int __user *)argp);
1da177e4
LT
1021 break;
1022 case SIOCGIFBR:
1023 case SIOCSIFBR:
1024 case SIOCBRADDBR:
1025 case SIOCBRDELBR:
1026 err = -ENOPKG;
1027 if (!br_ioctl_hook)
1028 request_module("bridge");
1029
4a3e2f71 1030 mutex_lock(&br_ioctl_mutex);
89bddce5 1031 if (br_ioctl_hook)
881d966b 1032 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1033 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1034 break;
1035 case SIOCGIFVLAN:
1036 case SIOCSIFVLAN:
1037 err = -ENOPKG;
1038 if (!vlan_ioctl_hook)
1039 request_module("8021q");
1040
4a3e2f71 1041 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1042 if (vlan_ioctl_hook)
881d966b 1043 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1044 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1045 break;
1da177e4
LT
1046 case SIOCADDDLCI:
1047 case SIOCDELDLCI:
1048 err = -ENOPKG;
1049 if (!dlci_ioctl_hook)
1050 request_module("dlci");
1051
7512cbf6
PE
1052 mutex_lock(&dlci_ioctl_mutex);
1053 if (dlci_ioctl_hook)
1da177e4 1054 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1055 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1056 break;
1057 default:
6b96018b 1058 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1059 break;
89bddce5 1060 }
1da177e4
LT
1061 return err;
1062}
1063
1064int sock_create_lite(int family, int type, int protocol, struct socket **res)
1065{
1066 int err;
1067 struct socket *sock = NULL;
89bddce5 1068
1da177e4
LT
1069 err = security_socket_create(family, type, protocol, 1);
1070 if (err)
1071 goto out;
1072
1073 sock = sock_alloc();
1074 if (!sock) {
1075 err = -ENOMEM;
1076 goto out;
1077 }
1078
1da177e4 1079 sock->type = type;
7420ed23
VY
1080 err = security_socket_post_create(sock, family, type, protocol, 1);
1081 if (err)
1082 goto out_release;
1083
1da177e4
LT
1084out:
1085 *res = sock;
1086 return err;
7420ed23
VY
1087out_release:
1088 sock_release(sock);
1089 sock = NULL;
1090 goto out;
1da177e4 1091}
c6d409cf 1092EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1093
1094/* No kernel lock held - perfect */
89bddce5 1095static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1096{
1097 struct socket *sock;
1098
1099 /*
89bddce5 1100 * We can't return errors to poll, so it's either yes or no.
1da177e4 1101 */
b69aee04 1102 sock = file->private_data;
1da177e4
LT
1103 return sock->ops->poll(file, sock, wait);
1104}
1105
89bddce5 1106static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1107{
b69aee04 1108 struct socket *sock = file->private_data;
1da177e4
LT
1109
1110 return sock->ops->mmap(file, sock, vma);
1111}
1112
20380731 1113static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1114{
1115 /*
89bddce5
SH
1116 * It was possible the inode is NULL we were
1117 * closing an unfinished socket.
1da177e4
LT
1118 */
1119
89bddce5 1120 if (!inode) {
1da177e4
LT
1121 printk(KERN_DEBUG "sock_close: NULL inode\n");
1122 return 0;
1123 }
1da177e4
LT
1124 sock_release(SOCKET_I(inode));
1125 return 0;
1126}
1127
1128/*
1129 * Update the socket async list
1130 *
1131 * Fasync_list locking strategy.
1132 *
1133 * 1. fasync_list is modified only under process context socket lock
1134 * i.e. under semaphore.
1135 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1136 * or under socket lock
1da177e4
LT
1137 */
1138
1139static int sock_fasync(int fd, struct file *filp, int on)
1140{
989a2979
ED
1141 struct socket *sock = filp->private_data;
1142 struct sock *sk = sock->sk;
eaefd110 1143 struct socket_wq *wq;
1da177e4 1144
989a2979 1145 if (sk == NULL)
1da177e4 1146 return -EINVAL;
1da177e4
LT
1147
1148 lock_sock(sk);
eaefd110
ED
1149 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1150 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1151
eaefd110 1152 if (!wq->fasync_list)
989a2979
ED
1153 sock_reset_flag(sk, SOCK_FASYNC);
1154 else
bcdce719 1155 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1156
989a2979 1157 release_sock(sk);
1da177e4
LT
1158 return 0;
1159}
1160
43815482 1161/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1162
1163int sock_wake_async(struct socket *sock, int how, int band)
1164{
43815482
ED
1165 struct socket_wq *wq;
1166
1167 if (!sock)
1168 return -1;
1169 rcu_read_lock();
1170 wq = rcu_dereference(sock->wq);
1171 if (!wq || !wq->fasync_list) {
1172 rcu_read_unlock();
1da177e4 1173 return -1;
43815482 1174 }
89bddce5 1175 switch (how) {
8d8ad9d7 1176 case SOCK_WAKE_WAITD:
1da177e4
LT
1177 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1178 break;
1179 goto call_kill;
8d8ad9d7 1180 case SOCK_WAKE_SPACE:
1da177e4
LT
1181 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1182 break;
1183 /* fall through */
8d8ad9d7 1184 case SOCK_WAKE_IO:
89bddce5 1185call_kill:
43815482 1186 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1187 break;
8d8ad9d7 1188 case SOCK_WAKE_URG:
43815482 1189 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1190 }
43815482 1191 rcu_read_unlock();
1da177e4
LT
1192 return 0;
1193}
c6d409cf 1194EXPORT_SYMBOL(sock_wake_async);
1da177e4 1195
721db93a 1196int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1197 struct socket **res, int kern)
1da177e4
LT
1198{
1199 int err;
1200 struct socket *sock;
55737fda 1201 const struct net_proto_family *pf;
1da177e4
LT
1202
1203 /*
89bddce5 1204 * Check protocol is in range
1da177e4
LT
1205 */
1206 if (family < 0 || family >= NPROTO)
1207 return -EAFNOSUPPORT;
1208 if (type < 0 || type >= SOCK_MAX)
1209 return -EINVAL;
1210
1211 /* Compatibility.
1212
1213 This uglymoron is moved from INET layer to here to avoid
1214 deadlock in module load.
1215 */
1216 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1217 static int warned;
1da177e4
LT
1218 if (!warned) {
1219 warned = 1;
89bddce5
SH
1220 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1221 current->comm);
1da177e4
LT
1222 }
1223 family = PF_PACKET;
1224 }
1225
1226 err = security_socket_create(family, type, protocol, kern);
1227 if (err)
1228 return err;
89bddce5 1229
55737fda
SH
1230 /*
1231 * Allocate the socket and allow the family to set things up. if
1232 * the protocol is 0, the family is instructed to select an appropriate
1233 * default.
1234 */
1235 sock = sock_alloc();
1236 if (!sock) {
e87cc472 1237 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1238 return -ENFILE; /* Not exactly a match, but its the
1239 closest posix thing */
1240 }
1241
1242 sock->type = type;
1243
95a5afca 1244#ifdef CONFIG_MODULES
89bddce5
SH
1245 /* Attempt to load a protocol module if the find failed.
1246 *
1247 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1248 * requested real, full-featured networking support upon configuration.
1249 * Otherwise module support will break!
1250 */
190683a9 1251 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1252 request_module("net-pf-%d", family);
1da177e4
LT
1253#endif
1254
55737fda
SH
1255 rcu_read_lock();
1256 pf = rcu_dereference(net_families[family]);
1257 err = -EAFNOSUPPORT;
1258 if (!pf)
1259 goto out_release;
1da177e4
LT
1260
1261 /*
1262 * We will call the ->create function, that possibly is in a loadable
1263 * module, so we have to bump that loadable module refcnt first.
1264 */
55737fda 1265 if (!try_module_get(pf->owner))
1da177e4
LT
1266 goto out_release;
1267
55737fda
SH
1268 /* Now protected by module ref count */
1269 rcu_read_unlock();
1270
3f378b68 1271 err = pf->create(net, sock, protocol, kern);
55737fda 1272 if (err < 0)
1da177e4 1273 goto out_module_put;
a79af59e 1274
1da177e4
LT
1275 /*
1276 * Now to bump the refcnt of the [loadable] module that owns this
1277 * socket at sock_release time we decrement its refcnt.
1278 */
55737fda
SH
1279 if (!try_module_get(sock->ops->owner))
1280 goto out_module_busy;
1281
1da177e4
LT
1282 /*
1283 * Now that we're done with the ->create function, the [loadable]
1284 * module can have its refcnt decremented
1285 */
55737fda 1286 module_put(pf->owner);
7420ed23
VY
1287 err = security_socket_post_create(sock, family, type, protocol, kern);
1288 if (err)
3b185525 1289 goto out_sock_release;
55737fda 1290 *res = sock;
1da177e4 1291
55737fda
SH
1292 return 0;
1293
1294out_module_busy:
1295 err = -EAFNOSUPPORT;
1da177e4 1296out_module_put:
55737fda
SH
1297 sock->ops = NULL;
1298 module_put(pf->owner);
1299out_sock_release:
1da177e4 1300 sock_release(sock);
55737fda
SH
1301 return err;
1302
1303out_release:
1304 rcu_read_unlock();
1305 goto out_sock_release;
1da177e4 1306}
721db93a 1307EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1308
1309int sock_create(int family, int type, int protocol, struct socket **res)
1310{
1b8d7ae4 1311 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1312}
c6d409cf 1313EXPORT_SYMBOL(sock_create);
1da177e4
LT
1314
1315int sock_create_kern(int family, int type, int protocol, struct socket **res)
1316{
1b8d7ae4 1317 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1318}
c6d409cf 1319EXPORT_SYMBOL(sock_create_kern);
1da177e4 1320
3e0fa65f 1321SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1322{
1323 int retval;
1324 struct socket *sock;
a677a039
UD
1325 int flags;
1326
e38b36f3
UD
1327 /* Check the SOCK_* constants for consistency. */
1328 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1329 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1330 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1331 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1332
a677a039 1333 flags = type & ~SOCK_TYPE_MASK;
77d27200 1334 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1335 return -EINVAL;
1336 type &= SOCK_TYPE_MASK;
1da177e4 1337
aaca0bdc
UD
1338 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1339 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1340
1da177e4
LT
1341 retval = sock_create(family, type, protocol, &sock);
1342 if (retval < 0)
1343 goto out;
1344
77d27200 1345 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1346 if (retval < 0)
1347 goto out_release;
1348
1349out:
1350 /* It may be already another descriptor 8) Not kernel problem. */
1351 return retval;
1352
1353out_release:
1354 sock_release(sock);
1355 return retval;
1356}
1357
1358/*
1359 * Create a pair of connected sockets.
1360 */
1361
3e0fa65f
HC
1362SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1363 int __user *, usockvec)
1da177e4
LT
1364{
1365 struct socket *sock1, *sock2;
1366 int fd1, fd2, err;
db349509 1367 struct file *newfile1, *newfile2;
a677a039
UD
1368 int flags;
1369
1370 flags = type & ~SOCK_TYPE_MASK;
77d27200 1371 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1372 return -EINVAL;
1373 type &= SOCK_TYPE_MASK;
1da177e4 1374
aaca0bdc
UD
1375 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1376 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1377
1da177e4
LT
1378 /*
1379 * Obtain the first socket and check if the underlying protocol
1380 * supports the socketpair call.
1381 */
1382
1383 err = sock_create(family, type, protocol, &sock1);
1384 if (err < 0)
1385 goto out;
1386
1387 err = sock_create(family, type, protocol, &sock2);
1388 if (err < 0)
1389 goto out_release_1;
1390
1391 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1392 if (err < 0)
1da177e4
LT
1393 goto out_release_both;
1394
7cbe66b6 1395 fd1 = sock_alloc_file(sock1, &newfile1, flags);
bf3c23d1
DM
1396 if (unlikely(fd1 < 0)) {
1397 err = fd1;
db349509 1398 goto out_release_both;
bf3c23d1 1399 }
1da177e4 1400
7cbe66b6 1401 fd2 = sock_alloc_file(sock2, &newfile2, flags);
198de4d7
AV
1402 if (unlikely(fd2 < 0)) {
1403 err = fd2;
1404 fput(newfile1);
1405 put_unused_fd(fd1);
1406 sock_release(sock2);
1407 goto out;
db349509
AV
1408 }
1409
157cf649 1410 audit_fd_pair(fd1, fd2);
db349509
AV
1411 fd_install(fd1, newfile1);
1412 fd_install(fd2, newfile2);
1da177e4
LT
1413 /* fd1 and fd2 may be already another descriptors.
1414 * Not kernel problem.
1415 */
1416
89bddce5 1417 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1418 if (!err)
1419 err = put_user(fd2, &usockvec[1]);
1420 if (!err)
1421 return 0;
1422
1423 sys_close(fd2);
1424 sys_close(fd1);
1425 return err;
1426
1da177e4 1427out_release_both:
89bddce5 1428 sock_release(sock2);
1da177e4 1429out_release_1:
89bddce5 1430 sock_release(sock1);
1da177e4
LT
1431out:
1432 return err;
1433}
1434
1da177e4
LT
1435/*
1436 * Bind a name to a socket. Nothing much to do here since it's
1437 * the protocol's responsibility to handle the local address.
1438 *
1439 * We move the socket address to kernel space before we call
1440 * the protocol layer (having also checked the address is ok).
1441 */
1442
20f37034 1443SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1444{
1445 struct socket *sock;
230b1839 1446 struct sockaddr_storage address;
6cb153ca 1447 int err, fput_needed;
1da177e4 1448
89bddce5 1449 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1450 if (sock) {
43db362d 1451 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1452 if (err >= 0) {
1453 err = security_socket_bind(sock,
230b1839 1454 (struct sockaddr *)&address,
89bddce5 1455 addrlen);
6cb153ca
BL
1456 if (!err)
1457 err = sock->ops->bind(sock,
89bddce5 1458 (struct sockaddr *)
230b1839 1459 &address, addrlen);
1da177e4 1460 }
6cb153ca 1461 fput_light(sock->file, fput_needed);
89bddce5 1462 }
1da177e4
LT
1463 return err;
1464}
1465
1da177e4
LT
1466/*
1467 * Perform a listen. Basically, we allow the protocol to do anything
1468 * necessary for a listen, and if that works, we mark the socket as
1469 * ready for listening.
1470 */
1471
3e0fa65f 1472SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1473{
1474 struct socket *sock;
6cb153ca 1475 int err, fput_needed;
b8e1f9b5 1476 int somaxconn;
89bddce5
SH
1477
1478 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1479 if (sock) {
8efa6e93 1480 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1481 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1482 backlog = somaxconn;
1da177e4
LT
1483
1484 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1485 if (!err)
1486 err = sock->ops->listen(sock, backlog);
1da177e4 1487
6cb153ca 1488 fput_light(sock->file, fput_needed);
1da177e4
LT
1489 }
1490 return err;
1491}
1492
1da177e4
LT
1493/*
1494 * For accept, we attempt to create a new socket, set up the link
1495 * with the client, wake up the client, then return the new
1496 * connected fd. We collect the address of the connector in kernel
1497 * space and move it to user at the very end. This is unclean because
1498 * we open the socket then return an error.
1499 *
1500 * 1003.1g adds the ability to recvmsg() to query connection pending
1501 * status to recvmsg. We need to add that support in a way thats
1502 * clean when we restucture accept also.
1503 */
1504
20f37034
HC
1505SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1506 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1507{
1508 struct socket *sock, *newsock;
39d8c1b6 1509 struct file *newfile;
6cb153ca 1510 int err, len, newfd, fput_needed;
230b1839 1511 struct sockaddr_storage address;
1da177e4 1512
77d27200 1513 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1514 return -EINVAL;
1515
1516 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1517 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1518
6cb153ca 1519 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1520 if (!sock)
1521 goto out;
1522
1523 err = -ENFILE;
c6d409cf
ED
1524 newsock = sock_alloc();
1525 if (!newsock)
1da177e4
LT
1526 goto out_put;
1527
1528 newsock->type = sock->type;
1529 newsock->ops = sock->ops;
1530
1da177e4
LT
1531 /*
1532 * We don't need try_module_get here, as the listening socket (sock)
1533 * has the protocol module (sock->ops->owner) held.
1534 */
1535 __module_get(newsock->ops->owner);
1536
7cbe66b6 1537 newfd = sock_alloc_file(newsock, &newfile, flags);
39d8c1b6
DM
1538 if (unlikely(newfd < 0)) {
1539 err = newfd;
9a1875e6
DM
1540 sock_release(newsock);
1541 goto out_put;
39d8c1b6
DM
1542 }
1543
a79af59e
FF
1544 err = security_socket_accept(sock, newsock);
1545 if (err)
39d8c1b6 1546 goto out_fd;
a79af59e 1547
1da177e4
LT
1548 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1549 if (err < 0)
39d8c1b6 1550 goto out_fd;
1da177e4
LT
1551
1552 if (upeer_sockaddr) {
230b1839 1553 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1554 &len, 2) < 0) {
1da177e4 1555 err = -ECONNABORTED;
39d8c1b6 1556 goto out_fd;
1da177e4 1557 }
43db362d 1558 err = move_addr_to_user(&address,
230b1839 1559 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1560 if (err < 0)
39d8c1b6 1561 goto out_fd;
1da177e4
LT
1562 }
1563
1564 /* File flags are not inherited via accept() unlike another OSes. */
1565
39d8c1b6
DM
1566 fd_install(newfd, newfile);
1567 err = newfd;
1da177e4 1568
1da177e4 1569out_put:
6cb153ca 1570 fput_light(sock->file, fput_needed);
1da177e4
LT
1571out:
1572 return err;
39d8c1b6 1573out_fd:
9606a216 1574 fput(newfile);
39d8c1b6 1575 put_unused_fd(newfd);
1da177e4
LT
1576 goto out_put;
1577}
1578
20f37034
HC
1579SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1580 int __user *, upeer_addrlen)
aaca0bdc 1581{
de11defe 1582 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1583}
1584
1da177e4
LT
1585/*
1586 * Attempt to connect to a socket with the server address. The address
1587 * is in user space so we verify it is OK and move it to kernel space.
1588 *
1589 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1590 * break bindings
1591 *
1592 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1593 * other SEQPACKET protocols that take time to connect() as it doesn't
1594 * include the -EINPROGRESS status for such sockets.
1595 */
1596
20f37034
HC
1597SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1598 int, addrlen)
1da177e4
LT
1599{
1600 struct socket *sock;
230b1839 1601 struct sockaddr_storage address;
6cb153ca 1602 int err, fput_needed;
1da177e4 1603
6cb153ca 1604 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1605 if (!sock)
1606 goto out;
43db362d 1607 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1608 if (err < 0)
1609 goto out_put;
1610
89bddce5 1611 err =
230b1839 1612 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1613 if (err)
1614 goto out_put;
1615
230b1839 1616 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1617 sock->file->f_flags);
1618out_put:
6cb153ca 1619 fput_light(sock->file, fput_needed);
1da177e4
LT
1620out:
1621 return err;
1622}
1623
1624/*
1625 * Get the local address ('name') of a socket object. Move the obtained
1626 * name to user space.
1627 */
1628
20f37034
HC
1629SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1630 int __user *, usockaddr_len)
1da177e4
LT
1631{
1632 struct socket *sock;
230b1839 1633 struct sockaddr_storage address;
6cb153ca 1634 int len, err, fput_needed;
89bddce5 1635
6cb153ca 1636 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1637 if (!sock)
1638 goto out;
1639
1640 err = security_socket_getsockname(sock);
1641 if (err)
1642 goto out_put;
1643
230b1839 1644 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1645 if (err)
1646 goto out_put;
43db362d 1647 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1648
1649out_put:
6cb153ca 1650 fput_light(sock->file, fput_needed);
1da177e4
LT
1651out:
1652 return err;
1653}
1654
1655/*
1656 * Get the remote address ('name') of a socket object. Move the obtained
1657 * name to user space.
1658 */
1659
20f37034
HC
1660SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1661 int __user *, usockaddr_len)
1da177e4
LT
1662{
1663 struct socket *sock;
230b1839 1664 struct sockaddr_storage address;
6cb153ca 1665 int len, err, fput_needed;
1da177e4 1666
89bddce5
SH
1667 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1668 if (sock != NULL) {
1da177e4
LT
1669 err = security_socket_getpeername(sock);
1670 if (err) {
6cb153ca 1671 fput_light(sock->file, fput_needed);
1da177e4
LT
1672 return err;
1673 }
1674
89bddce5 1675 err =
230b1839 1676 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1677 1);
1da177e4 1678 if (!err)
43db362d 1679 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1680 usockaddr_len);
6cb153ca 1681 fput_light(sock->file, fput_needed);
1da177e4
LT
1682 }
1683 return err;
1684}
1685
1686/*
1687 * Send a datagram to a given address. We move the address into kernel
1688 * space and check the user space data area is readable before invoking
1689 * the protocol.
1690 */
1691
3e0fa65f 1692SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1693 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1694 int, addr_len)
1da177e4
LT
1695{
1696 struct socket *sock;
230b1839 1697 struct sockaddr_storage address;
1da177e4
LT
1698 int err;
1699 struct msghdr msg;
1700 struct iovec iov;
6cb153ca 1701 int fput_needed;
6cb153ca 1702
253eacc0
LT
1703 if (len > INT_MAX)
1704 len = INT_MAX;
de0fa95c
PE
1705 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1706 if (!sock)
4387ff75 1707 goto out;
6cb153ca 1708
89bddce5
SH
1709 iov.iov_base = buff;
1710 iov.iov_len = len;
1711 msg.msg_name = NULL;
1712 msg.msg_iov = &iov;
1713 msg.msg_iovlen = 1;
1714 msg.msg_control = NULL;
1715 msg.msg_controllen = 0;
1716 msg.msg_namelen = 0;
6cb153ca 1717 if (addr) {
43db362d 1718 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1719 if (err < 0)
1720 goto out_put;
230b1839 1721 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1722 msg.msg_namelen = addr_len;
1da177e4
LT
1723 }
1724 if (sock->file->f_flags & O_NONBLOCK)
1725 flags |= MSG_DONTWAIT;
1726 msg.msg_flags = flags;
1727 err = sock_sendmsg(sock, &msg, len);
1728
89bddce5 1729out_put:
de0fa95c 1730 fput_light(sock->file, fput_needed);
4387ff75 1731out:
1da177e4
LT
1732 return err;
1733}
1734
1735/*
89bddce5 1736 * Send a datagram down a socket.
1da177e4
LT
1737 */
1738
3e0fa65f 1739SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1740 unsigned int, flags)
1da177e4
LT
1741{
1742 return sys_sendto(fd, buff, len, flags, NULL, 0);
1743}
1744
1745/*
89bddce5 1746 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1747 * sender. We verify the buffers are writable and if needed move the
1748 * sender address from kernel to user space.
1749 */
1750
3e0fa65f 1751SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1752 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1753 int __user *, addr_len)
1da177e4
LT
1754{
1755 struct socket *sock;
1756 struct iovec iov;
1757 struct msghdr msg;
230b1839 1758 struct sockaddr_storage address;
89bddce5 1759 int err, err2;
6cb153ca
BL
1760 int fput_needed;
1761
253eacc0
LT
1762 if (size > INT_MAX)
1763 size = INT_MAX;
de0fa95c 1764 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1765 if (!sock)
de0fa95c 1766 goto out;
1da177e4 1767
89bddce5
SH
1768 msg.msg_control = NULL;
1769 msg.msg_controllen = 0;
1770 msg.msg_iovlen = 1;
1771 msg.msg_iov = &iov;
1772 iov.iov_len = size;
1773 iov.iov_base = ubuf;
230b1839
YH
1774 msg.msg_name = (struct sockaddr *)&address;
1775 msg.msg_namelen = sizeof(address);
1da177e4
LT
1776 if (sock->file->f_flags & O_NONBLOCK)
1777 flags |= MSG_DONTWAIT;
89bddce5 1778 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1779
89bddce5 1780 if (err >= 0 && addr != NULL) {
43db362d 1781 err2 = move_addr_to_user(&address,
230b1839 1782 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1783 if (err2 < 0)
1784 err = err2;
1da177e4 1785 }
de0fa95c
PE
1786
1787 fput_light(sock->file, fput_needed);
4387ff75 1788out:
1da177e4
LT
1789 return err;
1790}
1791
1792/*
89bddce5 1793 * Receive a datagram from a socket.
1da177e4
LT
1794 */
1795
89bddce5 1796asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
95c96174 1797 unsigned int flags)
1da177e4
LT
1798{
1799 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1800}
1801
1802/*
1803 * Set a socket option. Because we don't know the option lengths we have
1804 * to pass the user mode parameter for the protocols to sort out.
1805 */
1806
20f37034
HC
1807SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1808 char __user *, optval, int, optlen)
1da177e4 1809{
6cb153ca 1810 int err, fput_needed;
1da177e4
LT
1811 struct socket *sock;
1812
1813 if (optlen < 0)
1814 return -EINVAL;
89bddce5
SH
1815
1816 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1817 if (sock != NULL) {
1818 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1819 if (err)
1820 goto out_put;
1da177e4
LT
1821
1822 if (level == SOL_SOCKET)
89bddce5
SH
1823 err =
1824 sock_setsockopt(sock, level, optname, optval,
1825 optlen);
1da177e4 1826 else
89bddce5
SH
1827 err =
1828 sock->ops->setsockopt(sock, level, optname, optval,
1829 optlen);
6cb153ca
BL
1830out_put:
1831 fput_light(sock->file, fput_needed);
1da177e4
LT
1832 }
1833 return err;
1834}
1835
1836/*
1837 * Get a socket option. Because we don't know the option lengths we have
1838 * to pass a user mode parameter for the protocols to sort out.
1839 */
1840
20f37034
HC
1841SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1842 char __user *, optval, int __user *, optlen)
1da177e4 1843{
6cb153ca 1844 int err, fput_needed;
1da177e4
LT
1845 struct socket *sock;
1846
89bddce5
SH
1847 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1848 if (sock != NULL) {
6cb153ca
BL
1849 err = security_socket_getsockopt(sock, level, optname);
1850 if (err)
1851 goto out_put;
1da177e4
LT
1852
1853 if (level == SOL_SOCKET)
89bddce5
SH
1854 err =
1855 sock_getsockopt(sock, level, optname, optval,
1856 optlen);
1da177e4 1857 else
89bddce5
SH
1858 err =
1859 sock->ops->getsockopt(sock, level, optname, optval,
1860 optlen);
6cb153ca
BL
1861out_put:
1862 fput_light(sock->file, fput_needed);
1da177e4
LT
1863 }
1864 return err;
1865}
1866
1da177e4
LT
1867/*
1868 * Shutdown a socket.
1869 */
1870
754fe8d2 1871SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1872{
6cb153ca 1873 int err, fput_needed;
1da177e4
LT
1874 struct socket *sock;
1875
89bddce5
SH
1876 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1877 if (sock != NULL) {
1da177e4 1878 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1879 if (!err)
1880 err = sock->ops->shutdown(sock, how);
1881 fput_light(sock->file, fput_needed);
1da177e4
LT
1882 }
1883 return err;
1884}
1885
89bddce5 1886/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1887 * fields which are the same type (int / unsigned) on our platforms.
1888 */
1889#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1890#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1891#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1892
c71d8ebe
TH
1893struct used_address {
1894 struct sockaddr_storage name;
1895 unsigned int name_len;
1896};
1897
228e548e 1898static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 1899 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1900 struct used_address *used_address)
1da177e4 1901{
89bddce5
SH
1902 struct compat_msghdr __user *msg_compat =
1903 (struct compat_msghdr __user *)msg;
230b1839 1904 struct sockaddr_storage address;
1da177e4 1905 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1906 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1907 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1908 /* 20 is size of ipv6_pktinfo */
1da177e4 1909 unsigned char *ctl_buf = ctl;
a74e9106 1910 int err, ctl_len, total_len;
89bddce5 1911
1da177e4
LT
1912 err = -EFAULT;
1913 if (MSG_CMSG_COMPAT & flags) {
228e548e 1914 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 1915 return -EFAULT;
228e548e 1916 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1917 return -EFAULT;
1918
228e548e 1919 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
1920 err = -EMSGSIZE;
1921 if (msg_sys->msg_iovlen > UIO_MAXIOV)
1922 goto out;
1923 err = -ENOMEM;
1924 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
1925 GFP_KERNEL);
1da177e4 1926 if (!iov)
228e548e 1927 goto out;
1da177e4
LT
1928 }
1929
1930 /* This will also move the address data into kernel space */
1931 if (MSG_CMSG_COMPAT & flags) {
43db362d 1932 err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
1da177e4 1933 } else
43db362d 1934 err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
89bddce5 1935 if (err < 0)
1da177e4
LT
1936 goto out_freeiov;
1937 total_len = err;
1938
1939 err = -ENOBUFS;
1940
228e548e 1941 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1942 goto out_freeiov;
228e548e 1943 ctl_len = msg_sys->msg_controllen;
1da177e4 1944 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1945 err =
228e548e 1946 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1947 sizeof(ctl));
1da177e4
LT
1948 if (err)
1949 goto out_freeiov;
228e548e
AB
1950 ctl_buf = msg_sys->msg_control;
1951 ctl_len = msg_sys->msg_controllen;
1da177e4 1952 } else if (ctl_len) {
89bddce5 1953 if (ctl_len > sizeof(ctl)) {
1da177e4 1954 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1955 if (ctl_buf == NULL)
1da177e4
LT
1956 goto out_freeiov;
1957 }
1958 err = -EFAULT;
1959 /*
228e548e 1960 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1961 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1962 * checking falls down on this.
1963 */
fb8621bb 1964 if (copy_from_user(ctl_buf,
228e548e 1965 (void __user __force *)msg_sys->msg_control,
89bddce5 1966 ctl_len))
1da177e4 1967 goto out_freectl;
228e548e 1968 msg_sys->msg_control = ctl_buf;
1da177e4 1969 }
228e548e 1970 msg_sys->msg_flags = flags;
1da177e4
LT
1971
1972 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1973 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1974 /*
1975 * If this is sendmmsg() and current destination address is same as
1976 * previously succeeded address, omit asking LSM's decision.
1977 * used_address->name_len is initialized to UINT_MAX so that the first
1978 * destination address never matches.
1979 */
bc909d9d
MD
1980 if (used_address && msg_sys->msg_name &&
1981 used_address->name_len == msg_sys->msg_namelen &&
1982 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
1983 used_address->name_len)) {
1984 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
1985 goto out_freectl;
1986 }
1987 err = sock_sendmsg(sock, msg_sys, total_len);
1988 /*
1989 * If this is sendmmsg() and sending to current destination address was
1990 * successful, remember it.
1991 */
1992 if (used_address && err >= 0) {
1993 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1994 if (msg_sys->msg_name)
1995 memcpy(&used_address->name, msg_sys->msg_name,
1996 used_address->name_len);
c71d8ebe 1997 }
1da177e4
LT
1998
1999out_freectl:
89bddce5 2000 if (ctl_buf != ctl)
1da177e4
LT
2001 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2002out_freeiov:
2003 if (iov != iovstack)
a74e9106 2004 kfree(iov);
228e548e
AB
2005out:
2006 return err;
2007}
2008
2009/*
2010 * BSD sendmsg interface
2011 */
2012
95c96174 2013SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
228e548e
AB
2014{
2015 int fput_needed, err;
2016 struct msghdr msg_sys;
2017 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2018
2019 if (!sock)
2020 goto out;
2021
c71d8ebe 2022 err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2023
6cb153ca 2024 fput_light(sock->file, fput_needed);
89bddce5 2025out:
1da177e4
LT
2026 return err;
2027}
2028
228e548e
AB
2029/*
2030 * Linux sendmmsg interface
2031 */
2032
2033int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2034 unsigned int flags)
2035{
2036 int fput_needed, err, datagrams;
2037 struct socket *sock;
2038 struct mmsghdr __user *entry;
2039 struct compat_mmsghdr __user *compat_entry;
2040 struct msghdr msg_sys;
c71d8ebe 2041 struct used_address used_address;
228e548e 2042
98382f41
AB
2043 if (vlen > UIO_MAXIOV)
2044 vlen = UIO_MAXIOV;
228e548e
AB
2045
2046 datagrams = 0;
2047
2048 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2049 if (!sock)
2050 return err;
2051
c71d8ebe 2052 used_address.name_len = UINT_MAX;
228e548e
AB
2053 entry = mmsg;
2054 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2055 err = 0;
228e548e
AB
2056
2057 while (datagrams < vlen) {
228e548e
AB
2058 if (MSG_CMSG_COMPAT & flags) {
2059 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
c71d8ebe 2060 &msg_sys, flags, &used_address);
228e548e
AB
2061 if (err < 0)
2062 break;
2063 err = __put_user(err, &compat_entry->msg_len);
2064 ++compat_entry;
2065 } else {
2066 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
c71d8ebe 2067 &msg_sys, flags, &used_address);
228e548e
AB
2068 if (err < 0)
2069 break;
2070 err = put_user(err, &entry->msg_len);
2071 ++entry;
2072 }
2073
2074 if (err)
2075 break;
2076 ++datagrams;
2077 }
2078
228e548e
AB
2079 fput_light(sock->file, fput_needed);
2080
728ffb86
AB
2081 /* We only return an error if no datagrams were able to be sent */
2082 if (datagrams != 0)
228e548e
AB
2083 return datagrams;
2084
228e548e
AB
2085 return err;
2086}
2087
2088SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2089 unsigned int, vlen, unsigned int, flags)
2090{
2091 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2092}
2093
a2e27255 2094static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 2095 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2096{
89bddce5
SH
2097 struct compat_msghdr __user *msg_compat =
2098 (struct compat_msghdr __user *)msg;
1da177e4 2099 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2100 struct iovec *iov = iovstack;
1da177e4 2101 unsigned long cmsg_ptr;
a74e9106 2102 int err, total_len, len;
1da177e4
LT
2103
2104 /* kernel mode address */
230b1839 2105 struct sockaddr_storage addr;
1da177e4
LT
2106
2107 /* user mode address pointers */
2108 struct sockaddr __user *uaddr;
2109 int __user *uaddr_len;
89bddce5 2110
1da177e4 2111 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2112 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2113 return -EFAULT;
c6d409cf 2114 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
89bddce5 2115 return -EFAULT;
1da177e4 2116
a2e27255 2117 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2118 err = -EMSGSIZE;
2119 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2120 goto out;
2121 err = -ENOMEM;
2122 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2123 GFP_KERNEL);
1da177e4 2124 if (!iov)
a2e27255 2125 goto out;
1da177e4
LT
2126 }
2127
2128 /*
89bddce5
SH
2129 * Save the user-mode address (verify_iovec will change the
2130 * kernel msghdr to use the kernel address space)
1da177e4 2131 */
89bddce5 2132
a2e27255 2133 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4
LT
2134 uaddr_len = COMPAT_NAMELEN(msg);
2135 if (MSG_CMSG_COMPAT & flags) {
43db362d 2136 err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4 2137 } else
43db362d 2138 err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4
LT
2139 if (err < 0)
2140 goto out_freeiov;
89bddce5 2141 total_len = err;
1da177e4 2142
a2e27255
ACM
2143 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2144 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2145
1da177e4
LT
2146 if (sock->file->f_flags & O_NONBLOCK)
2147 flags |= MSG_DONTWAIT;
a2e27255
ACM
2148 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2149 total_len, flags);
1da177e4
LT
2150 if (err < 0)
2151 goto out_freeiov;
2152 len = err;
2153
2154 if (uaddr != NULL) {
43db362d 2155 err = move_addr_to_user(&addr,
a2e27255 2156 msg_sys->msg_namelen, uaddr,
89bddce5 2157 uaddr_len);
1da177e4
LT
2158 if (err < 0)
2159 goto out_freeiov;
2160 }
a2e27255 2161 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2162 COMPAT_FLAGS(msg));
1da177e4
LT
2163 if (err)
2164 goto out_freeiov;
2165 if (MSG_CMSG_COMPAT & flags)
a2e27255 2166 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2167 &msg_compat->msg_controllen);
2168 else
a2e27255 2169 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2170 &msg->msg_controllen);
2171 if (err)
2172 goto out_freeiov;
2173 err = len;
2174
2175out_freeiov:
2176 if (iov != iovstack)
a74e9106 2177 kfree(iov);
a2e27255
ACM
2178out:
2179 return err;
2180}
2181
2182/*
2183 * BSD recvmsg interface
2184 */
2185
2186SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2187 unsigned int, flags)
2188{
2189 int fput_needed, err;
2190 struct msghdr msg_sys;
2191 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2192
2193 if (!sock)
2194 goto out;
2195
2196 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2197
6cb153ca 2198 fput_light(sock->file, fput_needed);
1da177e4
LT
2199out:
2200 return err;
2201}
2202
a2e27255
ACM
2203/*
2204 * Linux recvmmsg interface
2205 */
2206
2207int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2208 unsigned int flags, struct timespec *timeout)
2209{
2210 int fput_needed, err, datagrams;
2211 struct socket *sock;
2212 struct mmsghdr __user *entry;
d7256d0e 2213 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2214 struct msghdr msg_sys;
2215 struct timespec end_time;
2216
2217 if (timeout &&
2218 poll_select_set_timeout(&end_time, timeout->tv_sec,
2219 timeout->tv_nsec))
2220 return -EINVAL;
2221
2222 datagrams = 0;
2223
2224 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2225 if (!sock)
2226 return err;
2227
2228 err = sock_error(sock->sk);
2229 if (err)
2230 goto out_put;
2231
2232 entry = mmsg;
d7256d0e 2233 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2234
2235 while (datagrams < vlen) {
2236 /*
2237 * No need to ask LSM for more than the first datagram.
2238 */
d7256d0e
JMG
2239 if (MSG_CMSG_COMPAT & flags) {
2240 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
b9eb8b87
AB
2241 &msg_sys, flags & ~MSG_WAITFORONE,
2242 datagrams);
d7256d0e
JMG
2243 if (err < 0)
2244 break;
2245 err = __put_user(err, &compat_entry->msg_len);
2246 ++compat_entry;
2247 } else {
2248 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
b9eb8b87
AB
2249 &msg_sys, flags & ~MSG_WAITFORONE,
2250 datagrams);
d7256d0e
JMG
2251 if (err < 0)
2252 break;
2253 err = put_user(err, &entry->msg_len);
2254 ++entry;
2255 }
2256
a2e27255
ACM
2257 if (err)
2258 break;
a2e27255
ACM
2259 ++datagrams;
2260
71c5c159
BB
2261 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2262 if (flags & MSG_WAITFORONE)
2263 flags |= MSG_DONTWAIT;
2264
a2e27255
ACM
2265 if (timeout) {
2266 ktime_get_ts(timeout);
2267 *timeout = timespec_sub(end_time, *timeout);
2268 if (timeout->tv_sec < 0) {
2269 timeout->tv_sec = timeout->tv_nsec = 0;
2270 break;
2271 }
2272
2273 /* Timeout, return less than vlen datagrams */
2274 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2275 break;
2276 }
2277
2278 /* Out of band data, return right away */
2279 if (msg_sys.msg_flags & MSG_OOB)
2280 break;
2281 }
2282
2283out_put:
2284 fput_light(sock->file, fput_needed);
1da177e4 2285
a2e27255
ACM
2286 if (err == 0)
2287 return datagrams;
2288
2289 if (datagrams != 0) {
2290 /*
2291 * We may return less entries than requested (vlen) if the
2292 * sock is non block and there aren't enough datagrams...
2293 */
2294 if (err != -EAGAIN) {
2295 /*
2296 * ... or if recvmsg returns an error after we
2297 * received some datagrams, where we record the
2298 * error to return on the next call or if the
2299 * app asks about it using getsockopt(SO_ERROR).
2300 */
2301 sock->sk->sk_err = -err;
2302 }
2303
2304 return datagrams;
2305 }
2306
2307 return err;
2308}
2309
2310SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2311 unsigned int, vlen, unsigned int, flags,
2312 struct timespec __user *, timeout)
2313{
2314 int datagrams;
2315 struct timespec timeout_sys;
2316
2317 if (!timeout)
2318 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2319
2320 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2321 return -EFAULT;
2322
2323 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2324
2325 if (datagrams > 0 &&
2326 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2327 datagrams = -EFAULT;
2328
2329 return datagrams;
2330}
2331
2332#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2333/* Argument list sizes for sys_socketcall */
2334#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2335static const unsigned char nargs[21] = {
c6d409cf
ED
2336 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2337 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2338 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2339 AL(4), AL(5), AL(4)
89bddce5
SH
2340};
2341
1da177e4
LT
2342#undef AL
2343
2344/*
89bddce5 2345 * System call vectors.
1da177e4
LT
2346 *
2347 * Argument checking cleaned up. Saved 20% in size.
2348 * This function doesn't need to set the kernel lock because
89bddce5 2349 * it is set by the callees.
1da177e4
LT
2350 */
2351
3e0fa65f 2352SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4
LT
2353{
2354 unsigned long a[6];
89bddce5 2355 unsigned long a0, a1;
1da177e4 2356 int err;
47379052 2357 unsigned int len;
1da177e4 2358
228e548e 2359 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2360 return -EINVAL;
2361
47379052
AV
2362 len = nargs[call];
2363 if (len > sizeof(a))
2364 return -EINVAL;
2365
1da177e4 2366 /* copy_from_user should be SMP safe. */
47379052 2367 if (copy_from_user(a, args, len))
1da177e4 2368 return -EFAULT;
3ec3b2fb 2369
f3298dc4 2370 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb 2371
89bddce5
SH
2372 a0 = a[0];
2373 a1 = a[1];
2374
2375 switch (call) {
2376 case SYS_SOCKET:
2377 err = sys_socket(a0, a1, a[2]);
2378 break;
2379 case SYS_BIND:
2380 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2381 break;
2382 case SYS_CONNECT:
2383 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2384 break;
2385 case SYS_LISTEN:
2386 err = sys_listen(a0, a1);
2387 break;
2388 case SYS_ACCEPT:
de11defe
UD
2389 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2390 (int __user *)a[2], 0);
89bddce5
SH
2391 break;
2392 case SYS_GETSOCKNAME:
2393 err =
2394 sys_getsockname(a0, (struct sockaddr __user *)a1,
2395 (int __user *)a[2]);
2396 break;
2397 case SYS_GETPEERNAME:
2398 err =
2399 sys_getpeername(a0, (struct sockaddr __user *)a1,
2400 (int __user *)a[2]);
2401 break;
2402 case SYS_SOCKETPAIR:
2403 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2404 break;
2405 case SYS_SEND:
2406 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2407 break;
2408 case SYS_SENDTO:
2409 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2410 (struct sockaddr __user *)a[4], a[5]);
2411 break;
2412 case SYS_RECV:
2413 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2414 break;
2415 case SYS_RECVFROM:
2416 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2417 (struct sockaddr __user *)a[4],
2418 (int __user *)a[5]);
2419 break;
2420 case SYS_SHUTDOWN:
2421 err = sys_shutdown(a0, a1);
2422 break;
2423 case SYS_SETSOCKOPT:
2424 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2425 break;
2426 case SYS_GETSOCKOPT:
2427 err =
2428 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2429 (int __user *)a[4]);
2430 break;
2431 case SYS_SENDMSG:
2432 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2433 break;
228e548e
AB
2434 case SYS_SENDMMSG:
2435 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2436 break;
89bddce5
SH
2437 case SYS_RECVMSG:
2438 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2439 break;
a2e27255
ACM
2440 case SYS_RECVMMSG:
2441 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2442 (struct timespec __user *)a[4]);
2443 break;
de11defe
UD
2444 case SYS_ACCEPT4:
2445 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2446 (int __user *)a[2], a[3]);
aaca0bdc 2447 break;
89bddce5
SH
2448 default:
2449 err = -EINVAL;
2450 break;
1da177e4
LT
2451 }
2452 return err;
2453}
2454
89bddce5 2455#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2456
55737fda
SH
2457/**
2458 * sock_register - add a socket protocol handler
2459 * @ops: description of protocol
2460 *
1da177e4
LT
2461 * This function is called by a protocol handler that wants to
2462 * advertise its address family, and have it linked into the
55737fda
SH
2463 * socket interface. The value ops->family coresponds to the
2464 * socket system call protocol family.
1da177e4 2465 */
f0fd27d4 2466int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2467{
2468 int err;
2469
2470 if (ops->family >= NPROTO) {
89bddce5
SH
2471 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2472 NPROTO);
1da177e4
LT
2473 return -ENOBUFS;
2474 }
55737fda
SH
2475
2476 spin_lock(&net_family_lock);
190683a9
ED
2477 if (rcu_dereference_protected(net_families[ops->family],
2478 lockdep_is_held(&net_family_lock)))
55737fda
SH
2479 err = -EEXIST;
2480 else {
cf778b00 2481 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2482 err = 0;
2483 }
55737fda
SH
2484 spin_unlock(&net_family_lock);
2485
89bddce5 2486 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2487 return err;
2488}
c6d409cf 2489EXPORT_SYMBOL(sock_register);
1da177e4 2490
55737fda
SH
2491/**
2492 * sock_unregister - remove a protocol handler
2493 * @family: protocol family to remove
2494 *
1da177e4
LT
2495 * This function is called by a protocol handler that wants to
2496 * remove its address family, and have it unlinked from the
55737fda
SH
2497 * new socket creation.
2498 *
2499 * If protocol handler is a module, then it can use module reference
2500 * counts to protect against new references. If protocol handler is not
2501 * a module then it needs to provide its own protection in
2502 * the ops->create routine.
1da177e4 2503 */
f0fd27d4 2504void sock_unregister(int family)
1da177e4 2505{
f0fd27d4 2506 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2507
55737fda 2508 spin_lock(&net_family_lock);
a9b3cd7f 2509 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2510 spin_unlock(&net_family_lock);
2511
2512 synchronize_rcu();
2513
89bddce5 2514 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4 2515}
c6d409cf 2516EXPORT_SYMBOL(sock_unregister);
1da177e4 2517
77d76ea3 2518static int __init sock_init(void)
1da177e4 2519{
b3e19d92 2520 int err;
2ca794e5
EB
2521 /*
2522 * Initialize the network sysctl infrastructure.
2523 */
2524 err = net_sysctl_init();
2525 if (err)
2526 goto out;
b3e19d92 2527
1da177e4 2528 /*
89bddce5 2529 * Initialize sock SLAB cache.
1da177e4 2530 */
89bddce5 2531
1da177e4
LT
2532 sk_init();
2533
1da177e4 2534 /*
89bddce5 2535 * Initialize skbuff SLAB cache
1da177e4
LT
2536 */
2537 skb_init();
1da177e4
LT
2538
2539 /*
89bddce5 2540 * Initialize the protocols module.
1da177e4
LT
2541 */
2542
2543 init_inodecache();
b3e19d92
NP
2544
2545 err = register_filesystem(&sock_fs_type);
2546 if (err)
2547 goto out_fs;
1da177e4 2548 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2549 if (IS_ERR(sock_mnt)) {
2550 err = PTR_ERR(sock_mnt);
2551 goto out_mount;
2552 }
77d76ea3
AK
2553
2554 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2555 */
2556
2557#ifdef CONFIG_NETFILTER
2558 netfilter_init();
2559#endif
cbeb321a 2560
c1f19b51
RC
2561#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2562 skb_timestamping_init();
2563#endif
2564
b3e19d92
NP
2565out:
2566 return err;
2567
2568out_mount:
2569 unregister_filesystem(&sock_fs_type);
2570out_fs:
2571 goto out;
1da177e4
LT
2572}
2573
77d76ea3
AK
2574core_initcall(sock_init); /* early initcall */
2575
1da177e4
LT
2576#ifdef CONFIG_PROC_FS
2577void socket_seq_show(struct seq_file *seq)
2578{
2579 int cpu;
2580 int counter = 0;
2581
6f912042 2582 for_each_possible_cpu(cpu)
89bddce5 2583 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2584
2585 /* It can be negative, by the way. 8) */
2586 if (counter < 0)
2587 counter = 0;
2588
2589 seq_printf(seq, "sockets: used %d\n", counter);
2590}
89bddce5 2591#endif /* CONFIG_PROC_FS */
1da177e4 2592
89bbfc95 2593#ifdef CONFIG_COMPAT
6b96018b 2594static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2595 unsigned int cmd, void __user *up)
7a229387 2596{
7a229387
AB
2597 mm_segment_t old_fs = get_fs();
2598 struct timeval ktv;
2599 int err;
2600
2601 set_fs(KERNEL_DS);
6b96018b 2602 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2603 set_fs(old_fs);
644595f8
PA
2604 if (!err)
2605 err = compat_put_timeval(up, &ktv);
2606
7a229387
AB
2607 return err;
2608}
2609
6b96018b 2610static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2611 unsigned int cmd, void __user *up)
7a229387 2612{
7a229387
AB
2613 mm_segment_t old_fs = get_fs();
2614 struct timespec kts;
2615 int err;
2616
2617 set_fs(KERNEL_DS);
6b96018b 2618 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2619 set_fs(old_fs);
644595f8
PA
2620 if (!err)
2621 err = compat_put_timespec(up, &kts);
2622
7a229387
AB
2623 return err;
2624}
2625
6b96018b 2626static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2627{
2628 struct ifreq __user *uifr;
2629 int err;
2630
2631 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2632 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2633 return -EFAULT;
2634
6b96018b 2635 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2636 if (err)
2637 return err;
2638
6b96018b 2639 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2640 return -EFAULT;
2641
2642 return 0;
2643}
2644
6b96018b 2645static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2646{
6b96018b 2647 struct compat_ifconf ifc32;
7a229387
AB
2648 struct ifconf ifc;
2649 struct ifconf __user *uifc;
6b96018b 2650 struct compat_ifreq __user *ifr32;
7a229387
AB
2651 struct ifreq __user *ifr;
2652 unsigned int i, j;
2653 int err;
2654
6b96018b 2655 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2656 return -EFAULT;
2657
2658 if (ifc32.ifcbuf == 0) {
2659 ifc32.ifc_len = 0;
2660 ifc.ifc_len = 0;
2661 ifc.ifc_req = NULL;
2662 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2663 } else {
c6d409cf
ED
2664 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2665 sizeof(struct ifreq);
7a229387
AB
2666 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2667 ifc.ifc_len = len;
2668 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2669 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2670 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2671 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2672 return -EFAULT;
2673 ifr++;
2674 ifr32++;
2675 }
2676 }
2677 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2678 return -EFAULT;
2679
6b96018b 2680 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2681 if (err)
2682 return err;
2683
2684 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2685 return -EFAULT;
2686
2687 ifr = ifc.ifc_req;
2688 ifr32 = compat_ptr(ifc32.ifcbuf);
2689 for (i = 0, j = 0;
c6d409cf
ED
2690 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2691 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2692 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2693 return -EFAULT;
2694 ifr32++;
2695 ifr++;
2696 }
2697
2698 if (ifc32.ifcbuf == 0) {
2699 /* Translate from 64-bit structure multiple to
2700 * a 32-bit one.
2701 */
2702 i = ifc.ifc_len;
6b96018b 2703 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2704 ifc32.ifc_len = i;
2705 } else {
2706 ifc32.ifc_len = i;
2707 }
6b96018b 2708 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2709 return -EFAULT;
2710
2711 return 0;
2712}
2713
6b96018b 2714static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2715{
3a7da39d
BH
2716 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2717 bool convert_in = false, convert_out = false;
2718 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2719 struct ethtool_rxnfc __user *rxnfc;
7a229387 2720 struct ifreq __user *ifr;
3a7da39d
BH
2721 u32 rule_cnt = 0, actual_rule_cnt;
2722 u32 ethcmd;
7a229387 2723 u32 data;
3a7da39d 2724 int ret;
7a229387 2725
3a7da39d
BH
2726 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2727 return -EFAULT;
7a229387 2728
3a7da39d
BH
2729 compat_rxnfc = compat_ptr(data);
2730
2731 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2732 return -EFAULT;
2733
3a7da39d
BH
2734 /* Most ethtool structures are defined without padding.
2735 * Unfortunately struct ethtool_rxnfc is an exception.
2736 */
2737 switch (ethcmd) {
2738 default:
2739 break;
2740 case ETHTOOL_GRXCLSRLALL:
2741 /* Buffer size is variable */
2742 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2743 return -EFAULT;
2744 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2745 return -ENOMEM;
2746 buf_size += rule_cnt * sizeof(u32);
2747 /* fall through */
2748 case ETHTOOL_GRXRINGS:
2749 case ETHTOOL_GRXCLSRLCNT:
2750 case ETHTOOL_GRXCLSRULE:
55664f32 2751 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2752 convert_out = true;
2753 /* fall through */
2754 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2755 buf_size += sizeof(struct ethtool_rxnfc);
2756 convert_in = true;
2757 break;
2758 }
2759
2760 ifr = compat_alloc_user_space(buf_size);
2761 rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
2762
2763 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2764 return -EFAULT;
2765
3a7da39d
BH
2766 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2767 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2768 return -EFAULT;
2769
3a7da39d 2770 if (convert_in) {
127fe533 2771 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2772 * fs.ring_cookie and at the end of fs, but nowhere else.
2773 */
127fe533
AD
2774 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2775 sizeof(compat_rxnfc->fs.m_ext) !=
2776 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2777 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2778 BUILD_BUG_ON(
2779 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2780 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2781 offsetof(struct ethtool_rxnfc, fs.location) -
2782 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2783
2784 if (copy_in_user(rxnfc, compat_rxnfc,
127fe533 2785 (void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2786 (void *)rxnfc) ||
2787 copy_in_user(&rxnfc->fs.ring_cookie,
2788 &compat_rxnfc->fs.ring_cookie,
2789 (void *)(&rxnfc->fs.location + 1) -
2790 (void *)&rxnfc->fs.ring_cookie) ||
2791 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2792 sizeof(rxnfc->rule_cnt)))
2793 return -EFAULT;
2794 }
2795
2796 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2797 if (ret)
2798 return ret;
2799
2800 if (convert_out) {
2801 if (copy_in_user(compat_rxnfc, rxnfc,
127fe533 2802 (const void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2803 (const void *)rxnfc) ||
2804 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2805 &rxnfc->fs.ring_cookie,
2806 (const void *)(&rxnfc->fs.location + 1) -
2807 (const void *)&rxnfc->fs.ring_cookie) ||
2808 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2809 sizeof(rxnfc->rule_cnt)))
2810 return -EFAULT;
2811
2812 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2813 /* As an optimisation, we only copy the actual
2814 * number of rules that the underlying
2815 * function returned. Since Mallory might
2816 * change the rule count in user memory, we
2817 * check that it is less than the rule count
2818 * originally given (as the user buffer size),
2819 * which has been range-checked.
2820 */
2821 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2822 return -EFAULT;
2823 if (actual_rule_cnt < rule_cnt)
2824 rule_cnt = actual_rule_cnt;
2825 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2826 &rxnfc->rule_locs[0],
2827 rule_cnt * sizeof(u32)))
2828 return -EFAULT;
2829 }
2830 }
2831
2832 return 0;
7a229387
AB
2833}
2834
7a50a240
AB
2835static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2836{
2837 void __user *uptr;
2838 compat_uptr_t uptr32;
2839 struct ifreq __user *uifr;
2840
c6d409cf 2841 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2842 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2843 return -EFAULT;
2844
2845 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2846 return -EFAULT;
2847
2848 uptr = compat_ptr(uptr32);
2849
2850 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2851 return -EFAULT;
2852
2853 return dev_ioctl(net, SIOCWANDEV, uifr);
2854}
2855
6b96018b
AB
2856static int bond_ioctl(struct net *net, unsigned int cmd,
2857 struct compat_ifreq __user *ifr32)
7a229387
AB
2858{
2859 struct ifreq kifr;
2860 struct ifreq __user *uifr;
7a229387
AB
2861 mm_segment_t old_fs;
2862 int err;
2863 u32 data;
2864 void __user *datap;
2865
2866 switch (cmd) {
2867 case SIOCBONDENSLAVE:
2868 case SIOCBONDRELEASE:
2869 case SIOCBONDSETHWADDR:
2870 case SIOCBONDCHANGEACTIVE:
6b96018b 2871 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2872 return -EFAULT;
2873
2874 old_fs = get_fs();
c6d409cf 2875 set_fs(KERNEL_DS);
c3f52ae6 2876 err = dev_ioctl(net, cmd,
2877 (struct ifreq __user __force *) &kifr);
c6d409cf 2878 set_fs(old_fs);
7a229387
AB
2879
2880 return err;
2881 case SIOCBONDSLAVEINFOQUERY:
2882 case SIOCBONDINFOQUERY:
2883 uifr = compat_alloc_user_space(sizeof(*uifr));
2884 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2885 return -EFAULT;
2886
2887 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2888 return -EFAULT;
2889
2890 datap = compat_ptr(data);
2891 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2892 return -EFAULT;
2893
6b96018b 2894 return dev_ioctl(net, cmd, uifr);
7a229387 2895 default:
07d106d0 2896 return -ENOIOCTLCMD;
ccbd6a5a 2897 }
7a229387
AB
2898}
2899
6b96018b
AB
2900static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2901 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2902{
2903 struct ifreq __user *u_ifreq64;
7a229387
AB
2904 char tmp_buf[IFNAMSIZ];
2905 void __user *data64;
2906 u32 data32;
2907
2908 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2909 IFNAMSIZ))
2910 return -EFAULT;
2911 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2912 return -EFAULT;
2913 data64 = compat_ptr(data32);
2914
2915 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2916
2917 /* Don't check these user accesses, just let that get trapped
2918 * in the ioctl handler instead.
2919 */
2920 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2921 IFNAMSIZ))
2922 return -EFAULT;
2923 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
2924 return -EFAULT;
2925
6b96018b 2926 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2927}
2928
6b96018b
AB
2929static int dev_ifsioc(struct net *net, struct socket *sock,
2930 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2931{
a2116ed2 2932 struct ifreq __user *uifr;
7a229387
AB
2933 int err;
2934
a2116ed2
AB
2935 uifr = compat_alloc_user_space(sizeof(*uifr));
2936 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2937 return -EFAULT;
2938
2939 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2940
7a229387
AB
2941 if (!err) {
2942 switch (cmd) {
2943 case SIOCGIFFLAGS:
2944 case SIOCGIFMETRIC:
2945 case SIOCGIFMTU:
2946 case SIOCGIFMEM:
2947 case SIOCGIFHWADDR:
2948 case SIOCGIFINDEX:
2949 case SIOCGIFADDR:
2950 case SIOCGIFBRDADDR:
2951 case SIOCGIFDSTADDR:
2952 case SIOCGIFNETMASK:
fab2532b 2953 case SIOCGIFPFLAGS:
7a229387 2954 case SIOCGIFTXQLEN:
fab2532b
AB
2955 case SIOCGMIIPHY:
2956 case SIOCGMIIREG:
a2116ed2 2957 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2958 err = -EFAULT;
2959 break;
2960 }
2961 }
2962 return err;
2963}
2964
a2116ed2
AB
2965static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2966 struct compat_ifreq __user *uifr32)
2967{
2968 struct ifreq ifr;
2969 struct compat_ifmap __user *uifmap32;
2970 mm_segment_t old_fs;
2971 int err;
2972
2973 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2974 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
2975 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2976 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2977 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2978 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
2979 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
2980 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
2981 if (err)
2982 return -EFAULT;
2983
2984 old_fs = get_fs();
c6d409cf 2985 set_fs(KERNEL_DS);
c3f52ae6 2986 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2987 set_fs(old_fs);
a2116ed2
AB
2988
2989 if (cmd == SIOCGIFMAP && !err) {
2990 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
2991 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2992 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2993 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2994 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
2995 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
2996 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
2997 if (err)
2998 err = -EFAULT;
2999 }
3000 return err;
3001}
3002
3003static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32)
3004{
3005 void __user *uptr;
3006 compat_uptr_t uptr32;
3007 struct ifreq __user *uifr;
3008
c6d409cf 3009 uifr = compat_alloc_user_space(sizeof(*uifr));
a2116ed2
AB
3010 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
3011 return -EFAULT;
3012
3013 if (get_user(uptr32, &uifr32->ifr_data))
3014 return -EFAULT;
3015
3016 uptr = compat_ptr(uptr32);
3017
3018 if (put_user(uptr, &uifr->ifr_data))
3019 return -EFAULT;
3020
3021 return dev_ioctl(net, SIOCSHWTSTAMP, uifr);
3022}
3023
7a229387 3024struct rtentry32 {
c6d409cf 3025 u32 rt_pad1;
7a229387
AB
3026 struct sockaddr rt_dst; /* target address */
3027 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3028 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3029 unsigned short rt_flags;
3030 short rt_pad2;
3031 u32 rt_pad3;
3032 unsigned char rt_tos;
3033 unsigned char rt_class;
3034 short rt_pad4;
3035 short rt_metric; /* +1 for binary compatibility! */
7a229387 3036 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3037 u32 rt_mtu; /* per route MTU/Window */
3038 u32 rt_window; /* Window clamping */
7a229387
AB
3039 unsigned short rt_irtt; /* Initial RTT */
3040};
3041
3042struct in6_rtmsg32 {
3043 struct in6_addr rtmsg_dst;
3044 struct in6_addr rtmsg_src;
3045 struct in6_addr rtmsg_gateway;
3046 u32 rtmsg_type;
3047 u16 rtmsg_dst_len;
3048 u16 rtmsg_src_len;
3049 u32 rtmsg_metric;
3050 u32 rtmsg_info;
3051 u32 rtmsg_flags;
3052 s32 rtmsg_ifindex;
3053};
3054
6b96018b
AB
3055static int routing_ioctl(struct net *net, struct socket *sock,
3056 unsigned int cmd, void __user *argp)
7a229387
AB
3057{
3058 int ret;
3059 void *r = NULL;
3060 struct in6_rtmsg r6;
3061 struct rtentry r4;
3062 char devname[16];
3063 u32 rtdev;
3064 mm_segment_t old_fs = get_fs();
3065
6b96018b
AB
3066 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3067 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3068 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3069 3 * sizeof(struct in6_addr));
c6d409cf
ED
3070 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3071 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3072 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3073 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3074 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3075 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3076 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3077
3078 r = (void *) &r6;
3079 } else { /* ipv4 */
6b96018b 3080 struct rtentry32 __user *ur4 = argp;
c6d409cf 3081 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3082 3 * sizeof(struct sockaddr));
c6d409cf
ED
3083 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
3084 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
3085 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
3086 ret |= __get_user(r4.rt_window, &(ur4->rt_window));
3087 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
3088 ret |= __get_user(rtdev, &(ur4->rt_dev));
7a229387 3089 if (rtdev) {
c6d409cf 3090 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3091 r4.rt_dev = (char __user __force *)devname;
3092 devname[15] = 0;
7a229387
AB
3093 } else
3094 r4.rt_dev = NULL;
3095
3096 r = (void *) &r4;
3097 }
3098
3099 if (ret) {
3100 ret = -EFAULT;
3101 goto out;
3102 }
3103
c6d409cf 3104 set_fs(KERNEL_DS);
6b96018b 3105 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3106 set_fs(old_fs);
7a229387
AB
3107
3108out:
7a229387
AB
3109 return ret;
3110}
3111
3112/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3113 * for some operations; this forces use of the newer bridge-utils that
25985edc 3114 * use compatible ioctls
7a229387 3115 */
6b96018b 3116static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3117{
6b96018b 3118 compat_ulong_t tmp;
7a229387 3119
6b96018b 3120 if (get_user(tmp, argp))
7a229387
AB
3121 return -EFAULT;
3122 if (tmp == BRCTL_GET_VERSION)
3123 return BRCTL_VERSION + 1;
3124 return -EINVAL;
3125}
3126
6b96018b
AB
3127static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3128 unsigned int cmd, unsigned long arg)
3129{
3130 void __user *argp = compat_ptr(arg);
3131 struct sock *sk = sock->sk;
3132 struct net *net = sock_net(sk);
7a229387 3133
6b96018b
AB
3134 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3135 return siocdevprivate_ioctl(net, cmd, argp);
3136
3137 switch (cmd) {
3138 case SIOCSIFBR:
3139 case SIOCGIFBR:
3140 return old_bridge_ioctl(argp);
3141 case SIOCGIFNAME:
3142 return dev_ifname32(net, argp);
3143 case SIOCGIFCONF:
3144 return dev_ifconf(net, argp);
3145 case SIOCETHTOOL:
3146 return ethtool_ioctl(net, argp);
7a50a240
AB
3147 case SIOCWANDEV:
3148 return compat_siocwandev(net, argp);
a2116ed2
AB
3149 case SIOCGIFMAP:
3150 case SIOCSIFMAP:
3151 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3152 case SIOCBONDENSLAVE:
3153 case SIOCBONDRELEASE:
3154 case SIOCBONDSETHWADDR:
3155 case SIOCBONDSLAVEINFOQUERY:
3156 case SIOCBONDINFOQUERY:
3157 case SIOCBONDCHANGEACTIVE:
3158 return bond_ioctl(net, cmd, argp);
3159 case SIOCADDRT:
3160 case SIOCDELRT:
3161 return routing_ioctl(net, sock, cmd, argp);
3162 case SIOCGSTAMP:
3163 return do_siocgstamp(net, sock, cmd, argp);
3164 case SIOCGSTAMPNS:
3165 return do_siocgstampns(net, sock, cmd, argp);
a2116ed2
AB
3166 case SIOCSHWTSTAMP:
3167 return compat_siocshwtstamp(net, argp);
6b96018b
AB
3168
3169 case FIOSETOWN:
3170 case SIOCSPGRP:
3171 case FIOGETOWN:
3172 case SIOCGPGRP:
3173 case SIOCBRADDBR:
3174 case SIOCBRDELBR:
3175 case SIOCGIFVLAN:
3176 case SIOCSIFVLAN:
3177 case SIOCADDDLCI:
3178 case SIOCDELDLCI:
3179 return sock_ioctl(file, cmd, arg);
3180
3181 case SIOCGIFFLAGS:
3182 case SIOCSIFFLAGS:
3183 case SIOCGIFMETRIC:
3184 case SIOCSIFMETRIC:
3185 case SIOCGIFMTU:
3186 case SIOCSIFMTU:
3187 case SIOCGIFMEM:
3188 case SIOCSIFMEM:
3189 case SIOCGIFHWADDR:
3190 case SIOCSIFHWADDR:
3191 case SIOCADDMULTI:
3192 case SIOCDELMULTI:
3193 case SIOCGIFINDEX:
6b96018b
AB
3194 case SIOCGIFADDR:
3195 case SIOCSIFADDR:
3196 case SIOCSIFHWBROADCAST:
6b96018b 3197 case SIOCDIFADDR:
6b96018b
AB
3198 case SIOCGIFBRDADDR:
3199 case SIOCSIFBRDADDR:
3200 case SIOCGIFDSTADDR:
3201 case SIOCSIFDSTADDR:
3202 case SIOCGIFNETMASK:
3203 case SIOCSIFNETMASK:
3204 case SIOCSIFPFLAGS:
3205 case SIOCGIFPFLAGS:
3206 case SIOCGIFTXQLEN:
3207 case SIOCSIFTXQLEN:
3208 case SIOCBRADDIF:
3209 case SIOCBRDELIF:
9177efd3
AB
3210 case SIOCSIFNAME:
3211 case SIOCGMIIPHY:
3212 case SIOCGMIIREG:
3213 case SIOCSMIIREG:
6b96018b 3214 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3215
6b96018b
AB
3216 case SIOCSARP:
3217 case SIOCGARP:
3218 case SIOCDARP:
6b96018b 3219 case SIOCATMARK:
9177efd3
AB
3220 return sock_do_ioctl(net, sock, cmd, arg);
3221 }
3222
6b96018b
AB
3223 return -ENOIOCTLCMD;
3224}
7a229387 3225
95c96174 3226static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3227 unsigned long arg)
89bbfc95
SP
3228{
3229 struct socket *sock = file->private_data;
3230 int ret = -ENOIOCTLCMD;
87de87d5
DM
3231 struct sock *sk;
3232 struct net *net;
3233
3234 sk = sock->sk;
3235 net = sock_net(sk);
89bbfc95
SP
3236
3237 if (sock->ops->compat_ioctl)
3238 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3239
87de87d5
DM
3240 if (ret == -ENOIOCTLCMD &&
3241 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3242 ret = compat_wext_handle_ioctl(net, cmd, arg);
3243
6b96018b
AB
3244 if (ret == -ENOIOCTLCMD)
3245 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3246
89bbfc95
SP
3247 return ret;
3248}
3249#endif
3250
ac5a488e
SS
3251int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3252{
3253 return sock->ops->bind(sock, addr, addrlen);
3254}
c6d409cf 3255EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3256
3257int kernel_listen(struct socket *sock, int backlog)
3258{
3259 return sock->ops->listen(sock, backlog);
3260}
c6d409cf 3261EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3262
3263int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3264{
3265 struct sock *sk = sock->sk;
3266 int err;
3267
3268 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3269 newsock);
3270 if (err < 0)
3271 goto done;
3272
3273 err = sock->ops->accept(sock, *newsock, flags);
3274 if (err < 0) {
3275 sock_release(*newsock);
fa8705b0 3276 *newsock = NULL;
ac5a488e
SS
3277 goto done;
3278 }
3279
3280 (*newsock)->ops = sock->ops;
1b08534e 3281 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3282
3283done:
3284 return err;
3285}
c6d409cf 3286EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3287
3288int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3289 int flags)
ac5a488e
SS
3290{
3291 return sock->ops->connect(sock, addr, addrlen, flags);
3292}
c6d409cf 3293EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3294
3295int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3296 int *addrlen)
3297{
3298 return sock->ops->getname(sock, addr, addrlen, 0);
3299}
c6d409cf 3300EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3301
3302int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3303 int *addrlen)
3304{
3305 return sock->ops->getname(sock, addr, addrlen, 1);
3306}
c6d409cf 3307EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3308
3309int kernel_getsockopt(struct socket *sock, int level, int optname,
3310 char *optval, int *optlen)
3311{
3312 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3313 char __user *uoptval;
3314 int __user *uoptlen;
ac5a488e
SS
3315 int err;
3316
fb8621bb
NK
3317 uoptval = (char __user __force *) optval;
3318 uoptlen = (int __user __force *) optlen;
3319
ac5a488e
SS
3320 set_fs(KERNEL_DS);
3321 if (level == SOL_SOCKET)
fb8621bb 3322 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3323 else
fb8621bb
NK
3324 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3325 uoptlen);
ac5a488e
SS
3326 set_fs(oldfs);
3327 return err;
3328}
c6d409cf 3329EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3330
3331int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3332 char *optval, unsigned int optlen)
ac5a488e
SS
3333{
3334 mm_segment_t oldfs = get_fs();
fb8621bb 3335 char __user *uoptval;
ac5a488e
SS
3336 int err;
3337
fb8621bb
NK
3338 uoptval = (char __user __force *) optval;
3339
ac5a488e
SS
3340 set_fs(KERNEL_DS);
3341 if (level == SOL_SOCKET)
fb8621bb 3342 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3343 else
fb8621bb 3344 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3345 optlen);
3346 set_fs(oldfs);
3347 return err;
3348}
c6d409cf 3349EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3350
3351int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3352 size_t size, int flags)
3353{
f8451725
HX
3354 sock_update_classid(sock->sk);
3355
ac5a488e
SS
3356 if (sock->ops->sendpage)
3357 return sock->ops->sendpage(sock, page, offset, size, flags);
3358
3359 return sock_no_sendpage(sock, page, offset, size, flags);
3360}
c6d409cf 3361EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3362
3363int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3364{
3365 mm_segment_t oldfs = get_fs();
3366 int err;
3367
3368 set_fs(KERNEL_DS);
3369 err = sock->ops->ioctl(sock, cmd, arg);
3370 set_fs(oldfs);
3371
3372 return err;
3373}
c6d409cf 3374EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3375
91cf45f0
TM
3376int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3377{
3378 return sock->ops->shutdown(sock, how);
3379}
91cf45f0 3380EXPORT_SYMBOL(kernel_sock_shutdown);